3683a482f97f26e6c08290f6c8acccb764d16eac lrnassar Thu Mar 2 17:58:16 2023 -0800 Removing the use of refseq select because the annotations are too sparse. Refs #30320 diff --git src/hg/makeDb/doc/evaSnp4.txt src/hg/makeDb/doc/evaSnp4.txt index aaae0a9..0bdde85 100644 --- src/hg/makeDb/doc/evaSnp4.txt +++ src/hg/makeDb/doc/evaSnp4.txt @@ -207,32 +207,32 @@ chromDic = {} for chrom in chromSizes[1:-1]: chrom = chrom.split("\t") chromDic[chrom[0]] = chrom[1] return(chromDic) def splitChromsAndRunHgVai(workDir,dbs): """Split all the chroms in tenths in order to be able to run hgVai without running out of memory""" chromSizes = bash("fetchChromSizes %s" % (dbs)).split("\n") inputFile = workDir+"evaSnps.ucscChroms.vcf.gz" outputFile = workDir+"evaSnps"+dbs+"VaiResults.vep" n=0 allTables = bash("\hgsql -e \"show tables\" "+dbs+"") if dbs in ['galGal6','oviAri4','ponAbe3']: #Special exception to use refGene because ncbiRefSeq has incorrect protein sequence in Link table see #29262 geneTableToUse = "refGene" - elif "ncbiRefSeqSelect" in allTables: - geneTableToUse = "ncbiRefSeqSelect" +# elif "ncbiRefSeqSelect" in allTables: #Removing refseq select as the annotations are too sparse +# geneTableToUse = "ncbiRefSeqSelect" elif "ncbiRefSeqCurated" in allTables: geneTableToUse = "ncbiRefSeqCurated" elif "ensGene" in allTables: geneTableToUse = "ensGene" elif "refGene" in allTables: geneTableToUse = "refGene" elif "ncbiGene" in allTables: geneTableToUse = "ncbiGene" else: print(dbs) sys.exit("Could not find any tables to use for the following database: "+dbs) chromDic = buildChromSizesDic(workDir,dbs) #For function below, only bother with the chromosomes in the VCF chromsInVcf = bash("zcat "+inputFile+" | grep -v '^#' | cut -f1 | uniq").rstrip().split("\n")