e68cc6315fa92edde16a3e27015a15cb94290440 jnavarr5 Thu May 22 14:34:00 2025 -0700 Adding files that were staged as the otto user, no Redmine. diff --git src/hg/utils/otto/clinvar/clinVarToBed src/hg/utils/otto/clinvar/clinVarToBed index c1715aa23a4..e0f02fee390 100755 --- src/hg/utils/otto/clinvar/clinVarToBed +++ src/hg/utils/otto/clinvar/clinVarToBed @@ -6,37 +6,36 @@ from os.path import join, basename, dirname, isfile, abspath, isdir from datetime import date, datetime, timedelta dbToUrl = { "dbVar": "https://www.ncbi.nlm.nih.gov/dbvar/variants/%s/", "UniProtKB (variants)" : "http://www.uniprot.org/uniprot/%s", "OMIM Allelic Variant" : "http://www.omim.org/entry/%s", "MedGen": "https://www.ncbi.nlm.nih.gov/medgen/%s", "OMIM" : "http://www.omim.org/entry/%s", "MONDO" : "https://monarchinitiative.org/disease/%s", "ClinGen" : "https://reg.clinicalgenome.org/redmine/projects/registry/genboree_registry/by_caid?caid=%s", "Orphanet" : "http://www.orpha.net/consor/cgi-bin/OC_Exp.php?lng=EN&Expert=%s" } # since we're filtering on it, we make sure that we have all molecular consequences in the tdb file -# if Clinvar ever adds a new value, this script must fail and tdb must be updated. +# if they add a new one, this script must fail and tdb must be updated. possMolConseqs = set(["genic downstream transcript variant","no sequence alteration","inframe indel","stop lost","genic upstream transcript variant","initiator codon variant","inframe insertion","inframe deletion","","splice acceptor variant","splice donor variant","5 prime UTR variant","nonsense","non-coding transcript variant","3 prime UTR variant","frameshift variant","intron variant","synonymous variant","missense variant", ""]) # these consequences are highlighted using a triangle decorator. Similar to Decipher and Gnomad browsers truncConseqs = set(["nonsense", "frameshift variant", "splice acceptor variant", "splice donor variant"]) -# make sure that if the second list is edited, it is never out of sync with the first list assert(len(set(truncConseqs - possMolConseqs))==0) # === COMMAND LINE INTERFACE, OPTIONS AND HELP === parser = optparse.OptionParser("""usage: %prog [options] summaryFname varAllFname hgvsFname - check and convert the three main clinVar tab-sep files to four bed files, split into CNV and shorter mutations, for both hg19 and hg38 and convert all to bigBed. Output goes into the current dir Typical input files are at ftp://ftp.ncbi.nlm.nih.gov/pub/clinvar/tab_delimited/ """) parser.add_option("-d", "--debug", dest="debug", action="store_true", help="show debug messages") parser.add_option("", "--alpha", dest="isAlpha", action="store_true", help="Default target is /gbdb/{hg19,hg38}/bbi/clinvar. With this option, we use /gbdb/{hg19,hg38}/bbi/clinvarAlpha, the hgwdev only version of the track, see human/clinvarAlpha.ra in trackDb") parser.add_option("-a", "--auto", dest="auto", action="store_true", help="download the file from NCBI into the current dir and convert to bigBed") parser.add_option("", "--skipDown", dest="skipDownload", action="store_true", help="Only with --auto: don't download again if it's already there, useful when debugging/developing") parser.add_option("", "--maxDiff", dest="maxDiff", action="store", type="float", help="look for last month's download file in current dir and accept this much difference, expressed as a ratio. Can only be used with --auto.") @@ -1015,31 +1014,31 @@ ofh = hg19BedCnv elif assembly=="GRCh38": ofh = hg38Bed decorOfh = hg38DecorBed if isCnv: ofh = hg38BedCnv else: noAssCount +=1 ofh.write("\t".join(row)) ofh.write("\n") if isTrunc: # truncating mutations get a special symbol written into another BED file, for decorators decorRow = list(row[:12]) - rgbColor = row[8] + rgbColor = row[10] mouseOverText = row[mouseOverIdx] decorRef = row[0]+":"+row[1]+"-"+row[2]+":"+row[3] decorRow[3] = "" # delete mouseover decorRow.append(decorRef) decorRow.append("glyph") decorRow.append(rgbColor) decorRow.append("Triangle") decorRow.append(mouseOverText) decorOfh.write("\t".join(decorRow)) decorOfh.write("\n") hg19Bed.close()