4d06b1e359477194ed8b3e50dfec23aa695396af
jnavarr5
  Wed Sep 24 10:53:20 2025 -0700
Commiting changes from the clinVarToBed utility that was running on /hive to the kent source tree, no Redmine

diff --git src/hg/utils/otto/clinvar/clinVarToBed src/hg/utils/otto/clinvar/clinVarToBed
index e3daaebbbc6..a014dc29d7a 100755
--- src/hg/utils/otto/clinvar/clinVarToBed
+++ src/hg/utils/otto/clinvar/clinVarToBed
@@ -11,30 +11,31 @@
     "UniProtKB (variants)" : "http://www.uniprot.org/uniprot/%s",
     "OMIM Allelic Variant" : "http://www.omim.org/entry/%s",
     "MedGen": "https://www.ncbi.nlm.nih.gov/medgen/%s",
     "OMIM" : "http://www.omim.org/entry/%s",
     "MONDO" : "https://monarchinitiative.org/disease/%s",
     "ClinGen" : "https://reg.clinicalgenome.org/redmine/projects/registry/genboree_registry/by_caid?caid=%s",
     "Orphanet" : "http://www.orpha.net/consor/cgi-bin/OC_Exp.php?lng=EN&Expert=%s"
 }
 
 # since we're filtering on it, we make sure that we have all molecular consequences in the tdb file
 # if Clinvar ever adds a new value, this script must fail and tdb must be updated.
 possMolConseqs = set(["genic downstream transcript variant","no sequence alteration","inframe indel","stop lost","genic upstream transcript variant","initiator codon variant","inframe insertion","inframe deletion","","splice acceptor variant","splice donor variant","5 prime UTR variant","nonsense","non-coding transcript variant","3 prime UTR variant","frameshift variant","intron variant","synonymous variant","missense variant", ""])
 
 # these consequences are highlighted using a triangle decorator. Similar to Decipher and Gnomad browsers
 truncConseqs = set(["nonsense", "frameshift variant", "splice acceptor variant", "splice donor variant"])
+pinkConseqs = set(["splice acceptor variant", "splice donor variant"])
 
 # make sure that if the second list is edited, it is never out of sync with the first list
 assert(len(set(truncConseqs - possMolConseqs))==0)
 
 # === COMMAND LINE INTERFACE, OPTIONS AND HELP ===
 parser = optparse.OptionParser("""usage: %prog [options] summaryFname varAllFname hgvsFname - check and convert the three main clinVar tab-sep files to four bed files, split into CNV and shorter mutations, for both hg19 and hg38 and convert all to bigBed.
 
 Output goes into the current dir
 
 Typical input files are at ftp://ftp.ncbi.nlm.nih.gov/pub/clinvar/tab_delimited/
 """) 
 
 parser.add_option("-d", "--debug", dest="debug", action="store_true", help="show debug messages")
 parser.add_option("", "--alpha", dest="isAlpha", action="store_true", help="Default target is /gbdb/{hg19,hg38}/bbi/clinvar. With this option, we use /gbdb/{hg19,hg38}/bbi/clinvarAlpha, the hgwdev only version of the track, see human/clinvarAlpha.ra in trackDb")
 parser.add_option("-a", "--auto", dest="auto", action="store_true", help="download the file from NCBI into the current dir and convert to bigBed")
@@ -1011,40 +1012,43 @@
         decorOfh = hg38DecorBed
         if isCnv:
             ofh = hg38BedCnv
     else:
         noAssCount +=1
         
     ofh.write("\t".join(row))
     ofh.write("\n")
 
     if isTrunc:
         # truncating mutations get a special symbol written into another BED file, for decorators
         decorRow = list(row[:12])
         rgbColor = row[8]
         #decorRow[8] = "0,0,0" # line color
         decorRow[8] = "255,255,255" # line color
+        if molConseq in pinkConseqs:
+            decorRow[8] = "255,192,203" # inspired by decipher colors
+
         mouseOverText = row[mouseOverIdx]
 
         decorRef = row[0]+":"+row[1]+"-"+row[2]+":"+row[3]
         decorRow[3] = "" # delete mouseover
 
         decorRow.append(decorRef)
         decorRow.append("glyph")
         #decorRow.append(rgbColor)
         decorRow.append(rgbColor) # fill color
-        decorRow.append("Triangle")
+        decorRow.append("Diamond")
         decorRow.append(mouseOverText)
 
         decorOfh.write("\t".join(decorRow))
         decorOfh.write("\n")
 
 hg19Bed.close()
 hg38Bed.close()
 
 hg19DecorBed.close()
 hg38DecorBed.close()
 
 hg19BedCnv.close()
 hg38BedCnv.close()
 
 logging.info("%d lines with feature name that was too long, shortened them" % longCount)