src/hg/utils/otto/clinvar/clinVarToBed 697b862c4a5e14cc41f03dfa5757209898c3d5b8

697b862c4a5e14cc41f03dfa5757209898c3d5b8
max
  Wed May 21 07:14:40 2025 -0700
triangle decorators for the clinvar track, #35750

diff --git src/hg/utils/otto/clinvar/clinVarToBed src/hg/utils/otto/clinvar/clinVarToBed
index 61354700780..c1715aa23a4 100755
--- src/hg/utils/otto/clinvar/clinVarToBed
+++ src/hg/utils/otto/clinvar/clinVarToBed
@@ -6,61 +6,67 @@
 from os.path import join, basename, dirname, isfile, abspath, isdir
 from datetime import date, datetime, timedelta
 
 dbToUrl = {
     "dbVar": "https://www.ncbi.nlm.nih.gov/dbvar/variants/%s/",
     "UniProtKB (variants)" : "http://www.uniprot.org/uniprot/%s",
     "OMIM Allelic Variant" : "http://www.omim.org/entry/%s",
     "MedGen": "https://www.ncbi.nlm.nih.gov/medgen/%s",
     "OMIM" : "http://www.omim.org/entry/%s",
     "MONDO" : "https://monarchinitiative.org/disease/%s",
     "ClinGen" : "https://reg.clinicalgenome.org/redmine/projects/registry/genboree_registry/by_caid?caid=%s",
     "Orphanet" : "http://www.orpha.net/consor/cgi-bin/OC_Exp.php?lng=EN&Expert=%s"
 }
 
 # since we're filtering on it, we make sure that we have all molecular consequences in the tdb file
-# if they add a new one, this script must fail and tdb must be updated.
-possMolConseqs = set(["genic downstream transcript variant","no sequence alteration","inframe indel","stop lost","genic upstream transcript variant","initiatior codon variant","inframe insertion","inframe deletion","","splice acceptor variant","splice donor variant","5 prime UTR variant","nonsense","non-coding transcript variant","3 prime UTR variant","frameshift variant","intron variant","synonymous variant","missense variant", "initiator codon variant", ""])
+# if Clinvar ever adds a new value, this script must fail and tdb must be updated.
+possMolConseqs = set(["genic downstream transcript variant","no sequence alteration","inframe indel","stop lost","genic upstream transcript variant","initiator codon variant","inframe insertion","inframe deletion","","splice acceptor variant","splice donor variant","5 prime UTR variant","nonsense","non-coding transcript variant","3 prime UTR variant","frameshift variant","intron variant","synonymous variant","missense variant", ""])
+
+# these consequences are highlighted using a triangle decorator. Similar to Decipher and Gnomad browsers
+truncConseqs = set(["nonsense", "frameshift variant", "splice acceptor variant", "splice donor variant"])
+
+# make sure that if the second list is edited, it is never out of sync with the first list
+assert(len(set(truncConseqs - possMolConseqs))==0)
 
 # === COMMAND LINE INTERFACE, OPTIONS AND HELP ===
 parser = optparse.OptionParser("""usage: %prog [options] summaryFname varAllFname hgvsFname - check and convert the three main clinVar tab-sep files to four bed files, split into CNV and shorter mutations, for both hg19 and hg38 and convert all to bigBed.
 
 Output goes into the current dir
 
 Typical input files are at ftp://ftp.ncbi.nlm.nih.gov/pub/clinvar/tab_delimited/
 """) 
 
 parser.add_option("-d", "--debug", dest="debug", action="store_true", help="show debug messages")
 parser.add_option("", "--alpha", dest="isAlpha", action="store_true", help="Default target is /gbdb/{hg19,hg38}/bbi/clinvar. With this option, we use /gbdb/{hg19,hg38}/bbi/clinvarAlpha, the hgwdev only version of the track, see human/clinvarAlpha.ra in trackDb")
 parser.add_option("-a", "--auto", dest="auto", action="store_true", help="download the file from NCBI into the current dir and convert to bigBed")
 parser.add_option("", "--skipDown", dest="skipDownload", action="store_true", help="Only with --auto: don't download again if it's already there, useful when debugging/developing")
 parser.add_option("", "--maxDiff", dest="maxDiff", action="store", type="float", help="look for last month's download file in current dir and accept this much difference, expressed as a ratio. Can only be used with --auto.")
 (options, args) = parser.parse_args()
 
 if options.debug:
     logging.basicConfig(level=logging.DEBUG)
 else:
     logging.basicConfig(level=logging.INFO)
 
 if not options.isAlpha and os.getlogin()!="otto":
     print("Please do not run this script as a normal user, you will run into all sorts of permission problems.")
     print("to develop and test, ssh to otto, then run it with doUpdate.sh --alpha to generate the dev-only tracks.")
     print("When you're done, run doUpdate.sh without the --alpha and commit the change to the kent tree.")
     sys.exit(1)
 
 #clinvarExpectHeaders = "#AlleleID	Type	Name	GeneID	GeneSymbol	HGNC_ID	ClinicalSignificance	ClinSigSimple	LastEvaluated	RS# (dbSNP)	nsv/esv (dbVar)	RCVaccession	PhenotypeIDS	PhenotypeList	Origin	OriginSimple	Assembly	ChromosomeAccession	Chromosome	Start	Stop	ReferenceAllele	AlternateAllele	Cytogenetic	ReviewStatus	NumberSubmitters	Guidelines	TestedInGTR	OtherIDs	SubmitterCategories	VariationID	PositionVCF	ReferenceAlleleVCF	AlternateAlleleVCF\n"
-clinvarExpectHeaders = "#AlleleID|Type|Name|GeneID|GeneSymbol|HGNC_ID|ClinicalSignificance|ClinSigSimple|LastEvaluated|RS# (dbSNP)|nsv/esv (dbVar)|RCVaccession|PhenotypeIDS|PhenotypeList|Origin|OriginSimple|Assembly|ChromosomeAccession|Chromosome|Start|Stop|ReferenceAllele|AlternateAllele|Cytogenetic|ReviewStatus|NumberSubmitters|Guidelines|TestedInGTR|OtherIDs|SubmitterCategories|VariationID|PositionVCF|ReferenceAlleleVCF|AlternateAlleleVCF|SomaticClinicalImpact|SomaticClinicalImpactLastEvaluated|ReviewStatusClinicalImpact|Oncogenicity|OncogenicityLastEvaluated|ReviewStatusOncogenicity".split("|")
+clinvarExpectHeaders = "#AlleleID|Type|Name|GeneID|GeneSymbol|HGNC_ID|ClinicalSignificance|ClinSigSimple|LastEvaluated|RS# (dbSNP)|nsv/esv (dbVar)|RCVaccession|PhenotypeIDS|PhenotypeList|Origin|OriginSimple|Assembly|ChromosomeAccession|Chromosome|Start|Stop|ReferenceAllele|AlternateAllele|Cytogenetic|ReviewStatus|NumberSubmitters|Guidelines|TestedInGTR|OtherIDs|SubmitterCategories|VariationID|PositionVCF|ReferenceAlleleVCF|AlternateAlleleVCF|SomaticClinicalImpact|SomaticClinicalImpactLastEvaluated|ReviewStatusClinicalImpact|Oncogenicity|OncogenicityLastEvaluated|ReviewStatusOncogenicity|SCVsForAggregateGermlineClassification|SCVsForAggregateSomaticClinicalImpact|SCVsForAggregateOncogenicityClassification".split("|")
 
 # ==== FUNCTIONs =====
 
 # benign = BN, likely benign = LB, conflicting = CF, likely pathogenic= LP, 
 # pathogenic = PG, other = OT, uncertain = VUS, RF=risk factor
 # colors were defined by Ana Benet Pages
 # - WARNING ON FILTER CHANGES:  -  see #28562
 # Never change these codes. They are in old sessions. If you change them, change the across the whole script
 # and also change the name of the .as field, so the cart variables will be different. You can add new values through:
 cnvColors = {
         "INS" : {
             "OT" : "225,165,0",
             "BN" : "250,214,69",
             "LB" : "250,214,69",
             "CF" : "225,165,0",
@@ -386,31 +392,31 @@
     if not isfile(previousFname):
         print(("Cannot find %s, cannot compare new data with previous month" % previousFname))
         return
         
     previousCount = sum(1 for line in open(previousFname, encoding="utf8"))
     todayCount = sum(1 for line in open(fname, encoding="utf8"))
     percDiff = (todayCount-previousCount)/float(todayCount)
 
     if percDiff < 0 and maxDiff:
         raise Exception("line count change negative: %s %d, %s %d. Run without --maxDiff to ignore this error. Or run the cronjob script doUpdate.sh with -nocheck to ignore this error." % \
             (previousFname, previousCount, fname, todayCount))
     if percDiff > maxDiff:
         raise Exception("line count increased too much: yesterday %d , today %d. Check if the input file is OK. If yes, remove the --maxDiff option from the clinVarToBed, call it only with the --auto option. Or run the cronjob script doUpdate.sh with -nocheck to ignore this error." % (previousCount, todayCount))
     # see RM 14350 do more than just linecount
     # see http://stackoverflow.com/questions/1566461/how-to-count-differences-between-two-files-on-linux
-    cmd = "diff %s %s --speed-large-files | grep '^[\>\<]'  | wc -l" % (fname, previousFname)
+    cmd = "diff %s %s --speed-large-files | grep '^[><]'  | wc -l" % (fname, previousFname)
     diffCount = int(subprocess.check_output(cmd, shell=True, stderr=subprocess.STDOUT))
     logging.info("%s: %d lines changed" % (fname, diffCount))
     percDiff = float(diffCount)/todayCount
     if percDiff > maxDiff:
         logging.info(cmd)
         diffFname =  abspath(join("downloads", "lastDiff.txt"))
         cmd = "diff %s %s --speed-large-files > %s" % (fname, previousFname, diffFname)
         os.system(cmd) # ret code is not 0, as there are differences, so no need to check
         raise Exception("too many differences: %d out of %d lines. Look at the differences n the file %s. If you are OK with them, run '/hive/data/outside/otto/clinvar/doUpdate.sh -nocheck'" % (diffCount, todayCount, diffFname))
 
 # I'm leaving this in the code, because I would not be surprised if one day we have to go back and parse
 # the other files again
 #def parseVariationAlleleFile(varFname):
 #    """ return dict allele Id -> variant ID """
 #    logging.info("Parsing %s" % varFname)
@@ -580,30 +586,31 @@
     return htmlStr, asciiDesc, starCount
 
 def loadSubmissions(summFname):
     " load the submissions into the Mysql table for the lollipop track details page "
     # not doing the load+rename strategy yet, I don't think it makes a difference here
     logging.info("Loading submissions table %s into mysql, hg19 and also hg38" % summFname)
     fh = tempfile.NamedTemporaryFile(prefix="clinVarToBed", suffix=".tsv")
     tmpFname = fh.name
     cmd= "zgrep -v '^#' %s > %s" % (summFname, tmpFname)
     assert(os.system(cmd)==0)
 
     cmd = "chmod a+r %s" % (tmpFname) # otherwise mysqld can't read the file
     assert(os.system(cmd)==0)
 
     cmd = "hgLoadSqlTab hg19 clinvarSub clinvarSub.sql %s" % (tmpFname)
+    print(cmd)
     assert(os.system(cmd)==0)
 
     cmd = "hgLoadSqlTab hg38 clinvarSub clinvarSub.sql %s" % (tmpFname)
     assert(os.system(cmd)==0)
 
 def mostCurrentVersionName():
     " return name of version on FTP site "
     cmd = 'curl -s https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/ClinVarVCVRelease_00-latest.xml.gz | zcat | head -n2 | tail -n1'
     verLine = subprocess.check_output(cmd, shell=True, encoding="utf8")
     #<ClinVarVariationRelease xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="http://ftp.ncbi.nlm.nih.gov/pub/clinvar/xsd_public/ClinVar_VCV_2.0.xsd" ReleaseDate="2024-03-06">
     version = verLine.split(" ")[3].split("=")[1].split('"')[1]
     return version
 
 def downloadFromNcbi(skipDownload):
     " download files from NCBI and return list of filenames "
@@ -727,79 +734,87 @@
 
 ifh = gzip.open(filename, "rt", encoding="latin_1")
 
 # check the header
 line1 = ifh.readline()
 fileHeaders = line1.rstrip().split("\t")
 if (fileHeaders != clinvarExpectHeaders): # test if clinvar has changed their header line again
     logging.error("ClinVar has changed their header line again")
     logging.error("current header line: %s" % fileHeaders)
     logging.error("expected header line: %s" % clinvarExpectHeaders)
     raise Exception("Sorry. The code will need to be fixed.")
 
 # open out files
 hg19Bed = open("bed/clinvarMain.hg19.%sbed" % outSuffix, "w", encoding="latin1")
 hg38Bed = open("bed/clinvarMain.hg38.%sbed" % outSuffix, "w", encoding="latin1")
+
+hg19DecorBed = open("bed/clinvarMainDecor.hg19.%sbed" % outSuffix, "w", encoding="latin1")
+hg38DecorBed = open("bed/clinvarMainDecor.hg38.%sbed" % outSuffix, "w", encoding="latin1")
+
 hg19BedCnv = open("bed/clinvarCnv.hg19.%sbed" % outSuffix, "w", encoding="latin1")
 hg38BedCnv = open("bed/clinvarCnv.hg38.%sbed" % outSuffix, "w", encoding="latin1")
 
+
 longCount = 0
 noAssCount = 0
 noMcCount = 0
 minusAccs = []
 
 # convert lines
 for line in ifh:
     line = line.replace(", ", ",").replace(";", ",") # replace, bigBed conventions
     line = line.rstrip("\n")
     row = line.split("\t")
     row = [f.replace("\0", "") for f in row ]
 
     alleleId, allType, name, geneId, geneSymbol, hgncId, clinSign, clinSignSimple, lastEval, snpAcc, dbVarAcc, \
       irvcAcc, phenotypeIds, phenotypeList, origin, originSimple, assembly, chromAcc, chrom, start, end, \
       refAll, varAll, cytogenetic, reviewStatus, numberSubmitters, guidelines, inGtr, otherIds, \
-      submCategories, varId, posVcf, refAllVcf, altAllVcf, somClinImpact, somClinLastEval, revStatusClinImpact, oncogen, oncogenLastEval, revStatusOncogen = row
+      submCategories, varId, posVcf, refAllVcf, altAllVcf, somClinImpact, somClinLastEval, revStatusClinImpact, oncogen, oncogenLastEval, revStatusOncogen, aggrGermClass, aggrSomClassImpact, aggrSomOnc = row
    # SomaticClinicalImpact|SomaticClinicalImpactLastEvaluated|ReviewStatusClinicalImpact|Oncogenicity|OncogenicityLastEvaluated|ReviewStatusOncogenicity
 
     # changed in July 2018 when the "varId" was merged into this table. It used to be 
     # in allToVar
-    if varId=="":
-        logging.warn("empty variantID for alleleId %s, %s" % (alleleId, irvcAcc))
-        vcvId = ""
-    else:
-        vcvId = "VCV%08d.1" % int(varId)
+    #if varId=="":
+        #logging.warn("empty variantID for alleleId %s, %s" % (alleleId, irvcAcc))
+        #vcvId = ""
+    #else:
+        #vcvId = "VCV%09d.1" % int(varId)
 
     if chrom=="" or assembly=="" or assembly=="NCBI36" or assembly=="na":
         noAssCount += 1
         continue
 
     if assembly=="GRCh37":
         hgvs = hg19Hgvs
     elif assembly=="GRCh38":
         hgvs = hg38Hgvs
     else:
         print("Invalid assembly: %s, row %s" % (repr(assembly), repr(row)))
         assert(False)
 
     #hgvsTable =  ";".join(hgvs[alleleId])
     hgvsTable =  json.dumps(hgvs[alleleId])
 
     molConseq = allToVcf.get(int(alleleId))
+    isTrunc = False
     if molConseq is None:
-        #logging.debug("%s has no molConseq" % alleleId)
         noMcCount += 1
         molConseq = ""
+    else:
+        if molConseq in truncConseqs:
+            isTrunc = True
 
     if chromAcc=="NT_187513.1":
         chrom = "chrUn_KI270742v1"
     elif chromAcc=="NT_167222.1":
         chrom = "chrUn_gl000228"
     elif chromAcc=='NT_187499.1':
         chrom = "chrUn_KI270744v1"
     elif chromAcc=="NT_113889.1":
         if assembly=="GRCh37":
             chrom = "chrUn_gl000218"
         else:
             chrom = "chrUn_GL000218v1"
     elif chrom=="Un":
         print("Unknown fix/alt/patch chrom, please adapt clinVarToBed: %s, row: %s" % (chrom, row))
         sys.exit(1)
@@ -829,31 +844,33 @@
         minusAccs.append(irvcAcc)
         continue
 
     if start=="" or end=="":
         print("empty start or end coordinate. record %s, start/end = %s/%s "% (irvcAcc, start, end))
         continue
 
     if int(start)<0 or int(end)<0:
         print("illegal start or end coordinate. record %s, start/end = %s/%s"% (irvcAcc, start, end))
         continue
 
     # sometimes they seem to inverse their coordinates
     if int(start)>int(end):
         start, end = end, start
 
-    if chrom=="chr17" and (end=="217748468" or end=="84062405"):
+    if (chrom=="chr17" and end=="217748468" or end=="84062405") or  \
+        (chrom=="chr9" and end=="140730116" and assembly=="GRCh38"): # VCV003340459.2 reported to clinvar May 2025
+
         print("blacklisted feature")
         continue
 
     # reorder columns for bigBed
     start = str(int(start)-1) # convert to zero-based, half-open
     score = "0"
     strand = "."
     thickStart = start
     thickEnd = end
     itemRgb = "0"
     blockCount = "1"
     blockSizes = str(int(end)-int(start))
     blockStarts = "0"
 
     # used until Oct 2017
@@ -904,150 +921,189 @@
     mouseOverName = name
 
     otherIds, dbVarSsvAcc = accListToHtml(otherIds)
 
     if isCnv and dbVarSsvAcc!="":
         shortName = dbVarSsvAcc
     elif name=="":
         name = "No display name was provided by ClinVar for this variant"
         shortName = "NoName"
         mouseOverName = ""
 
     if len(name)>90:
         name = name[:90]+"..."
 
     vcvId = "VCV%09d" % int(varId)
+    vcvIdVersion = "VCV%09d" % int(varId)
     name = varId+"|"+vcvId+" : "+name
 
     if len(mouseOverName)>80:
         mouseOverName = mouseOverName[:80]+"..."
 
     #if len(hgvsProt) > 90:
         #hgvsProt = hgvsProt[:90]+"..."
     #if len(hgvsCod) > 90:
         #hgvsCod = hgvsCod[:90]+"..."
 
     phenotypeIds, _ = accListToHtml(phenotypeIds)
 
     starRatingHtml, asciiStars, starCount = reviewStatusToHtmlStars(reviewStatus)
 
     phenotypeList = ", ".join(phenotypeList.split("|"))
 
     mouseOverParts = [
             mouseOverName,
             "<b>Review Status: </b>"+starRatingHtml,
             "<b>Type: </b>"+allType,
             "<b>Consequence: </b>"+ molConseq,
             "<b>Significance: </b>"+clinSign,
             "<b>Origin: </b>"+origin,
             "<b>Phenotypes: </b>"+phenotypeList
             ]
     # removed Apr 2020: numberSubmitters+" submitters", "Level: "+asciiStars
     mouseOver = "<br>".join(mouseOverParts)
 
+    if snpAcc=="-1":
+        snpAcc=""
+    else:
         snpAcc = "rs"+snpAcc # dbSnp links changed in mid/late 2022
 
     vcfDesc = chrom+":"+posVcf+":"+refAllVcf+">"+altAllVcf
     if len(vcfDesc)>=255:
 	    refAllVcf = shortenSeq(refAllVcf)
 	    altAllVcf = shortenSeq(altAllVcf)
 	    vcfDesc = chrom+":"+posVcf+":"+refAllVcf+">"+altAllVcf
 
     somImpactDesc = ""
     if somClinImpact!="" and somClinImpact!="-":
 	    somImpactDesc = "%s (%s, %s)" % (somClinImpact, somClinLastEval, revStatusClinImpact)
 
     oncogenDesc = ""
     if oncogen!="" and oncogen!="-":
 	    oncogenDesc = "%s (%s, %s)" % (oncogen, oncogenLastEval, revStatusOncogen)
 
+    mouseOverIdx = 33 # if you ever change the field order below, make sure that you adapt this number!
+
     row = [chrom, start, end, shortName, str(starCount), strand, thickStart, thickEnd, itemRgb, \
         blockCount, blockSizes, blockStarts,
         name, clinSign, starRatingHtml, allType, geneStr, molConseq,
         snpAcc, dbVarAcc, irvcAcc, inGtr, phenotypeList, phenotypeIds, origin, assembly, cytogenetic,
         hgvsTable, "", numberSubmitters, lastEval, guidelines, otherIds, mouseOver,
 	    vcfDesc,
 	    somImpactDesc,
 	    oncogenDesc,
+        # the new classes
+        aggrGermClass, aggrSomClassImpact, aggrSomOnc,
         # these fields are the for the filters, not for the display
         pathoCode, originCode, allTypeCode, str(varLen), str(starCount),
         # the variant ID got forgotten in the original bigBed schema, it was only part of the origName field
-        varId, dbVarSsvAcc, vcvId]
+        varId, dbVarSsvAcc, vcvId, vcvIdVersion
+        ]
 
     # replace clinvar's placeholders with real empty fields
     newRow = []
     for x in row:
         if x in ["-1", "-"]:
             newRow.append("")
         else:
             newRow.append(x)
     row = newRow
 
     if assembly=="GRCh37":
         ofh = hg19Bed
+        decorOfh = hg19DecorBed
         if isCnv:
             ofh = hg19BedCnv
     elif assembly=="GRCh38":
         ofh = hg38Bed
+        decorOfh = hg38DecorBed
         if isCnv:
             ofh = hg38BedCnv
     else:
         noAssCount +=1
         
     ofh.write("\t".join(row))
     ofh.write("\n")
 
+    if isTrunc:
+        # truncating mutations get a special symbol written into another BED file, for decorators
+        decorRow = list(row[:12])
+        rgbColor = row[8]
+        mouseOverText = row[mouseOverIdx]
+
+        decorRef = row[0]+":"+row[1]+"-"+row[2]+":"+row[3]
+        decorRow[3] = "" # delete mouseover
+
+        decorRow.append(decorRef)
+        decorRow.append("glyph")
+        decorRow.append(rgbColor)
+        decorRow.append("Triangle")
+        decorRow.append(mouseOverText)
+
+        decorOfh.write("\t".join(decorRow))
+        decorOfh.write("\n")
+
 hg19Bed.close()
 hg38Bed.close()
+
+hg19DecorBed.close()
+hg38DecorBed.close()
+
 hg19BedCnv.close()
 hg38BedCnv.close()
 
 logging.info("%d lines with feature name that was too long, shortened them" % longCount)
 logging.info("%d lines without, with old assembly or no chrom, skipped" % noAssCount)
 logging.info("%d lines with -1/-1 start/end positions skipped. Examples: %s" % (len(minusAccs), minusAccs[:3]))
 
-fnames = [hg19Bed.name, hg38Bed.name, hg19BedCnv.name, hg38BedCnv.name]
+fnames = [hg19Bed.name, hg38Bed.name, hg19DecorBed.name, hg38DecorBed.name, hg19BedCnv.name, hg38BedCnv.name]
 
 # sort the files
 for fname in fnames:
     cmd = "bedSort %s %s" % (fname, fname)
     logging.debug(cmd)
     assert(os.system(cmd)==0)
 
 # check new bed files against last months's bed files
-if options.maxDiff:
+if options.maxDiff and not options.isAlpha:
     for fname in fnames:
         compareWithLastMonth(fname, options.maxDiff)
 
 bigBedDir = "bigBed"
 if options.isAlpha:
     bigBedDir = "bigBedAlpha"
 
 # convert to bigBed without the date in the fname, like clinvarMain.hg19.bb
 tmpFnames = []
 for fname in fnames:
     parts = fname.split(".")
     if len(parts) == 4: # clinvarMain.hg19.2011-11-11.bed
         base, db, date, ext = parts
     else:
         base, db, ext = parts
 
     base = basename(base)
         
     tmpFname = "%s.%s.temp.bb" % (base, db)
 
-    cmd = "bedToBigBed -extraIndex=_dbVarSsvId,snpId -tab -type=bed9+ -as=clinvar.as %s /hive/data/genomes/%s/chrom.sizes %s" % (fname, db, tmpFname)
+    asFname = "clinvar.as"
+    extraOptions = "-extraIndex=_dbVarSsvId,snpId,vcvId,_vcvIdVersion "
+    if "Decor" in fname:
+        asFname = "clinvarDecoration.as"
+        extraOptions = ""
+
+    cmd = "bedToBigBed %s -tab -type=bed9+ -as=%s %s /hive/data/genomes/%s/chrom.sizes %s" % (extraOptions, asFname, fname, db, tmpFname)
     mustRun(cmd)
 
     finalFname = "%s/%s.%s.bb" % (bigBedDir, base, db)
     tableName = finalFname.split('.')[0] # e.g. clinvarMain
 
     tmpFnames.append( (db, tableName, tmpFname, finalFname) )
 
 # make it atomic by renaming the temp files to the final file names at the very end
 for db, tableName, tmpFname, finalFname in tmpFnames:
     logging.debug("Renaming %s to %s" % (tmpFname, finalFname))
     os.rename(tmpFname, finalFname)
 
 # create the version.txt file
 clinvarVersion = open(versionFname).read()
 versionString = "ClinVar Release: %s, converted at UCSC on %s" % (clinvarVersion, date.today().strftime("%Y-%m-%d"))