acfbaec13c08b80dc3c5138821a23ef4f905d573 max Mon Jan 20 09:17:30 2025 -0800 fixing color coding of CNVs in clinvar, refs #35062 diff --git src/hg/utils/otto/clinvar/clinVarToBed src/hg/utils/otto/clinvar/clinVarToBed index 959a831..6135470 100755 --- src/hg/utils/otto/clinvar/clinVarToBed +++ src/hg/utils/otto/clinvar/clinVarToBed @@ -170,30 +170,32 @@ # GRCh37/hg19 15q14-15.1 chr15:34638237..42057083 complex variant print(name) assert False # name that starts with GRCh but does not match our regex should not happen elif "." in name: # many names look like NM_000274.3(OAT):c.550_552delGCT (p.Ala184del) # so we remove the gene and the protein description if ":" in name: name = name.split(":")[1] longName = name.split(" ")[0] # handle genomic HGVS NC_000010.11:g.(?_87925503)_(87965482_?)del if name.startswith("NC_") and "g." in name and ")" in name: shortName = name else: # strip away the final description like " (p.Ala184del)" + if not name.startswith("g.("): + # do not strip anything anymore is all we have the g. notation as we would end up with just g. name = name.split(" ")[0].split("[")[0].split("(")[0] shortName = name if name.endswith("del"): shortName = "del" elif name.endswith("ins"): shortName = "ins" elif name.endswith("dup"): shortName = "dup" else: posMatch = posRe.match(name) if posMatch!=None: pos, dupDelIns, change = posMatch.groups() if dupDelIns==None: dupDelIns= ""