9ecac913c6e52f7c7b3c957c22c6fd456f0b889c max Fri Jan 17 04:46:14 2020 -0800 adding chrMT support to clinVar, refs #24648 diff --git src/hg/utils/otto/clinvar/clinVarToBed src/hg/utils/otto/clinvar/clinVarToBed index c2b9ef3..b25275d 100755 --- src/hg/utils/otto/clinvar/clinVarToBed +++ src/hg/utils/otto/clinvar/clinVarToBed @@ -364,30 +364,33 @@ logging.warn("empty variantID for alleleId %s, %s" % (alleleId, irvcAcc)) hgvsCod, hgvsProt = allToHgvs.get(alleleId, ("", "")) if chrom=="" or assembly=="" or assembly=="NCBI36": noAssCount += 1 continue if chrom=="Un" and assembly=="GRCh38": print("wrong chrUn chrom on assembly hg38. Skipping %s" % irvcAcc) continue chrom = "chr"+chrom # Code-review/QA: Is it OK to pass through coordinates on chrM for hg19? if chrom=="chrMT": # why does NCBI use chrMT but we use chrM ? + if assembly=="GRCh37": + chrom = "chrMT" # our chrM is different from NCBI's MT, but chrMT got added hg19 in 2020 + else: chrom = "chrM" shortName, longName = shortenName(name) if len(shortName)>20: shortName = shortName[:17]+"..." longCount+=1 if len(longName)>60: longName = longName[:60]+"..." if start=="" or end=="": print("undefined start or end coordinate. record %s"% irvcAcc) continue @@ -475,32 +478,30 @@ blockCount, blockSizes, blockStarts, name, clinSign, starRatingHtml, allType, geneStr, snpAcc, dbVarAcc, irvcAcc, inGtr, phenotypeList, phenotypeIds, origin, assembly, cytogenetic, hgvsCod, hgvsProt, numberSubmitters, lastEval, guidelines, otherIds, mouseOver] # replace clinvar's placeholders with real empty fields newRow = [] for x in row: if x in ["-1", "-"]: newRow.append("") else: newRow.append(x) row = newRow if assembly=="GRCh37": - if chrom=="chrM": - continue # we don't have the same MT chrom as NCBI, skip these ofh = hg19Bed if isCnv: ofh = hg19BedCnv elif assembly=="GRCh38": ofh = hg38Bed if isCnv: ofh = hg38BedCnv else: noAssCount +=1 ofh.write("\t".join(row)) ofh.write("\n") hg19Bed.close() hg38Bed.close()