2b58971a98de87361a625375a7b1bf1fcc5ded03 lrnassar Fri Feb 28 17:36:45 2025 -0800 Fixing the mitomap track, I incorrectly assumed that the hg19 chrM was the same as hg38 chrM, but it was chrMT that matches with chrM on hg38. Refs #24849 diff --git src/hg/utils/otto/mitoMap/buildMitoMap.py src/hg/utils/otto/mitoMap/buildMitoMap.py index 09e10fc6597..306b091fd63 100755 --- src/hg/utils/otto/mitoMap/buildMitoMap.py +++ src/hg/utils/otto/mitoMap/buildMitoMap.py @@ -12,31 +12,31 @@ # Need -1 to all start positions # 3 T-C needs to end up as chrM 2 3 # 46 TG-del needs to end up as chrM 45 47 # One item at 16289 with no AA change # 3 items with 366 G-GAAAG pwd = "/hive/data/outside/otto/mitoMap/" mitoMapControlRegionVarsFileInputFile = pwd+"variantsControl.latest.tsv" mitoMapVarsOutputFile = pwd+"mitoMapVars.bed" fh = open(mitoMapControlRegionVarsFileInputFile,"r", encoding="utf-8", errors="replace") fh2 = open(mitoMapVarsOutputFile,"w") # $ head VariantsControlMITOMAPFoswiki.tsv -# Position Locus Nucleotide Change GB FreqFL (CR)*‡ GB Seqstotal (FL/CR)* Curated References +# Position Locus Nucleotide Change GB FreqFL (CR)*‡ GB Seqstotal (FL/CR)* Curated References # 3 Control Region T-C 0.000%(0.000%) 0 2 # I want the output to be bed9 for itemRgb and mouseover with 10 additional fields. The 6 original, plus 3 more from the coding file plus mouseOver: # chrom chromStart chromEnd name score strand thickStart thickEnd itemRgb position locus nucleotideChange GBfreq GBseqs curatedRefs _mouseOver for line in fh: if line.startswith("Position"): continue else: line = line.rstrip().split("\t") position = line[0] nucleotideChange = line[2] chrom = "chrM" chromStart = int(position)-1 if nucleotideChange == "": nucleotideChange = "Blank" @@ -63,30 +63,32 @@ GBfreq = line[3] GBseqs = line[4] curatedRefs = line[5] varType = "VariantsControl" mouseOver = "Nucl change: "+name+"
Position: "+position+"
Locus: "+locus+"
"+\ "GenBank freq FL(CR): "+GBfreq+"
GenBank seqs FL(CR): "+GBseqs+"
# of curated refs: "+curatedRefs fh2.write("\t".join(map(str, [chrom, chromStart, chromEnd, name, score, strand, thickStart, thickEnd, itemRgb, \ position, locus, nucleotideChange, codonNumber, codonPosition, AAchange, GBfreq, GBseqs, curatedRefs, varType, mouseOver])) + "\n") fh2.close() fh.close() mitoMapCodingRegionVarsFileInputFile = pwd+"variantsCoding.latest.tsv" mitoMapVarsOutputFile = pwd+"mitoMapVars.bed" +mitoMapVarsOutputFileHg19 = mitoMapVarsOutputFile.split(".")[0]+".hg19.bed" + fh = open(mitoMapCodingRegionVarsFileInputFile,"r", encoding="utf-8", errors="replace") fh2 = open(mitoMapVarsOutputFile,"a") # $ head VariantsCodingMITOMAPFoswiki.tsv #Position Locus Nucleotide Change Codon Number Codon Position Amino Acid Change GB Freq‡ GB Seqs Curated References #577 MT-TF G-A - - tRNA 0.000% 0 1 # I want the output to be bed9 for itemRgb and mouseover with 10 additional fields. The 6 original, plus 3 more from the coding file plus mouseOver: # chrom chromStart chromEnd name score strand thickStart thickEnd itemRgb position locus nucleotideChange GBfreq GBseqs curatedRefs _mouseOver for line in fh: if line.startswith("Position"): continue else: line = line.rstrip().split("\t") position = line[0] @@ -119,46 +121,50 @@ GBseqs = line[7] curatedRefs = line[8] varType = "VariantsCoding" mouseOver = "Nucl change: "+name+"
Position: "+position+"
Locus: "+locus+"
"+\ "Codon num: "+codonNumber+"
Codon pos: "+codonPosition+"
AA change: "+AAchange+\ "
GenBank freq FL(CR): "+GBfreq+"
GenBank seqs FL(CR): "+GBseqs+"
# of curated refs: "+curatedRefs fh2.write("\t".join(map(str, [chrom, chromStart, chromEnd, name, score, strand, thickStart, thickEnd, itemRgb, \ position, locus, nucleotideChange, codonNumber, codonPosition, AAchange, GBfreq, GBseqs, curatedRefs, varType, mouseOver])) + "\n") fh2.close() fh.close() bash("bedSort "+mitoMapVarsOutputFile+" "+mitoMapVarsOutputFile) bash("bedToBigBed -as="+pwd+"mitoMapVars.as -type=bed9+11 -tab "+mitoMapVarsOutputFile+" /cluster/data/hg38/chrom.sizes "+mitoMapVarsOutputFile.split(".")[0]+".new.bb") +bash("sed 's/chrM/chrMT/g' "+mitoMapVarsOutputFile+" > "+mitoMapVarsOutputFileHg19) +bash("bedToBigBed -as="+pwd+"mitoMapVars.as -type=bed9+11 -tab "+mitoMapVarsOutputFileHg19+" /cluster/data/hg19/chrom.sizes "+mitoMapVarsOutputFileHg19.split(".")[0]+".hg19.new.bb") print("Final file created: "+mitoMapVarsOutputFile.split(".")[0]+".new.bb") ###### # BEGIN PROCESSING OF DISEASE FILES ###### # Need -1 to all start positions # Everything is point mutations - only one of the files needs size calculation mitoMapRNAmutsFileInputFile = pwd+"mutationsRNA.latest.tsv" mitoMapDiseaseMutsOutputFile = pwd+"mitoMapDiseaseMuts.bed" +mitoMapDiseaseMutOutputFileHg19 = mitoMapDiseaseMutsOutputFile.split(".")[0]+".hg19.bed" + fh = open(mitoMapRNAmutsFileInputFile,"r", encoding="utf-8", errors="replace") fh2 = open(mitoMapDiseaseMutsOutputFile,"w") # $ head -2 MutationsRNAMITOMAPFoswiki.tsv -#Position Locus Disease Allele RNA Homoplasmy Heteroplasmy Status MitoTIP† GB Freq  FL (CR)*‡ GB Seqs FL (CR)* References +#Position Locus Disease Allele RNA Homoplasmy Heteroplasmy Status MitoTIP† GB Freq FL (CR)*‡ GB Seqs FL (CR)* References #576 MT-CR Hearing loss patient A576G noncoding MT-TF precursor nr nr Reported N/A 0.005%(0.012%) 3 (10) 1 # I want the output to be bed9 for itemRgb and mouseover with 14 additional fields. # chrom chromStart chromEnd name score strand thickStart thickEnd itemRgb position locus disease nucleotideChange allele rnaOrAAchange plasmy clinStatus mitoTIP GBfreq GBseqs curatedRefs _mouseOver for line in fh: if line.startswith("Position"): continue else: line = line.rstrip().split("\t") position = line[0] nucleotideChange = line[3] chrom = "chrM" chromStart = int(position)-1 chromEnd = chromStart + 1 #These are all base mutations name = nucleotideChange @@ -184,31 +190,31 @@ "GenBank freq FL(CR): "+GBfreq+"
GenBank seqs FL(CR): "+GBseqs+"
# of curated refs: "+curatedRefs fh2.write("\t".join(map(str, [chrom, chromStart, chromEnd, name, score, strand, thickStart, thickEnd, itemRgb, \ position, locus, disease, nucleotideChange, allele, rnaOrAAchange, plasmy, clinStatus, \ mitoTIP, GBfreq, GBseqs, curatedRefs, mutsCodingOrRNA, mouseOver])) + "\n") fh2.close() fh.close() mitoMapDiseaseMutsFileInputFile = pwd+"mutationsCodingControl.latest.tsv" mitoMapDiseaseMutsOutputFile = pwd+"mitoMapDiseaseMuts.bed" fh = open(mitoMapDiseaseMutsFileInputFile,"r", encoding="utf-8", errors="replace") fh2 = open(mitoMapDiseaseMutsOutputFile,"a") # $ head -2 MutationsCodingControlMITOMAPFoswiki.tsv -#Locus Allele Position NucleotideChange Amino AcidChange Plasmy Reports(Homo/Hetero) Disease Status GB Freq  FL (CR)*‡ GB Seqs FL (CR)* References +#Locus Allele Position NucleotideChange Amino AcidChange Plasmy Reports(Homo/Hetero) Disease Status GB Freq FL (CR)*‡ GB Seqs FL (CR)* References #MT-ATP6 m.8573G>A 8573 G-A G16D +/- Patient with suspected mitochondrial disease Reported by paper as Benign 0.107%(0.000%) 66 (0) 1 # I want the output to be bed9 for itemRgb and mouseover with 14 additional fields. # chrom chromStart chromEnd name score strand thickStart thickEnd itemRgb position locus disease nucleotideChange allele rnaOrAAchange plasmy clinStatus mitoTIP GBfreq GBseqs curatedRefs mutsCodingOrRNA _mouseOver for line in fh: if line.startswith("Locus"): continue else: line = line.rstrip().split("\t") position = line[2] nucleotideChange = line[3] chrom = "chrM" chromStart = int(position)-1 if nucleotideChange == "": print("This is an error!") @@ -243,16 +249,20 @@ curatedRefs = line[10] mutsCodingOrRNA = "MutationsCodingControl" mouseOver = "Allele: "+allele+"
Nucl change: "+nucleotideChange+"
Position: "+position+"
Locus: "+locus+"
"+\ "Disease: "+disease+"
RNA: "+rnaOrAAchange+"
Plasmy (Hom/Het): "+plasmy+"
"+\ "Status: "+clinStatus+"
"+\ "GenBank freq FL(CR): "+GBfreq+"
GenBank seqs FL(CR): "+GBseqs+"
# of curated refs: "+curatedRefs fh2.write("\t".join(map(str, [chrom, chromStart, chromEnd, name, score, strand, thickStart, thickEnd, itemRgb, \ position, locus, disease, nucleotideChange, allele, rnaOrAAchange, plasmy, clinStatus, \ mitoTIP, GBfreq, GBseqs, curatedRefs, mutsCodingOrRNA, mouseOver])) + "\n") fh2.close() fh.close() bash("bedSort "+mitoMapDiseaseMutsOutputFile+" "+mitoMapDiseaseMutsOutputFile) bash("bedToBigBed -as=/hive/data/genomes/hg38/bed/mitomap/mitoMapDiseaseMuts.as -type=bed9+14 -tab "+mitoMapDiseaseMutsOutputFile+" /cluster/data/hg38/chrom.sizes "+mitoMapDiseaseMutsOutputFile.split(".")[0]+".new.bb") + +bash("sed 's/chrM/chrMT/g' "+mitoMapDiseaseMutsOutputFile+" > "+mitoMapDiseaseMutOutputFileHg19) +bash("bedToBigBed -as=/hive/data/genomes/hg38/bed/mitomap/mitoMapDiseaseMuts.as -type=bed9+14 -tab "+mitoMapDiseaseMutOutputFileHg19+" /cluster/data/hg19/chrom.sizes "+mitoMapDiseaseMutOutputFileHg19.split(".")[0]+".hg19.new.bb") + print("Final file created: "+mitoMapDiseaseMutsOutputFile.split(".")[0]+".new.bb")