2b58971a98de87361a625375a7b1bf1fcc5ded03
lrnassar
  Fri Feb 28 17:36:45 2025 -0800
Fixing the mitomap track, I incorrectly assumed that the hg19 chrM was the same as hg38 chrM, but it was chrMT that matches with chrM on hg38. Refs #24849

diff --git src/hg/utils/otto/mitoMap/buildMitoMap.py src/hg/utils/otto/mitoMap/buildMitoMap.py
index 09e10fc6597..306b091fd63 100755
--- src/hg/utils/otto/mitoMap/buildMitoMap.py
+++ src/hg/utils/otto/mitoMap/buildMitoMap.py
@@ -12,31 +12,31 @@
 
 # Need -1 to all start positions
 # 3 T-C needs to end up as  chrM 2 3
 # 46 TG-del needs to end up as chrM 45 47
 # One item at 16289 with no AA change
 # 3 items with 366 G-GAAAG
 
 pwd = "/hive/data/outside/otto/mitoMap/"
 
 mitoMapControlRegionVarsFileInputFile = pwd+"variantsControl.latest.tsv"
 mitoMapVarsOutputFile = pwd+"mitoMapVars.bed"
 fh = open(mitoMapControlRegionVarsFileInputFile,"r", encoding="utf-8", errors="replace")
 fh2 = open(mitoMapVarsOutputFile,"w")
 
 # $ head VariantsControlMITOMAPFoswiki.tsv
-# Position        Locus   Nucleotide Change       GB FreqFL (CR)*‡        GB Seqstotal (FL/CR)*   Curated References
+# Position        Locus   Nucleotide Change       GB FreqFL (CR)*‡        GB Seqstotal (FL/CR)*   Curated References
 # 3       Control Region  T-C     0.000%(0.000%)  0       2
 
 # I want the output to be bed9 for itemRgb and mouseover with 10 additional fields. The 6 original, plus 3 more from the coding file plus mouseOver:
 # chrom chromStart chromEnd name score strand thickStart thickEnd itemRgb position locus nucleotideChange GBfreq GBseqs curatedRefs _mouseOver
 for line in fh:
     if line.startswith("Position"):
         continue
     else:
         line = line.rstrip().split("\t")
         position = line[0]
         nucleotideChange = line[2]
         chrom = "chrM"
         chromStart = int(position)-1
         if nucleotideChange == "":
             nucleotideChange = "Blank"
@@ -63,30 +63,32 @@
         GBfreq = line[3]
         GBseqs = line[4]
         curatedRefs = line[5]
         varType = "VariantsControl"
         mouseOver = "<b>Nucl change: </b>"+name+"<br><b>Position: </b>"+position+"<br><b>Locus: </b>"+locus+"<br>"+\
         "<b>GenBank freq FL(CR): </b>"+GBfreq+"<br><b>GenBank seqs FL(CR): </b>"+GBseqs+"<br><b># of curated refs: </b>"+curatedRefs
         
         fh2.write("\t".join(map(str, [chrom, chromStart, chromEnd, name, score, strand, thickStart, thickEnd, itemRgb, \
                                   position, locus, nucleotideChange, codonNumber, codonPosition, AAchange, GBfreq, GBseqs, curatedRefs, varType, mouseOver])) + "\n")
 
 fh2.close()
 fh.close()
 
 mitoMapCodingRegionVarsFileInputFile = pwd+"variantsCoding.latest.tsv"
 mitoMapVarsOutputFile = pwd+"mitoMapVars.bed"
+mitoMapVarsOutputFileHg19 = mitoMapVarsOutputFile.split(".")[0]+".hg19.bed"
+
 fh = open(mitoMapCodingRegionVarsFileInputFile,"r", encoding="utf-8", errors="replace")
 fh2 = open(mitoMapVarsOutputFile,"a")
 
 # $ head VariantsCodingMITOMAPFoswiki.tsv
 #Position        Locus   Nucleotide Change       Codon Number    Codon Position  Amino Acid Change       GB Freq‡        GB Seqs Curated References
 #577     MT-TF   G-A     -       -       tRNA    0.000%  0       1
 
 # I want the output to be bed9 for itemRgb and mouseover with 10 additional fields. The 6 original, plus 3 more from the coding file plus mouseOver:
 # chrom chromStart chromEnd name score strand thickStart thickEnd itemRgb position locus nucleotideChange GBfreq GBseqs curatedRefs _mouseOver
 for line in fh:
     if line.startswith("Position"):
         continue
     else:
         line = line.rstrip().split("\t")
         position = line[0]
@@ -119,46 +121,50 @@
         GBseqs = line[7]
         curatedRefs = line[8]
         varType = "VariantsCoding"
         mouseOver = "<b>Nucl change: </b>"+name+"<br><b>Position: </b>"+position+"<br><b>Locus: </b>"+locus+"<br>"+\
         "<b>Codon num: </b>"+codonNumber+"<br><b>Codon pos: </b>"+codonPosition+"<br><b>AA change: </b>"+AAchange+\
         "<br><b>GenBank freq FL(CR): </b>"+GBfreq+"<br><b>GenBank seqs FL(CR): </b>"+GBseqs+"<br><b># of curated refs: </b>"+curatedRefs
 
         fh2.write("\t".join(map(str, [chrom, chromStart, chromEnd, name, score, strand, thickStart, thickEnd, itemRgb, \
                                   position, locus, nucleotideChange, codonNumber, codonPosition, AAchange, GBfreq, GBseqs, curatedRefs, varType, mouseOver])) + "\n")
 
 fh2.close()
 fh.close()
 
 bash("bedSort "+mitoMapVarsOutputFile+" "+mitoMapVarsOutputFile)
 bash("bedToBigBed -as="+pwd+"mitoMapVars.as -type=bed9+11 -tab "+mitoMapVarsOutputFile+" /cluster/data/hg38/chrom.sizes "+mitoMapVarsOutputFile.split(".")[0]+".new.bb")
+bash("sed 's/chrM/chrMT/g' "+mitoMapVarsOutputFile+" > "+mitoMapVarsOutputFileHg19)
+bash("bedToBigBed -as="+pwd+"mitoMapVars.as -type=bed9+11 -tab "+mitoMapVarsOutputFileHg19+" /cluster/data/hg19/chrom.sizes "+mitoMapVarsOutputFileHg19.split(".")[0]+".hg19.new.bb")
 print("Final file created: "+mitoMapVarsOutputFile.split(".")[0]+".new.bb")
 
 ######
 # BEGIN PROCESSING OF DISEASE FILES
 ######
 
 # Need -1 to all start positions
 # Everything is point mutations - only one of the files needs size calculation
 
 mitoMapRNAmutsFileInputFile = pwd+"mutationsRNA.latest.tsv"
 mitoMapDiseaseMutsOutputFile = pwd+"mitoMapDiseaseMuts.bed"
+mitoMapDiseaseMutOutputFileHg19 = mitoMapDiseaseMutsOutputFile.split(".")[0]+".hg19.bed"
+
 fh = open(mitoMapRNAmutsFileInputFile,"r", encoding="utf-8", errors="replace")
 fh2 = open(mitoMapDiseaseMutsOutputFile,"w")
 
 # $ head -2 MutationsRNAMITOMAPFoswiki.tsv
-#Position        Locus   Disease Allele  RNA     Homoplasmy      Heteroplasmy    Status  MitoTIP†        GB Freq  FL (CR)*‡      GB Seqs FL (CR)*        References
+#Position        Locus   Disease Allele  RNA     Homoplasmy      Heteroplasmy    Status  MitoTIP†        GB Freq  FL (CR)*‡      GB Seqs FL (CR)*        References
 #576     MT-CR   Hearing loss patient    A576G   noncoding MT-TF precursor       nr      nr      Reported        N/A     0.005%(0.012%)  3 (10)  1
 
 # I want the output to be bed9 for itemRgb and mouseover with 14 additional fields. 
 # chrom chromStart chromEnd name score strand thickStart thickEnd itemRgb position locus disease nucleotideChange allele rnaOrAAchange plasmy clinStatus mitoTIP GBfreq GBseqs curatedRefs _mouseOver
 for line in fh:
     if line.startswith("Position"):
         continue
     else:
         line = line.rstrip().split("\t")
         position = line[0]
         nucleotideChange = line[3]
         chrom = "chrM"
         chromStart = int(position)-1
         chromEnd = chromStart + 1 #These are all base mutations
         name = nucleotideChange
@@ -184,31 +190,31 @@
         "<b>GenBank freq FL(CR): </b>"+GBfreq+"<br><b>GenBank seqs FL(CR): </b>"+GBseqs+"<br><b># of curated refs: </b>"+curatedRefs
         
         fh2.write("\t".join(map(str, [chrom, chromStart, chromEnd, name, score, strand, thickStart, thickEnd, itemRgb, \
                                   position, locus, disease, nucleotideChange, allele, rnaOrAAchange, plasmy, clinStatus, \
                                       mitoTIP, GBfreq, GBseqs, curatedRefs, mutsCodingOrRNA, mouseOver])) + "\n")
 
 fh2.close()
 fh.close()
 
 mitoMapDiseaseMutsFileInputFile = pwd+"mutationsCodingControl.latest.tsv"
 mitoMapDiseaseMutsOutputFile = pwd+"mitoMapDiseaseMuts.bed"
 fh = open(mitoMapDiseaseMutsFileInputFile,"r", encoding="utf-8", errors="replace")
 fh2 = open(mitoMapDiseaseMutsOutputFile,"a")
 
 # $ head -2 MutationsCodingControlMITOMAPFoswiki.tsv
-#Locus   Allele  Position        NucleotideChange        Amino AcidChange        Plasmy Reports(Homo/Hetero)     Disease Status  GB Freq  FL (CR)*‡      GB Seqs FL (CR)*        References
+#Locus   Allele  Position        NucleotideChange        Amino AcidChange        Plasmy Reports(Homo/Hetero)     Disease Status  GB Freq  FL (CR)*‡      GB Seqs FL (CR)*        References
 #MT-ATP6 m.8573G>A       8573    G-A     G16D    +/-     Patient with suspected mitochondrial disease    Reported by paper as Benign     0.107%(0.000%)  66 (0)  1
 
 # I want the output to be bed9 for itemRgb and mouseover with 14 additional fields. 
 # chrom chromStart chromEnd name score strand thickStart thickEnd itemRgb position locus disease nucleotideChange allele rnaOrAAchange plasmy clinStatus mitoTIP GBfreq GBseqs curatedRefs mutsCodingOrRNA _mouseOver
 for line in fh:
     if line.startswith("Locus"):
         continue
     else:
         line = line.rstrip().split("\t")
         position = line[2]
         nucleotideChange = line[3]
         chrom = "chrM"
         chromStart = int(position)-1
         if nucleotideChange == "":
             print("This is an error!")
@@ -243,16 +249,20 @@
         curatedRefs = line[10]
         mutsCodingOrRNA = "MutationsCodingControl"
         mouseOver = "<b>Allele: </b>"+allele+"<br><b>Nucl change: </b>"+nucleotideChange+"<br><b>Position: </b>"+position+"<br><b>Locus: </b>"+locus+"<br>"+\
         "<b>Disease: </b>"+disease+"<br><b>RNA: </b>"+rnaOrAAchange+"<br><b>Plasmy (Hom/Het): </b>"+plasmy+"<br>"+\
         "<b>Status: </b>"+clinStatus+"<br>"+\
         "<b>GenBank freq FL(CR): </b>"+GBfreq+"<br><b>GenBank seqs FL(CR): </b>"+GBseqs+"<br><b># of curated refs: </b>"+curatedRefs
         
         fh2.write("\t".join(map(str, [chrom, chromStart, chromEnd, name, score, strand, thickStart, thickEnd, itemRgb, \
                                   position, locus, disease, nucleotideChange, allele, rnaOrAAchange, plasmy, clinStatus, \
                                       mitoTIP, GBfreq, GBseqs, curatedRefs, mutsCodingOrRNA, mouseOver])) + "\n")
 fh2.close()
 fh.close()
 
 bash("bedSort "+mitoMapDiseaseMutsOutputFile+" "+mitoMapDiseaseMutsOutputFile)
 bash("bedToBigBed -as=/hive/data/genomes/hg38/bed/mitomap/mitoMapDiseaseMuts.as -type=bed9+14 -tab "+mitoMapDiseaseMutsOutputFile+" /cluster/data/hg38/chrom.sizes "+mitoMapDiseaseMutsOutputFile.split(".")[0]+".new.bb")
+
+bash("sed 's/chrM/chrMT/g' "+mitoMapDiseaseMutsOutputFile+" > "+mitoMapDiseaseMutOutputFileHg19)
+bash("bedToBigBed -as=/hive/data/genomes/hg38/bed/mitomap/mitoMapDiseaseMuts.as -type=bed9+14 -tab "+mitoMapDiseaseMutOutputFileHg19+" /cluster/data/hg19/chrom.sizes "+mitoMapDiseaseMutOutputFileHg19.split(".")[0]+".hg19.new.bb")
+
 print("Final file created: "+mitoMapDiseaseMutsOutputFile.split(".")[0]+".new.bb")