ec726ffa6050e6b7b65f5ed885789c850ce87e60
galt
  Wed Jan 26 03:20:35 2022 -0800
Tweaking the dobBigDbSnp step fixHg19ChrM since NC_012920 is now just called chrMT in dbsnp data.

diff --git src/hg/utils/automation/doBigDbSnp.pl src/hg/utils/automation/doBigDbSnp.pl
index 447dda5..6b55025 100755
--- src/hg/utils/automation/doBigDbSnp.pl
+++ src/hg/utils/automation/doBigDbSnp.pl
@@ -80,31 +80,31 @@
 topDir is expected to have a subdirectory json in which refsnp-*.json.bz2
 files have already been downloaded, as well as files $refSeqToUcsc and $equivRegions
 (see usage statement for dbSnpJsonToTab).
 
 buildId is usually NNN where NNN is 152 or greater, same as topDir; it can also have a
 suffix to distinguish it, e.g. 152Test.  The names of all result files contain $outRoot\$buildId.
 
 freqSourceOrder is a comma-separated list of projects that submit frequency data to dbSNP
 (see usage statement for dbSnpJsonToTab).
 
 Steps:
     split: splits refsnp-*.json.bz2 files into chunks of 100,000 lines.
     convert: runs dbSnpJsonToTab on chunks.
     mergeToChrom: merges chunk result files into per-chrom results files.
     mergeChroms: merges per-chrom results files.
-    fixHg19ChrM: if annotations on hg19 are included, then liftOver NC_012920 to hg19 chrM.
+    fixHg19ChrM: if annotations on hg19 are included, then liftOver chrMT (NC_012920) to hg19 chrM.
     check: runs checkBigDbSnp to add ucscNotes about overlapping items and clustering anomalies.
     bigBed: Converts BED4+ .bigDbSnp files into bigBed.
     install: installs links to files in /gbdb.
     cleanup: Removes or compresses intermediate files.
 All operations are performed in the build directory which is
 topDir/bigDbSnp.\$date unless -buildDir is given.
 ";
   # Detailed help (-help):
   print STDERR "
 Assumptions:
 1. $HgAutomate::clusterData/\$db/\$db.2bit contains sequence for \$db.
 2. topDir/json/ contains downloaded files refsnp-*.json.bz2
 3. topDir/ contains files refSeqToUcsc.tab and equivRegions.tab - see dbSnpJsonToTab usage
 " if ($detailed);
   print "\n";
@@ -465,50 +465,50 @@
     echo pid \$pid FAILED
     exit 1
   fi
 done
 _EOF_
                   );
   $bossScript->execute();
 } # doMergeChroms
 
 
 #########################################################################
 # * step: fixHg19ChrM [workhorse]
 sub doFixHg19ChrM {
   my $runDir = $buildDir;
   if (grep(/hg19/, @dbList)) {
-    my $whatItDoes = "It does a liftOver from NC_012920.1 to hg19 chrM.";
+    my $whatItDoes = "It does a liftOver from chrMT (old name NC_012920) to hg19 chrM.";
     my $bossScript = newBash HgRemoteScript("$runDir/doFixHg19ChrM.sh", $workhorse,
                                             $runDir, $whatItDoes);
     $bossScript->add(<<_EOF_
-# For hg19, liftOver NC_012920.1 annotations to hg19 chrM.
-sed -e 's/NC_012920 /NC_012920.1 /' \\
+# For hg19, liftOver chrMT annotations to hg19 chrM.
+sed -e 's/NC_012920 /chrMT /' \\
   /hive/data/outside/dbSNP/131/human/NC_012920ToChrM.over.chain \\
   > hg19.mitoLiftover.chain
 # For liftOver, convert 0-base fully-closed to 0-based half-open because liftOver
 # doesn't deal with 0-base items.
 mv hg19.$outRoot.bigDbSnp hg19.preChrMFix.$outRoot.bigDbSnp
-time (grep ^NC_012920 hg19.preChrMFix.$outRoot.bigDbSnp \\
+time (grep ^chrMT hg19.preChrMFix.$outRoot.bigDbSnp \\
       | awk -F"\t" 'BEGIN{OFS="\t";} {\$3 += 1; print;}' \\
       | liftOver -tab -bedPlus=3 stdin \\
           hg19.mitoLiftover.chain stdout chrM.unmapped \\
       | awk -F"\t" 'BEGIN{OFS="\t";} {\$3 -= 1; print;}' \\
       | sort -k2n,2n \\
         > hg19.chrM.$outRoot.bigDbSnp)
 wc -l hg19.chrM.$outRoot.bigDbSnp chrM.unmapped
-time grep -v ^NC_012920 hg19.preChrMFix.$outRoot.bigDbSnp \\
+time grep -v ^chrMT hg19.preChrMFix.$outRoot.bigDbSnp \\
      | sort --merge -k1,1 -k2n,2n - hg19.chrM.$outRoot.bigDbSnp \\
        > hg19.$outRoot.bigDbSnp
 _EOF_
                     );
     $bossScript->execute()
   };
 } # doFixHg19ChrM
 
 
 #########################################################################
 # * step: check [workhorse]
 sub doCheck {
   my $runDir = $buildDir;
 
   my $whatItDoes = "It runs checkBigDbSnp on merged bigDbSnp files.";