2ce16411b0f909e7e1dfd27a01af34316c260f2f
hiram
  Mon Jul 5 11:58:27 2021 -0700
catching up the edge cases for pair wise alignment to assembly hub genome no redmine

diff --git src/hg/utils/automation/doBlastzChainNet.pl src/hg/utils/automation/doBlastzChainNet.pl
index 536ed63..0417402 100755
--- src/hg/utils/automation/doBlastzChainNet.pl
+++ src/hg/utils/automation/doBlastzChainNet.pl
@@ -48,31 +48,32 @@
 use vars qw/
     $opt_blastzOutRoot
     $opt_swap
     $opt_chainMinScore
     $opt_chainLinearGap
     $opt_tRepeats
     $opt_qRepeats
     $opt_readmeOnly
     $opt_ignoreSelf
     $opt_syntenicNet
     $opt_noDbNameCheck
     $opt_inclHap
     $opt_noLoadChainSplit
     $opt_loadChainSplit
     $opt_swapDir
-    $opt_asmId
+    $opt_tAsmId
+    $opt_qAsmId
     $opt_skipDownload
     $opt_trackHub
     /;
 
 # Specify the steps supported with -continue / -stop:
 my $stepper = new HgStepManager(
     [ { name => 'partition',  func => \&doPartition },
       { name => 'blastz',     func => \&doBlastzClusterRun },
       { name => 'cat',        func => \&doCatRun },
       { name => 'chainRun',   func => \&doChainRun },
       { name => 'chainMerge', func => \&doChainMerge },
       { name => 'net',        func => \&netChains },
       { name => 'load',       func => \&loadUp },
       { name => 'download',   func => \&doDownloads },
       { name => 'cleanup',    func => \&cleanup },
@@ -118,31 +119,33 @@
                           a new directory:
                           $HgAutomate::clusterData/\$qDb/$HgAutomate::trackBuild/blastz.\$tDb.swap/
     -chainMinScore n      Add -minScore=n (default: $defaultChainMinScore) to the
                                   axtChain command.
     -chainLinearGap type  Add -linearGap=<loose|medium|filename> to the
                                   axtChain command.  (default: loose)
     -tRepeats table       Add -tRepeats=table to netClass (default: rmsk)
     -qRepeats table       Add -qRepeats=table to netClass (default: rmsk)
     -ignoreSelf           Do not assume self alignments even if tDb == qDb
     -syntenicNet          Perform optional syntenicNet step
     -noDbNameCheck        ignore Db name format
     -inclHap              include haplotypes *_hap* in chain/net, default not
     -loadChainSplit       load split chain tables, default is not split tables
     -swapDir path         directory to work in for swap, default:
                           /hive/data/genomes/qDb/bed/blastz.tDb.swap/
-    -asmId assemblyHubId  full name for assembly hub,
+    -tAsmId assemblyHubId  full name for assembly hub as target:
+                          e.g.: GCF_007474595.1_mLynCan4_v1.p
+    -qAsmId assemblyHubId  full name for assembly hub as target:
                           e.g.: GCF_007474595.1_mLynCan4_v1.p
     -skipDownload         do not construct the downloads directory
     -trackHub             construct big* files for track hub
 _EOF_
   ;
 print STDERR &HgAutomate::getCommonOptionHelp('dbHost' => $dbHost,
 				      'workhorse' => $workhorse,
 				      'fileServer' => '',
 				      'bigClusterHub' => $bigClusterHub,
 				      'smallClusterHub' => $smallClusterHub);
 print STDERR "
 Automates UCSC's blastz/chain/net pipeline:
     1. Big cluster run of blastz.
     2. Small cluster consolidation of blastz result files.
     3. Small cluster chaining run.
@@ -259,31 +262,31 @@
 15. SEQ2_SELF=1 specifies the SEQ2 is already specially split for self
     alignments and to use SEQ2 sequence for self alignment, not just a
     copy of SEQ1
 16. TMPDIR - specifies directory on cluster node to keep temporary files
     Typically TMPDIR=/scratch/tmp
 17. All other variables in DEF will be ignored!
 
 " if ($detailed);
   exit $status;
 }
 
 
 # Globals:
 my %defVars = ();
 my ($DEF, $tDb, $qDb, $QDb, $isSelf, $selfSplit, $buildDir, $fileServer);
-my ($swapDir, $asmId, $splitRef, $inclHap, $secondsStart, $secondsEnd, $dbExists, $qDbExists);
+my ($swapDir, $tAsmId, $qAsmId, $splitRef, $inclHap, $secondsStart, $secondsEnd, $dbExists, $qDbExists);
 
 sub isInDirList {
   # Return TRUE if $dir is under (begins with) something in dirList.
   my ($dir, @dirList) = @_;
   my $pat = '^(' . join('|', @dirList) . ')(/.*)?$';
   return ($dir =~ m@$pat@);
 }
 
 sub enforceClusterNoNo {
   # Die right away if user is trying to put cluster output somewhere
   # off-limits.
   my ($dir, $desc) = @_;
   if (&isInDirList($dir, @clusterNoNo)) {
     die "\ncluster outputs are forbidden to go to " .
       join (' or ', @clusterNoNo) . " so please choose a different " .
@@ -306,31 +309,32 @@
 		      @HgAutomate::commonOptionSpec,
 		      "blastzOutRoot=s",
 		      "swap",
 		      "chainMinScore=i",
 		      "chainLinearGap=s",
 		      "tRepeats=s",
 		      "qRepeats=s",
 		      "readmeOnly",
 		      "ignoreSelf",
                       "syntenicNet",
                       "noDbNameCheck",
                       "inclHap",
                       "noLoadChainSplit",
                       "loadChainSplit",
                       "swapDir=s",
-                      "asmId=s",
+                      "tAsmId=s",
+                      "qAsmId=s",
                       "skipDownload",
                       "trackHub"
 		     );
   &usage(1) if (!$ok);
   &usage(0, 1) if ($opt_help);
   &HgAutomate::processCommonOptions();
   my $err = $stepper->processOptions();
   usage(1) if ($err);
   $dbHost = $opt_dbHost if ($opt_dbHost);
   if ($opt_swap) {
     if ($opt_continue) {
       if ($stepper->stepPrecedes($opt_continue, 'net')) {
 	warn "\nIf -swap is specified, then -continue must specify a step ".
 	  "of \"net\" or later.\n";
 	&usage(1);
@@ -921,33 +925,34 @@
 
 sub swapGlobals {
   # Swap our global variables ($buildDir, $tDb, $qDb and %defVars SEQ1/SEQ2)
   # so that the remaining steps need no tweaks for -swap.
   $buildDir = $swapDir;
   my $tmp = $qDb;
   $qDb = $tDb;
   $tDb = $tmp;
   $QDb = $isSelf ? 'Self' : ucfirst($qDb);
   foreach my $var ('DIR', 'LEN', 'CHUNK', 'LAP', 'SMSK') {
     $tmp = $defVars{"SEQ1_$var"};
     $defVars{"SEQ1_$var"} = $defVars{"SEQ2_$var"};
     $defVars{"SEQ2_$var"} = $tmp;
   }
   $defVars{'BASE'} = $swapDir;
+  $tAsmId = $opt_qAsmId ? $opt_qAsmId : "";
+  $qAsmId = $opt_tAsmId ? $opt_tAsmId : "";
 }
 
-
 sub doChainMerge {
   # If -swap, swap chains from other org;  otherwise, merge the results
   # from the chainRun step.
   if ($opt_swap) {
     &swapChains();
     &swapGlobals();
   } else {
     &postProcessChains();
   }
 }
 
 
 sub netChains {
   # Turn chains into nets (,axt,maf,.over.chain).
   # Don't do this for self alignments.
@@ -1211,31 +1216,31 @@
   # Make an md5sum.txt file.
   my $net = $isSelf ? "" : "$tDb.$qDb.net.gz";
   if (! -s "$runDir/$net") {
      $net = "";
   }
   my $whatItDoes =
 "It makes an md5sum.txt file for downloadable files, with relative paths
 matching what the user will see on the download server, and installs the
 over.chain file in the liftOver dir.";
   my $bossScript = new HgRemoteScript("$runDir/makeMd5sum.csh", $workhorse,
 				      $runDir, $whatItDoes, $DEF);
   my $over = $tDb . "To$QDb.over.chain.gz";
   my $altOver = "$tDb.$qDb.over.chain.gz";
   my $liftOverDir = "$HgAutomate::clusterData/$tDb/$HgAutomate::trackBuild/liftOver";
   if ($tDb =~ m/^GC/) {
-     $liftOverDir = &HgAutomate::asmHubBuildDir($asmId) . "/liftOver";
+     $liftOverDir = &HgAutomate::asmHubBuildDir($tAsmId) . "/liftOver";
   }
   $bossScript->add(<<_EOF_
 mkdir -p $liftOverDir
 md5sum $tDb.$qDb.all.chain.gz $net > md5sum.txt
 _EOF_
   );
   if (! $isSelf) {
     my $axt = ($splitRef ?
 	       "md5sum axtNet/*.gz >> axtChain/md5sum.txt" :
 	       "cd axtNet\nmd5sum *.gz >> ../axtChain/md5sum.txt");
     $bossScript->add(<<_EOF_
 rm -f $liftOverDir/$over
 cp -p $altOver $liftOverDir/$over
 cd ..
 $axt
@@ -1347,37 +1352,44 @@
 &commafy($defVars{SEQ2_LAP}) . ".";
   }
   $lap .= "  Following alignment, the
 coordinates of the chunk alignments were corrected by the
 blastz-normalizeLav script written by Scott Schwartz of Penn State.";
   return $lap;
 }
 
 
 sub dumpDownloadReadme {
   # Write a file (README.txt) describing the download files.
   my ($file) = @_;
   my $fh = &HgAutomate::mustOpen(">$file");
   my ($tGenome, $tDate, $tSource, $tAsmName);
   if ($tDb =~ m/^GC/) {
-    ($tGenome, $tDate, $tSource) = &HgAutomate::getAssemblyInfo($dbHost, $asmId);
-    $tAsmName = $asmId;
+    ($tGenome, $tDate, $tSource) = &HgAutomate::getAssemblyInfo($dbHost, $tAsmId);
+    $tAsmName = $tAsmId;
   } else {
     ($tGenome, $tDate, $tSource) = &HgAutomate::getAssemblyInfo($dbHost, $tDb);
     $tAsmName = $tDb;
   }
-  my ($qGenome, $qDate, $qSource) = &HgAutomate::getAssemblyInfo($dbHost, $qDb);
+  my ($qGenome, $qDate, $qSource, $qAsmName);
+  if ($qDb =~ m/^GC/) {
+  ($qGenome, $qDate, $qSource) = &HgAutomate::getAssemblyInfo($dbHost, $qAsmId);
+    $qAsmName = $qAsmId;
+  } else {
+     ($qGenome, $qDate, $qSource) = &HgAutomate::getAssemblyInfo($dbHost, $qDb);
+    $qAsmName = $qDb;
+  }
   my $dir = $splitRef ? 'axtNet/*.' : '';
   my $synNet = $splitRef ?
   "mafSynNet/*.maf.gz - filtered net files for syntenic alignments
                only, in MAF format, see also, description of MAF format:
                http://genome.ucsc.edu/FAQ/FAQformat.html#format5" :
   "$tDb.$qDb.synNet.maf.gz - filtered net file for syntenic alignments
                only, in MAF format, see also, description of MAF format:
                http://genome.ucsc.edu/FAQ/FAQformat.html#format5
 
   - $tDb.$qDb.syn.net.gz - filtered net file for syntenic alignments only";
 
   my ($matrix, $o, $e, $k, $l, $h, $blastzOther) = &getBlastzParams();
   my $defaultMatrix = $defVars{'BLASTZ_Q'} ? '' : ' the default matrix';
   my $lap = &describeOverlapping();
   my $abridging = "";
@@ -1398,31 +1410,31 @@
 alignment coordinates were adjusted) using the restore_rpts program from
 Penn State.";
     }
   }
   my $desc = $isSelf ?
 "This directory contains alignments of
     $tGenome ($tDb, $tDate,
     $tSource) to itself." :
 "This directory contains alignments of the following assemblies:
 
   - target/reference: $tGenome
     ($tAsmName, $tDate,
     $tSource)
 
   - query: $qGenome
-    ($qDb, $qDate,
+    ($qAsmName, $qDate,
     $qSource)";
 
   print $fh "$desc
 
 Files included in this directory:
 
   - md5sum.txt: md5sum checksums for the files in this directory
 
   - $tDb.$qDb.all.chain.gz: chained lastz alignments. The chain format is
     described in http://genome.ucsc.edu/goldenPath/help/chain.html .
 
 ";
   if (! $isSelf) {
     print $fh
 "  - $tDb.$qDb.net.gz: \"net\" file that describes rearrangements between
@@ -1441,31 +1453,31 @@
     for $tDb-$qDb
 
 ";
   }
   if ($opt_swap) {
     my $TDb = ucfirst($tDb);
     print $fh
 "The chainSwap program was used to translate $qDb-referenced chained lastz
 alignments to $tDb into $tDb-referenced chains aligned to $qDb.  See
 the download directory goldenPath/$qDb/vs$TDb/README.txt for more
 information about the $qDb-referenced lastz and chaining process.
 ";
   } else {
     print $fh ($isSelf ?
 "The $tAsmName assembly was aligned to itself" :
-"The $tAsmName and $qDb assemblies were aligned");
+"The $tAsmName and $qAsmName assemblies were aligned");
   my $chainMinScore = $opt_chainMinScore ? "$opt_chainMinScore" :
 	$defaultChainMinScore;
   my $chainLinearGap = $opt_chainLinearGap ? "$opt_chainLinearGap" :
 	$defaultChainLinearGap;
     print $fh " by the lastz alignment
 program, which is available from Webb Miller's lab at Penn State
 University (http://www.bx.psu.edu/miller_lab/).  $lap $abridging
 
 The lastz scoring matrix (Q parameter) used was$defaultMatrix:
 
 $matrix
 
 with a gap open penalty of O=$o and a gap extension penalty of E=$e.
 The minimum score for an alignment to be kept was K=$k for the first pass
 and L=$l for the second pass, which restricted the search space to the
@@ -1550,30 +1562,34 @@
   my $successFile = "$runDir/$tDb.$qDb.all.chain.gz";
   if (! $isSelf && -s "$runDir/$tDb.$qDb.net.gz") {
      $successFile = "$runDir/$tDb.$qDb.net.gz";
   }
   if (! -e $successFile && ! $opt_debug) {
     die "installDownloads: looks like previous stage was not successful " .
       "(can't find $successFile).\n";
   }
   my $goldenPath = $HgAutomate::goldenPath;
   if ($tDb =~ m/^GC/) {
      $goldenPath = &HgAutomate::asmHubDownloadDir($tDb);
   }
   &dumpDownloadReadme("$runDir/README.txt");
   my $over = $tDb . "To$QDb.over.chain.gz";
   my $liftOverDir = "$HgAutomate::clusterData/$tDb/$HgAutomate::trackBuild/liftOver";
+  if ($tDb =~ m/^GC/) {
+     $liftOverDir = &HgAutomate::asmHubBuildDir($tAsmId) . "/liftOver";
+  }
+printf STDERR "# DBG: tDb '%s' have liftOverDir: '%s'\n", $tDb, $liftOverDir;
   my $gpLiftOverDir = "$goldenPath/$tDb/liftOver";
   my $gbdbLiftOverDir = "$HgAutomate::gbdb/$tDb/liftOver";
   my $andNets = $isSelf ? "." :
     ", nets and axtNet,\n" .
     "# and copies the liftOver chains to the liftOver download dir.";
   my $whatItDoes = "It creates the download directory for chains$andNets";
   my $bossScript = new HgRemoteScript("$runDir/installDownloads.csh", $dbHost,
 				      $runDir, $whatItDoes, $DEF);
   $bossScript->add(<<_EOF_
 mkdir -p $goldenPath/$tDb
 rm -rf $goldenPath/$tDb/vs$QDb
 mkdir -p $goldenPath/$tDb/vs$QDb
 cd $goldenPath/$tDb/vs$QDb
 ln -s $runDir/$tDb.$qDb.all.chain.gz .
 ln -s $runDir/README.txt .
@@ -1888,31 +1904,32 @@
 $secondsStart = `date "+%s"`;
 chomp $secondsStart;
 ($DEF) = @ARGV;
 
 $inclHap = "";
 $inclHap = "-inclHap" if ($opt_inclHap);
 &loadDef($DEF);
 &checkDef();
 
 my $seq1IsSplit = (`wc -l < $defVars{SEQ1_LEN}` <=
 		   $HgAutomate::splitThreshold);
 my $seq2IsSplit = (`wc -l < $defVars{SEQ2_LEN}` <=
 		   $HgAutomate::splitThreshold);
 
 # might be an assembly hub build
-$asmId = $opt_asmId ? $opt_asmId : "";
+$tAsmId = $opt_tAsmId ? $opt_tAsmId : "";
+$qAsmId = $opt_qAsmId ? $opt_qAsmId : "";
 
 # Undocumented option for quickly generating a README from DEF:
 if ($opt_readmeOnly) {
   $splitRef = $opt_swap ? $seq2IsSplit : $seq1IsSplit;
   &swapGlobals() if $opt_swap;
   &dumpDownloadReadme("/tmp/README.txt");
   exit 0;
 }
 
 my $date = `date +%Y-%m-%d`;
 chomp $date;
 $buildDir = $defVars{'BASE'} ||
   "$HgAutomate::clusterData/$tDb/$HgAutomate::trackBuild/blastz.$qDb.$date";
 
 if ($opt_swap) {