5c59ddc77713387809ce52e0e553eeb3e7b7ee64 hiram Mon May 19 12:57:47 2025 -0700 better explicit recognition of hs1 oddities refs #35712 diff --git src/hg/utils/automation/doRecipBest.pl src/hg/utils/automation/doRecipBest.pl index 2f2fce4faba..32993fbfbf1 100755 --- src/hg/utils/automation/doRecipBest.pl +++ src/hg/utils/automation/doRecipBest.pl @@ -31,31 +31,33 @@ $opt_trackHub /; # Specify the steps supported with -continue / -stop: my $stepper = new HgStepManager( [ { name => 'recipBest', func => \&doRecipBest }, { name => 'download', func => \&doDownload }, { name => 'load', func => \&loadRBest }, { name => 'cleanup', func => \&cleanUp }, ] ); # Option defaults: my $dbHost = 'hgwdev'; -my ($dbExists, $qDbExists, $tChromInfoExists, $qChromInfoExists); +my ($dbExists, $qDbExists, $tChromInfoExists, $qChromInfoExists, $tTrackHub, $qTrackHub); +$tTrackHub = 0; # will become true if database is hs1 +$qTrackHub = 0; # will become true if database is hs1 my $base = $0; $base =~ s/^(.*\/)?//; sub usage { # Usage / help / self-documentation: my ($status, $detailed) = @_; # Basic help (for incorrect usage): print STDERR " usage: $base tDb qDb options: "; print STDERR $stepper->getOptionHelp(); print STDERR <<_EOF_ -buildDir dir Use dir instead of default @@ -221,70 +223,70 @@ end cd axtRBestNet md5sum *.axt.gz > md5sum.txt cd .. # Make rbest mafNet for multiz: one .maf per $tDb seq. mkdir mafRBestNet foreach f (axtRBestNet/*.$tDb.$qDb.net.axt.gz) axtToMaf -tPrefix=$tDb. -qPrefix=$qDb. \$f \\ $targetSizes $querySizes \\ stdout \\ | gzip -c > mafRBestNet/\$f:t:r:r:r:r:r.maf.gz end _EOF_ ); - if ($opt_trackHub) { + if ($tTrackHub) { $bossScript->add(<<_EOF_ mkdir -p bigMaf echo "##maf version=1 scoring=blastz" > bigMaf/$tDb.$qDb.rbestNet.maf zegrep -h -v "^#" mafRBestNet/*.maf.gz >> bigMaf/$tDb.$qDb.rbestNet.maf echo "##eof maf" >> bigMaf/$tDb.$qDb.rbestNet.maf gzip bigMaf/$tDb.$qDb.rbestNet.maf _EOF_ ); } } else { $bossScript->add(<<_EOF_ # Make rbest net axt's download mkdir ../axtRBestNet netToAxt $tDb.$qDb.rbest.net.gz $tDb.$qDb.rbest.chain.gz \\ $target2Bit $query2Bit stdout \\ | axtSort stdin stdout \\ | gzip -c > ../axtRBestNet/$tDb.$qDb.rbest.axt.gz # Make rbest mafNet for multiz mkdir ../mafRBestNet axtToMaf -tPrefix=$tDb. -qPrefix=$qDb. ../axtRBestNet/$tDb.$qDb.rbest.axt.gz \\ $targetSizes $querySizes \\ stdout \\ | gzip -c > ../mafRBestNet/$tDb.$qDb.rbest.maf.gz cd ../mafRBestNet md5sum *.maf.gz > md5sum.txt cd ../axtRBestNet md5sum *.axt.gz > md5sum.txt _EOF_ ); - if ($opt_trackHub) { + if ($tTrackHub) { $bossScript->add(<<_EOF_ mkdir -p ../bigMaf cd ../bigMaf ln -s ../mafRBestNet/$tDb.$qDb.rbest.maf.gz ./$tDb.$qDb.rbestNet.maf.gz _EOF_ ); } } - if ($opt_trackHub) { + if ($tTrackHub) { $bossScript->add(<<_EOF_ cd $buildDir/bigMaf wget --no-check-certificate -O bigMaf.as 'https://raw.githubusercontent.com/ucscGenomeBrowser/kent/refs/heads/master/src/hg/lib/bigMaf.as' wget --no-check-certificate -O mafSummary.as 'https://raw.githubusercontent.com/ucscGenomeBrowser/kent/refs/heads/master/src/hg/lib/mafSummary.as' mafToBigMaf $tDb $tDb.$qDb.rbestNet.maf.gz stdout \\ | sort -k1,1 -k2,2n > $tDb.$qDb.rbestNet.txt bedToBigBed -itemsPerSlot=4 -type=bed3+1 -as=bigMaf.as -tab $tDb.$qDb.rbestNet.txt \\ $targetSizes $tDb.$qDb.rbestNet.bb hgLoadMafSummary -minSeqSize=1 -test $tDb $tDb.$qDb.rbestNet.summary \\ $tDb.$qDb.rbestNet.maf.gz cut -f2- $tDb.$qDb.rbestNet.summary.tab | sort -k1,1 -k2,2n \\ > $tDb.$qDb.rbestNet.summary.bed bedToBigBed -type=bed3+4 -as=mafSummary.as -tab \\ $tDb.$qDb.rbestNet.summary.bed \\ $targetSizes $tDb.$qDb.rbestNet.summary.bb @@ -385,31 +387,31 @@ # Make sure previous stage was successful. my $successDir = "$runDir/$tDb.$qDb.rbest.net.gz"; if (! -e $successDir && ! $opt_debug) { die "loadRBest looks like previous stage was not successful " . "(can't find $successDir).\n"; } my $whatItDoes = "It loads the recip best chain tables into $tDb, adds gap/repeat stats to the recip best .net file, and loads the recip net table."; my $bossScript = new HgRemoteScript("$runDir/loadRBest.csh", $dbHost, $runDir, $whatItDoes); $bossScript->add(<<_EOF_ # Load reciprocal best chains: _EOF_ ); - if (! $opt_trackHub && $dbExists) { + if (! $tTrackHub && $dbExists) { $bossScript->add(<<_EOF_ cd $runDir hgLoadChain -tIndex $tDb chainRBest$QDb $tDb.$qDb.rbest.chain.gz _EOF_ ); if ($qDbExists && $qChromInfoExists) { $bossScript->add(<<_EOF_ # Add gap/repeat stats to the net file using database tables: cd $runDir netClass -verbose=0 -noAr $tDb.$qDb.rbest.net.gz $tDb $qDb stdout \\ | gzip -c > $tDb.$qDb.rbest.classed.net.gz # Load nets: @@ -443,31 +445,31 @@ rm -f chain.tab if ( -s "$buildDir/axtChain/chainRBest${QDb}.bb" ) then mkdir -p /gbdb/$tDb/chainNet rm -f "/gbdb/$tDb/chainNet/$tDb.chainRBest$QDb.bb" "/gbdb/$tDb/chainNet/$tDb.chainRBest${QDb}Link.bb" ln -s "$buildDir/axtChain/chainRBest${QDb}.bb" "/gbdb/$tDb/chainNet/$tDb.chainRBest$QDb.bb" ln -s "$buildDir/axtChain/chainRBest${QDb}Link.bb" "/gbdb/$tDb/chainNet/$tDb.chainRBest${QDb}Link.bb" endif if ( -s "$buildDir/bigMaf/$tDb.$qDb.rbestNet.bb" ) then mkdir -p /gbdb/$tDb/chainNet rm -f "/gbdb/$tDb/chainNet/$tDb.$qDb.rbestNet.bb" "/gbdb/$tDb/chainNet/$tDb.$qDb.rbestNet.summary.bb" ln -s "$buildDir/bigMaf/$tDb.$qDb.rbestNet.bb" "/gbdb/$tDb/chainNet/$tDb.$qDb.rbestNet.bb" ln -s "$buildDir/bigMaf/$tDb.$qDb.rbestNet.summary.bb" "/gbdb/$tDb/chainNet/$tDb.$qDb.rbestNet.summary.bb" endif _EOF_ ); - } # else if (! $opt_trackHub && $dbExists) + } # else if (! $tTrackHub && $dbExists) $bossScript->execute(); } # sub loadRBest {} sub cleanUp { my $runDir = "$buildDir"; my $whatItDoes = "cleanup temporary files used by RBest procedure."; my $bossScript = newBash HgRemoteScript("$runDir/rBestCleanUp.bash", $dbHost, $runDir, $whatItDoes); $bossScript->add(<<_EOF_ rm -fr axtChain/experiments rm -f axtChain/bigChain.as axtChain/bigLink.as rm -f bigMaf/bigMaf.as rm -f bigMaf/mafSummary.as rm -fr axtChain/rBestNet axtChain/rBestChain @@ -482,48 +484,53 @@ # main #$opt_debug = 1; # Prevent "Suspended (tty input)" hanging: &HgAutomate::closeStdin(); # Make sure we have valid options and correct number of args &checkOptions(); &usage(1) if (scalar(@ARGV) != 2); ($tDb, $qDb) = @ARGV; # may be working on a 2bit file that does not have a database browser $dbExists = 0; $dbExists = 1 if (&HgAutomate::databaseExists($dbHost, $tDb)); +$tTrackHub = $opt_trackHub ? 1 : 0; # db might exist, but it may not have chromInfo table (promoted hub) $tChromInfoExists = 0; if ($dbExists) { $tChromInfoExists = 1 if (&HgAutomate::dbTableExists($dbHost, $tDb, "chromInfo")); + $tTrackHub = 1 if ($tTrackHub || (0 == $tChromInfoExists)); } # may be working with a query that has no database $qDbExists = 0; $qDbExists = 1 if (&HgAutomate::databaseExists($dbHost, $qDb)); $qChromInfoExists = 0; +$qTrackHub = 0; if ($qDbExists) { $qChromInfoExists = 1 if (&HgAutomate::dbTableExists($dbHost, $qDb, "chromInfo")); + $qTrackHub = 1 if (0 == $qChromInfoExists); } printf STDERR "# target db exists: %s\n", $dbExists ? "TRUE" : "FALSE"; printf STDERR "# target chromInfo exists: %s\n", $tChromInfoExists ? "TRUE" : "FALSE"; printf STDERR "# query db exists: %s\n", $qDbExists ? "TRUE" : "FALSE"; printf STDERR "# query chromInfo exists: %s\n", $qChromInfoExists ? "TRUE" : "FALSE"; -printf STDERR "# trackHub: %s\n", $opt_trackHub ? "TRUE" : "FALSE"; +printf STDERR "# target trackHub: %s\n", $tTrackHub ? "TRUE" : "FALSE"; +printf STDERR "# query trackHub: %s\n", $qTrackHub ? "TRUE" : "FALSE"; $QDb = ucfirst($qDb); $target2Bit = "$HgAutomate::clusterData/$tDb/$tDb.2bit"; $query2Bit = "$HgAutomate::clusterData/$qDb/$qDb.2bit"; $target2Bit = $opt_target2Bit if ($opt_target2Bit); $query2Bit = $opt_query2Bit if ($opt_query2Bit); $targetSizes = "$HgAutomate::clusterData/$tDb/chrom.sizes"; $querySizes = "$HgAutomate::clusterData/$qDb/chrom.sizes"; $targetSizes = $opt_targetSizes if ($opt_targetSizes); $querySizes = $opt_querySizes if ($opt_querySizes); $splitRef = (`wc -l < $targetSizes` <= $HgAutomate::splitThreshold); # Establish what directory we will work in.