b0b850bdc2557fed7f69594bc8fe013d478dbe85 hiram Wed Sep 28 10:19:12 2022 -0700 fixups for hs1 to database assemblies refs #29819 diff --git src/hg/utils/automation/doBlastzChainNet.pl src/hg/utils/automation/doBlastzChainNet.pl index cf14f20..82467cc 100755 --- src/hg/utils/automation/doBlastzChainNet.pl +++ src/hg/utils/automation/doBlastzChainNet.pl @@ -918,30 +918,36 @@ &HgAutomate::nfsNoodge("$runDir/$swappedChain"); if ($splitRef) { &HgAutomate::run("$HgAutomate::runSSH $fileServer nice " . "chainSplit $runDir/chain $runDir/$swappedChain"); } } # sub swapChains {} sub swapGlobals { # Swap our global variables ($buildDir, $tDb, $qDb and %defVars SEQ1/SEQ2) # so that the remaining steps need no tweaks for -swap. $buildDir = $swapDir; my $tmp = $qDb; $qDb = $tDb; $tDb = $tmp; + $tmp = $dbExists; + $dbExists = $qDbExists; + $qDbExists = $tmp; + $tmp = $tChromInfoExists; + $tChromInfoExists = $qChromInfoExists; + $qChromInfoExists = $tmp; $QDb = $isSelf ? 'Self' : ucfirst($qDb); foreach my $var ('DIR', 'LEN', 'CHUNK', 'LAP', 'SMSK') { $tmp = $defVars{"SEQ1_$var"}; $defVars{"SEQ1_$var"} = $defVars{"SEQ2_$var"}; $defVars{"SEQ2_$var"} = $tmp; } $defVars{'BASE'} = $swapDir; $tAsmId = $opt_qAsmId ? $opt_qAsmId : ""; $qAsmId = $opt_tAsmId ? $opt_tAsmId : ""; } sub doChainMerge { # If -swap, swap chains from other org; otherwise, merge the results # from the chainRun step. if ($opt_swap) { @@ -1114,31 +1120,31 @@ ####### instead of this database loading table (split table code could be ####### eliminated . . . ) $bossScript->add(<<_EOF_ cd $runDir/chain foreach c (`awk '{print \$1;}' $defVars{SEQ1_LEN}`) set f = \$c.chain if (! -e \$f) then echo no chains for \$c set f = /dev/null endif hgLoadChain $tDb \${c}_chain$QDb \$f end _EOF_ ); } else { - if (! $opt_trackHub && $dbExists) { + if (! $opt_trackHub && $dbExists && $tChromInfoExists ) { $bossScript->add(<<_EOF_ cd $runDir hgLoadChain -tIndex $tDb chain$QDb $tDb.$qDb.all.chain.gz _EOF_ ); } else { $bossScript->add(<<_EOF_ cd $runDir hgLoadChain -test -noBin -tIndex $tDb chain$QDb $tDb.$qDb.all.chain.gz wget --no-check-certificate -O bigChain.as 'http://genome-source.soe.ucsc.edu/gitlist/kent.git/raw/master/src/hg/lib/bigChain.as' wget --no-check-certificate -O bigLink.as 'http://genome-source.soe.ucsc.edu/gitlist/kent.git/raw/master/src/hg/lib/bigLink.as' sed 's/.000000//' chain.tab | awk 'BEGIN {OFS="\\t"} {print \$2, \$4, \$5, \$11, 1000, \$8, \$3, \$6, \$7, \$9, \$10, \$1}' > chain${QDb}.tab bedToBigBed -type=bed6+6 -as=bigChain.as -tab chain${QDb}.tab $defVars{SEQ1_LEN} chain${QDb}.bb awk 'BEGIN {OFS="\\t"} {print \$1, \$2, \$3, \$5, \$4}' link.tab | sort -k1,1 -k2,2n > chain${QDb}Link.tab bedToBigBed -type=bed4+1 -as=bigLink.as -tab chain${QDb}Link.tab $defVars{SEQ1_LEN} chain${QDb}Link.bb @@ -1147,31 +1153,31 @@ set percentCovered = `echo \$basesCovered \$totalBases | awk '{printf "%.3f", 100.0*\$1/\$2}'` printf "%d bases of %d (%s%%) in intersection\\n" "\$basesCovered" "\$totalBases" "\$percentCovered" > ../fb.$tDb.chain${QDb}Link.txt rm -f link.tab rm -f chain.tab _EOF_ ); } } if (! $isSelf) { my $tRepeats = $opt_tRepeats ? "-tRepeats=$opt_tRepeats" : $defaultTRepeats; my $qRepeats = $opt_qRepeats ? "-qRepeats=$opt_qRepeats" : $defaultQRepeats; if ($opt_swap) { $tRepeats = $opt_qRepeats ? "-tRepeats=$opt_qRepeats" : $defaultQRepeats; $qRepeats = $opt_tRepeats ? "-qRepeats=$opt_tRepeats" : $defaultTRepeats; } - if (! $opt_trackHub && $dbExists) { + if (! $opt_trackHub && $dbExists && $tChromInfoExists) { if ($qDbExists && $qChromInfoExists) { $bossScript->add(<<_EOF_ # Add gap/repeat stats to the net file using database tables: cd $runDir netClass -verbose=0 $tRepeats $qRepeats -noAr noClass.net $tDb $qDb $tDb.$qDb.net # Load nets: netFilter -minGap=10 $tDb.$qDb.net \\ | hgLoadNet -verbose=0 $tDb net$QDb stdin _EOF_ ); } else { $bossScript->add(<<_EOF_ cp -p noClass.net $tDb.$qDb.net @@ -1766,31 +1772,31 @@ cd $goldenPath/$tDb/vs$QDb/mafSynNet ln -s $buildDir/mafSynNet/* . _EOF_ ); } } else { # scaffold-based assembly # filter net for synteny and create syntenic net mafs $bossScript->add(<<_EOF_ netFilter -syn $tDb.$qDb.net.gz | gzip -c > $tDb.$qDb.syn.net.gz netChainSubset -verbose=0 $tDb.$qDb.syn.net.gz $tDb.$qDb.all.chain.gz stdout \\ | chainStitchId stdin stdout | gzip -c > $tDb.$qDb.syn.chain.gz _EOF_ ); - if (! $opt_trackHub && $dbExists) { + if (! $opt_trackHub && $dbExists && $tChromInfoExists) { $bossScript->add(<<_EOF_ set lineCount = `zcat $tDb.$qDb.syn.chain.gz | wc -l` if (\$lineCount > 0) then hgLoadChain -tIndex $tDb chainSyn$QDb $tDb.$qDb.syn.chain.gz endif _EOF_ ); if ($qDbExists && $qChromInfoExists) { $bossScript->add(<<_EOF_ netFilter -minGap=10 $tDb.$qDb.syn.net.gz \\ | hgLoadNet -verbose=0 $tDb netSyn$QDb stdin endif _EOF_ ); } @@ -1863,43 +1869,43 @@ cd $goldenPath/$tDb/vs$QDb if (-s $runDir/synNet.md5sum.txt ) then rm -f $tDb.$qDb.syn.net.gz rm -f $tDb.$qDb.synNet.maf.gz ln -s $runDir/$tDb.$qDb.syn.net.gz . ln -s $runDir/$tDb.$qDb.synNet.maf.gz . cat $runDir/synNet.md5sum.txt >> md5sum.txt sort -u md5sum.txt > tmp.sum cat tmp.sum > md5sum.txt rm -f tmp.sum endif _EOF_ ); } - if (! $opt_trackHub && $dbExists) { + if (! $opt_trackHub && $dbExists && $tChromInfoExists) { $bossScript->add(<<_EOF_ cd "$buildDir" if (\$lineCount > 0) then featureBits $tDb chainSyn${QDb}Link >&fb.$tDb.chainSyn${QDb}Link.txt cat fb.$tDb.chainSyn${QDb}Link.txt endif _EOF_ ); } } $bossScript->execute(); -} +} # sub doSyntenicNet ######################################################################### # # -- main -- # Prevent "Suspended (tty input)" hanging: &HgAutomate::closeStdin(); #$opt_debug = 1; &checkOptions(); &usage(1) if (scalar(@ARGV) != 1); $secondsStart = `date "+%s"`; chomp $secondsStart; @@ -1974,30 +1980,31 @@ if ($dbExists) { $tChromInfoExists = 1 if (&HgAutomate::dbTableExists($dbHost, $tDb, "chromInfo")); } # may be working with a query that does not have a database $qDbExists = 0; $qDbExists = 1 if (&HgAutomate::databaseExists($dbHost, $qDb)); $qChromInfoExists = 0; if ($qDbExists) { $qChromInfoExists = 1 if (&HgAutomate::dbTableExists($dbHost, $qDb, "chromInfo")); } printf STDERR "# target db exists: %s\n", $dbExists ? "TRUE" : "FALSE"; printf STDERR "# target chromInfo exists: %s\n", $tChromInfoExists ? "TRUE" : "FALSE"; printf STDERR "# query db exists: %s\n", $qDbExists ? "TRUE" : "FALSE"; printf STDERR "# query chromInfo exists: %s\n", $qChromInfoExists ? "TRUE" : "FALSE"; +printf STDERR "# trackHub: %s\n", $opt_trackHub ? "TRUE" : "FALSE"; # When running -swap, swapGlobals() happens at the end of the chainMerge step. # However, if we also use -continue with some step later than chainMerge, we # need to call swapGlobals before executing the remaining steps. if ($opt_swap && $stepper->stepPrecedes('chainMerge', $stepper->getStartStep())) { &swapGlobals(); } $stepper->execute(); $secondsEnd = `date "+%s"`; chomp $secondsEnd; my $elapsedSeconds = $secondsEnd - $secondsStart; my $elapsedMinutes = int($elapsedSeconds/60);