6e7e7ed56bc4de54088e81e32068fb14c4dfaac3 hiram Thu Oct 30 11:44:32 2025 -0700 may be complete at this time diff --git src/hg/utils/automation/doLongdust.pl src/hg/utils/automation/doLongdust.pl index 66fc5dbaa2c..bf407430769 100755 --- src/hg/utils/automation/doLongdust.pl +++ src/hg/utils/automation/doLongdust.pl @@ -88,31 +88,31 @@ ); usage(1) if (!$ok); usage(0, 1) if ($opt_help); HgAutomate::processCommonOptions(); my $err = $stepper->processOptions(); usage(1) if ($err); $dbHost = $opt_dbHost if ($opt_dbHost); } ######################################################################### # * step: setup [workhorse] sub doSetup { my $runDir = "$buildDir"; if ( ! $opt_debug && (-s "$runDir/chrom.sizes" && -s "$runDir/part.list" )) { printf STDERR "# setup step already complete\n"; - return + return; } if (! $opt_debug) { my @outs = ("$runDir/doSetup.bash", "$runDir/chrom.sizes", "$runDir/part.list"); HgAutomate::checkCleanSlate('setup', 'cluster', @outs); } HgAutomate::mustMkdir($runDir); my $whatItDoes = "prepare files for longdust cluster run."; my $workhorse = $opt_debug ? "hgwdev" : HgAutomate::chooseWorkhorse(); my $bossScript = newBash HgRemoteScript("$runDir/doSetup.bash", $workhorse, $runDir, $whatItDoes); my $tmpDir = HgAutomate::tmpDir(); @@ -125,30 +125,34 @@ export seqMax=`head -1 chrom.sizes | awk '{printf "%d", \$2+1}'` partitionSequence.pl -lstDir listFiles \$seqMax 0 \\ unmasked.2bit chrom.sizes 10000 ls -S listFiles/*.lst > part.list _EOF_ ); $bossScript->execute() if (! $opt_debug); } # doSetup ######################################################################### # * step: cluster [bigClusterHub] sub doCluster { my $paraHub = $bigClusterHub; my $runDir = "$buildDir"; + if ( ! $opt_debug && -s "$runDir/run.time") { + printf STDERR "# cluster step already complete\n"; + return; + } my $partList = "part.list"; # from doSetup HgAutomate::checkExistsUnlessDebug('setup', 'bedResult', "$runDir/part.list"); my $whatItDoes = "Cluster run longdust on the part.list sequences. Results into ./result/*.bed.gz"; my $templateCmd = ('runOne $(path1) {check out exists result/$(root1).bed.gz}'); HgAutomate::makeGsub($runDir, $templateCmd); `touch "$runDir/para_hub_$paraHub"`; my $paraRun = <<'_EOF_'; para make -ram=3g jobList para check para time > run.time cat run.time _EOF_ my $bossScript = newBash HgRemoteScript("$runDir/doCluster.bash", $paraHub, $runDir, $whatItDoes); @@ -185,56 +189,64 @@ chmod +x runOne gensub2 $partList single gsub jobList $paraRun _EOF_ ); $bossScript->execute() if (! $opt_debug); } # doCluster ######################################################################### # * step: bedResult [fileServer] sub doBedResult { my $runDir = "$buildDir"; + if ( ! $opt_debug && -s "$runDir/longdust.bb") { + printf STDERR "# bedResult step already complete\n"; + return; + } my $whatItDoes = "Consolidate the cluster run bed.gz files. Make single bed and bigBed file."; HgAutomate::checkExistsUnlessDebug('cluster', 'cleanup', ("$runDir/longdust.bed.gz", - "$runDir/windowmasker.sdust.bed")); + "$runDir/longdust.bb")); my $fileServer = $opt_debug ? "hgwdev" : HgAutomate::chooseFileServer($runDir); my $bossScript = newBash HgRemoteScript("$runDir/makeBed.bash", $fileServer, $runDir, $whatItDoes); $bossScript->add(<<_EOF_ -ls -S result/*.bed | xargs gzcat | gzip -c > longdust.bed.gz +ls -S result/*.bed.gz | xargs zcat | gzip -c > longdust.bed.gz bedToBigBed -type=bed3 longdust.bed.gz chrom.sizes longdust.bb export totalBases=`ave -col=2 chrom.sizes | grep total | awk '{printf "%d", \$NF}'` export basesCovered=`bigBedInfo longdust.bb | grep basesCovered | awk '{printf "%s", \$NF}' | tr -d ','` export percentCovered=`echo \$basesCovered \$totalBases | awk '{printf "%.2f", 100*\$1/\$2}'` printf "%d bases of %d (%s%%) in intersection\n" "\$basesCovered" "\$totalBases" "\$percentCovered" > fb.longdust.txt cat fb.longdust.txt _EOF_ ); $bossScript->execute() if (! $opt_debug); } #doBedResult ######################################################################### # * step: cleanup [fileServer] sub doCleanup { my $runDir = "$buildDir"; + if ( ! $opt_debug && ( ! -d "$runDir/tmp" && ! -d "$runDir/result")) { + printf STDERR "# cleanup step already complete\n"; + return; + } my $whatItDoes = "Cleans up or compresses intermediate files."; my $fileServer = $opt_debug ? "hgwdev" : HgAutomate::chooseFileServer($runDir); - my $bossScript = newBash HgRemoteScript("$runDir/doCleanup.bash", $fileServer, + my $bossScript = newBash HgRemoteScript("$runDir/cleanup.bash", $fileServer, $runDir, $whatItDoes); $bossScript->add(<<_EOF_ rm -fr tmp result _EOF_ ); $bossScript->execute() if (! $opt_debug); } # doCleanup ######################################################################### # main # Prevent "Suspended (tty input)" hanging: HgAutomate::closeStdin(); # Make sure we have valid options and exactly 1 argument: