4f1877c2974ee37a984000e1add2c4c27da0de7b hiram Fri Oct 25 18:48:17 2024 -0700 the TMPDIR needs to be set locally on the para node not where the driver script is making the sub-script refs #34685 diff --git src/hg/utils/automation/doWindowMasker.pl src/hg/utils/automation/doWindowMasker.pl index 1b8d577..c980898 100755 --- src/hg/utils/automation/doWindowMasker.pl +++ src/hg/utils/automation/doWindowMasker.pl @@ -1,284 +1,314 @@ #!/usr/bin/env perl # DO NOT EDIT the /cluster/bin/scripts copy of this file -- # edit ~/kent/src/hg/utils/automation/doWindowMasker.pl instead. # $Id: doWindowMasker.pl,v 1.8 2009/03/13 22:27:12 hiram Exp $ use Getopt::Long; use warnings; use strict; use FindBin qw($Bin); use lib "$Bin"; use HgAutomate; use HgRemoteScript; use HgStepManager; # Option variable names, both common and peculiar to this script: use vars @HgAutomate::commonOptionVars; use vars @HgStepManager::optionVars; use vars qw/ $opt_buildDir $opt_unmaskedSeq /; # Specify the steps supported with -continue / -stop: my $stepper = new HgStepManager( [ { name => 'count', func => \&doCount }, { name => 'mask', func => \&doMask }, { name => 'sdust', func => \&doSdust }, { name => 'twobit', func => \&doTwoBit }, { name => 'load', func => \&doLoad }, { name => 'cleanup', func => \&doCleanup }, ] ); # Option defaults: my $defaultWorkhorse = 'least loaded'; my $dbHost = 'hgwdev'; my $unmaskedSeq = "$HgAutomate::clusterData/\$db/\$db.unmasked.2bit"; my $base = $0; $base =~ s/^(.*\/)?//; sub usage { # Usage / help / self-documentation: my ($status, $detailed) = @_; # Basic help (for incorrect usage): print STDERR " usage: $base db options: "; print STDERR $stepper->getOptionHelp(); print STDERR <<_EOF_ -buildDir dir Use dir instead of default $HgAutomate::clusterData/\$db/$HgAutomate::trackBuild/WindowMasker.\$date (necessary when continuing at a later date). -unmaskedSeq <file.2bit> Use file.2bit for unmasked sequence, default is: $unmaskedSeq _EOF_ ; print STDERR &HgAutomate::getCommonOptionHelp('dbHost' => $dbHost, 'workhorse' => $defaultWorkhorse); print STDERR " Automates UCSC's WindowMasker process for genome database \$db. Steps: count: Do the first pass of WindowMasker: collecting the counts. mask: The second pass of WindowMasker and collect output. sdust: Another pass of WindowMasker using -sdust true. twobit: Make masked twobit files. load: load and clean of gaps, reload cleaned table. cleanup: Removes or compresses intermediate files. All operations are performed in the build directory which is $HgAutomate::clusterData/\$db/$HgAutomate::trackBuild/WindowMasker.\$date unless -buildDir is given. "; # Detailed help (-help): print STDERR " Assumptions: 1. $HgAutomate::clusterData/\$db/\$db.unmasked.2bit contains sequence for database/assembly \$db. (This can be overridden with -unmaskedSeq.) " if ($detailed); print "\n"; exit $status; } # Globals: # Command line args: db my ($db); # Other: my ($buildDir); sub checkOptions { # Make sure command line options are valid/supported. my $ok = GetOptions(@HgStepManager::optionSpec, 'buildDir=s', 'unmaskedSeq=s', @HgAutomate::commonOptionSpec, ); &usage(1) if (!$ok); &usage(0, 1) if ($opt_help); &HgAutomate::processCommonOptions(); my $err = $stepper->processOptions(); usage(1) if ($err); $dbHost = $opt_dbHost if ($opt_dbHost); } ######################################################################### # * step: count [workhorse] sub doCount { my $runDir = "$buildDir"; &HgAutomate::checkCleanSlate('count', 'mask', "$runDir/windowmasker.counts"); &HgAutomate::mustMkdir($runDir); my $whatItDoes = "It does WindowMasker counts step."; my $workhorse = &HgAutomate::chooseWorkhorse(); my $bossScript = new HgRemoteScript("$runDir/doCount.csh", $workhorse, $runDir, $whatItDoes); my $tmpDir = &HgAutomate::tmpDir(); $bossScript->add(<<_EOF_ +if ( -d "/data/tmp" ) then + setenv TMPDIR "/data/tmp" +else if ( -d "/scratch/tmp" ) then + setenv TMPDIR "/scratch/tmp" +else + set tmpSz = `df --output=avail -k /tmp | tail -1` + set shmSz = `df --output=avail -k /dev/shm | tail -1` + if ( "\${shmSz}" > "\${tmpSz}" ) then + mkdir -p /dev/shm/tmp + chmod 777 /dev/shm/tmp + setenv TMPDIR "/dev/shm/tmp" + else + setenv TMPDIR "/tmp" + endif +endif set windowMaskerDir = /cluster/bin/\$MACHTYPE set windowMasker = \$windowMaskerDir/windowmasker set fa = $db.fa -set tmpDir = `mktemp -d -p $tmpDir doWindowMasker.XXXXXX` +set tmpDir = `mktemp -d -p \$TMPDIR doWindowMasker.XXXXXX` chmod 775 \$tmpDir set inputTwoBit = $unmaskedSeq pushd \$tmpDir twoBitToFa \$inputTwoBit \$fa \$windowMasker -mk_counts true -input \$fa -output windowmasker.counts popd cp \$tmpDir/windowmasker.counts . rm -rf \$tmpDir _EOF_ ); $bossScript->execute(); } # doCount ######################################################################### # * step: mask [workhorse] sub doMask { printf STDERR "# doMask: obsolete step, no longer needed\n"; return 0; } # doMask ######################################################################### # * step: sdust [workhorse] sub doSdust { my $runDir = "$buildDir"; &HgAutomate::checkExistsUnlessDebug('mask', 'sdust', "$runDir/windowmasker.counts"); my $whatItDoes = "It does WindowMasker masking step with -sdust true."; my $workhorse = &HgAutomate::chooseWorkhorse(); my $bossScript = new HgRemoteScript("$runDir/doSdust.csh", $workhorse, $runDir, $whatItDoes); my $tmpDir = &HgAutomate::tmpDir(); $bossScript->add(<<_EOF_ +if ( -d "/data/tmp" ) then + setenv TMPDIR "/data/tmp" +else if ( -d "/scratch/tmp" ) then + setenv TMPDIR "/scratch/tmp" +else + set tmpSz = `df --output=avail -k /tmp | tail -1` + set shmSz = `df --output=avail -k /dev/shm | tail -1` + if ( "\${shmSz}" > "\${tmpSz}" ) then + mkdir -p /dev/shm/tmp + chmod 777 /dev/shm/tmp + setenv TMPDIR "/dev/shm/tmp" + else + setenv TMPDIR "/tmp" + endif +endif set windowMaskerDir = /cluster/bin/\$MACHTYPE set windowMasker = \$windowMaskerDir/windowmasker set fa = $db.fa -set tmpDir = `mktemp -d -p $tmpDir doWindowMasker.XXXXXX` +set tmpDir = `mktemp -d -p \$TMPDIR doWindowMasker.XXXXXX` chmod 775 \$tmpDir set inputTwoBit = $unmaskedSeq cp windowmasker.counts \$tmpDir pushd \$tmpDir twoBitToFa \$inputTwoBit \$fa \$windowMasker -ustat windowmasker.counts -sdust true -input \$fa -output windowmasker.intervals perl -wpe \'if \(s\/^\>lcl\\\|\(\.\*\)\\n\$\/\/\) { \$chr = \$1\; } \\ if \(\/^\(\\d+\) \- \(\\d+\)\/\) { \\ \$s=\$1\; \$e=\$2+1\; s\/\(\\d+\) \- \(\\d+\)\/\$chr\\t\$s\\t\$e\/\; \\ }\' windowmasker.intervals > windowmasker.sdust.bed popd cp \$tmpDir/windowmasker.sdust.bed . rm -rf \$tmpDir _EOF_ ); $bossScript->execute(); } # doSdust ######################################################################### # * step: twobit [fileServer] sub doTwoBit { my $runDir = "$buildDir"; my $whatItDoes = "Make .2bit files from the beds."; &HgAutomate::checkExistsUnlessDebug('sdust', 'twobit', ("$runDir/windowmasker.counts", "$runDir/windowmasker.sdust.bed")); my $fileServer = &HgAutomate::chooseFileServer($runDir); my $bossScript = new HgRemoteScript("$runDir/doTwoBit.csh", $fileServer, $runDir, $whatItDoes); $bossScript->add(<<_EOF_ twoBitMask $unmaskedSeq windowmasker.sdust.bed $db.wmsk.sdust.2bit twoBitToFa $db.wmsk.sdust.2bit stdout | faSize stdin >&faSize.$db.wmsk.sdust.txt _EOF_ ); $bossScript->execute(); } #doTwoBit ######################################################################### # * step: load [dbHost] sub doLoad { my $runDir = "$buildDir"; my $whatItDoes = "load sdust.bed and filter with gaps to clean"; &HgAutomate::checkExistsUnlessDebug('twobit', 'load', ("$runDir/$db.wmsk.sdust.2bit", "$runDir/faSize.$db.wmsk.sdust.txt")); my $bossScript = new HgRemoteScript("$runDir/doLoad.csh", $dbHost, $runDir, $whatItDoes); $bossScript->add(<<_EOF_ hgLoadBed $db windowmaskerSdust windowmasker.sdust.bed featureBits -countGaps $db windowmaskerSdust >&fb.$db.windowmaskerSdust.beforeClean.txt featureBits $db -not gap -bed=notGap.bed featureBits $db windowmaskerSdust notGap.bed -bed=stdout | gzip -c > cleanWMask.bed.gz hgLoadBed $db windowmaskerSdust cleanWMask.bed.gz featureBits -countGaps $db windowmaskerSdust >&fb.$db.windowmaskerSdust.clean.txt zcat cleanWMask.bed.gz | twoBitMask ../../$db.unmasked.2bit stdin -type=.bed $db.cleanWMSdust.2bit twoBitToFa $db.cleanWMSdust.2bit stdout | faSize stdin >& faSize.$db.cleanWMSdust.txt featureBits -countGaps $db rmsk windowmaskerSdust >&fb.$db.rmsk.windowmaskerSdust.txt _EOF_ ); $bossScript->execute(); } #doLoad ######################################################################### # * step: cleanup [fileServer] sub doCleanup { my $runDir = "$buildDir"; my $whatItDoes = "It cleans up or compresses intermediate files."; my $fileServer = &HgAutomate::chooseFileServer($runDir); my $bossScript = new HgRemoteScript("$runDir/doCleanup.csh", $fileServer, $runDir, $whatItDoes); $bossScript->add(<<_EOF_ gzip $runDir/windowmasker.counts gzip $runDir/windowmasker.sdust.bed _EOF_ ); $bossScript->execute(); } # doCleanup ######################################################################### # main # Prevent "Suspended (tty input)" hanging: &HgAutomate::closeStdin(); # Make sure we have valid options and exactly 1 argument: &checkOptions(); &usage(1) if (scalar(@ARGV) != 1); my $secondsStart = `date "+%s"`; chomp $secondsStart; ($db) = @ARGV; # Force debug and verbose until this is looking pretty solid: #$opt_debug = 1; $opt_verbose = 3 if ($opt_verbose < 3); # Establish what directory we will work in. my $date = `date +%Y-%m-%d`; chomp $date; $buildDir = $opt_buildDir ? $opt_buildDir : "$HgAutomate::clusterData/$db/$HgAutomate::trackBuild/WindowMasker.$date"; $unmaskedSeq = $opt_unmaskedSeq ? $opt_unmaskedSeq : "$HgAutomate::clusterData/$db/$db.unmasked.2bit"; # Do everything. $stepper->execute(); # Tell the user anything they should know. my $stopStep = $stepper->getStopStep(); my $upThrough = ($stopStep eq 'cleanup') ? "" : " (through the '$stopStep' step)"; my $secondsEnd = `date "+%s"`; chomp $secondsEnd; my $elapsedSeconds = $secondsEnd - $secondsStart; my $elapsedMinutes = int($elapsedSeconds/60); $elapsedSeconds -= $elapsedMinutes * 60; &HgAutomate::verbose(1, "\n *** All done !$upThrough - Elapsed time: ${elapsedMinutes}m${elapsedSeconds}s\n"); &HgAutomate::verbose(1, " *** Steps were performed in $buildDir\n"); &HgAutomate::verbose(1, "\n");