4165c71e12cef21e41d7042b9f8f1ba71d9cd910 hiram Fri Oct 18 13:29:11 2024 -0700 eliminate direct use of /scratch/tmp and instead use appropriate tmpDir function no redmine diff --git src/hg/utils/automation/doSimpleRepeat.pl src/hg/utils/automation/doSimpleRepeat.pl index 1fc05f4..e80e975 100755 --- src/hg/utils/automation/doSimpleRepeat.pl +++ src/hg/utils/automation/doSimpleRepeat.pl @@ -150,57 +150,58 @@ my $clusterSeq = "$clusterSeqDir/$db.doSimp.2bit"; if ($inHive) { $clusterSeq = "$clusterSeqDir/$db.unmasked.2bit"; } $clusterSeq = "$unmaskedSeq" if ($opt_unmaskedSeq); my $partDir .= "$okOut[0]/$db/TrfPart"; $partDir = "$buildDir/TrfPart" if ($opt_unmaskedSeq); my $trf409Option = ""; my $trfCmd = "trf"; if ($trf409 ne 0) { $trf409Option = "-l=$trf409"; $trfCmd = "trf.4.09"; } # Cluster job script: + my $tmpDir = &HgAutomate::tmpDir(); my $fh = &HgAutomate::mustOpen(">$runDir/TrfRun.csh"); print $fh <<_EOF_ #!/bin/csh -ef set finalOut = \$1 set inLst = \$finalOut:r set inLft = \$inLst:r.lft $HgAutomate::setMachtype # Use local disk for output, and move the final result to \$finalOut # when done, to minimize I/O. -set tmpDir = `mktemp -d -p /scratch/tmp doSimpleRepeat.cluster.XXXXXX` +set tmpDir = `mktemp -d -p $tmpDir doSimpleRepeat.cluster.XXXXXX` pushd \$tmpDir foreach spec (`cat \$inLst`) # Remove path and .2bit filename to get just the seq:start-end spec: set base = `echo \$spec | sed -r -e 's/^[^:]+://'` # If \$spec is the whole sequence, twoBitToFa removes the :start-end part, # which causes liftUp to barf later. So tweak the header back to # seq:start-end for liftUp's sake: twoBitToFa \$spec stdout \\ | sed -e "s/^>.*/>\$base/" \\ | $clusterBin/trfBig $trf409Option -trf=$clusterBin/$trfCmd \\ - stdin /dev/null -bedAt=\$base.bed -tempDir=/scratch/tmp + stdin /dev/null -bedAt=\$base.bed -tempDir=$tmpDir end # Due to the large chunk size, .lft files can have thousands of lines. # Even though the liftUp code does &lineFileClose, somehow we still # run out of filehandles. So limit the size of liftSpecs: split -a 3 -d -l 500 \$inLft SplitLft. # Lift up: foreach splitLft (SplitLft.*) set bedFiles = `awk '{print \$2 ".bed"};' \$splitLft` endsInLf -zeroOk \$bedFiles set lineCount=`cat \$bedFiles | wc -l` if (\$lineCount > 0) then cat \$bedFiles \\ | liftUp -type=.bed tmpOut.\$splitLft \$splitLft error stdin @@ -284,35 +285,36 @@ sub doSingle { my $runDir = "$buildDir"; &HgAutomate::mustMkdir($runDir); my $workhorse = &HgAutomate::chooseWorkhorse(); my $whatItDoes = "It runs trfBig on the entire (small) genome in one pass."; my $bossScript = new HgRemoteScript("$runDir/doTrf.csh", $workhorse, $runDir, $whatItDoes); my $trf409Option = ""; my $trfCmd = "trf"; if ($trf409 ne 0) { $trf409Option = "-l=$trf409"; $trfCmd = "trf.4.09"; } + my $tmpDir = &HgAutomate::tmpDir(); $bossScript->add(<<_EOF_ $HgAutomate::setMachtype twoBitToFa $unmaskedSeq stdout \\ | $clusterBin/trfBig $trf409Option -trf=$clusterBin/$trfCmd \\ - stdin /dev/null -bedAt=simpleRepeat.bed -tempDir=/scratch/tmp + stdin /dev/null -bedAt=simpleRepeat.bed -tempDir=$tmpDir _EOF_ ); $bossScript->execute(); } # doSingle sub doTrf { if ($useCluster) { &doCluster(); } else { &doSingle(); } } # doTrf #########################################################################