0cf3bcb238f681f3df0decac63b08b4953d3f455 hiram Sat Oct 26 23:25:21 2024 -0700 getting TMPDIR set correctly refs #34685 diff --git src/hg/utils/automation/doSimpleRepeat.pl src/hg/utils/automation/doSimpleRepeat.pl index a7c3407..450d001 100755 --- src/hg/utils/automation/doSimpleRepeat.pl +++ src/hg/utils/automation/doSimpleRepeat.pl @@ -150,40 +150,40 @@ my $clusterSeq = "$clusterSeqDir/$db.doSimp.2bit"; if ($inHive) { $clusterSeq = "$clusterSeqDir/$db.unmasked.2bit"; } $clusterSeq = "$unmaskedSeq" if ($opt_unmaskedSeq); my $partDir .= "$okOut[0]/$db/TrfPart"; $partDir = "$buildDir/TrfPart" if ($opt_unmaskedSeq); my $trf409Option = ""; my $trfCmd = "trf"; if ($trf409 ne 0) { $trf409Option = "-l=$trf409"; $trfCmd = "trf.4.09"; } # Cluster job script: - my $tmpDir = &HgAutomate::tmpDir(); my $fh = &HgAutomate::mustOpen(">$runDir/TrfRun.csh"); print $fh <<_EOF_ #!/bin/csh -ef set finalOut = \$1 set inLst = \$finalOut:r set inLft = \$inLst:r.lft +unsetenv TMPDIR if ( -d "/data/tmp" ) then setenv TMPDIR "/data/tmp" else if ( -d "/scratch/tmp" ) then setenv TMPDIR "/scratch/tmp" else set tmpSz = `df --output=avail -k /tmp | tail -1` set shmSz = `df --output=avail -k /dev/shm | tail -1` if ( "\${shmSz}" > "\${tmpSz}" ) then mkdir -p /dev/shm/tmp chmod 777 /dev/shm/tmp setenv TMPDIR "/dev/shm/tmp" else setenv TMPDIR "/tmp" endif endif @@ -192,31 +192,31 @@ # Use local disk for output, and move the final result to \$finalOut # when done, to minimize I/O. set tmpDir = `mktemp -d -p \$TMPDIR doSimpleRepeat.cluster.XXXXXX` pushd \$tmpDir foreach spec (`cat \$inLst`) # Remove path and .2bit filename to get just the seq:start-end spec: set base = `echo \$spec | sed -r -e 's/^[^:]+://'` # If \$spec is the whole sequence, twoBitToFa removes the :start-end part, # which causes liftUp to barf later. So tweak the header back to # seq:start-end for liftUp's sake: twoBitToFa \$spec stdout \\ | sed -e "s/^>.*/>\$base/" \\ | $clusterBin/trfBig $trf409Option -trf=$clusterBin/$trfCmd \\ - stdin /dev/null -bedAt=\$base.bed -tempDir=$tmpDir + stdin /dev/null -bedAt=\$base.bed -tempDir=\$tmpDir end # Due to the large chunk size, .lft files can have thousands of lines. # Even though the liftUp code does &lineFileClose, somehow we still # run out of filehandles. So limit the size of liftSpecs: split -a 3 -d -l 500 \$inLft SplitLft. # Lift up: foreach splitLft (SplitLft.*) set bedFiles = `awk '{print \$2 ".bed"};' \$splitLft` endsInLf -zeroOk \$bedFiles set lineCount=`cat \$bedFiles | wc -l` if (\$lineCount > 0) then cat \$bedFiles \\ | liftUp -type=.bed tmpOut.\$splitLft \$splitLft error stdin @@ -303,33 +303,49 @@ my $workhorse = &HgAutomate::chooseWorkhorse(); my $whatItDoes = "It runs trfBig on the entire (small) genome in one pass."; my $bossScript = new HgRemoteScript("$runDir/doTrf.csh", $workhorse, $runDir, $whatItDoes); my $trf409Option = ""; my $trfCmd = "trf"; if ($trf409 ne 0) { $trf409Option = "-l=$trf409"; $trfCmd = "trf.4.09"; } my $tmpDir = &HgAutomate::tmpDir(); $bossScript->add(<<_EOF_ $HgAutomate::setMachtype +unsetenv TMPDIR +if ( -d "/data/tmp" ) then + setenv TMPDIR "/data/tmp" +else if ( -d "/scratch/tmp" ) then + setenv TMPDIR "/scratch/tmp" +else + set tmpSz = `df --output=avail -k /tmp | tail -1` + set shmSz = `df --output=avail -k /dev/shm | tail -1` + if ( "\${shmSz}" > "\${tmpSz}" ) then + mkdir -p /dev/shm/tmp + chmod 777 /dev/shm/tmp + setenv TMPDIR "/dev/shm/tmp" + else + setenv TMPDIR "/tmp" + endif +endif twoBitToFa $unmaskedSeq stdout \\ | $clusterBin/trfBig $trf409Option -trf=$clusterBin/$trfCmd \\ - stdin /dev/null -bedAt=simpleRepeat.bed -tempDir=$tmpDir + stdin /dev/null -bedAt=simpleRepeat.bed -tempDir=\$TMPDIR _EOF_ ); $bossScript->execute(); } # doSingle sub doTrf { if ($useCluster) { &doCluster(); } else { &doSingle(); } } # doTrf #########################################################################