9110d73a77da715fc6a5a708c4de6e3ecd381110 hiram Fri Oct 17 10:23:07 2025 -0700 fixup correct TMPDIR with new cluster environment refs #36483 diff --git src/hg/utils/automation/doSameSpeciesLiftOver.pl src/hg/utils/automation/doSameSpeciesLiftOver.pl index c76c4135068..fa89b9260c5 100755 --- src/hg/utils/automation/doSameSpeciesLiftOver.pl +++ src/hg/utils/automation/doSameSpeciesLiftOver.pl @@ -66,31 +66,31 @@ "; print STDERR $stepper->getOptionHelp(); print STDERR <<_EOF_ -buildDir dir Use dir instead of default $HgAutomate::clusterData/\$fromDb/$HgAutomate::trackBuild/blat.\$toDb.\$date (necessary when continuing at a later date). -ooc /path/11.ooc Use this instead of the default /hive/data/genomes/fromDb/11.ooc Can be "none". -target2Bit /path/target.2bit Full path to target sequence (fromDb) -query2Bit /path/query.2bit Full path to query sequence (toDb) -targetSizes /path/target.chrom.sizes Full path to target chrom.sizes (fromDb) -querySizes /path/query.chrom.sizes Full path to query chrom.sizes (toDb) -chainRam Ng Cluster ram size for chain step, default: -chainRam=$chainRam -chainCpu N Cluster CPUs number for chain step, default: -chainCpu=$chainCpu - -localTmp /tmp Full path to temporary storage for heavy I/O usage + -localTmp /tmp Full path to temporary storage for heavy I/O usage - UNUSED 2025-10 _EOF_ ; print STDERR &HgAutomate::getCommonOptionHelp('dbHost' => $dbHost, 'workhorse' => '', 'fileServer' => '', 'ram' => $ramG, 'cpu' => $cpu, 'bigClusterHub' => ''); print STDERR " Automates UCSC's same-species liftOver (blat/chain/net) pipeline, based on Kate's suite of makeLo-* scripts: align: Aligns the assemblies using blat -fastMap on a big cluster. chain: Chains the alignments on a big cluster. net: Nets the alignments, uses netChainSubset to extract liftOver chains. load: Installs liftOver chain files, calls hgAddLiftOverChain on $dbHost. @@ -225,36 +225,53 @@ die "align: $paraHub does not have $ooc -- if that is not the correct " . "location, please run again with -ooc.\n"; } # script for a single job: split query partition further into # $splitSize bites while building a .lft liftUp spec; blat; lift. my $size = $splitSize; my $fh = &HgAutomate::mustOpen(">$runDir/job.csh"); print $fh <<_EOF_ #!/bin/csh -ef set targetList = \$1 set queryListIn = \$2 set outPsl = \$3 +unsetenv TMPDIR +if ( -d "/data/tmp" ) then + setenv TMPDIR "/data/tmp" +else if ( -d "/scratch/tmp" ) then + setenv TMPDIR "/scratch/tmp" +else + set tmpSz = `df --output=avail -k /tmp | tail -1` + set shmSz = `df --output=avail -k /dev/shm | tail -1` + if ( "\${shmSz}" > "\${tmpSz}" ) then + mkdir -p /dev/shm/tmp + chmod 777 /dev/shm/tmp + setenv TMPDIR "/dev/shm/tmp" + else + setenv TMPDIR "/tmp" + endif +endif + if (\$targetList:e == "lst") set targetList = $runDir/\$targetList if (\$queryListIn:e == "lst") set queryListIn = $runDir/\$queryListIn # Use local disk for output, and move the final result to \$outPsl # when done, to minimize I/O. -set tmpDir = `mktemp -d -p $localTmp doSame.blat.XXXXXX` +set tmpDir = `mktemp -d -p \$TMPDIR doSame.blat.XXXXXX` pushd \$tmpDir # We might get a .lst or a 2bit spec here -- convert to (list of) 2bit spec: if (\$queryListIn:e == "lst") then set specList = `cat \$queryListIn` else set specList = \$queryListIn endif # Further partition the query spec(s) into $splitSize coord ranges, building up # a .lst of 2bit specs for blat and a .lft liftUp spec for the results: cp /dev/null reSplitQuery.lst cp /dev/null query.lft foreach spec (\$specList) set file = `echo \$spec | awk -F: '{print \$1;}'` @@ -409,31 +426,48 @@ &HgAutomate::checkCleanSlate('chain', 'net', 'chainRaw'); &HgAutomate::checkExistsUnlessDebug('align', 'chain', $pslDir, $blatDoneFile); &makePslPartsLst($pslDir); my ($paraHub, $tSeqScratch, $qSeqScratch) = &getClusterSeqs(); # script for a single job: cat inputs if necessary and chain. my $fh = &HgAutomate::mustOpen(">$runDir/job.csh"); print $fh <<_EOF_ #!/bin/csh -ef set inPattern = \$1 set outChain = \$2 -set tmpOut = `mktemp -p $localTmp doSame.chain.XXXXXX` +unsetenv TMPDIR +if ( -d "/data/tmp" ) then + setenv TMPDIR "/data/tmp" +else if ( -d "/scratch/tmp" ) then + setenv TMPDIR "/scratch/tmp" +else + set tmpSz = `df --output=avail -k /tmp | tail -1` + set shmSz = `df --output=avail -k /dev/shm | tail -1` + if ( "\${shmSz}" > "\${tmpSz}" ) then + mkdir -p /dev/shm/tmp + chmod 777 /dev/shm/tmp + setenv TMPDIR "/dev/shm/tmp" + else + setenv TMPDIR "/tmp" + endif +endif + +set tmpOut = `mktemp -p \$TMPDIR doSame.chain.XXXXXX` cat $pslDir/\$inPattern* \\ | axtChain -verbose=0 -linearGap=medium -psl stdin \\ $tSeqScratch $qSeqScratch stdout \\ | chainBridge -linearGap=medium stdin $tSeqScratch $qSeqScratch \\ \$tmpOut mv \$tmpOut \$outChain chmod 664 \$outChain _EOF_ ; close($fh); &HgAutomate::run("chmod a+x $runDir/job.csh"); &HgAutomate::makeGsub($runDir, 'job.csh $(path1) ' . @@ -463,32 +497,48 @@ my $runDir = "$buildDir/run.chain"; my @outs = ("$runDir/$tDb.$qDb.all.chain.gz", "$runDir/$tDb.$qDb.noClass.net.gz"); &HgAutomate::checkCleanSlate('net', 'load', @outs); &HgAutomate::checkExistsUnlessDebug('chain', 'net', "$runDir/chainRaw/"); my $whatItDoes = "It nets the chained blat alignments and runs netChainSubset to produce liftOver chains."; my $mach = &HgAutomate::chooseWorkhorse(); my $bossScript = new HgRemoteScript("$runDir/doNet.csh", $mach, $runDir, $whatItDoes); my $chromBased = (`wc -l < $tSizes` <= $HgAutomate::splitThreshold); my $lump = $chromBased ? "" : "-lump=100"; $bossScript->add(<<_EOF_ +unsetenv TMPDIR +if ( -d "/data/tmp" ) then + setenv TMPDIR "/data/tmp" +else if ( -d "/scratch/tmp" ) then + setenv TMPDIR "/scratch/tmp" +else + set tmpSz = `df --output=avail -k /tmp | tail -1` + set shmSz = `df --output=avail -k /dev/shm | tail -1` + if ( "\${shmSz}" > "\${tmpSz}" ) then + mkdir -p /dev/shm/tmp + chmod 777 /dev/shm/tmp + setenv TMPDIR "/dev/shm/tmp" + else + setenv TMPDIR "/tmp" + endif +endif # Use local scratch disk... this can be quite I/O intensive: -set tmpDir = `mktemp -d -p $localTmp doSame.blat.XXXXXX` +set tmpDir = `mktemp -d -p \$TMPDIR doSame.blat.XXXXXX` # Merge up the hierarchy and assign unique chain IDs: mkdir \$tmpDir/chainMerged foreach d (chainRaw/*) set tChunk = \$d:t chainMergeSort \$d/*.chain > \$tmpDir/chainMerged/\$tChunk.chain end chainMergeSort \$tmpDir/chainMerged/*.chain \\ | chainSplit $lump \$tmpDir/chainSplit stdin endsInLf \$tmpDir/chainSplit/*.chain rm -rf \$tmpDir/chainMerged/ mkdir \$tmpDir/netSplit \$tmpDir/overSplit foreach f (\$tmpDir/chainSplit/*.chain)