4f1877c2974ee37a984000e1add2c4c27da0de7b hiram Fri Oct 25 18:48:17 2024 -0700 the TMPDIR needs to be set locally on the para node not where the driver script is making the sub-script refs #34685 diff --git src/hg/utils/automation/doCpgIslands.pl src/hg/utils/automation/doCpgIslands.pl index fe4b21b..78819b2 100755 --- src/hg/utils/automation/doCpgIslands.pl +++ src/hg/utils/automation/doCpgIslands.pl @@ -160,31 +160,47 @@ print $fh <<_EOF_ #!/bin/bash set -beEu -o pipefail export partName=\$1 export part2bit=partFa/\$partName.2bit export result=\$2 twoBitToFa \$part2bit stdout | /hive/data/staging/data/cpgIslandExt/cpglh /dev/stdin > \$result _EOF_ ; close($fh); my $fh = &HgAutomate::mustOpen(">$runDir/oneSplit.bash"); print $fh <<_EOF_ #!/bin/bash set -beEu -o pipefail -export tmpFile=`mktemp -p /dev/shm doCpg.\$\$.XXXXX` +if [ -d "/data/tmp" ]; then + export TMPDIR="/data/tmp" +elif [ -d "/scratch/tmp" ]; then + export TMPDIR="/scratch/tmp" +else + tmpSz=`df --output=avail -k /tmp | tail -1` + shmSz=`df --output=avail -k /dev/shm | tail -1` + if [ "\${shmSz}" -gt "\${tmpSz}" ]; then + mkdir -p /dev/shm/tmp + chmod 777 /dev/shm/tmp + export TMPDIR="/dev/shm/tmp" + else + export TMPDIR="/tmp" + fi +fi + +export tmpFile=`mktemp -p \$TMPDIR doCpg.\$\$.XXXXX` export chromSizes=$chromSizes export fileSpec="\${1}" export file=`echo \$fileSpec | cut -d':' -f1` export seq=`echo \$fileSpec | cut -d':' -f2` export range=`echo \$fileSpec | cut -d':' -f3` export start=`echo \$range | cut -d'-' -f1` export end=`echo \$range | cut -d'-' -f2` export seqSize=`grep -w "\${seq}" \$chromSizes | awk '{print \$2}'` twoBitToFa \$fileSpec stdout | maskOutFa stdin hard stdout | /hive/data/staging/data/cpgIslandExt/cpglh \\ /dev/stdin | sed -e "s/\\t /\\t/g;" > "\${tmpFile}" printf "%d\\t%s:%d-%d\\t%d\\t%s\\t%d\\n" "\${start}" "\${seq}" "\${start}" "\${end}" "\${seqSize}" "\${seq}" "\${seqSize}" \\ | liftUp -type=.bed results/\${seq}:\${start}-\${end}.cpg stdin error "\${tmpFile}" rm -f "\${tmpFile}" @@ -337,31 +353,31 @@ "$HgAutomate::clusterData/$db/$HgAutomate::trackBuild/cpgIslands"; $maskedSeq = $opt_maskedSeq ? $opt_maskedSeq : "$HgAutomate::clusterData/$db/$db.2bit"; $chromSizes = $opt_chromSizes ? $opt_chromSizes : "$HgAutomate::clusterData/$db/chrom.sizes"; $tableName = $opt_tableName ? $opt_tableName : "cpgIslandExt"; my $maxSeqSize=`sort -k2,2nr $chromSizes | head -1 | awk '{printf "%d", \$NF}'`; my $totalSeqSize=`ave -col=2 $chromSizes | grep -w total | awk '{printf "%d", \$NF}'`; chomp $maxSeqSize; chomp $totalSeqSize; $maxSplitSize = $maxSeqSize; $splitRun = 0; # big genomes are over 4Gb: 4*1024*1024*1024 = 4294967296 # or if maxSeqSize over 1Gb -if ( ($maxSeqSize > 4*1024**3) || ($maxSeqSize > 1024**3) ) { +if ( ($totalSeqSize > 4*1024**3) || ($maxSeqSize > 1024**3) ) { $splitRun = 1; $maxSplitSize = 1000000000; } printf STDERR "# total genome size %d, max sequence size: %d, splitRun: %s\n", $totalSeqSize, $maxSeqSize, $splitRun ? "TRUE" : "FALSE"; # Do everything. $stepper->execute(); # Tell the user anything they should know. my $stopStep = $stepper->getStopStep(); my $upThrough = ($stopStep eq 'cleanup') ? "" : " (through the '$stopStep' step)"; $secondsEnd = `date "+%s"`; chomp $secondsEnd;