4f1877c2974ee37a984000e1add2c4c27da0de7b hiram Fri Oct 25 18:48:17 2024 -0700 the TMPDIR needs to be set locally on the para node not where the driver script is making the sub-script refs #34685 diff --git src/hg/utils/automation/asmHubRepeatMasker.sh src/hg/utils/automation/asmHubRepeatMasker.sh index 4678efe..c061941 100755 --- src/hg/utils/automation/asmHubRepeatMasker.sh +++ src/hg/utils/automation/asmHubRepeatMasker.sh @@ -2,30 +2,45 @@ # asmHubRepeatMasker.sh - process a specified *.fa.out.gz file into a set # of bigBed files for assembly hub display # # fail on any error: set -beEu -o pipefail # ensure sort functions properly despite kluster node environment export LC_COLLATE=C if [ $# -ne 3 ]; then printf "%s\n" "usage: asmHubRepeatMasker.sh " 1>&2 exit 255 fi +if [ -d "/data/tmp" ]; then + export TMPDIR="/data/tmp" +elif [ -d "/scratch/tmp" ]; then + export TMPDIR="/scratch/tmp" +else + tmpSz=`df --output=avail -k /tmp | tail -1` + shmSz=`df --output=avail -k /dev/shm | tail -1` + if [ "${shmSz}" -gt "${tmpSz}" ]; then + mkdir -p /dev/shm/tmp + chmod 777 /dev/shm/tmp + export TMPDIR="/dev/shm/tmp" + else + export TMPDIR="/tmp" + fi +fi export dateStamp=`date "+%FT%T %s"` export asmId=$1 export rmOutFile=$2 export destDir=$3 export chrSizes="" if [ -s "$destDir/$asmId.chrom.sizes" ]; then chrSizes="$destDir/$asmId.chrom.sizes" elif [ -s "../../$asmId.chrom.sizes" ]; then chrSizes="../../$asmId.chrom.sizes" fi # assume this file name pattern export faAlign=`echo "${rmOutFile}" | sed -e 's/sorted.fa.out/fa.align/; s/.gz//;'` @@ -68,31 +83,31 @@ "${asmId}.rmsk.align.bb" & fi wait if [ -s "${asmId}.fa.align.tsv" ]; then gzip "${asmId}.fa.align.tsv" & fi gzip "${asmId}.sorted.fa.join.tsv" & wait rm -fr classBed rmskClass ${asmId}.rmsk.tab bbi/*.rmsk.*.bb \ bbi/*.rmsk.*.bb mkdir classBed rmskClass rm -f ${asmId}.rm.out mkdir -p bbi dateStamp=`date "+%FT%T %s"` printf "# %s processing %s\n" "${dateStamp}" "${rmOutFile}" 1>&2 - export rmOutTmp=`mktemp -p /dev/shm rmskProcess.${asmId}.XXXXX` + export rmOutTmp=`mktemp -p "${TMPDIR}" rmskProcess.${asmId}.XXXXX` printf "%s\n" ' SW perc perc perc query position in query matching repeat position in repeat score div. del. ins. sequence begin end (left) repeat class/family begin end (left) ID ' > "${rmOutTmp}" zcat "${rmOutFile}" | headRest 3 stdin >> "${rmOutTmp}" hgLoadOut -verbose=2 -tabFile=${asmId}.rmsk.tab -table=rmsk -nosplit test "${rmOutTmp}" 2> rmsk.bad.records.txt dateStamp=`date "+%FT%T %s"` if [ -s ${asmId}.rmsk.tab ]; then printf "# %s splitting into categories %s\n" "${dateStamp}" "${asmId}.rmsk.tab" 1>&2 sort -k12,12 ${asmId}.rmsk.tab \ | splitFileByColumn -ending=tab -col=12 -tab stdin rmskClass for T in SINE LINE LTR DNA Simple Low_complexity Satellite do fileCount=`(ls rmskClass/${T}*.tab 2> /dev/null || true) | wc -l` if [ "$fileCount" -gt 0 ]; then dateStamp=`date "+%FT%T %s"`