b032eee9e1cec0a1cb46ccff207a7a99b0a94e00 hiram Wed Dec 28 22:17:38 2022 -0800 run repeatModeler in /dev/shm for better performance no redmine diff --git src/hg/utils/automation/doRepeatModeler.pl src/hg/utils/automation/doRepeatModeler.pl index eb7be51..1a86ca7 100755 --- src/hg/utils/automation/doRepeatModeler.pl +++ src/hg/utils/automation/doRepeatModeler.pl @@ -133,38 +133,49 @@ die "Error: required file $unmaskedSeq does not exist."; } my $whatItDoes = "Construct .fa file from unmasked.2bit, then run BuildDatabase from RepeatModeler to prepare rmblastn index files."; my $bossScript = newBash HgRemoteScript("$runDir/blastDb.bash", $workhorse, $runDir, $whatItDoes); $bossScript->add(<<_EOF_ export asmId="${db}" export unmasked2Bit="${unmaskedSeq}" export bDatabase="${BuildDatabase}" +export tmpDir=`mktemp -d -p /dev/shm rModeler.XXXXXX` + +# working directory +cd "\${tmpDir}" + if [ "\${unmasked2Bit}" -nt "\${asmId}.fa" ]; then twoBitToFa "\${unmasked2Bit}" "\${asmId}.fa" touch -r "\${unmasked2Bit}" "\${asmId}.fa" fi if [ "\${asmId}.fa" -nt "\${asmId}.nsq" ]; then time (\$bDatabase -name "\${asmId}" -engine ncbi "\${asmId}.fa") > blastDb.log 2>&1 fi + +cd ${runDir} +rsync -a -P "\${tmpDir}/" ./ +rm -fr "\${tmpDir}/" +chmod 775 ${runDir} + _EOF_ ); $bossScript->execute() if (! $opt_debug); } # sub doBlastDb ######################################################################### # * step: cluster [bigClusterHub] sub doCluster { my $runDir = "$buildDir"; my $paraHub = $bigClusterHub; # First, make sure previous step has completed: if ( ! $opt_debug ) { if ( ! -s "$runDir/$db.nsq" ) { @@ -178,42 +189,54 @@ &HgAutomate::verbose(1, "\ncluster step previously completed\n"); return; } } my $whatItDoes = "runs single cluster job to perform the RepeatModeler process."; my $bossScript = newBash HgRemoteScript("$runDir/doCluster.bash", $paraHub, $runDir, $whatItDoes); $bossScript->add(<<_EOF_ printf '#!/bin/bash set -beEu -o pipefail +export tmpDir=`mktemp -d -p /dev/shm rModeler.XXXXXX` + +# working directory +cd "\${tmpDir}" +rsync -a -P "${runDir}/" "\${tmpDir}/" + export asmId="\${1}" export threadCount="${threadCount}" export rModeler="${RepeatModeler}" time (\$rModeler -engine ncbi \$threadCount -database "\${asmId}") > modeler.log 2>&1 ' > oneJob chmod +x oneJob printf "oneJob ${db} {check out line+ ${db}-rmod.log}\n" > jobList para make $parasolOpts jobList para check para time > run.time cat run.time + +cd ${runDir} +rsync -a -P "\${tmpDir}/" ./ +rm -fr "\${tmpDir}/" +chmod 775 ${runDir} + _EOF_ ); $bossScript->execute() if (! $opt_debug); } # doCluster ######################################################################### # * step: cleanup [workhorse] sub doCleanup { my $runDir = "$buildDir"; # First, make sure previous step has completed: if ( ! $opt_debug ) { if ( -s "$runDir/run.time" && ! -s "$runDir/${db}-families.fa" ) { die "cleanup: previous 'cluster' step appears to be broken, run.time is present but ${db}-families.fa is not present ?"; }