167a944687bd484f0dc6c8821749c08053a2a0a5 hiram Sun Aug 13 18:47:43 2023 -0700 add -ram argument to doAugustus to allow smaller ram size on the paraRun, this should be an argument to the paraRun setup no redmine diff --git src/hg/utils/automation/doAugustus.pl src/hg/utils/automation/doAugustus.pl index 6534100..b410ff0 100755 --- src/hg/utils/automation/doAugustus.pl +++ src/hg/utils/automation/doAugustus.pl @@ -8,76 +8,79 @@ use strict; use FindBin qw($Bin); use lib "$Bin"; use HgAutomate; use HgRemoteScript; use HgStepManager; # Option variable names, both common and peculiar to this script: use vars @HgAutomate::commonOptionVars; use vars @HgStepManager::optionVars; use vars qw/ $opt_buildDir $opt_maskedSeq $opt_species $opt_utr + $opt_ram $opt_noDbGenePredCheck /; # Specify the steps supported with -continue / -stop: my $stepper = new HgStepManager( [ { name => 'partition', func => \&doPartition }, { name => 'augustus', func => \&doAugustus }, { name => 'makeGp', func => \&doMakeGp }, { name => 'load', func => \&doLoadAugustus }, { name => 'cleanup', func => \&doCleanup }, ] ); # Option defaults: # my $bigClusterHub = 'swarm'; my $bigClusterHub = 'ku'; my $workhorse = 'hgwdev'; my $dbHost = 'hgwdev'; my $defaultWorkhorse = 'hgwdev'; my $maskedSeq = "$HgAutomate::clusterData/\$db/\$db.2bit"; my $utr = "off"; +my $ramG = "-ram=8g"; my $noDbGenePredCheck = 1; # default yes, use -db for genePredCheck my $species = "human"; my $augustusDir = "/hive/data/outside/augustus/augustus-3.3.1"; my $augustusConfig="$augustusDir/config"; my $base = $0; $base =~ s/^(.*\/)?//; sub usage { # Usage / help / self-documentation: my ($status, $detailed) = @_; # Basic help (for incorrect usage): print STDERR " usage: $base db options: "; print STDERR $stepper->getOptionHelp(); print STDERR <<_EOF_ -buildDir dir Use dir instead of default $HgAutomate::clusterData/\$db/$HgAutomate::trackBuild/augustus (necessary when continuing at a later date). -maskedSeq seq.2bit Use seq.2bit as the masked input sequence instead of default ($maskedSeq). -utr Obsolete, now is automatic (was: Use augustus arg: --UTR=on, default is --UTR=off) -noDbGenePredCheck do not use -db= on genePredCheck, there is no real db + -ram Ng set -ram=Ng argument to para create command (default 8g) -species <name> name from list: human chicken zebrafish, default: human _EOF_ ; print STDERR &HgAutomate::getCommonOptionHelp('dbHost' => $dbHost, 'bigClusterHub' => $bigClusterHub, 'workhorse' => $defaultWorkhorse); print STDERR " Automates UCSC's Augustus track construction for the database \$db. Steps: partition: Creates hard-masked fastas needed for the CpG Island program. augustus: Run gsBig on the hard-masked fastas makeGp: Transform output from gsBig into augustus.gtf augustus.pep and load: Load augustus.gtf and into \$db. cleanup: Removes hard-masked fastas and output from gsBig. All operations are performed in the build directory which is $HgAutomate::clusterData/\$db/$HgAutomate::trackBuild/augustus unless -buildDir is given. @@ -236,49 +239,51 @@ --predictionStart=\$start --predictionEnd=\$end \\ \$fasta --outfile=\$gtfFile --errfile=\$errFile:t endif gzip \$gtfFile popd mv \$tmpDir/\$gtfFile.gz \$resultGz mv \$tmpDir/\$errFile:t \$errFile rm -fr \$tmpDir _EOF_ ); $whatItDoes = "Run augustus on chunked fasta sequences."; $bossScript = newBash HgRemoteScript("$runDir/runAugustus.bash", $paraHub, $runDir, $whatItDoes); - my $paraRun = &HgAutomate::paraRun(); $bossScript->add(<<_EOF_ (grep -v partBundles ../partition/part.list || /bin/true) | while read twoBit do chr=`echo \$twoBit | sed -e 's/.*2bit://;' | awk -F':' '{print \$1}'` chrStart=`echo \$twoBit | sed -e 's/.*2bit://;' | awk -F':' '{print \$2}' | sed -e 's/-.*//;'` chrEnd=`echo \$twoBit | sed -e 's/.*2bit://;' | awk -F':' '{print \$2}' | sed -e 's/.*-//;'` echo "runOne \$chrStart \$chrEnd {check in exists+ $buildDir/fasta/\${chr}.fa} {check out exists+ gtf/\$chr/\$chr.\$chrStart.\$chrEnd.gtf.gz}" done > jobList (grep partBundles ../partition/part.list || /bin/true) | while read bundleName do B=`basename \$bundleName | sed -e 's/.lst//;'` echo "runOne 0 0 {check in exists+ $buildDir/fasta/\${B}.fa} {check out exists+ gtf/\${B}/\${B}.0.0.gtf.gz}" done >> jobList chmod +x runOne -$paraRun +/parasol/bin/para $ramG make jobList +/parasol/bin/para check +/parasol/bin/para time > run.time +cat run.time _EOF_ ); $bossScript->execute(); } # doAugustus ######################################################################### # * step: make gp [workhorse] sub doMakeGp { my $runDir = $buildDir; &HgAutomate::mustMkdir($runDir); # First, make sure we're starting clean. if (! -e "$runDir/run.augustus/run.time") { die "doMakeGp: the previous step augustus did not complete \n" . "successfully ($buildDir/run.augustus/run.time does not exist).\nPlease " . @@ -397,30 +402,31 @@ # Prevent "Suspended (tty input)" hanging: &HgAutomate::closeStdin(); # Make sure we have valid options and exactly 1 argument: &checkOptions(); &usage(1) if (scalar(@ARGV) != 1); $secondsStart = `date "+%s"`; chomp $secondsStart; ($db) = @ARGV; # Force debug and verbose until this is looking pretty solid: #$opt_debug = 1; #$opt_verbose = 3 if ($opt_verbose < 3); $noDbGenePredCheck = $opt_noDbGenePredCheck ? 0 : $noDbGenePredCheck; +$ramG = $opt_ram ? $ramG : "-ram=$opt_ram"; # Establish what directory we will work in. $buildDir = $opt_buildDir ? $opt_buildDir : "$HgAutomate::clusterData/$db/$HgAutomate::trackBuild/augustus"; $maskedSeq = $opt_maskedSeq ? $opt_maskedSeq : "$HgAutomate::clusterData/$db/$db.2bit"; $species = $opt_species ? $opt_species : $species; if ( -s "${augustusConfig}/species/$species/${species}_utr_probs.pbl" ) { $utr = "on"; } else { $utr = "off"; } # Do everything. $stepper->execute();