5f33fc0ebd5b0d47a27fb88cb78eff9aa0278e25 hiram Thu Aug 24 13:52:09 2023 -0700 add -ram argument to allow smaller ram size on the paraRun, this should be an argument to the paraRun setup no redmine diff --git src/hg/utils/automation/doXenoRefGene.pl src/hg/utils/automation/doXenoRefGene.pl index ac82f68..5668662 100755 --- src/hg/utils/automation/doXenoRefGene.pl +++ src/hg/utils/automation/doXenoRefGene.pl @@ -24,30 +24,32 @@ # Specify the steps supported with -continue / -stop: my $stepper = new HgStepManager( [ { name => 'splitTarget', func => \&doSplitTarget }, { name => 'blatRun', func => \&doBlatRun }, { name => 'filterPsl', func => \&doFilterPsl }, { name => 'makeGp', func => \&doMakeGp }, { name => 'cleanup', func => \&doCleanup }, ] ); # Option defaults: my $bigClusterHub = 'ku'; my $workhorse = 'hgwdev'; my $dbHost = 'hgwdev'; +my $ram = '4g'; +my $cpu = 1; my $defaultWorkhorse = 'hgwdev'; my $maskedSeq = "$HgAutomate::clusterData/\$db/\$db.2bit"; my $mrnas = "/hive/data/genomes/asmHubs/xenoRefSeq"; my $noDbGenePredCheck = 1; # default yes, use -db for genePredCheck my $base = $0; $base =~ s/^(.*\/)?//; sub usage { # Usage / help / self-documentation: my ($status, $detailed) = @_; # Basic help (for incorrect usage): print STDERR " usage: $base db options: @@ -56,30 +58,32 @@ print STDERR <<_EOF_ -buildDir dir Use dir instead of default $HgAutomate::clusterData/\$db/$HgAutomate::trackBuild/xenoRefGene (necessary when continuing at a later date). -maskedSeq seq.2bit Use seq.2bit as the masked input sequence instead of default ($maskedSeq). -mrnas </path/to/xenoRefSeqMrna> - location of xenoRefMrna.fa.gz expanded directory of mrnas/ and xenoRefMrna.sizes, default $mrnas -noDbGenePredCheck do not use -db= on genePredCheck, there is no real db _EOF_ ; print STDERR &HgAutomate::getCommonOptionHelp('dbHost' => $dbHost, 'bigClusterHub' => $bigClusterHub, + 'ram' => $ram, + 'cpu' => $cpu, 'workhorse' => $defaultWorkhorse); print STDERR " Automates construction of a xeno RefSeq gene track from RefSeq mRNAs. Steps: splitTarget split the masked target sequence into individual fasta sequences blatRun: Run blat with the xenoRefSeq mRNAs query to target sequence filterPsl: Run pslCDnaFilter on the blat psl results makeGp: Transform the filtered PSL into a genePred file and create bigGenePred from the genePred file cleanup: Removes hard-masked fastas and output from gsBig. All operations are performed in the build directory which is $HgAutomate::clusterData/\$db/$HgAutomate::trackBuild/xenoRefGene unless -buildDir is given. "; # Detailed help (-help): print STDERR " Assumptions: @@ -208,31 +212,31 @@ export queryDir="$mrnas/mrnas"; export target=\$1 export query=\$2 export result=\$3 mkdir -p `dirname \$result` blat -noHead -q=rnax -t=dnax -mask=lower ../target/\$target.fa.gz \$queryDir/\$query.fa.gz \$result _EOF_ ); $whatItDoes = "Operate the blat run of the mRNAs query sequence to the target split sequence."; $bossScript = newBash HgRemoteScript("$runDir/runBlat.bash", $paraHub, $runDir, $whatItDoes); - my $paraRun = &HgAutomate::paraRun(); + my $paraRun = &HgAutomate::paraRun($ram, $cpu); $bossScript->add(<<_EOF_ chmod +x blatOne gensub2 ../target.list $mrnas/query.list gsub jobList $paraRun _EOF_ ); $bossScript->execute(); } # doBlatRun ######################################################################### # * step: filterPsl [workhorse] sub doFilterPsl { my $runDir = $buildDir;