5bd057d435611f90f3767e1c5bc9a73a56b62e8b hiram Mon Oct 28 21:11:14 2024 -0700 adding ram and cpu arguments since this process can work in 4gb with 1cpu refs #34685 diff --git src/hg/utils/automation/doSimpleRepeat.pl src/hg/utils/automation/doSimpleRepeat.pl index bc4cdca..8e82288 100755 --- src/hg/utils/automation/doSimpleRepeat.pl +++ src/hg/utils/automation/doSimpleRepeat.pl @@ -30,59 +30,63 @@ /; # Specify the steps supported with -continue / -stop: my $stepper = new HgStepManager( [ { name => 'trf', func => \&doTrf }, { name => 'filter', func => \&doFilter }, { name => 'load', func => \&doLoad }, { name => 'cleanup', func => \&doCleanup }, ] ); # Option defaults: my $defaultSmallClusterHub = 'most available'; my $defaultWorkhorse = 'least loaded'; my $dbHost = 'hgwdev'; +my $ram = '4g'; +my $cpu = 1; my $unmaskedSeq = "\$db.unmasked.2bit"; my $trf409 = ""; my $base = $0; $base =~ s/^(.*\/)?//; sub usage { # Usage / help / self-documentation: my ($status, $detailed) = @_; # Basic help (for incorrect usage): print STDERR " usage: $base db options: "; print STDERR $stepper->getOptionHelp(); print STDERR <<_EOF_ -buildDir dir Use dir instead of default $HgAutomate::clusterData/\$db/$HgAutomate::trackBuild/simpleRepeat.\$date (necessary when continuing at a later date). -unmaskedSeq seq.2bit Use seq.2bit as the unmasked input sequence instead of default ($unmaskedSeq). -trf409 n use new -l option to trf v4.09 (l=n) maximum TR length expected (in millions) (eg, -l=3 for 3 million) Human genome hg38 uses: -trf409=6 -> -l=6 _EOF_ ; print STDERR &HgAutomate::getCommonOptionHelp('dbHost' => $dbHost, 'workhorse' => '', + 'ram' => $ram, + 'cpu' => $cpu, 'smallClusterHub' => ''); my ($sizeM, $chunkM) = ($singleRunSize, $chunkSize); $sizeM =~ s/000000$/Mb/; $chunkM =~ s/000000$/Mb/; print STDERR " Automates UCSC's simpleRepeat (TRF) process for genome database \$db. Steps: trf: If total genome size is <= $sizeM, run trfBig on a workhorse; otherwise do a cluster run of trfBig on $chunkM sequence chunks. filter: If a cluster run was performed, concatenate the results into simpleRepeat.bed. Filter simpleRepeat.bed (period <= 12) to trfMask.bed. If \$db is chrom-based, split trfMaskBed into trfMaskChrom/chr*.bed for downloads. load: Load simpleRepeat.bed into the simpleRepeat table in \$db. cleanup: Removes or compresses intermediate files. All operations are performed in the build directory which is $HgAutomate::clusterData/\$db/$HgAutomate::trackBuild/simpleRepeat.\$date unless -buildDir is given. @@ -238,45 +242,45 @@ $chunkM =~ s/000000$/Mb/; my $whatItDoes = "It computes a logical partition of unmasked 2bit into $chunkM chunks and runs it on the cluster with the most available bandwidth."; my $bossScript = new HgRemoteScript("$runDir/doTrf.csh", $paraHub, $runDir, $whatItDoes); if ( ! $opt_unmaskedSeq && ! $inHive) { $bossScript->add(<<_EOF_ mkdir -p $clusterSeqDir rsync -av $unmaskedSeq $clusterSeq _EOF_ ); } - my $paraRun = &HgAutomate::paraRun(); + my $paraRun = &HgAutomate::paraRun($ram, $cpu); my $gensub2 = &HgAutomate::gensub2(); if ($opt_unmaskedSeq) { $bossScript->add(<<_EOF_ chmod a+x TrfRun.csh rm -rf $partDir $Bin/simplePartition.pl $clusterSeq $chunkSize $partDir $gensub2 $partDir/partitions.lst single gsub jobList $paraRun _EOF_ ); } else { - my $paraRun = &HgAutomate::paraRun(); + my $paraRun = &HgAutomate::paraRun($ram, $cpu); my $gensub2 = &HgAutomate::gensub2(); $bossScript->add(<<_EOF_ chmod a+x TrfRun.csh rm -rf $partDir $Bin/simplePartition.pl $clusterSeq $chunkSize $partDir rm -f $buildDir/TrfPart ln -s $partDir $buildDir/TrfPart $gensub2 $partDir/partitions.lst single gsub jobList $paraRun _EOF_ ); }