5bd057d435611f90f3767e1c5bc9a73a56b62e8b
hiram
  Mon Oct 28 21:11:14 2024 -0700
adding ram and cpu arguments since this process can work in 4gb with 1cpu refs #34685

diff --git src/hg/utils/automation/doSimpleRepeat.pl src/hg/utils/automation/doSimpleRepeat.pl
index bc4cdca..8e82288 100755
--- src/hg/utils/automation/doSimpleRepeat.pl
+++ src/hg/utils/automation/doSimpleRepeat.pl
@@ -30,59 +30,63 @@
     /;
 
 # Specify the steps supported with -continue / -stop:
 my $stepper = new HgStepManager(
     [ { name => 'trf',     func => \&doTrf },
       { name => 'filter',  func => \&doFilter },
       { name => 'load',    func => \&doLoad },
       { name => 'cleanup', func => \&doCleanup },
     ]
 				);
 
 # Option defaults:
 my $defaultSmallClusterHub = 'most available';
 my $defaultWorkhorse = 'least loaded';
 my $dbHost = 'hgwdev';
+my $ram = '4g';
+my $cpu = 1;
 my $unmaskedSeq = "\$db.unmasked.2bit";
 my $trf409 = "";
 
 my $base = $0;
 $base =~ s/^(.*\/)?//;
 
 sub usage {
   # Usage / help / self-documentation:
   my ($status, $detailed) = @_;
   # Basic help (for incorrect usage):
   print STDERR "
 usage: $base db
 options:
 ";
   print STDERR $stepper->getOptionHelp();
   print STDERR <<_EOF_
     -buildDir dir         Use dir instead of default
                           $HgAutomate::clusterData/\$db/$HgAutomate::trackBuild/simpleRepeat.\$date
                           (necessary when continuing at a later date).
     -unmaskedSeq seq.2bit Use seq.2bit as the unmasked input sequence instead
                           of default ($unmaskedSeq).
     -trf409 n             use new -l option to trf v4.09 (l=n)
                           maximum TR length expected (in millions)
                           (eg, -l=3 for 3 million)
                           Human genome hg38 uses: -trf409=6 -> -l=6
 _EOF_
   ;
   print STDERR &HgAutomate::getCommonOptionHelp('dbHost' => $dbHost,
 						'workhorse' => '',
+						'ram' => $ram,
+						'cpu' => $cpu,
 						'smallClusterHub' => '');
   my ($sizeM, $chunkM) = ($singleRunSize, $chunkSize);
   $sizeM =~ s/000000$/Mb/;  $chunkM =~ s/000000$/Mb/;
   print STDERR "
 Automates UCSC's simpleRepeat (TRF) process for genome database \$db.  Steps:
     trf:     If total genome size is <= $sizeM, run trfBig on a workhorse;
              otherwise do a cluster run of trfBig on $chunkM sequence chunks.
     filter:  If a cluster run was performed, concatenate the results into
              simpleRepeat.bed.  Filter simpleRepeat.bed (period <= 12) to
              trfMask.bed.  If \$db is chrom-based, split trfMaskBed into
              trfMaskChrom/chr*.bed for downloads.
     load:    Load simpleRepeat.bed into the simpleRepeat table in \$db.
     cleanup: Removes or compresses intermediate files.
 All operations are performed in the build directory which is
 $HgAutomate::clusterData/\$db/$HgAutomate::trackBuild/simpleRepeat.\$date unless -buildDir is given.
@@ -238,45 +242,45 @@
   $chunkM =~ s/000000$/Mb/;
   my $whatItDoes =
 "It computes a logical partition of unmasked 2bit into $chunkM chunks
 and runs it on the cluster with the most available bandwidth.";
   my $bossScript = new HgRemoteScript("$runDir/doTrf.csh", $paraHub,
 				      $runDir, $whatItDoes);
 
   if ( ! $opt_unmaskedSeq && ! $inHive) {
     $bossScript->add(<<_EOF_
 mkdir -p $clusterSeqDir
 rsync -av $unmaskedSeq $clusterSeq
 _EOF_
     );
   }
 
-  my $paraRun = &HgAutomate::paraRun();
+  my $paraRun = &HgAutomate::paraRun($ram, $cpu);
   my $gensub2 = &HgAutomate::gensub2();
   if ($opt_unmaskedSeq) {
     $bossScript->add(<<_EOF_
 chmod a+x TrfRun.csh
 
 rm -rf $partDir
 $Bin/simplePartition.pl $clusterSeq $chunkSize $partDir
 
 $gensub2 $partDir/partitions.lst single gsub jobList
 $paraRun
 _EOF_
     );
   } else {
-  my $paraRun = &HgAutomate::paraRun();
+  my $paraRun = &HgAutomate::paraRun($ram, $cpu);
   my $gensub2 = &HgAutomate::gensub2();
   $bossScript->add(<<_EOF_
 chmod a+x TrfRun.csh
 
 rm -rf $partDir
 $Bin/simplePartition.pl $clusterSeq $chunkSize $partDir
 rm -f $buildDir/TrfPart
 ln -s $partDir $buildDir/TrfPart
 
 $gensub2 $partDir/partitions.lst single gsub jobList
 $paraRun
 _EOF_
   );
   }