2df91f181b1639297e87adfaee146fedcebd2f4a
hiram
  Mon Oct 21 13:53:37 2024 -0700
now using hgwdev cluster for lastz runs and lastz only needs 3gb memory to run refs #34688

diff --git src/hg/utils/automation/doBlastzChainNet.pl src/hg/utils/automation/doBlastzChainNet.pl
index 4ad6ae7..e448b0f 100755
--- src/hg/utils/automation/doBlastzChainNet.pl
+++ src/hg/utils/automation/doBlastzChainNet.pl
@@ -75,30 +75,32 @@
       { name => 'chainMerge', func => \&doChainMerge },
       { name => 'net',        func => \&netChains },
       { name => 'load',       func => \&loadUp },
       { name => 'download',   func => \&doDownloads },
       { name => 'cleanup',    func => \&cleanup },
       { name => 'syntenicNet',func => \&doSyntenicNet }
     ]
 			       );
 
 # Option defaults:
 # my $bigClusterHub = 'swarm';
 my $bigClusterHub = 'ku';
 # my $smallClusterHub = 'encodek';
 my $smallClusterHub = 'ku';
 my $dbHost = 'hgwdev';
+my $ram = '3g';
+my $cpu = 1;
 my $workhorse = 'hgwdev';
 my $defaultChainLinearGap = "loose";
 my $defaultChainMinScore = "1000";	# from axtChain itself
 my $defaultTRepeats = "";		# for netClass option tRepeats
 my $defaultQRepeats = "";		# for netClass option qRepeats
 my $defaultSeq1Limit = 30;
 my $defaultSeq2Limit = 100;
 
 sub usage {
   # Usage / help / self-documentation:
   my ($status, $detailed) = @_;
   my $base = $0;
   $base =~ s/^(.*\/)?//;
   # Basic help (for incorrect usage):
   print STDERR "
@@ -131,30 +133,32 @@
     -loadChainSplit       load split chain tables, default is not split tables
     -swapDir path         directory to work in for swap, default:
                           /hive/data/genomes/qDb/bed/blastz.tDb.swap/
     -tAsmId assemblyHubId  full name for assembly hub as target:
                           e.g.: GCF_007474595.1_mLynCan4_v1.p
     -qAsmId assemblyHubId  full name for assembly hub as target:
                           e.g.: GCF_007474595.1_mLynCan4_v1.p
     -skipDownload         do not construct the downloads directory
     -trackHub             construct big* files for track hub
 _EOF_
   ;
 print STDERR &HgAutomate::getCommonOptionHelp('dbHost' => $dbHost,
 				      'workhorse' => $workhorse,
 				      'fileServer' => '',
 				      'bigClusterHub' => $bigClusterHub,
+				      'ram' => $ram,
+				      'cpu' => $cpu,
 				      'smallClusterHub' => $smallClusterHub);
 print STDERR "
 Automates UCSC's blastz/chain/net pipeline:
     1. Big cluster run of blastz.
     2. Small cluster consolidation of blastz result files.
     3. Small cluster chaining run.
     4. Sorting and netting of chains on the fileserver
        (no nets for self-alignments).
     5. Generation of liftOver-suitable chains from nets+chains on fileserver
        (not done for self-alignments).
     6. Generation of axtNet and mafNet files on the fileserver (not for self).
     7. Addition of gap/repeat info to nets on hgwdev (not for self).
     8. Loading of chain and net tables on hgwdev (no nets for self).
     9. Setup of download directory on hgwdev.
     10.Optional (-syntenicNet flag): Generation of syntenic mafNet files.
@@ -632,31 +636,31 @@
   }
   if (! -e "$runDir/$queryList" && ! $opt_debug) {
     die "doBlastzClusterRun: there's no query list file" .
         "so start over without the -continue align.\n";
   }
   my $templateCmd = ("$blastzRunUcsc -outFormat psl " .
 		     ($isSelf ? '-dropSelf ' : '') .
 		     '$(path1) $(path2) ../DEF ' .
 		     '{check out exists ' .
 		     $outRoot . '/$(file1)/$(file1)_$(file2).psl }');
   &HgAutomate::makeGsub($runDir, $templateCmd);
   `touch "$runDir/para_hub_$paraHub"`;
   my $whatItDoes = "It sets up and performs the big cluster blastz run.";
   my $bossScript = new HgRemoteScript("$runDir/doClusterRun.csh", $paraHub,
 				      $runDir, $whatItDoes, $DEF);
-  my $paraRun = &HgAutomate::paraRun();
+  my $paraRun = &HgAutomate::paraRun($ram, $cpu);
   my $gensub2 = &HgAutomate::gensub2();
   $bossScript->add(<<_EOF_
 $gensub2 $targetList $queryList gsub jobList
 $paraRun
 _EOF_
     );
   $bossScript->execute();
 }	#	sub doBlastzClusterRun {}
 
 sub doCatRun {
   # Do a small cluster run to concatenate the lowest level of chunk result
   # files from the big cluster blastz run.  This brings results up to the
   # next level: per-target-chunk results, which may still need to be
   # concatenated into per-target-sequence in the next step after this one --
   # chaining.
@@ -685,31 +689,31 @@
   my $outRoot = $opt_blastzOutRoot ? "$opt_blastzOutRoot/psl" : '../psl';
 
   my $fh = &HgAutomate::mustOpen(">$runDir/cat.csh");
   print $fh <<_EOF_
 #!/bin/csh -ef
 find $outRoot/\$1/ -name "*.psl" | xargs cat | gzip -c > \$2
 _EOF_
   ;
   close($fh);
 
   my $whatItDoes =
 "It sets up and performs a small cluster run to concatenate all files in
 each subdirectory of $outRoot into a per-target-chunk file.";
   my $bossScript = new HgRemoteScript("$runDir/doCatRun.csh", $paraHub,
 				      $runDir, $whatItDoes, $DEF);
-  my $paraRun = &HgAutomate::paraRun();
+  my $paraRun = &HgAutomate::paraRun($ram, $cpu);
   my $gensub2 = &HgAutomate::gensub2();
   $bossScript->add(<<_EOF_
 (cd $outRoot; find . -maxdepth 1 -type d | grep '^./') \\
         | sed -e 's#/\$##; s#^./##' > tParts.lst
 chmod a+x cat.csh
 $gensub2 tParts.lst single gsub jobList
 mkdir -p ../pslParts
 $paraRun
 _EOF_
     );
   $bossScript->execute();
 }	#	sub doCatRun {}
 
 
 sub makePslPartsLst {