2df91f181b1639297e87adfaee146fedcebd2f4a hiram Mon Oct 21 13:53:37 2024 -0700 now using hgwdev cluster for lastz runs and lastz only needs 3gb memory to run refs #34688 diff --git src/hg/utils/automation/doBlastzChainNet.pl src/hg/utils/automation/doBlastzChainNet.pl index 4ad6ae7..e448b0f 100755 --- src/hg/utils/automation/doBlastzChainNet.pl +++ src/hg/utils/automation/doBlastzChainNet.pl @@ -75,30 +75,32 @@ { name => 'chainMerge', func => \&doChainMerge }, { name => 'net', func => \&netChains }, { name => 'load', func => \&loadUp }, { name => 'download', func => \&doDownloads }, { name => 'cleanup', func => \&cleanup }, { name => 'syntenicNet',func => \&doSyntenicNet } ] ); # Option defaults: # my $bigClusterHub = 'swarm'; my $bigClusterHub = 'ku'; # my $smallClusterHub = 'encodek'; my $smallClusterHub = 'ku'; my $dbHost = 'hgwdev'; +my $ram = '3g'; +my $cpu = 1; my $workhorse = 'hgwdev'; my $defaultChainLinearGap = "loose"; my $defaultChainMinScore = "1000"; # from axtChain itself my $defaultTRepeats = ""; # for netClass option tRepeats my $defaultQRepeats = ""; # for netClass option qRepeats my $defaultSeq1Limit = 30; my $defaultSeq2Limit = 100; sub usage { # Usage / help / self-documentation: my ($status, $detailed) = @_; my $base = $0; $base =~ s/^(.*\/)?//; # Basic help (for incorrect usage): print STDERR " @@ -131,30 +133,32 @@ -loadChainSplit load split chain tables, default is not split tables -swapDir path directory to work in for swap, default: /hive/data/genomes/qDb/bed/blastz.tDb.swap/ -tAsmId assemblyHubId full name for assembly hub as target: e.g.: GCF_007474595.1_mLynCan4_v1.p -qAsmId assemblyHubId full name for assembly hub as target: e.g.: GCF_007474595.1_mLynCan4_v1.p -skipDownload do not construct the downloads directory -trackHub construct big* files for track hub _EOF_ ; print STDERR &HgAutomate::getCommonOptionHelp('dbHost' => $dbHost, 'workhorse' => $workhorse, 'fileServer' => '', 'bigClusterHub' => $bigClusterHub, + 'ram' => $ram, + 'cpu' => $cpu, 'smallClusterHub' => $smallClusterHub); print STDERR " Automates UCSC's blastz/chain/net pipeline: 1. Big cluster run of blastz. 2. Small cluster consolidation of blastz result files. 3. Small cluster chaining run. 4. Sorting and netting of chains on the fileserver (no nets for self-alignments). 5. Generation of liftOver-suitable chains from nets+chains on fileserver (not done for self-alignments). 6. Generation of axtNet and mafNet files on the fileserver (not for self). 7. Addition of gap/repeat info to nets on hgwdev (not for self). 8. Loading of chain and net tables on hgwdev (no nets for self). 9. Setup of download directory on hgwdev. 10.Optional (-syntenicNet flag): Generation of syntenic mafNet files. @@ -632,31 +636,31 @@ } if (! -e "$runDir/$queryList" && ! $opt_debug) { die "doBlastzClusterRun: there's no query list file" . "so start over without the -continue align.\n"; } my $templateCmd = ("$blastzRunUcsc -outFormat psl " . ($isSelf ? '-dropSelf ' : '') . '$(path1) $(path2) ../DEF ' . '{check out exists ' . $outRoot . '/$(file1)/$(file1)_$(file2).psl }'); &HgAutomate::makeGsub($runDir, $templateCmd); `touch "$runDir/para_hub_$paraHub"`; my $whatItDoes = "It sets up and performs the big cluster blastz run."; my $bossScript = new HgRemoteScript("$runDir/doClusterRun.csh", $paraHub, $runDir, $whatItDoes, $DEF); - my $paraRun = &HgAutomate::paraRun(); + my $paraRun = &HgAutomate::paraRun($ram, $cpu); my $gensub2 = &HgAutomate::gensub2(); $bossScript->add(<<_EOF_ $gensub2 $targetList $queryList gsub jobList $paraRun _EOF_ ); $bossScript->execute(); } # sub doBlastzClusterRun {} sub doCatRun { # Do a small cluster run to concatenate the lowest level of chunk result # files from the big cluster blastz run. This brings results up to the # next level: per-target-chunk results, which may still need to be # concatenated into per-target-sequence in the next step after this one -- # chaining. @@ -685,31 +689,31 @@ my $outRoot = $opt_blastzOutRoot ? "$opt_blastzOutRoot/psl" : '../psl'; my $fh = &HgAutomate::mustOpen(">$runDir/cat.csh"); print $fh <<_EOF_ #!/bin/csh -ef find $outRoot/\$1/ -name "*.psl" | xargs cat | gzip -c > \$2 _EOF_ ; close($fh); my $whatItDoes = "It sets up and performs a small cluster run to concatenate all files in each subdirectory of $outRoot into a per-target-chunk file."; my $bossScript = new HgRemoteScript("$runDir/doCatRun.csh", $paraHub, $runDir, $whatItDoes, $DEF); - my $paraRun = &HgAutomate::paraRun(); + my $paraRun = &HgAutomate::paraRun($ram, $cpu); my $gensub2 = &HgAutomate::gensub2(); $bossScript->add(<<_EOF_ (cd $outRoot; find . -maxdepth 1 -type d | grep '^./') \\ | sed -e 's#/\$##; s#^./##' > tParts.lst chmod a+x cat.csh $gensub2 tParts.lst single gsub jobList mkdir -p ../pslParts $paraRun _EOF_ ); $bossScript->execute(); } # sub doCatRun {} sub makePslPartsLst {