cb9f070d5e860a9ebadd21e975d7f718b79aec8f
hiram
  Mon Dec 6 14:41:51 2021 -0800
adjusting the cluster for the simple repeats when many small contigs no redmine

diff --git src/hg/utils/automation/doAssemblyHub.pl src/hg/utils/automation/doAssemblyHub.pl
index 77376e3..c87a945 100755
--- src/hg/utils/automation/doAssemblyHub.pl
+++ src/hg/utils/automation/doAssemblyHub.pl
@@ -1187,42 +1187,56 @@
 _EOF_
   );
   $bossScript->execute();
 } # repeatMasker
 
 #########################################################################
 # * step: simpleRepeat [workhorse]
 sub doSimpleRepeat {
   my $runDir = "$buildDir/trackData/simpleRepeat";
   &HgAutomate::mustMkdir($runDir);
 
   my $whatItDoes = "construct TRF/simpleRepeat track data";
   my $bossScript = newBash HgRemoteScript("$runDir/doSimpleRepeat.bash",
                     $workhorse, $runDir, $whatItDoes);
 
+  my $trfClusterHub = $smallClusterHub;
+
+  my $seqCount = `cat $buildDir/$asmId.chrom.sizes | wc -l`;
+  chomp $seqCount;
+  # check for large seqCount and large genome, then use bigCluster
+  # the 100000 and 20000000 are from doSimpleRepeat.pl
+  if ( $seqCount > 100000 ) {
+     my $genomeSize = `ave -col=2 $buildDir/$asmId.chrom.sizes | grep -w total | awk '{printf "%d", \$NF}'`;
+     chomp $genomeSize;
+     if ($genomeSize > 200000000) {
+	$trfClusterHub = $bigClusterHub;
+     }
+  }
+
   $bossScript->add(<<_EOF_
 export asmId=$asmId
 export buildDir=$buildDir
 
 if [ \$buildDir/\$asmId.2bit -nt trfMask.bed.gz ]; then
   doSimpleRepeat.pl -stop=filter -buildDir=`pwd` \\
     -unmaskedSeq=\$buildDir/\$asmId.2bit \\
-      -trf409=6 -dbHost=$dbHost -smallClusterHub=$smallClusterHub \\
+      -trf409=6 -dbHost=$dbHost -smallClusterHub=$trfClusterHub \\
         -workhorse=$workhorse \$asmId
   doSimpleRepeat.pl -buildDir=`pwd` \\
     -continue=cleanup -stop=cleanup -unmaskedSeq=\$buildDir/\$asmId.2bit \\
-      -trf409=6 -dbHost=$dbHost -smallClusterHub=$smallClusterHub \\
+      -trf409=6 -dbHost=$dbHost -smallClusterHub=$trfClusterHub \\
         -workhorse=$workhorse \$asmId
   gzip simpleRepeat.bed trfMask.bed
 fi
 _EOF_
   );
   $bossScript->execute();
 } # simpleRepeat
 
 ##   my $rmskResult = "$buildDir/trackData/repeatMasker/$asmId.rmsk.2bit";
 ##   if (! -s $rmskResult) {
 ##     die "simpleRepeat: previous step repeatMasker has not completed\n" .
 ##       "# not found: $rmskResult\n";
 ##   }
 ##   twoBitMask ../repeatMasker/\$asmId.rmsk.2bit -add trfMask.bed \\
 ##     \$asmId.RM_TRF_masked.2bit