cb9f070d5e860a9ebadd21e975d7f718b79aec8f hiram Mon Dec 6 14:41:51 2021 -0800 adjusting the cluster for the simple repeats when many small contigs no redmine diff --git src/hg/utils/automation/doAssemblyHub.pl src/hg/utils/automation/doAssemblyHub.pl index 77376e3..c87a945 100755 --- src/hg/utils/automation/doAssemblyHub.pl +++ src/hg/utils/automation/doAssemblyHub.pl @@ -1187,42 +1187,56 @@ _EOF_ ); $bossScript->execute(); } # repeatMasker ######################################################################### # * step: simpleRepeat [workhorse] sub doSimpleRepeat { my $runDir = "$buildDir/trackData/simpleRepeat"; &HgAutomate::mustMkdir($runDir); my $whatItDoes = "construct TRF/simpleRepeat track data"; my $bossScript = newBash HgRemoteScript("$runDir/doSimpleRepeat.bash", $workhorse, $runDir, $whatItDoes); + my $trfClusterHub = $smallClusterHub; + + my $seqCount = `cat $buildDir/$asmId.chrom.sizes | wc -l`; + chomp $seqCount; + # check for large seqCount and large genome, then use bigCluster + # the 100000 and 20000000 are from doSimpleRepeat.pl + if ( $seqCount > 100000 ) { + my $genomeSize = `ave -col=2 $buildDir/$asmId.chrom.sizes | grep -w total | awk '{printf "%d", \$NF}'`; + chomp $genomeSize; + if ($genomeSize > 200000000) { + $trfClusterHub = $bigClusterHub; + } + } + $bossScript->add(<<_EOF_ export asmId=$asmId export buildDir=$buildDir if [ \$buildDir/\$asmId.2bit -nt trfMask.bed.gz ]; then doSimpleRepeat.pl -stop=filter -buildDir=`pwd` \\ -unmaskedSeq=\$buildDir/\$asmId.2bit \\ - -trf409=6 -dbHost=$dbHost -smallClusterHub=$smallClusterHub \\ + -trf409=6 -dbHost=$dbHost -smallClusterHub=$trfClusterHub \\ -workhorse=$workhorse \$asmId doSimpleRepeat.pl -buildDir=`pwd` \\ -continue=cleanup -stop=cleanup -unmaskedSeq=\$buildDir/\$asmId.2bit \\ - -trf409=6 -dbHost=$dbHost -smallClusterHub=$smallClusterHub \\ + -trf409=6 -dbHost=$dbHost -smallClusterHub=$trfClusterHub \\ -workhorse=$workhorse \$asmId gzip simpleRepeat.bed trfMask.bed fi _EOF_ ); $bossScript->execute(); } # simpleRepeat ## my $rmskResult = "$buildDir/trackData/repeatMasker/$asmId.rmsk.2bit"; ## if (! -s $rmskResult) { ## die "simpleRepeat: previous step repeatMasker has not completed\n" . ## "# not found: $rmskResult\n"; ## } ## twoBitMask ../repeatMasker/\$asmId.rmsk.2bit -add trfMask.bed \\ ## \$asmId.RM_TRF_masked.2bit