55a4d2f27b5f2be54928d7cc454b062f67045b21 hiram Thu Dec 15 11:53:15 2022 -0800 correct thru cleanup no redmine diff --git src/hg/utils/automation/doRepeatModeler.pl src/hg/utils/automation/doRepeatModeler.pl index 04a8658..1ac6202 100755 --- src/hg/utils/automation/doRepeatModeler.pl +++ src/hg/utils/automation/doRepeatModeler.pl @@ -49,47 +49,46 @@ $base =~ s/^(.*\/)?//; sub usage { # Usage / help / self-documentation: my ($status, $detailed) = @_; # Basic help (for incorrect usage): print STDERR " usage: $base db options: the db argument is a UCSC database name or the assembly identifier for a GenArk assembly hub build "; print STDERR $stepper->getOptionHelp(); print STDERR <<_EOF_ -buildDir dir Use dir instead of default - $HgAutomate::clusterData/\$db/$HgAutomate::trackBuild/RepeatModeler.\$date + $HgAutomate::clusterData/\$db/$HgAutomate::trackBuild/repeatModeler.\$date (necessary when continuing at a later date). -unmaskedSeq seq.2bit Use seq.2bit as the unmasked input sequence instead of default ($unmaskedSeq). _EOF_ ; print STDERR &HgAutomate::getCommonOptionHelp('dbHost' => $dbHost, 'workhorse' => '', 'bigClusterHub' => ''); print STDERR " Automates the RepeatModeler process for genome assembly \$db. Steps: blastDb: construct fasta file from unmasked.2bit and rmblastn index files. cluster: Parasol cluster run of RepeatModeler. - libResult: Collect the consensus library file from the RepeatModeler output. cleanup: Removes or compresses intermediate files. All operations are performed in the build directory which is -$HgAutomate::clusterData/\$db/$HgAutomate::trackBuild/RepeatModeler.\$date unless -buildDir is given. +$HgAutomate::clusterData/\$db/$HgAutomate::trackBuild/repeatModeler.\$date unless -buildDir is given. Run -help to see what files are required for this script. "; # Detailed help (-help): print STDERR " Assumptions: 1. $HgAutomate::clusterData/\$db/\$db.unmasked.2bit contains sequence for database/assembly \$db. (This can be overridden with -unmaskedSeq.) 2. When complete, the resulting RepeatMasker library file will be in the build directory with the name: asmId-families.fa " if ($detailed); print STDERR "\n"; exit $status; } # Globals: @@ -221,71 +220,76 @@ if ( ! -s "$runDir/${db}-families.fa" ) { die "cleanup previous 'libResult' step has not completed, ${db}-families.fa not present\n"; } # And, verify this step has not run before if ( ! -s "$runDir/${db}.fa" ) { &HgAutomate::verbose(1, "\ncleanup step previously completed\n"); return; } } my $whatItDoes = "Cleans up or compresses intermediate files."; my $bossScript = newBash HgRemoteScript("$runDir/doCleanup.bash", $workhorse, $runDir, $whatItDoes); $bossScript->add(<<_EOF_ export asmId="${db}" -rm -f \${asmId}.fa -rm -f \${asmId}.n?? -gzip -c \${asmId}.stk +if [ ! -s "\${asmId}-families.fa" ]; then + printf "cleanup expected result file: \${asmId}-families.fa does not exist\n" 1>&2 + exit 255 +fi +rm -fr \${asmId}.fa \${asmId}.n?? ./err/ +if [ -s "\${asmId}-families.stk" ]; then + gzip \${asmId}-families.stk +fi c=`ls -d RM_* | wc -l` if [ "\${c}" -eq 1 ]; then RM_dir=`ls -d RM_*` if [ -d "\${RM_dir}" ]; then rm -fr "\${RM_dir}" else - printf "directory RM_* not found ?\n" 1>&2 + printf "directory RM_* not found ?\\n" 1>&2 ls -d RM* 1>&2 exit 255 fi else - printf "single directory RM_* not found ?\n" 1>&2 + printf "single directory RM_* not found ?\\n" 1>&2 ls -d RM* 1>&2 exit 255 fi _EOF_ ); $bossScript->execute() if (! $opt_debug); } # doCleanup ######################################################################### # main # Prevent "Suspended (tty input)" hanging: &HgAutomate::closeStdin(); # Make sure we have valid options and exactly 1 argument: &checkOptions(); &usage(1) if (scalar(@ARGV) != 1); $secondsStart = `date "+%s"`; chomp $secondsStart; ($db) = @ARGV; # Now that we know the $db, figure out our paths: my $date = `date +%Y-%m-%d`; chomp $date; $buildDir = $opt_buildDir ? $opt_buildDir : - "$HgAutomate::clusterData/$db/$HgAutomate::trackBuild/RepeatModeler.$date"; + "$HgAutomate::clusterData/$db/$HgAutomate::trackBuild/repeatModeler.$date"; $unmaskedSeq = $opt_unmaskedSeq ? $opt_unmaskedSeq : "$HgAutomate::clusterData/$db/$db.unmasked.2bit"; # Do everything. $stepper->execute(); # Tell the user anything they should know. my $stopStep = $stepper->getStopStep(); my $upThrough = ($stopStep eq 'cleanup') ? "" : " (through the '$stopStep' step)"; $secondsEnd = `date "+%s"`; chomp $secondsEnd; my $elapsedSeconds = $secondsEnd - $secondsStart; my $elapsedMinutes = int($elapsedSeconds/60);