55a4d2f27b5f2be54928d7cc454b062f67045b21
hiram
  Thu Dec 15 11:53:15 2022 -0800
correct thru cleanup no redmine

diff --git src/hg/utils/automation/doRepeatModeler.pl src/hg/utils/automation/doRepeatModeler.pl
index 04a8658..1ac6202 100755
--- src/hg/utils/automation/doRepeatModeler.pl
+++ src/hg/utils/automation/doRepeatModeler.pl
@@ -49,47 +49,46 @@
 $base =~ s/^(.*\/)?//;
 
 sub usage {
   # Usage / help / self-documentation:
   my ($status, $detailed) = @_;
   # Basic help (for incorrect usage):
   print STDERR "
 usage: $base db
 options:
     the db argument is a UCSC database name or the assembly identifier
     for a GenArk assembly hub build
 ";
   print STDERR $stepper->getOptionHelp();
   print STDERR <<_EOF_
     -buildDir dir         Use dir instead of default
-                          $HgAutomate::clusterData/\$db/$HgAutomate::trackBuild/RepeatModeler.\$date
+                          $HgAutomate::clusterData/\$db/$HgAutomate::trackBuild/repeatModeler.\$date
                           (necessary when continuing at a later date).
     -unmaskedSeq seq.2bit Use seq.2bit as the unmasked input sequence instead
                           of default ($unmaskedSeq).
 _EOF_
   ;
   print STDERR &HgAutomate::getCommonOptionHelp('dbHost' => $dbHost,
 						'workhorse' => '',
 						'bigClusterHub' => '');
   print STDERR "
 Automates the RepeatModeler process for genome assembly \$db.  Steps:
     blastDb: construct fasta file from unmasked.2bit and rmblastn index files.
     cluster: Parasol cluster run of RepeatModeler.
-    libResult: Collect the consensus library file from the RepeatModeler output.
     cleanup: Removes or compresses intermediate files.
 All operations are performed in the build directory which is
-$HgAutomate::clusterData/\$db/$HgAutomate::trackBuild/RepeatModeler.\$date unless -buildDir is given.
+$HgAutomate::clusterData/\$db/$HgAutomate::trackBuild/repeatModeler.\$date unless -buildDir is given.
 Run -help to see what files are required for this script.
 ";
   # Detailed help (-help):
   print STDERR "
 Assumptions:
 1. $HgAutomate::clusterData/\$db/\$db.unmasked.2bit contains sequence for
    database/assembly \$db.  (This can be overridden with -unmaskedSeq.)
 2. When complete, the resulting RepeatMasker library file will be in the build
    directory with the name: asmId-families.fa
 " if ($detailed);
   print STDERR "\n";
   exit $status;
 }
 
 # Globals:
@@ -221,71 +220,76 @@
     if ( ! -s "$runDir/${db}-families.fa" ) {
       die "cleanup previous 'libResult' step has not completed, ${db}-families.fa not present\n";
     }
     # And, verify this step has not run before
     if ( ! -s "$runDir/${db}.fa" ) {
          &HgAutomate::verbose(1, "\ncleanup step previously completed\n");
          return;
     }
   }
   my $whatItDoes = "Cleans up or compresses intermediate files.";
   my $bossScript = newBash HgRemoteScript("$runDir/doCleanup.bash", $workhorse,
 				      $runDir, $whatItDoes);
   $bossScript->add(<<_EOF_
 export asmId="${db}"
 
-rm -f \${asmId}.fa
-rm -f \${asmId}.n??
-gzip -c \${asmId}.stk
+if [ ! -s "\${asmId}-families.fa" ]; then
+  printf "cleanup expected result file: \${asmId}-families.fa does not exist\n" 1>&2
+  exit 255
+fi
+rm -fr \${asmId}.fa \${asmId}.n?? ./err/
+if [ -s "\${asmId}-families.stk" ]; then
+  gzip \${asmId}-families.stk
+fi
 c=`ls -d RM_* | wc -l`
 if [ "\${c}" -eq 1 ]; then
    RM_dir=`ls -d RM_*`
    if [ -d "\${RM_dir}" ]; then
      rm -fr "\${RM_dir}"
    else
-     printf "directory RM_* not found ?\n" 1>&2
+     printf "directory RM_* not found ?\\n" 1>&2
      ls -d RM* 1>&2
      exit 255
    fi
 else
-   printf "single directory RM_* not found ?\n" 1>&2
+   printf "single directory RM_* not found ?\\n" 1>&2
    ls -d RM* 1>&2
    exit 255
 fi
 _EOF_
   );
   $bossScript->execute() if (! $opt_debug);
 } # doCleanup
 
 #########################################################################
 # main
 
 # Prevent "Suspended (tty input)" hanging:
 &HgAutomate::closeStdin();
 
 # Make sure we have valid options and exactly 1 argument:
 &checkOptions();
 &usage(1) if (scalar(@ARGV) != 1);
 $secondsStart = `date "+%s"`;
 chomp $secondsStart;
 ($db) = @ARGV;
 
 # Now that we know the $db, figure out our paths:
 my $date = `date +%Y-%m-%d`;
 chomp $date;
 $buildDir = $opt_buildDir ? $opt_buildDir :
-  "$HgAutomate::clusterData/$db/$HgAutomate::trackBuild/RepeatModeler.$date";
+  "$HgAutomate::clusterData/$db/$HgAutomate::trackBuild/repeatModeler.$date";
 $unmaskedSeq = $opt_unmaskedSeq ? $opt_unmaskedSeq :
   "$HgAutomate::clusterData/$db/$db.unmasked.2bit";
 
 # Do everything.
 $stepper->execute();
 
 # Tell the user anything they should know.
 my $stopStep = $stepper->getStopStep();
 my $upThrough = ($stopStep eq 'cleanup') ? "" :
   "  (through the '$stopStep' step)";
 
 $secondsEnd = `date "+%s"`;
 chomp $secondsEnd;
 my $elapsedSeconds = $secondsEnd - $secondsStart;
 my $elapsedMinutes = int($elapsedSeconds/60);