7c88fd330d657bfc6007f75c75545ba63e94d87d
hiram
  Thu Sep 11 15:17:40 2025 -0700
adding -uncurated to the version 4.2.1 operation

diff --git src/hg/utils/automation/doRepeatMasker.pl src/hg/utils/automation/doRepeatMasker.pl
index 17dd0f2663b..0126f268c50 100755
--- src/hg/utils/automation/doRepeatMasker.pl
+++ src/hg/utils/automation/doRepeatMasker.pl
@@ -8,31 +8,33 @@
 use Getopt::Long;
 use warnings;
 use strict;
 use Carp;
 use FindBin qw($Bin);
 use lib "$Bin";
 use HgAutomate;
 use HgRemoteScript;
 use HgStepManager;
 
 # Hardcoded command path:
 my $RepeatMaskerPath = "/hive/data/outside/RepeatMasker/RepeatMasker-4.2.1";
 my $RepeatMasker = "$RepeatMaskerPath/RepeatMasker";
 # default engine changed from crossmatch to rmblast as of 2022-12
 # with RM version 4.1.4
-my $RepeatMaskerEngine = "-engine rmblast -pa 1";
+# version 4.2.1 changed the way the libraries are used, now need
+#   the -uncurated option to have behavior similar to before
+my $RepeatMaskerEngine = "-uncurated -engine rmblast -pa 1";
 # per RM doc, rmblast uses 4 CPUs for each job
 my $parasolRAM = "-cpu=4 -ram=32g";
 
 # Option variable names, both common and peculiar to this script:
 use vars @HgAutomate::commonOptionVars;
 use vars @HgStepManager::optionVars;
 use vars qw/
     $opt_buildDir
     $opt_ncbiRmsk
     $opt_dupList
     $opt_liftSpec
     $opt_species
     $opt_unmaskedSeq
     $opt_customLib
     $opt_useHMMER
@@ -197,38 +199,38 @@
   my $species = $opt_species ? $opt_species : &HgAutomate::getSpecies($dbHost, $db);
   my $customLib = $opt_customLib;
   my $repeatLib = "";
   if ($opt_customLib && $opt_species) {
      $repeatLib = "-species \'$species\' -lib $customLib";
   }
   elsif ($opt_customLib) {
      $repeatLib = "-lib $customLib";
   }
   else {
      $repeatLib = "-species \'$species\'";
   }
 
   # updated for ku kluster operation -cpu option instead of ram option
   if ( $opt_useRMBlastn ) {
-    $RepeatMaskerEngine = "-engine rmblast -pa 1";
+    $RepeatMaskerEngine = "-uncurated -engine rmblast -pa 1";
     $parasolRAM = "-cpu=4 -ram=32g";
   } elsif ( $opt_useCrossMatch ) {
-    $RepeatMaskerEngine = "-engine crossmatch -s";
+    $RepeatMaskerEngine = "-uncurated -engine crossmatch -s";
     $parasolRAM = "-cpu=1";
   } elsif ( $opt_useHMMER ) {
     # NOTE: This is only applicable for 8gb one-job-per-node scheduling
-    $RepeatMaskerEngine = "-engine hmmer -pa 4";
+    $RepeatMaskerEngine = "-uncurated -engine hmmer -pa 4";
     $parasolRAM = "-cpu=4 -ram=32g";
   }
 
   # Script to do a dummy run of RepeatMasker, to test our invocation and
   # unpack library files before kicking off a large cluster run.
   #  And now that RM is being run from local /scratch/data/RepeatMasker/
   #  this is also done in the cluster run script so each node will have
   #	its library initialized
   my $fh = &HgAutomate::mustOpen(">$runDir/dummyRun.csh");
   print $fh <<_EOF_
 #!/bin/csh -ef
 
 $RepeatMasker $RepeatMaskerEngine $repeatLib /dev/null
 _EOF_
   ;