7c88fd330d657bfc6007f75c75545ba63e94d87d hiram Thu Sep 11 15:17:40 2025 -0700 adding -uncurated to the version 4.2.1 operation diff --git src/hg/utils/automation/doRepeatMasker.pl src/hg/utils/automation/doRepeatMasker.pl index 17dd0f2663b..0126f268c50 100755 --- src/hg/utils/automation/doRepeatMasker.pl +++ src/hg/utils/automation/doRepeatMasker.pl @@ -8,31 +8,33 @@ use Getopt::Long; use warnings; use strict; use Carp; use FindBin qw($Bin); use lib "$Bin"; use HgAutomate; use HgRemoteScript; use HgStepManager; # Hardcoded command path: my $RepeatMaskerPath = "/hive/data/outside/RepeatMasker/RepeatMasker-4.2.1"; my $RepeatMasker = "$RepeatMaskerPath/RepeatMasker"; # default engine changed from crossmatch to rmblast as of 2022-12 # with RM version 4.1.4 -my $RepeatMaskerEngine = "-engine rmblast -pa 1"; +# version 4.2.1 changed the way the libraries are used, now need +# the -uncurated option to have behavior similar to before +my $RepeatMaskerEngine = "-uncurated -engine rmblast -pa 1"; # per RM doc, rmblast uses 4 CPUs for each job my $parasolRAM = "-cpu=4 -ram=32g"; # Option variable names, both common and peculiar to this script: use vars @HgAutomate::commonOptionVars; use vars @HgStepManager::optionVars; use vars qw/ $opt_buildDir $opt_ncbiRmsk $opt_dupList $opt_liftSpec $opt_species $opt_unmaskedSeq $opt_customLib $opt_useHMMER @@ -197,38 +199,38 @@ my $species = $opt_species ? $opt_species : &HgAutomate::getSpecies($dbHost, $db); my $customLib = $opt_customLib; my $repeatLib = ""; if ($opt_customLib && $opt_species) { $repeatLib = "-species \'$species\' -lib $customLib"; } elsif ($opt_customLib) { $repeatLib = "-lib $customLib"; } else { $repeatLib = "-species \'$species\'"; } # updated for ku kluster operation -cpu option instead of ram option if ( $opt_useRMBlastn ) { - $RepeatMaskerEngine = "-engine rmblast -pa 1"; + $RepeatMaskerEngine = "-uncurated -engine rmblast -pa 1"; $parasolRAM = "-cpu=4 -ram=32g"; } elsif ( $opt_useCrossMatch ) { - $RepeatMaskerEngine = "-engine crossmatch -s"; + $RepeatMaskerEngine = "-uncurated -engine crossmatch -s"; $parasolRAM = "-cpu=1"; } elsif ( $opt_useHMMER ) { # NOTE: This is only applicable for 8gb one-job-per-node scheduling - $RepeatMaskerEngine = "-engine hmmer -pa 4"; + $RepeatMaskerEngine = "-uncurated -engine hmmer -pa 4"; $parasolRAM = "-cpu=4 -ram=32g"; } # Script to do a dummy run of RepeatMasker, to test our invocation and # unpack library files before kicking off a large cluster run. # And now that RM is being run from local /scratch/data/RepeatMasker/ # this is also done in the cluster run script so each node will have # its library initialized my $fh = &HgAutomate::mustOpen(">$runDir/dummyRun.csh"); print $fh <<_EOF_ #!/bin/csh -ef $RepeatMasker $RepeatMaskerEngine $repeatLib /dev/null _EOF_ ;