909ff18f8b72ca31971508ca13eed84633e1dc1e
hiram
  Sun Feb 2 10:24:45 2025 -0800
upgrading to new version of RM no redmine

diff --git src/hg/utils/automation/doRepeatMasker.pl src/hg/utils/automation/doRepeatMasker.pl
index a45cb523cb3..b00d903e403 100755
--- src/hg/utils/automation/doRepeatMasker.pl
+++ src/hg/utils/automation/doRepeatMasker.pl
@@ -4,31 +4,31 @@
 # edit ~/kent/src/hg/utils/automation/doRepeatMasker.pl instead.
 
 # $Id: doRepeatMasker.pl,v 1.14 2009/03/19 16:15:29 hiram Exp $
 
 use Getopt::Long;
 use warnings;
 use strict;
 use Carp;
 use FindBin qw($Bin);
 use lib "$Bin";
 use HgAutomate;
 use HgRemoteScript;
 use HgStepManager;
 
 # Hardcoded command path:
-my $RepeatMaskerPath = "/hive/data/staging/data/RepeatMasker221107";
+my $RepeatMaskerPath = "/hive/data/outside/RepeatMasker/RepeatMasker-4.1.7-p1";
 my $RepeatMasker = "$RepeatMaskerPath/RepeatMasker";
 # default engine changed from crossmatch to rmblast as of 2022-12
 # with RM version 4.1.4
 my $RepeatMaskerEngine = "-engine rmblast -pa 1";
 # per RM doc, rmblast uses 4 CPUs for each job
 my $parasolRAM = "-cpu=4 -ram=32g";
 
 # Option variable names, both common and peculiar to this script:
 use vars @HgAutomate::commonOptionVars;
 use vars @HgStepManager::optionVars;
 use vars qw/
     $opt_buildDir
     $opt_ncbiRmsk
     $opt_dupList
     $opt_liftSpec
@@ -217,50 +217,48 @@
   } elsif ( $opt_useHMMER ) {
     # NOTE: This is only applicable for 8gb one-job-per-node scheduling
     $RepeatMaskerEngine = "-engine hmmer -pa 4";
     $parasolRAM = "-cpu=4 -ram=32g";
   }
 
   # Script to do a dummy run of RepeatMasker, to test our invocation and
   # unpack library files before kicking off a large cluster run.
   #  And now that RM is being run from local /scratch/data/RepeatMasker/
   #  this is also done in the cluster run script so each node will have
   #	its library initialized
   my $fh = &HgAutomate::mustOpen(">$runDir/dummyRun.csh");
   print $fh <<_EOF_
 #!/bin/csh -ef
 
-set path = (/cluster/software/bin \$path)
 $RepeatMasker $RepeatMaskerEngine $repeatLib /dev/null
 _EOF_
   ;
   close($fh);
 
   my $tmpDir = &HgAutomate::tmpDir();
   # Cluster job script:
   $fh = &HgAutomate::mustOpen(">$runDir/RMRun.csh");
   print $fh <<_EOF_
 #!/bin/csh -ef
 
 if ( -d "/data/tmp" ) then
   setenv TMPDIR "/data/tmp"
 else if ( -d "/scratch/tmp" ) then
   setenv TMPDIR "/scratch/tmp"
 else
   setenv TMPDIR "/tmp"
 endif
-set path = (/cluster/software/bin \$path)
 
 set finalOut = \$1
 
 set inLst = \$finalOut:r
 set inLft = \$inLst:r.lft
 set alignOut = \$finalOut:r.align
 set catOut = \$finalOut:r.cat
 
 # Use local disk for output, and move the final result to \$outPsl
 # when done, to minimize I/O.
 set tmpDir = `mktemp -d -p \$TMPDIR doRepeatMasker.cluster.XXXXXX`
 pushd \$tmpDir
 
 # Initialize local library
 $RepeatMasker $RepeatMaskerEngine $repeatLib /dev/null
@@ -320,32 +318,30 @@
 _EOF_
   ;
   close($fh);
 
   &HgAutomate::makeGsub($runDir,
       "./RMRun.csh {check out line $partDir/\$(path1).out}");
 
   my $whatItDoes =
 "It computes a logical partition of unmasked 2bit into 500k chunks
 and runs it on the cluster with the most available bandwidth.";
   my $bossScript = new HgRemoteScript("$runDir/doCluster.csh", $paraHub,
 				      $runDir, $whatItDoes);
 
   $bossScript->add(<<_EOF_
 
-set path = (/cluster/software/bin \$path)
-
 chmod a+x dummyRun.csh
 chmod a+x RMRun.csh
 
 # Record RM version used:
 printf "The repeat files provided for this assembly were generated using RepeatMasker.\\
   Smit, AFA, Hubley, R & Green, P.,\\
   RepeatMasker version 4.1.4\\
   1996-2010 <http://www.repeatmasker.org>.\\
 \\
 VERSION:\\n" > ../versionInfo.txt
 
 ./dummyRun.csh | grep -v "dev/null" >> ../versionInfo.txt
 
 $RepeatMasker -v >> ../versionInfo.txt
 printf "# RMRBMeta.embl library version: %s\\n" "`grep RELEASE $RepeatMaskerPath/Libraries/RMRBMeta.embl`" >> ../versionInfo.txt