909ff18f8b72ca31971508ca13eed84633e1dc1e hiram Sun Feb 2 10:24:45 2025 -0800 upgrading to new version of RM no redmine diff --git src/hg/utils/automation/doRepeatMasker.pl src/hg/utils/automation/doRepeatMasker.pl index a45cb523cb3..b00d903e403 100755 --- src/hg/utils/automation/doRepeatMasker.pl +++ src/hg/utils/automation/doRepeatMasker.pl @@ -4,31 +4,31 @@ # edit ~/kent/src/hg/utils/automation/doRepeatMasker.pl instead. # $Id: doRepeatMasker.pl,v 1.14 2009/03/19 16:15:29 hiram Exp $ use Getopt::Long; use warnings; use strict; use Carp; use FindBin qw($Bin); use lib "$Bin"; use HgAutomate; use HgRemoteScript; use HgStepManager; # Hardcoded command path: -my $RepeatMaskerPath = "/hive/data/staging/data/RepeatMasker221107"; +my $RepeatMaskerPath = "/hive/data/outside/RepeatMasker/RepeatMasker-4.1.7-p1"; my $RepeatMasker = "$RepeatMaskerPath/RepeatMasker"; # default engine changed from crossmatch to rmblast as of 2022-12 # with RM version 4.1.4 my $RepeatMaskerEngine = "-engine rmblast -pa 1"; # per RM doc, rmblast uses 4 CPUs for each job my $parasolRAM = "-cpu=4 -ram=32g"; # Option variable names, both common and peculiar to this script: use vars @HgAutomate::commonOptionVars; use vars @HgStepManager::optionVars; use vars qw/ $opt_buildDir $opt_ncbiRmsk $opt_dupList $opt_liftSpec @@ -217,50 +217,48 @@ } elsif ( $opt_useHMMER ) { # NOTE: This is only applicable for 8gb one-job-per-node scheduling $RepeatMaskerEngine = "-engine hmmer -pa 4"; $parasolRAM = "-cpu=4 -ram=32g"; } # Script to do a dummy run of RepeatMasker, to test our invocation and # unpack library files before kicking off a large cluster run. # And now that RM is being run from local /scratch/data/RepeatMasker/ # this is also done in the cluster run script so each node will have # its library initialized my $fh = &HgAutomate::mustOpen(">$runDir/dummyRun.csh"); print $fh <<_EOF_ #!/bin/csh -ef -set path = (/cluster/software/bin \$path) $RepeatMasker $RepeatMaskerEngine $repeatLib /dev/null _EOF_ ; close($fh); my $tmpDir = &HgAutomate::tmpDir(); # Cluster job script: $fh = &HgAutomate::mustOpen(">$runDir/RMRun.csh"); print $fh <<_EOF_ #!/bin/csh -ef if ( -d "/data/tmp" ) then setenv TMPDIR "/data/tmp" else if ( -d "/scratch/tmp" ) then setenv TMPDIR "/scratch/tmp" else setenv TMPDIR "/tmp" endif -set path = (/cluster/software/bin \$path) set finalOut = \$1 set inLst = \$finalOut:r set inLft = \$inLst:r.lft set alignOut = \$finalOut:r.align set catOut = \$finalOut:r.cat # Use local disk for output, and move the final result to \$outPsl # when done, to minimize I/O. set tmpDir = `mktemp -d -p \$TMPDIR doRepeatMasker.cluster.XXXXXX` pushd \$tmpDir # Initialize local library $RepeatMasker $RepeatMaskerEngine $repeatLib /dev/null @@ -320,32 +318,30 @@ _EOF_ ; close($fh); &HgAutomate::makeGsub($runDir, "./RMRun.csh {check out line $partDir/\$(path1).out}"); my $whatItDoes = "It computes a logical partition of unmasked 2bit into 500k chunks and runs it on the cluster with the most available bandwidth."; my $bossScript = new HgRemoteScript("$runDir/doCluster.csh", $paraHub, $runDir, $whatItDoes); $bossScript->add(<<_EOF_ -set path = (/cluster/software/bin \$path) - chmod a+x dummyRun.csh chmod a+x RMRun.csh # Record RM version used: printf "The repeat files provided for this assembly were generated using RepeatMasker.\\ Smit, AFA, Hubley, R & Green, P.,\\ RepeatMasker version 4.1.4\\ 1996-2010 <http://www.repeatmasker.org>.\\ \\ VERSION:\\n" > ../versionInfo.txt ./dummyRun.csh | grep -v "dev/null" >> ../versionInfo.txt $RepeatMasker -v >> ../versionInfo.txt printf "# RMRBMeta.embl library version: %s\\n" "`grep RELEASE $RepeatMaskerPath/Libraries/RMRBMeta.embl`" >> ../versionInfo.txt