src/hg/utils/automation/doWindowMasker.pl 1.7
1.7 2009/03/07 00:06:33 aamp
Forgot to commit some stuff.
Index: src/hg/utils/automation/doWindowMasker.pl
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/utils/automation/doWindowMasker.pl,v
retrieving revision 1.6
retrieving revision 1.7
diff -b -B -U 1000000 -r1.6 -r1.7
--- src/hg/utils/automation/doWindowMasker.pl 13 Aug 2007 20:45:19 -0000 1.6
+++ src/hg/utils/automation/doWindowMasker.pl 7 Mar 2009 00:06:33 -0000 1.7
@@ -1,272 +1,272 @@
#!/usr/bin/env perl
# DO NOT EDIT the /cluster/bin/scripts copy of this file --
# edit ~/kent/src/hg/utils/automation/doWindowMasker.pl instead.
# $Id$
use Getopt::Long;
use warnings;
use strict;
use FindBin qw($Bin);
use lib "$Bin";
use HgAutomate;
use HgRemoteScript;
use HgStepManager;
# Option variable names, both common and peculiar to this script:
use vars @HgAutomate::commonOptionVars;
use vars @HgStepManager::optionVars;
use vars qw/
$opt_buildDir
$opt_unmaskedSeq
/;
# Specify the steps supported with -continue / -stop:
my $stepper = new HgStepManager(
[ { name => 'count', func => \&doCount },
{ name => 'mask', func => \&doMask },
{ name => 'sdust', func => \&doSdust },
{ name => 'twobit', func => \&doTwoBit },
{ name => 'cleanup', func => \&doCleanup },
]
);
# Option defaults:
my $defaultWorkhorse = 'least loaded';
my $dbHost = 'hgwdev';
-my $unmaskedSeq = "$HgAutomate::clusterData/\$db/\$db.unmasked.2bit";
+my $unmaskedSeq = "$HgAutomate::hiveDataGenomes/\$db/\$db.unmasked.2bit";
my $base = $0;
$base =~ s/^(.*\/)?//;
sub usage {
# Usage / help / self-documentation:
my ($status, $detailed) = @_;
# Basic help (for incorrect usage):
print STDERR "
usage: $base db
options:
";
print STDERR $stepper->getOptionHelp();
print STDERR <<_EOF_
-buildDir dir Use dir instead of default
- $HgAutomate::clusterData/\$db/$HgAutomate::trackBuild/WindowMasker.\$date
+ $HgAutomate::hiveDataGenomes/\$db/$HgAutomate::trackBuild/WindowMasker.\$date
(necessary when continuing at a later date).
_EOF_
;
print STDERR &HgAutomate::getCommonOptionHelp('dbHost' => $dbHost,
'workhorse' => $defaultWorkhorse);
print STDERR "
Automates UCSC's WindowMasker process for genome database \$db. Steps:
count: Do the first pass of WindowMasker: collecting the counts.
mask: The second pass of WindowMasker and collect output.
sdust: Another pass of WindowMasker using -sdust true.
twobit: Make masked twobit files.
cleanup: Removes or compresses intermediate files.
All operations are performed in the build directory which is
-$HgAutomate::clusterData/\$db/$HgAutomate::trackBuild/WindowMasker.\$date unless -buildDir is given.
+$HgAutomate::hiveDataGenomes/\$db/$HgAutomate::trackBuild/WindowMasker.\$date unless -buildDir is given.
";
# Detailed help (-help):
print STDERR "
Assumptions:
-1. $HgAutomate::clusterData/\$db/\$db.unmasked.2bit contains sequence for
+1. $HgAutomate::hiveDataGenomes/\$db/\$db.unmasked.2bit contains sequence for
database/assembly \$db. (This can be overridden with -unmaskedSeq.)
" if ($detailed);
print "\n";
exit $status;
}
# Globals:
# Command line args: db
my ($db);
# Other:
my ($buildDir);
sub checkOptions {
# Make sure command line options are valid/supported.
my $ok = GetOptions(@HgStepManager::optionSpec,
'buildDir=s',
@HgAutomate::commonOptionSpec,
);
&usage(1) if (!$ok);
&usage(0, 1) if ($opt_help);
&HgAutomate::processCommonOptions();
my $err = $stepper->processOptions();
usage(1) if ($err);
$dbHost = $opt_dbHost if ($opt_dbHost);
}
#########################################################################
# * step: count [workhorse]
sub doCount {
my $runDir = "$buildDir";
&HgAutomate::checkCleanSlate('count', 'mask', "$runDir/windowmasker.counts");
&HgAutomate::mustMkdir($runDir);
my $whatItDoes = "It does WindowMasker counts step.";
my $workhorse = &HgAutomate::chooseWorkhorse();
my $bossScript = new HgRemoteScript("$runDir/doCount.csh", $workhorse,
$runDir, $whatItDoes);
$bossScript->add(<<_EOF_
set windowMaskerDir = /cluster/bin/\$MACHTYPE
set windowMasker = \$windowMaskerDir/windowmasker
set fa = $db.fa
set tmpDir = `mktemp -d -p /scratch/tmp doWindowMasker.XXXXXX`
chmod 775 \$tmpDir
set inputTwoBit = $unmaskedSeq
pushd \$tmpDir
twoBitToFa \$inputTwoBit \$fa
\$windowMasker -mk_counts true -input \$fa -output windowmasker.counts
popd
cp \$tmpDir/windowmasker.counts .
rm -rf \$tmpDir
_EOF_
);
$bossScript->execute();
} # doCount
#########################################################################
# * step: mask [workhorse]
sub doMask {
my $runDir = "$buildDir";
&HgAutomate::checkExistsUnlessDebug('count', 'mask', "$runDir/windowmasker.counts");
my $whatItDoes = "It does WindowMasker masking step.";
my $workhorse = &HgAutomate::chooseWorkhorse();
my $bossScript = new HgRemoteScript("$runDir/doMask.csh", $workhorse,
$runDir, $whatItDoes);
$bossScript->add(<<_EOF_
set windowMaskerDir = /cluster/bin/\$MACHTYPE
set windowMasker = \$windowMaskerDir/windowmasker
set fa = $db.fa
set tmpDir = `mktemp -d -p /scratch/tmp doWindowMasker.XXXXXX`
chmod 775 \$tmpDir
set inputTwoBit = $unmaskedSeq
cp windowmasker.counts \$tmpDir
pushd \$tmpDir
twoBitToFa \$inputTwoBit \$fa
\$windowMasker -ustat windowmasker.counts -input \$fa -output windowmasker.intervals
perl -wpe \'if \(s\/^\>lcl\\\|\(\.\*\)\\n\$\/\/\) { \$chr = \$1\; } \\
if \(\/^\(\\d+\) \- \(\\d+\)\/\) { \\
\$s=\$1\; \$e=\$2+1\; s\/\(\\d+\) \- \(\\d+\)\/\$chr\\t\$s\\t\$e\/\; \\
}\' windowmasker.intervals > windowmasker.bed
popd
cp \$tmpDir/windowmasker.bed .
rm -rf \$tmpDir
_EOF_
);
$bossScript->execute();
} # doMask
#########################################################################
# * step: sdust [workhorse]
sub doSdust {
my $runDir = "$buildDir";
&HgAutomate::checkExistsUnlessDebug('mask', 'sdust', "$runDir/windowmasker.counts");
my $whatItDoes = "It does WindowMasker masking step with -sdust true.";
my $workhorse = &HgAutomate::chooseWorkhorse();
my $bossScript = new HgRemoteScript("$runDir/doSdust.csh", $workhorse,
$runDir, $whatItDoes);
$bossScript->add(<<_EOF_
set windowMaskerDir = /cluster/bin/\$MACHTYPE
set windowMasker = \$windowMaskerDir/windowmasker
set fa = $db.fa
set tmpDir = `mktemp -d -p /scratch/tmp doWindowMasker.XXXXXX`
chmod 775 \$tmpDir
set inputTwoBit = $unmaskedSeq
cp windowmasker.counts \$tmpDir
pushd \$tmpDir
twoBitToFa \$inputTwoBit \$fa
\$windowMasker -ustat windowmasker.counts -sdust true -input \$fa -output windowmasker.intervals
perl -wpe \'if \(s\/^\>lcl\\\|\(\.\*\)\\n\$\/\/\) { \$chr = \$1\; } \\
if \(\/^\(\\d+\) \- \(\\d+\)\/\) { \\
\$s=\$1\; \$e=\$2+1\; s\/\(\\d+\) \- \(\\d+\)\/\$chr\\t\$s\\t\$e\/\; \\
}\' windowmasker.intervals > windowmasker.sdust.bed
popd
cp \$tmpDir/windowmasker.sdust.bed .
rm -rf \$tmpDir
_EOF_
);
$bossScript->execute();
} # doSdust
#########################################################################
# * step: twobit [fileServer]
sub doTwoBit {
my $runDir = "$buildDir";
my $whatItDoes = "Make .2bit files from the beds.";
&HgAutomate::checkExistsUnlessDebug('mask', 'sdust', ("$runDir/windowmasker.counts",
"$runDir/windowmasker.bed", "$runDir/windowmasker.sdust.bed"));
my $fileServer = &HgAutomate::chooseFileServer($runDir);
my $bossScript = new HgRemoteScript("$runDir/doTwoBit.csh", $fileServer,
$runDir, $whatItDoes);
$bossScript->add(<<_EOF_
twoBitMask $unmaskedSeq windowmasker.bed $db.wmsk.2bit
twoBitMask $unmaskedSeq windowmasker.sdust.bed $db.wmsk.sdust.2bit
_EOF_
);
$bossScript->execute();
} #doTwoBit
#########################################################################
# * step: cleanup [fileServer]
sub doCleanup {
my $runDir = "$buildDir";
my $whatItDoes = "It cleans up or compresses intermediate files.";
my $fileServer = &HgAutomate::chooseFileServer($runDir);
my $bossScript = new HgRemoteScript("$runDir/doCleanup.csh", $fileServer,
$runDir, $whatItDoes);
$bossScript->add(<<_EOF_
gzip $runDir/windowmasker.counts
gzip $runDir/windowmasker.bed
gzip $runDir/windowmasker.sdust.bed
_EOF_
);
$bossScript->execute();
} # doCleanup
#########################################################################
# main
# Prevent "Suspended (tty input)" hanging:
&HgAutomate::closeStdin();
# Make sure we have valid options and exactly 1 argument:
&checkOptions();
&usage(1) if (scalar(@ARGV) != 1);
($db) = @ARGV;
# Force debug and verbose until this is looking pretty solid:
#$opt_debug = 1;
$opt_verbose = 3 if ($opt_verbose < 3);
# Establish what directory we will work in.
my $date = `date +%Y-%m-%d`;
chomp $date;
$buildDir = $opt_buildDir ? $opt_buildDir :
- "$HgAutomate::clusterData/$db/$HgAutomate::trackBuild/WindowMasker.$date";
+ "$HgAutomate::hiveDataGenomes/$db/$HgAutomate::trackBuild/WindowMasker.$date";
$unmaskedSeq = $opt_unmaskedSeq ? $opt_unmaskedSeq :
- "$HgAutomate::clusterData/$db/$db.unmasked.2bit";
+ "$HgAutomate::hiveDataGenomes/$db/$db.unmasked.2bit";
# Do everything.
$stepper->execute();
# Tell the user anything they should know.
my $stopStep = $stepper->getStopStep();
my $upThrough = ($stopStep eq 'cleanup') ? "" :
" (through the '$stopStep' step)";
&HgAutomate::verbose(1,
"\n *** All done!$upThrough\n");
&HgAutomate::verbose(1,
" *** Steps were performed in $buildDir\n");
&HgAutomate::verbose(1, "\n");