src/hg/utils/automation/doWindowMasker.pl 1.7

1.7 2009/03/07 00:06:33 aamp
Forgot to commit some stuff.
Index: src/hg/utils/automation/doWindowMasker.pl
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/utils/automation/doWindowMasker.pl,v
retrieving revision 1.6
retrieving revision 1.7
diff -b -B -U 1000000 -r1.6 -r1.7
--- src/hg/utils/automation/doWindowMasker.pl	13 Aug 2007 20:45:19 -0000	1.6
+++ src/hg/utils/automation/doWindowMasker.pl	7 Mar 2009 00:06:33 -0000	1.7
@@ -1,272 +1,272 @@
 #!/usr/bin/env perl
 
 # DO NOT EDIT the /cluster/bin/scripts copy of this file --
 # edit ~/kent/src/hg/utils/automation/doWindowMasker.pl instead.
 
 # $Id$
 
 use Getopt::Long;
 use warnings;
 use strict;
 use FindBin qw($Bin);
 use lib "$Bin";
 use HgAutomate;
 use HgRemoteScript;
 use HgStepManager;
 
 # Option variable names, both common and peculiar to this script:
 use vars @HgAutomate::commonOptionVars;
 use vars @HgStepManager::optionVars;
 use vars qw/
     $opt_buildDir
     $opt_unmaskedSeq
     /;
 
 # Specify the steps supported with -continue / -stop:
 my $stepper = new HgStepManager(
     [ { name => 'count',   func => \&doCount },
       { name => 'mask', func => \&doMask },
       { name => 'sdust', func => \&doSdust },
       { name => 'twobit', func => \&doTwoBit },
       { name => 'cleanup', func => \&doCleanup },
     ]
 				);
 
 # Option defaults:
 my $defaultWorkhorse = 'least loaded';
 my $dbHost = 'hgwdev';
-my $unmaskedSeq = "$HgAutomate::clusterData/\$db/\$db.unmasked.2bit";
+my $unmaskedSeq = "$HgAutomate::hiveDataGenomes/\$db/\$db.unmasked.2bit";
 
 my $base = $0;
 $base =~ s/^(.*\/)?//;
 
 sub usage {
   # Usage / help / self-documentation:
   my ($status, $detailed) = @_;
   # Basic help (for incorrect usage):
   print STDERR "
 usage: $base db
 options:
 ";
   print STDERR $stepper->getOptionHelp();
   print STDERR <<_EOF_
     -buildDir dir         Use dir instead of default
-                          $HgAutomate::clusterData/\$db/$HgAutomate::trackBuild/WindowMasker.\$date
+                          $HgAutomate::hiveDataGenomes/\$db/$HgAutomate::trackBuild/WindowMasker.\$date
                           (necessary when continuing at a later date).
 _EOF_
   ;
   print STDERR &HgAutomate::getCommonOptionHelp('dbHost' => $dbHost,
 					'workhorse' => $defaultWorkhorse);
   print STDERR "
 Automates UCSC's WindowMasker process for genome database \$db.  Steps:
     count: Do the first pass of WindowMasker: collecting the counts.
     mask: The second pass of WindowMasker and collect output.
     sdust: Another pass of WindowMasker using -sdust true.
     twobit: Make masked twobit files.
     cleanup: Removes or compresses intermediate files.
 All operations are performed in the build directory which is
-$HgAutomate::clusterData/\$db/$HgAutomate::trackBuild/WindowMasker.\$date unless -buildDir is given.
+$HgAutomate::hiveDataGenomes/\$db/$HgAutomate::trackBuild/WindowMasker.\$date unless -buildDir is given.
 ";
   # Detailed help (-help):
   print STDERR "
 Assumptions:
-1. $HgAutomate::clusterData/\$db/\$db.unmasked.2bit contains sequence for
+1. $HgAutomate::hiveDataGenomes/\$db/\$db.unmasked.2bit contains sequence for
    database/assembly \$db.  (This can be overridden with -unmaskedSeq.)
 " if ($detailed);
   print "\n";
   exit $status;
 }
 
 
 # Globals:
 # Command line args: db
 my ($db);
 # Other:
 my ($buildDir);
 
 sub checkOptions {
   # Make sure command line options are valid/supported.
   my $ok = GetOptions(@HgStepManager::optionSpec,
 		      'buildDir=s',
 		      @HgAutomate::commonOptionSpec,
 		      );
   &usage(1) if (!$ok);
   &usage(0, 1) if ($opt_help);
   &HgAutomate::processCommonOptions();
   my $err = $stepper->processOptions();
   usage(1) if ($err);
   $dbHost = $opt_dbHost if ($opt_dbHost);
 }
 
 
 #########################################################################
 # * step: count [workhorse]
 sub doCount {
   my $runDir = "$buildDir";
   &HgAutomate::checkCleanSlate('count', 'mask', "$runDir/windowmasker.counts");
   &HgAutomate::mustMkdir($runDir);
   
   my $whatItDoes = "It does WindowMasker counts step.";
   my $workhorse = &HgAutomate::chooseWorkhorse();
   my $bossScript = new HgRemoteScript("$runDir/doCount.csh", $workhorse,
 				      $runDir, $whatItDoes);
 
   $bossScript->add(<<_EOF_
 set windowMaskerDir = /cluster/bin/\$MACHTYPE 
 set windowMasker = \$windowMaskerDir/windowmasker
 set fa = $db.fa
 set tmpDir = `mktemp -d -p /scratch/tmp doWindowMasker.XXXXXX`
 chmod 775 \$tmpDir
 set inputTwoBit = $unmaskedSeq
 pushd \$tmpDir
 twoBitToFa \$inputTwoBit \$fa
 \$windowMasker -mk_counts true -input \$fa -output windowmasker.counts
 popd 
 cp \$tmpDir/windowmasker.counts .
 rm -rf \$tmpDir
 _EOF_
   );
   $bossScript->execute();
 } # doCount
 
 
 #########################################################################
 # * step: mask [workhorse]
 sub doMask {
   my $runDir = "$buildDir";
   &HgAutomate::checkExistsUnlessDebug('count', 'mask', "$runDir/windowmasker.counts");
   my $whatItDoes = "It does WindowMasker masking step.";
   my $workhorse = &HgAutomate::chooseWorkhorse();
   my $bossScript = new HgRemoteScript("$runDir/doMask.csh", $workhorse,
 				      $runDir, $whatItDoes);
   $bossScript->add(<<_EOF_
 set windowMaskerDir = /cluster/bin/\$MACHTYPE 
 set windowMasker = \$windowMaskerDir/windowmasker
 set fa = $db.fa
 set tmpDir = `mktemp -d -p /scratch/tmp doWindowMasker.XXXXXX`
 chmod 775 \$tmpDir
 set inputTwoBit = $unmaskedSeq
 cp windowmasker.counts \$tmpDir
 pushd \$tmpDir
 twoBitToFa \$inputTwoBit \$fa
 \$windowMasker -ustat windowmasker.counts -input \$fa -output windowmasker.intervals
 perl -wpe \'if \(s\/^\>lcl\\\|\(\.\*\)\\n\$\/\/\) { \$chr = \$1\; } \\
    if \(\/^\(\\d+\) \- \(\\d+\)\/\) { \\
    \$s=\$1\; \$e=\$2+1\; s\/\(\\d+\) \- \(\\d+\)\/\$chr\\t\$s\\t\$e\/\; \\
    }\' windowmasker.intervals > windowmasker.bed
 popd 
 cp \$tmpDir/windowmasker.bed .
 rm -rf \$tmpDir
 _EOF_
   );
 
   $bossScript->execute();
 } # doMask
 
 #########################################################################
 # * step: sdust [workhorse]
 sub doSdust {
   my $runDir = "$buildDir";
   &HgAutomate::checkExistsUnlessDebug('mask', 'sdust', "$runDir/windowmasker.counts");
   my $whatItDoes = "It does WindowMasker masking step with -sdust true.";
   my $workhorse = &HgAutomate::chooseWorkhorse();
   my $bossScript = new HgRemoteScript("$runDir/doSdust.csh", $workhorse,
 				      $runDir, $whatItDoes);
   $bossScript->add(<<_EOF_
 set windowMaskerDir = /cluster/bin/\$MACHTYPE 
 set windowMasker = \$windowMaskerDir/windowmasker
 set fa = $db.fa
 set tmpDir = `mktemp -d -p /scratch/tmp doWindowMasker.XXXXXX`
 chmod 775 \$tmpDir
 set inputTwoBit = $unmaskedSeq
 cp windowmasker.counts \$tmpDir
 pushd \$tmpDir
 twoBitToFa \$inputTwoBit \$fa
 \$windowMasker -ustat windowmasker.counts -sdust true -input \$fa -output windowmasker.intervals
 perl -wpe \'if \(s\/^\>lcl\\\|\(\.\*\)\\n\$\/\/\) { \$chr = \$1\; } \\
    if \(\/^\(\\d+\) \- \(\\d+\)\/\) { \\
    \$s=\$1\; \$e=\$2+1\; s\/\(\\d+\) \- \(\\d+\)\/\$chr\\t\$s\\t\$e\/\; \\
    }\' windowmasker.intervals > windowmasker.sdust.bed
 popd 
 cp \$tmpDir/windowmasker.sdust.bed .
 rm -rf \$tmpDir
 _EOF_
   );
 
   $bossScript->execute();
 } # doSdust
 
 
 #########################################################################
 # * step: twobit [fileServer]
 sub doTwoBit {
   my $runDir = "$buildDir";
   my $whatItDoes = "Make .2bit files from the beds.";
   &HgAutomate::checkExistsUnlessDebug('mask', 'sdust', ("$runDir/windowmasker.counts", 
            "$runDir/windowmasker.bed", "$runDir/windowmasker.sdust.bed"));
   my $fileServer = &HgAutomate::chooseFileServer($runDir);
   my $bossScript = new HgRemoteScript("$runDir/doTwoBit.csh", $fileServer,
 				      $runDir, $whatItDoes);
   $bossScript->add(<<_EOF_
 twoBitMask $unmaskedSeq windowmasker.bed $db.wmsk.2bit
 twoBitMask $unmaskedSeq windowmasker.sdust.bed $db.wmsk.sdust.2bit
 _EOF_
   );
   $bossScript->execute();
 } #doTwoBit
 
 #########################################################################
 # * step: cleanup [fileServer]
 sub doCleanup {
   my $runDir = "$buildDir";
   my $whatItDoes = "It cleans up or compresses intermediate files.";
   my $fileServer = &HgAutomate::chooseFileServer($runDir);
   my $bossScript = new HgRemoteScript("$runDir/doCleanup.csh", $fileServer,
 				      $runDir, $whatItDoes);
   $bossScript->add(<<_EOF_
 gzip $runDir/windowmasker.counts
 gzip $runDir/windowmasker.bed
 gzip $runDir/windowmasker.sdust.bed
 _EOF_
   );
   $bossScript->execute();
 } # doCleanup
 
 
 #########################################################################
 # main
 
 # Prevent "Suspended (tty input)" hanging:
 &HgAutomate::closeStdin();
 
 # Make sure we have valid options and exactly 1 argument:
 &checkOptions();
 &usage(1) if (scalar(@ARGV) != 1);
 ($db) = @ARGV;
 
 # Force debug and verbose until this is looking pretty solid:
 #$opt_debug = 1;
 $opt_verbose = 3 if ($opt_verbose < 3);
 
 # Establish what directory we will work in.
 my $date = `date +%Y-%m-%d`;
 chomp $date;
 $buildDir = $opt_buildDir ? $opt_buildDir :
-  "$HgAutomate::clusterData/$db/$HgAutomate::trackBuild/WindowMasker.$date";
+  "$HgAutomate::hiveDataGenomes/$db/$HgAutomate::trackBuild/WindowMasker.$date";
 $unmaskedSeq = $opt_unmaskedSeq ? $opt_unmaskedSeq :
-  "$HgAutomate::clusterData/$db/$db.unmasked.2bit";
+  "$HgAutomate::hiveDataGenomes/$db/$db.unmasked.2bit";
 
 # Do everything.
 $stepper->execute();
 
 # Tell the user anything they should know.
 my $stopStep = $stepper->getStopStep();
 my $upThrough = ($stopStep eq 'cleanup') ? "" :
   "  (through the '$stopStep' step)";
 
 &HgAutomate::verbose(1,
 	"\n *** All done!$upThrough\n");
 &HgAutomate::verbose(1,
 	" *** Steps were performed in $buildDir\n");
 &HgAutomate::verbose(1, "\n");