src/hg/utils/automation/doRecipBest.pl 1.12
1.12 2010/02/04 18:33:56 hiram
allow use of lastz or blastz directory
Index: src/hg/utils/automation/doRecipBest.pl
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/utils/automation/doRecipBest.pl,v
retrieving revision 1.11
retrieving revision 1.12
diff -b -B -U 1000000 -r1.11 -r1.12
--- src/hg/utils/automation/doRecipBest.pl 21 Jul 2008 18:07:16 -0000 1.11
+++ src/hg/utils/automation/doRecipBest.pl 4 Feb 2010 18:33:56 -0000 1.12
@@ -1,318 +1,325 @@
#!/usr/bin/env perl
# DO NOT EDIT the /cluster/bin/scripts copy of this file --
# edit ~/kent/src/hg/utils/automation/doRecipBest.pl instead.
# This script should probably be folded back into doBlastzChainNet.pl
# eventually.
# $Id$
use Getopt::Long;
use warnings;
use strict;
use FindBin qw($Bin);
use lib "$Bin";
use HgAutomate;
use HgRemoteScript;
use HgStepManager;
# Option variable names, both common and peculiar to this script:
use vars @HgAutomate::commonOptionVars;
use vars @HgStepManager::optionVars;
use vars qw/
$opt_buildDir
/;
# Specify the steps supported with -continue / -stop:
my $stepper = new HgStepManager(
[ { name => 'recipBest', func => \&doRecipBest },
{ name => 'download', func => \&doDownload },
]
);
# Option defaults:
my $dbHost = 'hgwdev';
my $base = $0;
$base =~ s/^(.*\/)?//;
sub usage {
# Usage / help / self-documentation:
my ($status, $detailed) = @_;
# Basic help (for incorrect usage):
print STDERR "
usage: $base tDb qDb
options:
";
print STDERR $stepper->getOptionHelp();
print STDERR <<_EOF_
-buildDir dir Use dir instead of default
$HgAutomate::clusterData/\$tDb/$HgAutomate::trackBuild/blastz.\$qDb
_EOF_
;
print STDERR &HgAutomate::getCommonOptionHelp('dbHost' => $dbHost,
'workhorse' => '');
print STDERR "
Automates addition of reciprocal best chains/nets to regular chains/nets which
have already been created using doBlastzChainNet.pl. Steps:
recipBest: Net in both directions to get reciprocal best.
download: Make a reciprocalBest subdir of the existing download dir.
All work is done in the axtChain subdir of the build directory:
$HgAutomate::clusterData/\$tDb/$HgAutomate::trackBuild/blastz.\$qDb unless -buildDir is given.
";
# Detailed help (-help):
print STDERR "
Assumptions:
1. $HgAutomate::clusterData/\$db/\$db.2bit contains RepeatMasked sequence for
database/assembly \$db (for both \$tDb and \$qDb).
2. $HgAutomate::clusterData/\$db/chrom.sizes contains all sequence names and sizes from
\$db.2bit (for both \$tDb and \$qDb).
3. The buildDir contains axtChain/\$tDb.\$qDb.over.chain.gz and the download dir
goldenPath/\$tDb/vs\$QDb already exists.
" if ($detailed);
print "\n";
exit $status;
}
# Globals:
my %defVars = ();
# Command line args: tDb qDb
my ($tDb, $qDb);
# Other:
my ($QDb);
my ($buildDir);
my ($splitRef);
sub checkOptions {
# Make sure command line options are valid/supported.
my $ok = GetOptions(@HgStepManager::optionSpec,
'buildDir=s',
@HgAutomate::commonOptionSpec,
);
&usage(1) if (!$ok);
&usage(0, 1) if ($opt_help);
&HgAutomate::processCommonOptions();
my $err = $stepper->processOptions();
usage(1) if ($err);
$dbHost = $opt_dbHost if ($opt_dbHost);
}
#########################################################################
# * step: recipBest [workhorse]
sub doRecipBest {
my $runDir = "$buildDir/axtChain";
&HgAutomate::mustMkdir($runDir);
&HgAutomate::checkExistsUnlessDebug("doBlastzChainNet.pl", "recipBest",
"$runDir/$tDb.$qDb.over.chain.gz");
&HgAutomate::checkCleanSlate("recipBest", "download",
"$runDir/$qDb.$tDb.rbest.net.gz");
my $whatItDoes =
"It nets in both directions to get reciprocal best chains and nets.";
my $workhorse = &HgAutomate::chooseWorkhorse();
my $bossScript = new HgRemoteScript("$runDir/doRecipBest.csh", $workhorse,
$runDir, $whatItDoes);
my $t2bit = "$HgAutomate::clusterData/$tDb/$tDb.2bit";
my $q2bit = "$HgAutomate::clusterData/$qDb/$qDb.2bit";
$bossScript->add(<<_EOF_
# Swap $tDb-best chains to be $qDb-referenced:
chainStitchId $tDb.$qDb.over.chain.gz stdout \\
| chainSwap stdin stdout \\
| chainSort stdin $qDb.$tDb.tBest.chain
# Net those on $qDb to get $qDb-ref'd reciprocal best net:
chainPreNet $qDb.$tDb.tBest.chain \\
$HgAutomate::clusterData/{$qDb,$tDb}/chrom.sizes stdout \\
| chainNet -minSpace=1 -minScore=0 \\
stdin $HgAutomate::clusterData/{$qDb,$tDb}/chrom.sizes stdout /dev/null \\
| netSyntenic stdin stdout \\
| gzip -c > $qDb.$tDb.rbest.net.gz
# Extract $qDb-ref'd reciprocal best chain:
netChainSubset $qDb.$tDb.rbest.net.gz $qDb.$tDb.tBest.chain stdout \\
| chainStitchId stdin stdout \\
| gzip -c > $qDb.$tDb.rbest.chain.gz
# Swap to get $tDb-ref'd reciprocal best chain:
chainSwap $qDb.$tDb.rbest.chain.gz stdout \\
| chainSort stdin stdout \\
| gzip -c > $tDb.$qDb.rbest.chain.gz
# Net those on $tDb to get $tDb-ref'd reciprocal best net:
chainPreNet $tDb.$qDb.rbest.chain.gz \\
$HgAutomate::clusterData/{$tDb,$qDb}/chrom.sizes stdout \\
| chainNet -minSpace=1 -minScore=0 \\
stdin $HgAutomate::clusterData/{$tDb,$qDb}/chrom.sizes stdout /dev/null \\
| netSyntenic stdin stdout \\
| gzip -c > $tDb.$qDb.rbest.net.gz
# Clean up the one temp file and make md5sum:
rm $qDb.$tDb.tBest.chain
md5sum *.rbest.*.gz > md5sum.rbest.txt
# Create files for testing coverage of *.rbest.*.
netToBed -maxGap=1 $qDb.$tDb.rbest.net.gz $qDb.$tDb.rbest.net.bed
netToBed -maxGap=1 $tDb.$qDb.rbest.net.gz $tDb.$qDb.rbest.net.bed
chainToPsl $qDb.$tDb.rbest.chain.gz \\
$HgAutomate::clusterData/{$qDb,$tDb}/chrom.sizes \\
$q2bit $t2bit \\
$qDb.$tDb.rbest.chain.psl
chainToPsl $tDb.$qDb.rbest.chain.gz \\
$HgAutomate::clusterData/{$tDb,$qDb}/chrom.sizes \\
$t2bit $q2bit \\
$tDb.$qDb.rbest.chain.psl
# Verify that all coverage figures are equal:
set tChCov = `awk '{print \$19;}' $tDb.$qDb.rbest.chain.psl | sed -e 's/,/\\n/g' | awk 'BEGIN {N = 0;} {N += \$1;} END {printf "\%d\\n", N;}'`
set qChCov = `awk '{print \$19;}' $qDb.$tDb.rbest.chain.psl | sed -e 's/,/\\n/g' | awk 'BEGIN {N = 0;} {N += \$1;} END {printf "\%d\\n", N;}'`
set tNetCov = `awk 'BEGIN {N = 0;} {N += (\$3 - \$2);} END {printf "\%d\\n", N;}' $tDb.$qDb.rbest.net.bed`
set qNetCov = `awk 'BEGIN {N = 0;} {N += (\$3 - \$2);} END {printf "\%d\\n", N;}' $qDb.$tDb.rbest.net.bed`
if (\$tChCov != \$qChCov) then
echo "Warning: $tDb rbest chain coverage \$tChCov != $qDb \$qChCov"
endif
if (\$tNetCov != \$qNetCov) then
echo "Warning: $tDb rbest net coverage \$tNetCov != $qDb \$qNetCov"
endif
if (\$tChCov != \$tNetCov) then
echo "Warning: $tDb rbest chain coverage \$tChCov != net cov \$tNetCov"
endif
mkdir experiments
mv *.bed *.psl experiments
_EOF_
);
# Create axt and maf files
if ($splitRef) {
$bossScript->add(<<_EOF_
# Make rbest net axt's download: one .axt per $tDb seq.
netSplit $tDb.$qDb.rbest.net.gz rBestNet
chainSplit rBestChain $tDb.$qDb.rbest.chain.gz
cd ..
mkdir axtRBestNet
foreach f (axtChain/rBestNet/*.net)
netToAxt \$f axtChain/rBestChain/\$f:t:r.chain \\
$t2bit $q2bit stdout \\
| axtSort stdin stdout \\
| gzip -c > axtRBestNet/\$f:t:r.$tDb.$qDb.net.axt.gz
end
# Make rbest mafNet for multiz: one .maf per $tDb seq.
mkdir mafRBestNet
foreach f (axtRBestNet/*.$tDb.$qDb.net.axt.gz)
axtToMaf -tPrefix=$tDb. -qPrefix=$qDb. \$f \\
$HgAutomate::clusterData/{$tDb,$qDb}/chrom.sizes \\
stdout \\
| gzip -c > mafRBestNet/\$f:t:r:r:r:r:r.maf.gz
end
_EOF_
);
} else {
$bossScript->add(<<_EOF_
# Make rbest net axt's download
mkdir ../axtRBestNet
netToAxt $tDb.$qDb.rbest.net.gz $tDb.$qDb.rbest.chain.gz \\
$t2bit $q2bit stdout \\
| axtSort stdin stdout \\
| gzip -c > ../axtRBestNet/$tDb.$qDb.rbest.axt.gz
# Make rbest mafNet for multiz
mkdir ../mafRBestNet
axtToMaf -tPrefix=$tDb. -qPrefix=$qDb. ../axtRBestNet/$tDb.$qDb.rbest.axt.gz \\
$HgAutomate::clusterData/{$tDb,$qDb}/chrom.sizes \\
stdout \\
| gzip -c > ../mafRBestNet/$tDb.$qDb.rbest.maf.gz
_EOF_
);
}
$bossScript->execute();
} # doRecipBest
#########################################################################
# * step: download [dbHost]
sub makeRbestReadme {
my ($fname) = @_;
if ($opt_debug) {
return;
}
my $fh = &HgAutomate::mustOpen(">$fname");
print $fh <<_EOF_
This directory contains reciprocal-best netted chains for $tDb-$qDb.
- $tDb.$qDb.rbest.net.gz: $tDb-referenced recip.best net to $qDb.
- $tDb.$qDb.rbest.chain.gz: chains extracted from the recip.best
net. These can be passed to the liftOver program to translate coords
from $tDb to $qDb through the recip.best net.
- $qDb.$tDb.rbest.net.gz: $qDb-referenced recip.best net.
- $qDb.$tDb.rbest.chain.gz: recip.best "liftOver" chains.
_EOF_
;
close($fh);
} # makeRbestReadme
sub doDownload {
my $runDir = "$buildDir/axtChain";
my $whatItDoes = "It makes a download (sub)dir.";
my $bossScript = new HgRemoteScript("$runDir/doRBDownload.csh", $dbHost,
$runDir, $whatItDoes);
my $downloadDir = "$HgAutomate::goldenPath/$tDb/vs$QDb";
&HgAutomate::checkExistsUnlessDebug("recipBest", "download",
"$runDir/$tDb.$qDb.rbest.net.gz",
$downloadDir);
my $readme = "$runDir/README.rbest.txt";
&makeRbestReadme($readme);
$bossScript->add(<<_EOF_
mkdir $downloadDir/reciprocalBest
cd $downloadDir/reciprocalBest
ln -s $runDir/*.rbest.*.gz .
ln -s $runDir/md5sum.rbest.txt md5sum.txt
ln -s $readme README.txt
mkdir axtRBestNet
ln -s $buildDir/axtRBestNet/*.axt.gz axtRBestNet/
_EOF_
);
$bossScript->execute();
} # doDownload
#########################################################################
# main
#$opt_debug = 1;
# Prevent "Suspended (tty input)" hanging:
&HgAutomate::closeStdin();
# Make sure we have valid options and correct number of args
&checkOptions();
&usage(1) if (scalar(@ARGV) != 2);
($tDb, $qDb) = @ARGV;
$QDb = ucfirst($qDb);
$splitRef = (`wc -l < $HgAutomate::clusterData/$tDb/chrom.sizes`
<= $HgAutomate::splitThreshold);
# Establish what directory we will work in.
-$buildDir = $opt_buildDir ? $opt_buildDir :
- "$HgAutomate::clusterData/$tDb/$HgAutomate::trackBuild/blastz.$qDb";
+if ($opt_buildDir) {
+ $buildDir = $opt_buildDir;
+} else {
+$buildDir = "$HgAutomate::clusterData/$tDb/$HgAutomate::trackBuild/blastz.$qDb";
+if (! -d $buildDir) {
+$buildDir = "$HgAutomate::clusterData/$tDb/$HgAutomate::trackBuild/lastz.$qDb";
+}
+die "can not find existing build directory:\n$buildDir\n";
+}
# Do everything.
$stepper->execute();
# Tell the user anything they should know.
my $stopStep = $stepper->getStopStep();
my $upThrough = ($stopStep eq 'download') ? "" :
" (through the '$stopStep' step)";
&HgAutomate::verbose(1,
"\n *** All done!$upThrough\n");
&HgAutomate::verbose(1,
" *** Steps were performed in $buildDir\n");
&HgAutomate::verbose(1, "\n");