e69db257480ddc4aae24ca6ae0130c9450abc362 hiram Mon Oct 28 14:07:47 2024 -0700 do not use /dev/shm as a temporary directory refs #34685 diff --git src/hg/utils/automation/doSameSpeciesLiftOver.pl src/hg/utils/automation/doSameSpeciesLiftOver.pl index ed36cff..c76c413 100755 --- src/hg/utils/automation/doSameSpeciesLiftOver.pl +++ src/hg/utils/automation/doSameSpeciesLiftOver.pl @@ -1,708 +1,708 @@ #!/usr/bin/env perl # DO NOT EDIT the /cluster/bin/scripts copy of this file -- # edit ~/kent/src/hg/utils/automation/doSameSpeciesLiftOver.pl instead. # $Id: doSameSpeciesLiftOver.pl,v 1.5 2008/09/19 04:38:08 angie Exp $ use Getopt::Long; use warnings; use strict; use FindBin qw($Bin); use lib "$Bin"; use HgAutomate; use HgRemoteScript; use HgStepManager; # Option variable names, both common and peculiar to this script: use vars @HgAutomate::commonOptionVars; use vars @HgStepManager::optionVars; use vars qw/ $opt_buildDir $opt_ooc $opt_target2Bit $opt_targetSizes $opt_query2Bit $opt_querySizes $opt_chainRam $opt_chainCpu $opt_localTmp /; # Specify the steps supported with -continue / -stop: my $stepper = new HgStepManager( [ { name => 'align', func => \&doAlign }, { name => 'chain', func => \&doChain }, { name => 'net', func => \&doNet }, { name => 'load', func => \&doLoad }, { name => 'cleanup', func => \&doCleanup }, ] ); # Option defaults: my $dbHost = 'hgwdev'; my $ramG = '4g'; my $cpu = 1; my $blatRam = '4g'; # -ram=Ng argument my $blatCpu = 1; # -cpu=N argument my $chainRam = '16g'; # -chainRam=Ng argument my $chainCpu = 1; # -chainCpu=N argument # This could be made into an option: # BLAT -fastMap will not work with query chunks greater than 5000 my $splitSize = '5000'; my $splitOverlap = '500'; my $base = $0; $base =~ s/^(.*\/)?//; sub usage { # Usage / help / self-documentation: my ($status, $detailed) = @_; # Basic help (for incorrect usage): print STDERR " usage: $base fromDb toDb options: "; print STDERR $stepper->getOptionHelp(); print STDERR <<_EOF_ -buildDir dir Use dir instead of default $HgAutomate::clusterData/\$fromDb/$HgAutomate::trackBuild/blat.\$toDb.\$date (necessary when continuing at a later date). -ooc /path/11.ooc Use this instead of the default /hive/data/genomes/fromDb/11.ooc Can be "none". -target2Bit /path/target.2bit Full path to target sequence (fromDb) -query2Bit /path/query.2bit Full path to query sequence (toDb) -targetSizes /path/target.chrom.sizes Full path to target chrom.sizes (fromDb) -querySizes /path/query.chrom.sizes Full path to query chrom.sizes (toDb) -chainRam Ng Cluster ram size for chain step, default: -chainRam=$chainRam -chainCpu N Cluster CPUs number for chain step, default: -chainCpu=$chainCpu - -localTmp /dev/shm Full path to temporary storage for heavy I/O usage + -localTmp /tmp Full path to temporary storage for heavy I/O usage _EOF_ ; print STDERR &HgAutomate::getCommonOptionHelp('dbHost' => $dbHost, 'workhorse' => '', 'fileServer' => '', 'ram' => $ramG, 'cpu' => $cpu, 'bigClusterHub' => ''); print STDERR " Automates UCSC's same-species liftOver (blat/chain/net) pipeline, based on Kate's suite of makeLo-* scripts: align: Aligns the assemblies using blat -fastMap on a big cluster. chain: Chains the alignments on a big cluster. net: Nets the alignments, uses netChainSubset to extract liftOver chains. load: Installs liftOver chain files, calls hgAddLiftOverChain on $dbHost. cleanup: Removes or compresses intermediate files. All operations are performed in the build directory which is $HgAutomate::clusterData/\$fromDb/$HgAutomate::trackBuild/blat.\$toDb.\$date unless -buildDir is given. "; # Detailed help (-help): print STDERR " Assumptions: 1. /scratch/data/\$db/\$db.2bit contains RepeatMasked sequence for database/assembly \$db. 2. $HgAutomate::clusterData/\$db/chrom.sizes contains all sequence names and sizes from \$db.2bit. 3. The \$db.2bit files have already been distributed to cluster-scratch (/scratch/data/<db>/). " if ($detailed); print "\n"; exit $status; } # Globals: # Command line args: tDb=fromDb, qDb=toDb my ($tDb, $qDb); my $localTmp = &HgAutomate::tmpDir(); # do the right thing in UCSC environment # Other: my ($buildDir); my ($tSeq, $tSizes, $qSeq, $qSizes, $QDb, $fileServer); my ($liftOverChainDir, $liftOverChainFile, $liftOverChainPath, $dbExists); sub checkOptions { # Make sure command line options are valid/supported. my $ok = GetOptions(@HgStepManager::optionSpec, 'buildDir=s', 'ooc=s', 'target2Bit=s', 'targetSizes=s', 'query2Bit=s', 'querySizes=s', 'chainRam=s', 'chainCpu=i', 'localTmp=s', @HgAutomate::commonOptionSpec, ); &usage(1) if (!$ok); &usage(0, 1) if ($opt_help); &HgAutomate::processCommonOptions(); my $err = $stepper->processOptions(); usage(1) if ($err); $dbHost = $opt_dbHost if ($opt_dbHost); $localTmp = $opt_localTmp ? $opt_localTmp : $localTmp; } sub getClusterSeqs { # Choose cluster and look for already-installed 2bit files on appropriate # cluster-scratch storage. Exit with an error message if we can't find them. my $paraHub = $opt_bigClusterHub ? $opt_bigClusterHub : &HgAutomate::chooseClusterByBandwidth(); my ($tSeqScratch, $qSeqScratch); if ($opt_target2Bit) { $tSeqScratch = $opt_target2Bit } else { my @okFilesystems = &HgAutomate::chooseFilesystemsForCluster($paraHub, 'in'); if ( -e "/scratch/data/$tDb/$tDb.2bit" ) { $tSeqScratch = "/scratch/data/$tDb/$tDb.2bit"; } else { foreach my $fs (@okFilesystems) { &HgAutomate::verbose(1, "checking $fs/$tDb/$tDb.2bit\n"); if (&HgAutomate::machineHasFile($paraHub, "$fs/$tDb/$tDb.2bit")) { $tSeqScratch = "$fs/$tDb/$tDb.2bit"; last; } } } if (! defined $tSeqScratch) { die "align: can't find $tDb/$tDb.2bit in " . join("/, ", @okFilesystems) . "/ -- please distribute.\n"; } } if ($opt_query2Bit) { $qSeqScratch = $opt_query2Bit; } else { my @okFilesystems = &HgAutomate::chooseFilesystemsForCluster($paraHub, 'in'); if ( -e "/scratch/data/$qDb/$qDb.2bit" ) { $qSeqScratch = "/scratch/data/$qDb/$qDb.2bit"; } else { foreach my $fs (@okFilesystems) { if (&HgAutomate::machineHasFile($paraHub, "$fs/$qDb/$qDb.2bit")) { $qSeqScratch = "$fs/$qDb/$qDb.2bit"; last; } } } if (! defined $qSeqScratch) { die "align: can't find $qDb/$qDb.2bit in " . join("/, ", @okFilesystems) . "/ -- please distribute.\n"; } } &HgAutomate::verbose(1, "Using $paraHub, $tSeqScratch and $qSeqScratch\n"); return ($paraHub, $tSeqScratch, $qSeqScratch); } # getClusterSeqs ######################################################################### # * step: align [bigClusterHub] sub doAlign { my $runDir = "$buildDir/run.blat"; &HgAutomate::mustMkdir($runDir); my $ooc = "/hive/data/genomes/$tDb/11.ooc"; if ($opt_ooc) { if ($opt_ooc eq 'none') { $ooc = ""; } else { $ooc = "$opt_ooc"; } } my $dashOoc = "-ooc=$ooc"; my $pslDir = "$runDir/psl"; &HgAutomate::checkCleanSlate('align', 'chain', $pslDir, 'run.time'); my ($paraHub, $tSeqScratch, $qSeqScratch) = &getClusterSeqs(); if (! &HgAutomate::machineHasFile($paraHub, $ooc)) { die "align: $paraHub does not have $ooc -- if that is not the correct " . "location, please run again with -ooc.\n"; } # script for a single job: split query partition further into # $splitSize bites while building a .lft liftUp spec; blat; lift. my $size = $splitSize; my $fh = &HgAutomate::mustOpen(">$runDir/job.csh"); print $fh <<_EOF_ #!/bin/csh -ef set targetList = \$1 set queryListIn = \$2 set outPsl = \$3 if (\$targetList:e == "lst") set targetList = $runDir/\$targetList if (\$queryListIn:e == "lst") set queryListIn = $runDir/\$queryListIn # Use local disk for output, and move the final result to \$outPsl # when done, to minimize I/O. set tmpDir = `mktemp -d -p $localTmp doSame.blat.XXXXXX` pushd \$tmpDir # We might get a .lst or a 2bit spec here -- convert to (list of) 2bit spec: if (\$queryListIn:e == "lst") then set specList = `cat \$queryListIn` else set specList = \$queryListIn endif # Further partition the query spec(s) into $splitSize coord ranges, building up # a .lst of 2bit specs for blat and a .lft liftUp spec for the results: cp /dev/null reSplitQuery.lst cp /dev/null query.lft foreach spec (\$specList) set file = `echo \$spec | awk -F: '{print \$1;}'` set seq = `echo \$spec | awk -F: '{print \$2;}'` set range = `echo \$spec | awk -F: '{print \$3;}'` set start = `echo \$range | awk -F- '{print \$1;}'` set end = `echo \$range | awk -F- '{print \$2;}'` if (! -e q.sizes) twoBitInfo \$file q.sizes set seqSize = `awk '\$1 == "'\$seq'" {print \$2;}' q.sizes` set chunkEnd = '0' while (\$chunkEnd < \$end) set chunkEnd = `echo \$start $size | awk '{print \$1+\$2}'` if (\$chunkEnd > \$end) set chunkEnd = \$end set chunkSize = `echo \$chunkEnd \$start | awk '{print \$1-\$2}'` echo \$file\\:\$seq\\:\$start-\$chunkEnd >> reSplitQuery.lst if ((\$start == 0) && (\$chunkEnd == \$seqSize)) then echo "\$start \$seq \$seqSize \$seq \$seqSize" >> query.lft else echo "\$start \$seq"":\$start-\$chunkEnd \$chunkSize \$seq \$seqSize" >> query.lft endif set start = `echo \$chunkEnd $splitOverlap | awk '{print \$1-\$2}'` end end # Align unsplit target sequence(s) to .lst of 2bit specs for $splitSize chunks # of query: blat \$targetList reSplitQuery.lst tmpUnlifted.psl \\ -tileSize=11 $dashOoc -minScore=100 -minIdentity=98 -fastMap -noHead # Lift query coords back up: liftUp -pslQ -nohead tmpOut.psl query.lft warn tmpUnlifted.psl # Move final result into place: mv tmpOut.psl \$outPsl popd rm -rf \$tmpDir _EOF_ ; close($fh); &HgAutomate::run("chmod a+x $runDir/job.csh"); &HgAutomate::makeGsub($runDir, 'job.csh $(path1) $(path2) {check out line ' . $pslDir . '/$(file1)/$(file2).psl}'); my $paraRun = &HgAutomate::paraRun($blatRam, $blatCpu); my $whatItDoes = "It performs a cluster run of blat -fastMap."; my $bossScript = new HgRemoteScript("$runDir/doAlign.csh", $paraHub, $runDir, $whatItDoes); # Don't allow target sequences to be split because we don't lift them # nor do we cat them before chaining. Use the max target seq size # as the chunkSize for partitionSequence.pl on the target. my $tpSize = `awk '{print \$2;}' $tSizes | sort -nr | head -1`; chomp $tpSize; # However, $tDb might be a collection of zillions of tiny scaffolds. # So to ensure reasonable cluster batch size, make sure that chunkSize # is at least 10,000,000 for the target. my $minTpSize = 10000000; $tpSize = $minTpSize if ($tpSize < $minTpSize); my $gensub2 = &HgAutomate::gensub2(); $bossScript->add(<<_EOF_ # Compute partition (coordinate ranges) for cluster job. This does # not need to be run on the build fileserver because it does not actually # split any sequences -- it merely computes ranges based on the chrom.sizes. rm -rf tParts $Bin/partitionSequence.pl $tpSize 0 $tSeqScratch \\ $tSizes 2000 \\ -lstDir=tParts > t.lst rm -rf qParts $Bin/partitionSequence.pl 10000000 0 $qSeqScratch \\ $qSizes 1000 \\ -lstDir=qParts > q.lst mkdir $pslDir foreach f (`cat t.lst`) mkdir $pslDir/\$f:t end $gensub2 t.lst q.lst gsub jobList $paraRun _EOF_ ); $bossScript->execute(); } # doAlign ######################################################################### # * step: chain [smallClusterHub] sub makePslPartsLst { # $pslDir/$tPart/ files look like either # part006.lst.psl --> make this into a single job. # $qDb.2bit:$seq:$start-$end.psl --> cat $qDb.2bit:$seq:*.psl into a job. # ==> Make a list of any part*.lst.psl plus collapsed roots # (one $qDb.2bit:$seq: per $seq). my ($pslDir) = @_; &HgAutomate::verbose(2, "Making a list of patterns from the contents of " . "$pslDir\n"); return if ($opt_debug); opendir(P, "$pslDir") || die "Couldn't open directory $pslDir for reading: $!\n"; my @tParts = readdir(P); closedir(P); my $fh = &HgAutomate::mustOpen(">$buildDir/run.chain/pslParts.lst"); my $totalCount = 0; foreach my $tPart (@tParts) { next if ($tPart =~ /^\.\.?$/); my %seqs = (); my $count = 0; opendir(P, "$pslDir/$tPart") || die "Couldn't open directory $pslDir/$tPart for reading: $!\n"; my @qParts = readdir(P); closedir(P); foreach my $q (@qParts) { if ($q =~ /^part\d+.lst.psl$/) { print $fh "$tPart/$q\n"; $count++; } elsif ($q =~ s@^(\S+\.2bit:\w+):\d+.*@$1@) { # Collapse subsequences (subranges of a sequence) down to one entry # per sequence: $seqs{$q} = 1; } elsif ($q ne '.' && $q ne '..') { warn "makePslPartsLst: Unrecognized partition file format \"$q\""; } } foreach my $q (keys %seqs) { print $fh "$tPart/$q:\n"; $count++; } if ($count < 1) { die "makePslPartsLst: didn't find anything in $pslDir/$tPart/ ."; } $totalCount += $count; } close($fh); if ($totalCount < 1) { die "makePslPartsLst: didn't find anything in $pslDir/ ."; } &HgAutomate::verbose(2, "Found $totalCount patterns in $pslDir/*/.\n"); } # makePslPartsLst sub doChain { my $runDir = "$buildDir/run.chain"; &HgAutomate::mustMkdir($runDir); my $pslDir = "$buildDir/run.blat/psl"; my $blatDoneFile = "$buildDir/run.blat/run.time"; &HgAutomate::checkCleanSlate('chain', 'net', 'chainRaw'); &HgAutomate::checkExistsUnlessDebug('align', 'chain', $pslDir, $blatDoneFile); &makePslPartsLst($pslDir); my ($paraHub, $tSeqScratch, $qSeqScratch) = &getClusterSeqs(); # script for a single job: cat inputs if necessary and chain. my $fh = &HgAutomate::mustOpen(">$runDir/job.csh"); print $fh <<_EOF_ #!/bin/csh -ef set inPattern = \$1 set outChain = \$2 set tmpOut = `mktemp -p $localTmp doSame.chain.XXXXXX` cat $pslDir/\$inPattern* \\ | axtChain -verbose=0 -linearGap=medium -psl stdin \\ $tSeqScratch $qSeqScratch stdout \\ | chainBridge -linearGap=medium stdin $tSeqScratch $qSeqScratch \\ \$tmpOut mv \$tmpOut \$outChain chmod 664 \$outChain _EOF_ ; close($fh); &HgAutomate::run("chmod a+x $runDir/job.csh"); &HgAutomate::makeGsub($runDir, 'job.csh $(path1) ' . '{check out line+ chainRaw/$(path1).chain}'); my $whatItDoes = "It does a cluster run to chain the blat alignments."; my $bossScript = new HgRemoteScript("$runDir/doChain.csh", $paraHub, $runDir, $whatItDoes); my $paraRun = &HgAutomate::paraRun($chainRam, $chainCpu); my $gensub2 = &HgAutomate::gensub2(); $bossScript->add(<<_EOF_ mkdir chainRaw foreach d ($pslDir/*) mkdir chainRaw/\$d:t end $gensub2 pslParts.lst single gsub jobList $paraRun _EOF_ ); $bossScript->execute(); } # doChain ######################################################################### # * step: net [workhorse] sub doNet { my $runDir = "$buildDir/run.chain"; my @outs = ("$runDir/$tDb.$qDb.all.chain.gz", "$runDir/$tDb.$qDb.noClass.net.gz"); &HgAutomate::checkCleanSlate('net', 'load', @outs); &HgAutomate::checkExistsUnlessDebug('chain', 'net', "$runDir/chainRaw/"); my $whatItDoes = "It nets the chained blat alignments and runs netChainSubset to produce liftOver chains."; my $mach = &HgAutomate::chooseWorkhorse(); my $bossScript = new HgRemoteScript("$runDir/doNet.csh", $mach, $runDir, $whatItDoes); my $chromBased = (`wc -l < $tSizes` <= $HgAutomate::splitThreshold); my $lump = $chromBased ? "" : "-lump=100"; $bossScript->add(<<_EOF_ # Use local scratch disk... this can be quite I/O intensive: set tmpDir = `mktemp -d -p $localTmp doSame.blat.XXXXXX` # Merge up the hierarchy and assign unique chain IDs: mkdir \$tmpDir/chainMerged foreach d (chainRaw/*) set tChunk = \$d:t chainMergeSort \$d/*.chain > \$tmpDir/chainMerged/\$tChunk.chain end chainMergeSort \$tmpDir/chainMerged/*.chain \\ | chainSplit $lump \$tmpDir/chainSplit stdin endsInLf \$tmpDir/chainSplit/*.chain rm -rf \$tmpDir/chainMerged/ mkdir \$tmpDir/netSplit \$tmpDir/overSplit foreach f (\$tmpDir/chainSplit/*.chain) set split = \$f:t:r chainNet \$f \\ $tSizes $qSizes \\ \$tmpDir/netSplit/\$split.net /dev/null netChainSubset \$tmpDir/netSplit/\$split.net \$f stdout \\ | chainStitchId stdin \$tmpDir/overSplit/\$split.chain end endsInLf \$tmpDir/netSplit/*.net endsInLf \$tmpDir/overSplit/*.chain cat \$tmpDir/chainSplit/*.chain | gzip -c > $tDb.$qDb.all.chain.gz cat \$tmpDir/netSplit/*.net | gzip -c > $tDb.$qDb.noClass.net.gz cat \$tmpDir/overSplit/*.chain | gzip -c > $buildDir/$liftOverChainFile rm -rf \$tmpDir/ _EOF_ ); $bossScript->execute(); } # doNet ######################################################################### # * step: load [dbHost] sub doLoad { my $runDir = "$buildDir"; &HgAutomate::checkExistsUnlessDebug('net', 'load', "$buildDir/$liftOverChainFile"); my $whatItDoes = "It makes links from $HgAutomate::gbdb/ and goldenPath/ (download area) to the liftOver chains file, and calls hgAddLiftOverChain to register the $HgAutomate::gbdb location."; my $bossScript = new HgRemoteScript("$runDir/doLoad.csh", $dbHost, $runDir, $whatItDoes); if ($dbExists) { $bossScript->add(<<_EOF_ # Link to standardized location of liftOver files: mkdir -p $liftOverChainDir rm -f $liftOverChainPath ln -s $buildDir/$liftOverChainFile $liftOverChainPath -set tmpFile = `mktemp -t -p /dev/shm tmpMd5.XXXXXX` +set tmpFile = `mktemp -t -p /tmp tmpMd5.XXXXXX` csh -c "grep -v $liftOverChainFile $liftOverChainDir/md5sum.txt || true" > \$tmpFile md5sum $buildDir/$liftOverChainFile | sed -e "s#$buildDir/##;" >> \$tmpFile sort \$tmpFile > $liftOverChainDir/md5sum.txt rm -f \$tmpFile # Link from download area: mkdir -p $HgAutomate::goldenPath/$tDb/liftOver rm -f $HgAutomate::goldenPath/$tDb/liftOver/$liftOverChainFile ln -s $liftOverChainPath $HgAutomate::goldenPath/$tDb/liftOver/ # Link from genome browser fileserver: mkdir -p $HgAutomate::gbdb/$tDb/liftOver rm -f $HgAutomate::gbdb/$tDb/liftOver/$liftOverChainFile ln -s $liftOverChainPath $HgAutomate::gbdb/$tDb/liftOver/ # Add an entry to liftOverChain table in central database (specified in # ~/.hg.conf) so that hgLiftOver will know that this is available: hgAddLiftOverChain $tDb $qDb _EOF_ ); } else { $bossScript->add(<<_EOF_ hgLoadChain -test -noBin -tIndex $tDb chain$QDb $buildDir/$liftOverChainFile wget --no-check-certificate -O bigChain.as 'https://raw.githubusercontent.com/ucscGenomeBrowser/kent/refs/heads/master/src/hg/lib/bigChain.as' wget --no-check-certificate -O bigLink.as 'https://raw.githubusercontent.com/ucscGenomeBrowser/kent/refs/heads/master/src/hg/lib/bigLink.as' sed 's/.000000//' chain.tab | awk 'BEGIN {OFS="\\t"} {print \$2, \$4, \$5, \$11, 1000, \$8, \$3, \$6, \$7, \$9, \$10, \$1}' > chain${QDb}.tab bedToBigBed -type=bed6+6 -as=bigChain.as -tab chain${QDb}.tab $tSizes chain${QDb}.bb awk 'BEGIN {OFS="\\t"} {print \$1, \$2, \$3, \$5, \$4}' link.tab | sort -k1,1 -k2,2n > chain${QDb}Link.tab bedToBigBed -type=bed4+1 -as=bigLink.as -tab chain${QDb}Link.tab $tSizes chain${QDb}Link.bb set totalBases = `ave -col=2 $tSizes | grep "^total" | awk '{printf "%d", \$2}'` set basesCovered = `bedSingleCover.pl chain${QDb}Link.tab | ave -col=4 stdin | grep "^total" | awk '{printf "%d", \$2}'` set percentCovered = `echo \$basesCovered \$totalBases | awk '{printf "%.3f", 100.0*\$1/\$2}'` printf "%d bases of %d (%s%%) in intersection\\n" "\$basesCovered" "\$totalBases" "\$percentCovered" > fb.$tDb.chain.${QDb}Link.txt rm -f link.tab chain.tab bigChain.as bigLink.as chain${QDb}.tab chain${QDb}Link.tab _EOF_ ); } $bossScript->execute(); } # doLoad ######################################################################### # * step: cleanup [fileServer] sub doCleanup { my $runDir = "$buildDir"; my $whatItDoes = "It cleans up or compresses intermediate files."; $fileServer = &HgAutomate::chooseFileServer($runDir); my $bossScript = new HgRemoteScript("$runDir/doCleanup.csh", $fileServer, $runDir, $whatItDoes); my $pslDir = "run.blat/psl"; $bossScript->add(<<_EOF_ rm -rf $pslDir/ rm -rf run.chain/chainRaw/ rm -rf run.chain/chainMerged/ rm -rf run.chain/chainSplit/ rm -rf run.chain/netSplit/ rm -rf run.chain/overSplit/ _EOF_ ); $bossScript->execute(); } # doCleanup sub getSeqAndSizes { if ($opt_target2Bit) { $tSeq = $opt_target2Bit } else { # Test assumptions about 2bit and chrom.sizes files. $tSeq = "/scratch/data/$tDb/$tDb.2bit"; if (! -e $tSeq) { # allow it to exist here too: my $fs = "$HgAutomate::clusterData"; &HgAutomate::verbose(1, "checking $fs/$tDb/$tDb.2bit\n"); if (-e "$fs/$tDb/$tDb.2bit") { $tSeq = "$fs/$tDb/$tDb.2bit"; } } } if ($opt_targetSizes) { $tSizes = $opt_targetSizes; } else { $tSizes = "$HgAutomate::clusterData/$tDb/chrom.sizes"; } if ($opt_query2Bit) { $qSeq = $opt_query2Bit; } else { $qSeq = "/scratch/data/$qDb/$qDb.2bit"; if (! -e $qSeq) { # allow it to exist here too: my $fs = "$HgAutomate::clusterData"; &HgAutomate::verbose(1, "checking $fs/$qDb/$qDb.2bit\n"); if (-e "$fs/$qDb/$qDb.2bit") { $qSeq = "$fs/$qDb/$qDb.2bit"; } } } if ($opt_querySizes) { $qSizes = $opt_querySizes; } else { $qSizes = "$HgAutomate::clusterData/$qDb/chrom.sizes"; } my $problem = 0; foreach my $file ($tSeq, $tSizes, $qSeq, $qSizes) { if (! -e $file) { warn "Error: cannot find required file \"$file\"\n"; $problem = 1; } } if ($problem && !$opt_debug) { warn "Run $base -help for a description of expected files.\n"; exit 1; } } ######################################################################### # main # Prevent "Suspended (tty input)" hanging: &HgAutomate::closeStdin(); &checkOptions(); &usage(1) if (scalar(@ARGV) != 2); ($tDb, $qDb) = @ARGV; # may be working on a 2bit file that does not have a database browser $dbExists = 0; $dbExists = 1 if (&HgAutomate::databaseExists($dbHost, $tDb)); &getSeqAndSizes(); $QDb = ucfirst($qDb); $liftOverChainDir = "$HgAutomate::clusterData/$tDb/$HgAutomate::trackBuild/liftOver"; $liftOverChainFile = "${tDb}To${QDb}.over.chain.gz"; $liftOverChainPath = "$liftOverChainDir/$liftOverChainFile"; $chainRam = $opt_chainRam ? $opt_chainRam : $chainRam; $chainCpu = $opt_chainCpu ? $opt_chainCpu : $chainCpu; $blatRam = $opt_ram ? $opt_ram : $blatRam; $blatCpu = $opt_cpu ? $opt_cpu : $blatCpu; my $date = `date +%Y-%m-%d`; chomp $date; $buildDir = $opt_buildDir ? $opt_buildDir : "$HgAutomate::clusterData/$tDb/$HgAutomate::trackBuild/blat.$qDb.$date"; if (! -d $buildDir) { if ($stepper->stepPrecedes('align', $stepper->getStartStep())) { die "$buildDir does not exist; try running again with -buildDir.\n"; } &HgAutomate::mustMkdir($buildDir); } $stepper->execute(); my $stopStep = $stepper->getStopStep(); my $upThrough = ($stopStep eq 'cleanup') ? "" : " (through the '$stopStep' step)"; &HgAutomate::verbose(1, "\n *** All done!$upThrough\n"); &HgAutomate::verbose(1, " *** Steps were performed in $buildDir\n"); if ($stepper->stepPrecedes('net', $stopStep)) { &HgAutomate::verbose(1, " *** Test installation ($HgAutomate::gbdb, goldenPath, hgLiftover " . "operation) on $dbHost.\n"); } &HgAutomate::verbose(1, "\n");