src/hg/utils/automation/blastz-run-ucsc 3f7ec313053fa504911f69798236f571b8932e3f

3f7ec313053fa504911f69798236f571b8932e3f
hiram
  Sun May 17 13:55:40 2020 -0700
fixup use of type command for debian dash shell refs #25528

diff --git src/hg/utils/automation/blastz-run-ucsc src/hg/utils/automation/blastz-run-ucsc
index 0e6528d..74fc78d 100755
--- src/hg/utils/automation/blastz-run-ucsc
+++ src/hg/utils/automation/blastz-run-ucsc
@@ -1,728 +1,728 @@
 #!/usr/bin/env perl
 
 # DO NOT EDIT the /cluster/bin/scripts copy of this file --
 # edit ~/kent/src/hg/utils/automation/blastz-run-ucsc instead.
 
 # Based on Scott Schwartz's bash script blastz-run;
 # rewritten in perl and extended to handle .2bit inputs and
 # multi-record fasta for target as well as query by Angie Hinrichs.
 
 # $Id: blastz-run-ucsc,v 1.11 2009/08/13 23:15:57 hiram Exp $
 
 use Getopt::Long;
 use strict;
 use warnings;
 
 use vars qw/
     $opt_outFormat
     $opt_gz
     $opt_dropSelf
     $opt_help
     /;
 
 sub usage {
   my ($status) = @_;
   my $base = $0;
   $base =~ s/^(.*\/)?//;
     print STDERR "
 usage: $base target query DEF out
 options:
     -outFormat (psl|axt)  Convert blastz output from lav to psl or axt.
     -gz                   Compress output with gzip.
     -dropSelf             When converting to psl or axt, drop alignment
                           blocks that cross the diagonal (trivial self-
                           alignments).
     Unpacks sequence (if necessary), runs blastz and lifts/\"normalizes\"
     output (if necessary).
     Target and query can be list files containing one sequence specifier
     per line, or sequence specifiers.  A sequence specifier is a .2bit,
     .nib or .fa(.gz) files, possibly with a sequence and range specifier
     appended.
     DEF is a Scott Schwartz-style bash script containing blastz parameters.
     out is the (lifted/\"normalized\") output of blastz (optionally converted
     to axt or psl, and/or gzipped).  This script does not pay attention to
     out's file suffixes -- it is up to you to make sure that they are
     consistent with output format and compression options.
 \n";
   exit $status;
 }
 
 sub checkOptions {
   # Make sure command line options are valid/supported.
   my $ok = GetOptions("outFormat=s",
 		      "gz",
 		      "dropSelf",
 		      "help");
   &usage(1) if (!$ok);
   &usage(0) if ($opt_help);
   if ($opt_outFormat) {
     $opt_outFormat =~ tr/A-Z/a-z/;
     if ($opt_outFormat eq 'lav') {
       $opt_outFormat = undef;
     } elsif ($opt_outFormat !~ /^(psl|axt)$/) {
       print STDERR "\nUnsupported -outFormat $opt_outFormat.\n";
       &usage(1);
     }
   }
   if ($opt_dropSelf && !($opt_outFormat && $opt_outFormat =~ /^(psl|axt)$/)) {
     print STDERR "\n-dropSelf is only supported for -outFormat axt or psl.\n";
     &usage(1);
   }
 }
 
 # my $machtype = $ENV{'MACHTYPE'};
 my $machtype = "x86_64";
 # This hash will contain the params defined in the Scott-style DEF script:
 my %defVars = ();
 # Temporary local directory for intermediate files:
 my $TMP;
 # For lifting, we'll need to know the sequence sizes:
 my %tSizes = ();
 my %qSizes = ();
 my $machType = `uname -m`;
 chomp $machType;
 $machType = "i386" if ($machType eq "i686");
 my @potentialPath = ("/cluster/bin/penn/$machType", "/cluster/bin/penn",
 		   "/cluster/bin/scripts", "/cluster/bin/$machType",
 		   "/bin", "/usr/bin");
 # this defaultPath will be cleaned up at start of program
 my $defaultPath = ("/cluster/bin/penn/$machType:/cluster/bin/penn:" .
 		   "/cluster/bin/scripts:/cluster/bin/$machType:" .
 		   '/bin:/usr/bin');
 
 # these commands will be filled in with full path names to
 # avoid constant shell searches for commands through PATH
 # more efficient when commands are often repeated in a cluster job
 my $rmCmd = "rm";
 my $cpCmd = "cp";
 my $mvCmd = "mv";
 my $shCmd = "sh";
 my $lsCmd = "ls";
 my $catCmd = "cat";
 my $gzipCmd = "gzip";
 my $gunzipCmd = "gunzip";
 my $twoBitToFaCmd = "twoBitToFa";
 
 sub cleanDie {
   # Clean up $TMP (if it has been created) and then die.
   my ($msg) = @_;
   if ($TMP && -d $TMP) {
     system("$shCmd", "-c", "$rmCmd -rf $TMP");
   }
   die $msg;
 }
 
 sub run {
   # Run a command in sh, with PATH specified in %defVars.
   my ($cmd) = @_;
   my $setPath = "export PATH=";
   $setPath .= "$defVars{PATH}:" if (defined $defVars{'PATH'});
   $setPath .= "$defaultPath; ";
   my $failPath = "set -xbeEu -o pipefail; ";
   system("$shCmd", "-c", $setPath . $failPath . $cmd) == 0 ||
     &cleanDie("Command failed:\n$setPath\n$cmd\n");
 }
 
 sub findCommand {
   my ($cmd) = @_;
   my $setPath = "PATH=";
   $setPath .= "$defVars{PATH}:" if (defined $defVars{'PATH'});
   $setPath .= "$defaultPath; ";
-  my $which = qx{$setPath type -ap $cmd 2> /dev/null | head -1};
+  my $which = qx{$setPath type -ap $cmd 2> /dev/null | grep -v "not found" | head -1 | awk '{print \$NF}' };
   chomp $which;
   $which = $cmd if ($which eq "");
   return $which;
 }
 
 sub nfsNoodge {
   # Try to access the given file/dir; if it fails, don't die, just
   # wait a second, try again.
   # Scott's blastz-run included a similar trick.
   my ($file) = @_;
   my $ret = system("$shCmd", "-c", "$lsCmd -d $file > /dev/null");
   if ($ret != 0) {
     sleep 1;
     $ret = system("$shCmd", "-c", "$lsCmd -d $file > /dev/null");
     sleep 1 if ($ret != 0);
   }
 }
 
 sub loadDef {
   # Read parameters from a bash script with Scott's param variable names:
   my ($def) = @_;
   &nfsNoodge($def);
   open(DEF, "$def") || &cleanDie("Can't open def file $def: $!\n");
   while (<DEF>) {
     s/^\s*export\s+//;
     next if (/^\s*#/ || /^\s*$/);
     if (/(\w+)\s*=\s*(.*)/) {
       my ($var, $val) = ($1, $2);
       while ($val =~ /\$(\w+)/) {
 	my $subst = $defVars{$1};
 	$val =~ s/\$$1/$subst/ if (defined $subst);
       }
       $defVars{$var} = $val;
     }
   }
   close(DEF);
 }
 
 sub loadSeqSizes {
   # Load up sequence -> size mapping from $sizeFile into $hashRef.
   my ($sizeFile, $hashRef) = @_;
   &nfsNoodge($sizeFile);
   open(SIZE, "$sizeFile") || &cleanDie("Can't open size file $sizeFile: $!\n");
   while (<SIZE>) {
     chomp;
     my ($seq, $size) = split;
     $hashRef->{$seq} = $size;
   }
   close(SIZE);
 }
 
 sub fileBase {
   # Get rid of leading path and .suffix (and .gz too if it's there).
   my ($path) = @_;
   $path =~ s/^(.*\/)?(\S+)\.\w+(\.gz)?/$2/;
   return $path;
 }
 
 sub parseFileSpec {
   # Take a file spec with possible seq name, start and end specifiers and
   # split into components.
   my ($info, $dir) = @_;
   my ($fName, $seq, $start, $end);
   if ($info =~ /^(\S+):(\S+):(\d+)-(\d+)$/) {
     ($fName, $seq, $start, $end) = ($1, $2, $3, $4);
   } elsif ($info =~ /^(\S+):(\d+)-(\d+)$/) {
     ($fName, $seq, $start, $end) = ($1, &fileBase($info), $2, $3);
   } else {
     ($fName, $seq, $start, $end) = ($info, &fileBase($info), 0, 0);
   }
   $fName = "$dir/$fName" if ($fName !~ /^[\/.]/);
   return ($fName, $seq, $start, $end);
 }
 
 sub enumerateFileSpecs {
   # Given a sequence file name, sequence file name + offset spec, or
   # a name of a file containing a list of such file specs, return a
   # list of file specs.
   my ($listOrSpec, $seqDir) = @_;
   my @specs = ();
   my ($fName, $seq, $start, $end) = &parseFileSpec($listOrSpec, $seqDir);
   if ($fName =~ /\.(fa(\.gz)?|nib|2bit)$/) {
     # It appears to be a single spec not a list.
     push @specs, $listOrSpec;
   } else {
     # It doesn't have a recognized file ending -- it had better be a list.
     # Assume each line contains a single spec.
     &nfsNoodge($listOrSpec);
     open(LST, "$listOrSpec")
       || &cleanDie("Couldn't open list file $listOrSpec: $!\n");
     while (<LST>) {
       chomp;
       push @specs, $_;
     }
     close(LST);
   }
   return @specs;
 }
 
 sub collapseFileSpecs {
   # If we get a big old list of query sequences that are not subranged,
   # then cat them all into one big .fa file so we can invoke blastz
   # only once, and don't have to lift afterwards.
   my @fileSpecs = @_;
   my $collapsed = 0;
   if (scalar(@fileSpecs) == 1) {
     # Only one spec in the list ==> no collapsing to do.
     return ($collapsed, @fileSpecs);
   }
   # Now see if there are any specs that look like subranges:
   foreach my $spec (@fileSpecs) {
     my ($fname, $seq, $start, $end) = &parseFileSpec($spec, ".");
     if ($start > 0) {
       # Found a subrange; no collapsing.
       return ($collapsed, @fileSpecs);
     }
   }
   # OK, we're in the clear to concatenate all of these complete sequences
   # onto one local file.
   my $big = "$TMP/bigCollapse.fa";
   # avoiding exec of shell here instead of going through &run
   # and making an empty new file $big
   &run("$cpCmd /dev/null $big");
   foreach my $spec (@fileSpecs) {
     my ($seq, $local) = unpackForBlastz($spec, '.', 'tmp');
     $local =~ s/\[\S+\]$//;
     if ($local =~ /\.nib$/) {
       my $nib = $local;
       $local =~ s/\.nib/.fa/;
       $local =~ s/(.*\/)/tmp./;
       my $size = `nibSize $nib | awk '{print \$3;}'`;
       chomp $size;
       &run("nibFrag -masked $nib 0 $size + $local");
     }
     &run("$catCmd $local >> $big");
     &run("$rmCmd -f $local") if ($local =~ /^$TMP/);
   }
   $collapsed = 1;
   @fileSpecs = ($big);
   return ($collapsed, @fileSpecs);
 }
 
 sub getBlastzParams {
   # Look for BLASTZ_? param specs in %defVars; turn into blastz command line
   # option string.  (blastz command line option names are all 1-character,
   # and all 1-char BLASTZ_? DEF params are blastz command line options.)
   my $params = " ";
   foreach my $p (keys %defVars) {
     if ($p =~ /^BLASTZ_(\w)$/) {
       $params .= "$1=$defVars{$p} ";
     }
   }
   return $params;
 }
 
 sub blastzRangeSpec {
   # Return a blastz-style range spec unless a null range is passed in.
   my ($start, $end) = @_;
   if ($end != 0) {
     $start += 1;
     return "[$start,$end]";
   } else {
     return "";
   }
 }
 
 sub unpackForBlastz {
   # If the sequence file is .fa.gz or .2bit, uncompress/extract fa into
   # a local file;  if it's something that blastz already supports like
   # .fa or .nib, translate the offset spec (if any) into blastz's format.
   my ($spec, $seqDir, $prefix) = @_;
   my ($fname, $seq, $start, $end) = &parseFileSpec($spec, $seqDir);
   my $local = "$TMP/$prefix.$seq.$start.$end.fa";
   if ($fname =~ /\.2bit$/) {
     # Don't use twoBitToFa's range extraction capabilities -- that
     # confuses blastz/normalizeLav.  Just extract the whole thing as
     # fa and tell blastz what range we're using:
     &run("$twoBitToFaCmd $fname:$seq $local");
     $local .= &blastzRangeSpec($start, $end);
   } elsif ($fname =~ /\.fa(\.gz)?$/) {
     if ($1) {
       &run("$gunzipCmd -c $fname > $local");
     } else {
       $local = $fname;
     }
     $local .= &blastzRangeSpec($start, $end);
   } elsif ($fname =~ /\.nib$/) {
     $local = $fname;
     $local .= &blastzRangeSpec($start, $end);
   } else {
     &cleanDie("Don't know what type of sequence file this is: $fname");
   }
   return ($seq, $local);
 }
 
 sub parseBlastzSpec {
   # Extract info back out of blastz spec.  Leave start 1-based.
   my ($spec) = @_;
   my ($fname, $seq, $start, $end);
   if ($spec =~ /^(\S*\/)?(\S+)\.(fa|nib)(\[(\d+),(\d+)\])?$/) {
     $fname = "$1$2.$3";
     $seq = $2;
     if ($4) {
       $start = $5;
       $end = $6;
     } else {
       $start = 0;
       $end = 0;
     }
   } else {
     &cleanDie("Can't parse this blastz spec: $spec");
   }
   return ($fname, $seq, $start, $end);
 }
 
 sub emptyLav {
   #  satisify later processing steps with an empty lav result
   my ($raw) = @_;
   open (FH,">$raw") || &cleanDie("emptyLav: can not write to $raw $!\n");
   print FH <<_EOF_
 #:lav
 d {
   "blastz target query H=2000
      A    C    G    T
     91 -114  -31 -123
   -114  100 -125  -31
    -31 -125  100 -114
   -123  -31 -114   91
   O = 400, E = 30"
 }
 m {
   n 0
 }
 #:eof
 _EOF_
   ;
   close (FH);
 }
 
 sub checkSubRange {
 # given an fa or nib file name: file.fa|nib[one_based_start,end]
 # verify that bit of sequence has usable sequence for blastz
   my ($faNibFile) = @_;
   my ($file, $seq, $one_start, $end) = &parseBlastzSpec($faNibFile);
   my $start = $one_start - 1;  # 1 based becomes 0 based coord
   my $answer = 0;  # default answer is usable sequence exists
   return($answer) if ((0 == $end) || (($end - $start) > 10000));
   if ($file =~ m/.nib$/) {
     $answer =
       `nibFrag -masked $file $start $end "+" stdout | faSize stdin | grep " 0 upper " | wc -l`;
   } else {
     $answer =
       `faFrag -mixed $file $start $end stdout | faSize stdin | grep " 0 upper " | wc -l`;
   }
   return($answer);
 }
 
 sub plainBlastz {
   # just run blastz -- target and query can have "[$start,$end]" specifiers
   # where $start is 1-based.  Put output in file name $raw.
   my ($target, $query, $options, $raw) = @_;
   my $answer = 1;  # default answer is that blastz was run OK
   my $blastz = $defVars{'BLASTZ'} || "lastz";
   #  these variables will be 1 if there is zero usable sequence
   #  this could be a more extensive check than just for zero.
   my $targetNoSequence = 0;
   if ($target !~ m/.2bit$/) {
     if ($target =~ m/\[/) {
       $targetNoSequence = &checkSubRange($target);
     } else {
       $targetNoSequence =
 	`cat $target | faSize stdin | grep " 0 upper " | wc -l`;
     }
   }
   chomp $targetNoSequence;
   my $queryNoSequence = 0;
   if ($query =~ m/\[/) {
     $queryNoSequence = &checkSubRange($query);
   } elsif ($query !~ m/.2bit/) {
     $queryNoSequence =
 	`cat $query | faSize stdin | grep " 0 upper " | wc -l`;
   }
   chomp $queryNoSequence;
   if ($targetNoSequence || $queryNoSequence) {
     &emptyLav($raw);
     $answer = 0;  # not really running blastz
   } else {
     if ($target =~ m/.2bit$/) {
       $target .= "[multiple]";
       $options .= " --format=axt+";
     }
     &run("$blastz $target $query $options > $raw");
   }
   return($answer);
 }
 
 sub selectRpts {
   # This used to be an awk, piped to select_rpts, piped to sort -n.
   # select_rpts is a little perl script, and getting the DEF PATH into the
   # pipe command seems more trouble than it's worth -- so inline not only
   # the awk but also the select_rpts here.  Still pipe to sort -n just in case.
   my ($allRepFile, $start, $end, $selRepFile) = @_;
   &nfsNoodge($allRepFile);
   open(ALL, "<$allRepFile") || &cleanDie("Can't open $allRepFile: $!\n");
   open(SEL, "| sort -n > $selRepFile")
     || &cleanDie("Can't open pipe to sort -n > $selRepFile: $!\n");
   print SEL "\%:repeats\n";
   while (<ALL>) {
     if (/^\s*\d+/) {
       my @words = split;
       my ($repStart, $repEnd) = ($words[5], $words[6]);
       next if ((! defined $repStart) || (! defined $repEnd));
       $repStart += 1;
       next if ($repStart > $repEnd);
       next if (($repEnd < $start) || ($repStart > $end));
       $repStart = $start if ($repStart < $start);
       $repEnd   = $end   if ($repEnd   > $end);
       $repStart -= ($start - 1);
       $repEnd   -= ($start - 1);
       print SEL "$repStart $repEnd rpt\n";
     }
   }
   close(SEL);
   close(ALL);
 }
 
 sub abridgeLinSpecReps {
   # Before running blastz, snip out lineage-specific repeats from each
   # sequence.  Then run blastz only on that strand.  After running blastz,
   # fix up the alignment coords to reflect the original sequence including
   # the lineage-specific repeats.
   # Currently this only works on per-chromosome sequence and repeat files.
   my ($tSpec, $qSpec, $blastzOptions, $raw) = @_;
   my ($tFname, $tSeq, $tStart, $tEnd) = &parseBlastzSpec($tSpec);
   my ($qFname, $qSeq, $qStart, $qEnd) = &parseBlastzSpec($qSpec);
 
   if ($tEnd == 0 || $qEnd == 0) {
     &cleanDie("When abridging repeats, sequence filenames must be followed " .
 	      "by range specs even if the whole sequence is aligned.\n");
   }
 
   my $S1X = "$defVars{SEQ1_SMSK}/$tSeq.out.spec";
   my $S2X = "$defVars{SEQ2_SMSK}/$qSeq.out.spec";
   &cleanDie("$S1X not found") if (! -f $S1X);
   &cleanDie("$S2X not found") if (! -f $S2X);
 
   my $TS1X = "$TMP/s1.rpts";
   my $TS2X = "$TMP/s2.rpts";
   my $TS1 = "$TMP/s1.fa";
   my $TS2 = "$TMP/s2.fa";
   my $TS1S = "$TMP/s1.strip.fa";
   my $TS2S = "$TMP/s2.strip.fa";
   my $TS2R = "$TMP/s2.rev.fa";
   my $TS2SR = "$TMP/s2.strip.rev.fa";
   my $TZF = "$TMP/fwd.lav";
   my $TZR = "$TMP/rev.lav";
   my $TZS = "$TMP/std.lav";
 
   # Get repeats in range:
   &selectRpts($S1X, $tStart, $tEnd, $TS1X);
   &selectRpts($S2X, $qStart, $qEnd, $TS2X);
 
   # Get sequence in range:
   &run("fasta-subseq $tFname $tStart $tEnd > $TS1");
   &run("fasta-subseq $qFname $qStart $qEnd > $TS2");
 
   # Snip out the repeats:
   &run("strip_rpts $TS1 $TS1X > $TS1S");
   &run("strip_rpts $TS2 $TS2X > $TS2S");
 
   # Run blastz on query forward strand and restore repeats:
   my $blastzOK = &plainBlastz($TS1S, $TS2S, "B=0" . $blastzOptions, $TZF);
   if ($blastzOK ) {
       &run("restore_rpts $TZF " .
             "$TS1 \"\\\"$tFname\\\" $tStart $tEnd\" " .
             "$TS2 \"\\\"$qFname\\\" $qStart $qEnd\" " .
             "$TS1X $TS2X > $raw");
   } else {
     &run("$cpCmd -p $TZF $raw");
   }
   # Run blastz on query reverse strand and restore repeats:
   &run("revcomp $TS2S > $TS2SR");
   $blastzOK = &plainBlastz($TS1S, $TS2SR, "B=0" . $blastzOptions, $TZR);
   if ($blastzOK ) {
       &run("restore_rpts $TZR " .
             "$TS1 \"\\\"$tFname\\\" $tStart $tEnd\" " .
             "$TS2 \"\\\"$qFname-\\\" $qStart $qEnd\" " .
             "$TS1X $TS2X reverse >> $raw");
   } else {
     &run("$catCmd $TZF >> $raw");
   }
 }
 
 sub liftLav {
   # Run blastz-normalizeLav to lift up chunk coords to sequence level.
   my ($raw, $out, $tSeq, $qSeq) = @_;
   my $tLen = $tSizes{$tSeq};
   my $qLen = $qSeq ? $qSizes{$qSeq} : "0";
   &run("blastz-normalizeLav $tLen $qLen < $raw > $out");
 }
 
 sub convertOutput {
   # Convert lav file to psl or axt file, optionally dropping trivial self al's.
   my ($lav, $out) = @_;
   if ($opt_outFormat eq 'axt') {
     my $dropSelf = ($opt_dropSelf ? "-dropSelf " : "");
     my $tSeq = $defVars{'SEQ1_CTGDIR'} || $defVars{'SEQ1_DIR'};
     my $qSeq = $defVars{'SEQ2_CTGDIR'} || $defVars{'SEQ2_DIR'};
     &run("lavToAxt $dropSelf $lav $tSeq $qSeq $out");
   } elsif ($opt_dropSelf) {
     &run("lavToPsl $lav stdout " .
          "| pslDropOverlap stdin $out");
     &run("pslCheck $out");
   } else {
     &run("lavToPsl $lav $out");
     &run("pslCheck $out");
   }
 }
 
 
 #########################################################################
 #
 # -- main --
 # cleanup the defaultPath variable to eliminate non-existent paths:
 $defaultPath = "";
 foreach my $path (@potentialPath) {
  if ( -d "$path" ) {
     if (length($defaultPath)) {
       $defaultPath .= ":$path";
     } else {
       $defaultPath = $path;
     }
  }
 }
 
 &checkOptions();
 
 &usage(1) if (scalar(@ARGV) != 4);
 my ($target, $query, $DEF, $out) = @ARGV;
 
 # It is OK to have a previous result existing
 if ( -f "$out") {
   print STDERR "Output file $out exists; exiting.\n";
   exit 0;
 }
 
 
 &loadDef($DEF);
 # find full pathNames for commands:
 $rmCmd = &findCommand("$rmCmd");
 $cpCmd = &findCommand("$cpCmd");
 $mvCmd = &findCommand("$mvCmd");
 $shCmd = &findCommand("$shCmd");
 $lsCmd = &findCommand("$lsCmd");
 $catCmd = &findCommand("$catCmd");
 $gzipCmd = &findCommand("$gzipCmd");
 $gunzipCmd = &findCommand("$gunzipCmd");
 $twoBitToFaCmd = &findCommand("$twoBitToFaCmd");
 
 my $seq1Dir = $defVars{'SEQ1_CTGDIR'} || $defVars{'SEQ1_DIR'};
 my $seq2Dir = $defVars{'SEQ2_CTGDIR'} || $defVars{'SEQ2_DIR'};
 my $seq1Len = $defVars{'SEQ1_CTGLEN'} || $defVars{'SEQ1_LEN'};
 my $seq2Len = $defVars{'SEQ2_CTGLEN'} || $defVars{'SEQ2_LEN'};
 &loadSeqSizes($seq1Len, \%tSizes);
 &loadSeqSizes($seq2Len, \%qSizes);
 
 
 # Make a temporary directory for intermediate files:
 if (exists($defVars{'TMPDIR'})) {
   $TMP = `mktemp -d $defVars{'TMPDIR'}/blastz.XXXXXX` || &cleanDie("Can't mktemp");
 } else {
   $TMP = `mktemp -d /tmp/blastz.XXXXXX` || &cleanDie("Can't mktemp");
 }
 chomp $TMP;
 print STDERR "temp files in $TMP -- will attempt cleanup before die-ing.\n";
 
 # Some internal defaults for blastz params, carried over from
 # Scott's blastz-run:
 $defVars{'BLASTZ_H'} = 2000 if (! defined $defVars{'BLASTZ_H'});
 
 my $blastzOptions = &getBlastzParams(%defVars);
 
 my @tFileSpecs = &enumerateFileSpecs($target, $seq1Dir);
 my @qFileSpecs = &enumerateFileSpecs($query, $seq2Dir);
 if ($defVars{'BLASTZ_ABRIDGE_REPEATS'} && scalar(@qFileSpecs) > 1) {
   &cleanDie("Sorry, DEF can't have BLASTZ_ABRIDGE_REPEATS set if a list of " .
 	    "query sequences is passed in -- not supported.\n");
 }
 my $collapsed = 0;
 my $query2bit = $query;
 $query2bit =~ s/.lst$/.2bit/;
 # if partition step constructed the 2bit file for this query, use it:
 if ( -s $query2bit ) {
   &run("$cpCmd -p $query2bit $TMP");  # it is used repeatedly for each tSpec
   $query2bit =~ s#qParts/##;
   $query2bit =~ s#tParts/##;  # could be this for self alignment
   @qFileSpecs = undef;
   $qFileSpecs[0] = "$TMP/$query2bit";
   $collapsed = 1;
 } else {
   ($collapsed, @qFileSpecs) = &collapseFileSpecs(@qFileSpecs);
 }
 # if partition step constructed the 2bit file for this target bit, use it
 # using a target 2bit can only result in axt or psl output, lav not possible
 # so don't do this if lav required output
 if ($opt_outFormat) {
   my $target2bit = $target;
   $target2bit =~ s/.lst$/.2bit/;
   if (-s $target2bit ) {
     @tFileSpecs = undef;
     $tFileSpecs[0] = "$target2bit";  # used only once for all qSpec
   }
 }
 
 # Local file to hold concatenated results of all {target, query} runs:
 my $localOut = "$TMP/local.lav";
 &run("$cpCmd /dev/null $localOut");
 # Local temporary files that are overwritten by each {target, query} run:
 my $littleRaw = "$TMP/little.raw";
 my $littleOut = "$TMP/little.lav";
 my $littleConv = "$TMP/little.conv";
 
 # Run blastz on each {target, query} pair;  lift and collect results.
 foreach my $tSpec (@tFileSpecs) {
   my ($tSeq, $tLocal) = ("", "");
   my $axtResult = 0;
   if ($tSpec =~ m/.2bit$/) {
      $tLocal = $tSpec;   # use .2bit file when it exists
      $axtResult = 1;     # lastz can only output axt when using 2bit[multiple]
   } else {
      ($tSeq, $tLocal) = &unpackForBlastz($tSpec, $seq1Dir, 't');
   }
   foreach my $qSpec (@qFileSpecs) {
     my ($qSeq, $qLocal) = ("", "");
     if ($qSpec =~ m/.2bit$/) {
       $qLocal = $qSpec;
     } else {
       ($qSeq, $qLocal) = &unpackForBlastz($qSpec, $seq2Dir, 'q');
     }
     if ($defVars{'BLASTZ_ABRIDGE_REPEATS'}) {
       &abridgeLinSpecReps($tLocal, $qLocal, $blastzOptions, $littleRaw);
     } else {
       &plainBlastz($tLocal, $qLocal, $blastzOptions, $littleRaw);
     }
     # there may be no lifting involved when axtResult TBD XXX
     if ($axtResult) {
       &run("$cpCmd -p $littleRaw $littleOut");
     } else {
       if ($collapsed) {
         # Lift target side only:
         &liftLav($littleRaw, $littleOut, $tSeq, undef);
       } else {
         &liftLav($littleRaw, $littleOut, $tSeq, $qSeq);
         if ($qLocal =~ /^$TMP/) {
             $qLocal =~ s/\[.*\]$//;
             &run("$rmCmd $qLocal");
         }
       }
     }
     if ($opt_outFormat) {
       if ($axtResult) {
         if ($opt_outFormat eq 'axt') {
           if ($opt_dropSelf) {
             die "-dropSelf not supported for AXT output from 2bit";
           }
           &run("$catCmd $littleOut >> $localOut");
         } else {
           my $tSizes = $defVars{'SEQ1_CTGLEN'} || $defVars{'SEQ1_LEN'};
           my $qSizes = $defVars{'SEQ2_CTGLEN'} || $defVars{'SEQ2_LEN'};
           my $dropSelfCmd = $opt_dropSelf ? "| pslDropOverlap stdin stdout" : "";
           # carry through comments
           &run("grep '^#' $littleOut | egrep -v 'identity|coverage|num_masked' > $littleConv");
           &run("axtToPsl $littleOut $tSizes $qSizes stdout $dropSelfCmd >> $littleConv");
           &run("$catCmd $littleConv >> $localOut");
         }
       } else {
         &convertOutput($littleOut, $littleConv);
         &run("$catCmd $littleConv >> $localOut");
       }
     } else {
       &run("$catCmd $littleOut >> $localOut");
     }
   }
   if ($tLocal =~ /^$TMP/) {
       $tLocal =~ s/\[.*\]$//;
       &run("$rmCmd $tLocal");
   }
 }
 if ($opt_gz) {
   &run("$gzipCmd $localOut");
   $localOut .= ".gz";
 }
 # Move collected results to final location.
 &run("$mvCmd $localOut $out");
 
 # Clean up temporary directory:
 &run("$rmCmd -rf $TMP");