a76cda6a69557a8c71016489147e4afd31266295
angie
  Mon Feb 3 15:19:13 2020 -0800
blastz-run-ucsc: catch pipe errors; use pslDropOverlap for -dropSelf, for both lav and axt output.  refs #24694, #24695
In MLQ #24587 a user reported some trivial self-alignments that should have been filtered - but when lastz produced axt output (for 2bit without suffix), we were not doing anything to filter those out.  Now we use pslDropOverlap there.  Worse, lavToAxt was failing when lastz output included double-sided gaps, and the failure was not propagated through the piped system() command, so undetected errors caused lots of alignments to be lost.  So instead of using lavToAxt because of its -dropSelf option, use lavToPsl | pslDropOverlap.

diff --git src/hg/utils/automation/blastz-run-ucsc src/hg/utils/automation/blastz-run-ucsc
index 5e57dea..0e6528d 100755
--- src/hg/utils/automation/blastz-run-ucsc
+++ src/hg/utils/automation/blastz-run-ucsc
@@ -106,31 +106,32 @@
 sub cleanDie {
   # Clean up $TMP (if it has been created) and then die.
   my ($msg) = @_;
   if ($TMP && -d $TMP) {
     system("$shCmd", "-c", "$rmCmd -rf $TMP");
   }
   die $msg;
 }
 
 sub run {
   # Run a command in sh, with PATH specified in %defVars.
   my ($cmd) = @_;
   my $setPath = "export PATH=";
   $setPath .= "$defVars{PATH}:" if (defined $defVars{'PATH'});
   $setPath .= "$defaultPath; ";
-  system("$shCmd", "-c", $setPath . $cmd) == 0 ||
+  my $failPath = "set -xbeEu -o pipefail; ";
+  system("$shCmd", "-c", $setPath . $failPath . $cmd) == 0 ||
     &cleanDie("Command failed:\n$setPath\n$cmd\n");
 }
 
 sub findCommand {
   my ($cmd) = @_;
   my $setPath = "PATH=";
   $setPath .= "$defVars{PATH}:" if (defined $defVars{'PATH'});
   $setPath .= "$defaultPath; ";
   my $which = qx{$setPath type -ap $cmd 2> /dev/null | head -1};
   chomp $which;
   $which = $cmd if ($which eq "");
   return $which;
 }
 
 sub nfsNoodge {
@@ -520,72 +521,69 @@
     &run("$catCmd $TZF >> $raw");
   }
 }
 
 sub liftLav {
   # Run blastz-normalizeLav to lift up chunk coords to sequence level.
   my ($raw, $out, $tSeq, $qSeq) = @_;
   my $tLen = $tSizes{$tSeq};
   my $qLen = $qSeq ? $qSizes{$qSeq} : "0";
   &run("blastz-normalizeLav $tLen $qLen < $raw > $out");
 }
 
 sub convertOutput {
   # Convert lav file to psl or axt file, optionally dropping trivial self al's.
   my ($lav, $out) = @_;
-  my $tSeq = $defVars{'SEQ1_CTGDIR'} || $defVars{'SEQ1_DIR'};
-  my $qSeq = $defVars{'SEQ2_CTGDIR'} || $defVars{'SEQ2_DIR'};
-  my $tSizes = $defVars{'SEQ1_CTGLEN'} || $defVars{'SEQ1_LEN'};
-  my $qSizes = $defVars{'SEQ2_CTGLEN'} || $defVars{'SEQ2_LEN'};
   if ($opt_outFormat eq 'axt') {
     my $dropSelf = ($opt_dropSelf ? "-dropSelf " : "");
+    my $tSeq = $defVars{'SEQ1_CTGDIR'} || $defVars{'SEQ1_DIR'};
+    my $qSeq = $defVars{'SEQ2_CTGDIR'} || $defVars{'SEQ2_DIR'};
     &run("lavToAxt $dropSelf $lav $tSeq $qSeq $out");
   } elsif ($opt_dropSelf) {
-    # lavToPsl doesn't have lavToAxt's -dropSelf functionality, so pipe
-    # lavToAxt -dropSelf to axtToPsl.
-    &run("lavToAxt -dropSelf $lav $tSeq $qSeq stdout " .
-	 "| axtToPsl stdin $tSizes $qSizes $out");
+    &run("lavToPsl $lav stdout " .
+         "| pslDropOverlap stdin $out");
     &run("pslCheck $out");
   } else {
     &run("lavToPsl $lav $out");
     &run("pslCheck $out");
   }
 }
 
 
 #########################################################################
 #
 # -- main --
 # cleanup the defaultPath variable to eliminate non-existent paths:
 $defaultPath = "";
 foreach my $path (@potentialPath) {
  if ( -d "$path" ) {
     if (length($defaultPath)) {
       $defaultPath .= ":$path";
     } else {
       $defaultPath = $path;
     }
  }
 }
 
 &checkOptions();
 
 &usage(1) if (scalar(@ARGV) != 4);
 my ($target, $query, $DEF, $out) = @ARGV;
 
 # It is OK to have a previous result existing
 if ( -f "$out") {
+  print STDERR "Output file $out exists; exiting.\n";
   exit 0;
 }
 
 
 &loadDef($DEF);
 # find full pathNames for commands:
 $rmCmd = &findCommand("$rmCmd");
 $cpCmd = &findCommand("$cpCmd");
 $mvCmd = &findCommand("$mvCmd");
 $shCmd = &findCommand("$shCmd");
 $lsCmd = &findCommand("$lsCmd");
 $catCmd = &findCommand("$catCmd");
 $gzipCmd = &findCommand("$gzipCmd");
 $gunzipCmd = &findCommand("$gunzipCmd");
 $twoBitToFaCmd = &findCommand("$twoBitToFaCmd");
@@ -681,37 +679,41 @@
     } else {
       if ($collapsed) {
         # Lift target side only:
         &liftLav($littleRaw, $littleOut, $tSeq, undef);
       } else {
         &liftLav($littleRaw, $littleOut, $tSeq, $qSeq);
         if ($qLocal =~ /^$TMP/) {
             $qLocal =~ s/\[.*\]$//;
             &run("$rmCmd $qLocal");
         }
       }
     }
     if ($opt_outFormat) {
       if ($axtResult) {
         if ($opt_outFormat eq 'axt') {
+          if ($opt_dropSelf) {
+            die "-dropSelf not supported for AXT output from 2bit";
+          }
           &run("$catCmd $littleOut >> $localOut");
         } else {
           my $tSizes = $defVars{'SEQ1_CTGLEN'} || $defVars{'SEQ1_LEN'};
           my $qSizes = $defVars{'SEQ2_CTGLEN'} || $defVars{'SEQ2_LEN'};
+          my $dropSelfCmd = $opt_dropSelf ? "| pslDropOverlap stdin stdout" : "";
           # carry through comments
           &run("grep '^#' $littleOut | egrep -v 'identity|coverage|num_masked' > $littleConv");
-          &run("axtToPsl $littleOut $tSizes $qSizes stdout >> $littleConv");
+          &run("axtToPsl $littleOut $tSizes $qSizes stdout $dropSelfCmd >> $littleConv");
           &run("$catCmd $littleConv >> $localOut");
         }
       } else {
         &convertOutput($littleOut, $littleConv);
         &run("$catCmd $littleConv >> $localOut");
       }
     } else {
       &run("$catCmd $littleOut >> $localOut");
     }
   }
   if ($tLocal =~ /^$TMP/) {
       $tLocal =~ s/\[.*\]$//;
       &run("$rmCmd $tLocal");
   }
 }