ce07fe9f6695acd811e90fbd646830ca7858b525
markd
  Mon Sep 28 13:41:50 2015 -0700
fixed various problems detected by making pslCheck more stringent

diff --git src/lib/psl.c src/lib/psl.c
index 46d0253..23f921f 100644
--- src/lib/psl.c
+++ src/lib/psl.c
@@ -1548,40 +1548,40 @@
     "+", "-", "++", "+-", "-+", "--", NULL
 };
 int i, errCount = 0;
 int tBlockSizeMult = pslIsProtein(psl) ? 3 : 1;
 
 /* check strand value */
 for (i = 0; VALID_STRANDS[i] != NULL; i++)
     {
     if (strcmp(psl->strand, VALID_STRANDS[i]) == 0)
         break;
     }
 if (VALID_STRANDS[i] == NULL)
     chkError(pslDesc, out, psl, &errCount,
              "\tinvalid PSL strand: \"%s\"\n", psl->strand);
 
-/* check target */
-chkRanges(pslDesc, out, psl, psl->tName, "target", 't', pslTStrand(psl), psl->tSize, psl->tStart, psl->tEnd,
-          psl->tStarts, tBlockSizeMult, &errCount);
-chkInsertCounts(pslDesc, out, psl, psl->tName, 't', psl->tStarts, psl->tNumInsert, psl->tBaseInsert, &errCount);
-
 /* check query */
 chkRanges(pslDesc, out, psl, psl->qName, "query", 'q', pslQStrand(psl), psl->qSize, psl->qStart, psl->qEnd,
           psl->qStarts, 1, &errCount);
 chkInsertCounts(pslDesc, out, psl, psl->qName, 'q', psl->qStarts, psl->qNumInsert, psl->qBaseInsert, &errCount);
 
+/* check target */
+chkRanges(pslDesc, out, psl, psl->tName, "target", 't', pslTStrand(psl), psl->tSize, psl->tStart, psl->tEnd,
+          psl->tStarts, tBlockSizeMult, &errCount);
+chkInsertCounts(pslDesc, out, psl, psl->tName, 't', psl->tStarts, psl->tNumInsert, psl->tBaseInsert, &errCount);
+
 return errCount;
 }
 
 struct hash *readPslToBinKeeper(char *sizeFileName, char *pslFileName)
 /* read a list of psls and return results in hash of binKeeper structure for fast query*/
 {
 struct binKeeper *bk; 
 struct psl *psl;
 struct lineFile *sf = lineFileOpen(sizeFileName, TRUE);
 struct lineFile *pf = lineFileOpen(pslFileName , TRUE);
 struct hash *hash = newHash(0);
 char *chromRow[2];
 char *row[21] ;
 
 while (lineFileRow(sf, chromRow))
@@ -1827,30 +1827,54 @@
  * updated to with the new amount of space. */
 {
 int blockSpace = *blockSpacePtr;
 int newSpace = 2 * blockSpace;
 ExpandArray(psl->blockSizes, blockSpace, newSpace);
 ExpandArray(psl->qStarts, blockSpace, newSpace);
 ExpandArray(psl->tStarts, blockSpace, newSpace);
 if (psl->qSequence != NULL)
     {
     ExpandArray(psl->qSequence, blockSpace, newSpace);
     ExpandArray(psl->tSequence, blockSpace, newSpace);
     }
 *blockSpacePtr = newSpace;
 }
 
+void pslComputeInsertCounts(struct psl *psl)
+/* compute numInsert and baseInsert fields from the blocks */
+{
+psl->qNumInsert = psl->qBaseInsert = 0;
+psl->tNumInsert = psl->tBaseInsert = 0;
+int iBlk;
+
+for (iBlk = 1; iBlk < psl->blockCount; iBlk++)
+    {
+    unsigned qGapSize = psl->qStarts[iBlk] - (psl->qStarts[iBlk-1]+psl->blockSizes[iBlk-1]);
+    if (qGapSize != 0)
+        {
+        psl->qNumInsert++;
+        psl->qBaseInsert += qGapSize;
+        }
+    unsigned tGapSize = psl->tStarts[iBlk] - (psl->tStarts[iBlk-1]+psl->blockSizes[iBlk-1]);
+    if (tGapSize != 0)
+        {
+        psl->tNumInsert++;
+        psl->tBaseInsert += tGapSize;
+        }
+    }
+}
+
 static boolean getNextCigarOp(char *startPtr, boolean reverse, char **ptr, char *op, int *size)
 /* gets one cigar op out of the CIGAR string.  Reverse the order if asked */
 {
 char *str = *ptr;
 
 if (str == NULL)
     return FALSE;
 
 if ((!reverse && (*str == 0)) || (reverse && (str == startPtr)))
     return FALSE;
 
 // between each cigar op there could be nothing, or a space, or a plus
 if (reverse)
     {
     char *end = str - 1;
@@ -1953,30 +1977,31 @@
 psl->qStart = psl->qStarts[0];
 psl->qEnd = pslQEnd(psl, psl->blockCount-1);
 if (strand[0] == '-')
     reverseIntRange(&psl->qStart, &psl->qEnd, qSize);
 psl->tStart = psl->tStarts[0];
 psl->tEnd = pslTEnd(psl, psl->blockCount-1);
 if (strand[1] == '-')
     reverseIntRange(&psl->tStart, &psl->tEnd, tSize);
 
 /* sanity check */
 if (qNext != qBlkEnd)
     errAbort("CIGAR query length does not match specified query range %s:%d-%d", qName, qStart, qEnd);
 if (tNext != tBlkEnd)
     errAbort("CIGAR target length does not match specified target range %s:%d-%d", tName, tStart, tEnd);
 psl->match = totalSize;
+pslComputeInsertCounts(psl);
 return psl;
 }
 
 int pslRangeTreeOverlap(struct psl *psl, struct rbTree *rangeTree)
 /* Return amount that psl overlaps (on target side) with rangeTree. */
 {
 int i;
 int overlap = 0;
 boolean isRc = (psl->strand[1] == '-');
 for (i=0; i<psl->blockCount; ++i)
     {
     int start = psl->tStarts[i];
     int end = start + psl->blockSizes[i];
     if (isRc)
         reverseIntRange(&start, &end, psl->tSize);