61917474c6c267cd21f007c1f2d5b5f243ee7d5f
angie
  Mon Jun 19 09:25:58 2017 -0700
Prevent unsigned int underflow when sum of blockSizes is greater than actual qSize.  Collapse contiguous 'exons' that are split to represent internal frameshift (e.g. ribosomal frameshift) in genePredExt but need to be a single block to avoid pslCheck errors downstream.  This prevents a few PSLs from being lost from ncbiRefSeqPsl.

diff --git src/hg/genePredToFakePsl/genePredToFakePsl.c src/hg/genePredToFakePsl/genePredToFakePsl.c
index 8b38107..a4e37b1 100644
--- src/hg/genePredToFakePsl/genePredToFakePsl.c
+++ src/hg/genePredToFakePsl/genePredToFakePsl.c
@@ -77,51 +77,65 @@
 fprintf(cdsFh,"%s\t%d..%d\n", gp->name, qCdsStart+1, qCdsEnd); /* genbank cds is closed 1-based */
 }
 
 static void cnvGenePred(struct hash *chromHash, struct genePred *gp, FILE *pslFh, FILE *cdsFh)
 /* convert a genePred to a psl and CDS */
 {
 int chromSize = hashIntValDefault(chromHash, gp->chrom, 0);
 if (chromSize == 0)
     errAbort("Couldn't find chromosome/scaffold '%s' in chromInfo", gp->chrom);
 int e = 0, qSize=0;
 
 
 for (e = 0; e < gp->exonCount; ++e)
     qSize+=(gp->exonEnds[e] - gp->exonStarts[e]);
 
+int realSize = 0;
 int sizeAdjust = 0;
 if (qSizes != NULL)
     {
-    int realSize = hashIntValDefault(qSizeHash, gp->name, 0);
-    if (realSize > 0)
+    realSize = hashIntValDefault(qSizeHash, gp->name, 0);
+    // If there is a realSize (>0), but realSize is less than qSize, this subtraction would
+    // cause unsigned underflow so don't do it.  qSize > realSize implies that one of the genePred
+    // "exons" is glossing over a query gap.
+    if (realSize > qSize)
        sizeAdjust = realSize - qSize;
     }
 
-struct psl *psl = pslNew(gp->name, qSize+sizeAdjust, 0, qSize,
+struct psl *psl = pslNew(gp->name, realSize ? realSize : qSize, 0, qSize,
                          gp->chrom, chromSize, gp->txStart, gp->txEnd,
                          gp->strand, gp->exonCount, 0);
 /* no size adjustment to qStarts for positive strand */
 if (gp->strand[0] == '+')
    sizeAdjust = 0;
-psl->blockCount = gp->exonCount;		    
+int i = -1;
 for (e = 0; e < gp->exonCount; ++e)
     {
-    psl->blockSizes[e] = (gp->exonEnds[e] - gp->exonStarts[e]);
-    psl->qStarts[e] = e==0 ? 0 + sizeAdjust : psl->qStarts[e-1] + psl->blockSizes[e-1];
-    psl->tStarts[e] = gp->exonStarts[e];
+    if (e == 0 || gp->exonStarts[e] != gp->exonEnds[e-1])
+        {
+        i++;
+        psl->blockSizes[i] = (gp->exonEnds[e] - gp->exonStarts[e]);
+        psl->qStarts[i] = i==0 ? 0 + sizeAdjust : psl->qStarts[i-1] + psl->blockSizes[i-1];
+        psl->tStarts[i] = gp->exonStarts[e];
+        }
+    else
+        {
+        // Merge "exons" that have a 0-length gap between them to avoid pslCheck failure
+        psl->blockSizes[i] += (gp->exonEnds[e] - gp->exonStarts[e]);
+        }
     }
+psl->blockCount = i+1;
 psl->match = qSize;	
 psl->tNumInsert = psl->blockCount-1; 
 psl->tBaseInsert = (gp->txEnd - gp->txStart) - qSize;
 pslTabOut(psl, pslFh);
 pslFree(&psl);
 if (gp->cdsStart < gp->cdsEnd)
     cnvGenePredCds(gp, qSize, cdsFh);
 }
 
 static struct hash *getChromHash(char *db)
 /* Return a hash of chrom names and sizes, from either -chromSize=file or db */
 {
 struct hash *chromHash = NULL;
 if (chromSizes != NULL)
     chromHash = hChromSizeHashFromFile(chromSizes);