ec58e08d65dcad145657e4c4420988ce5c3e4b18
angie
  Mon Feb 22 14:22:40 2021 -0800
Increase bufLen in genomeTxMismatch to accommodate overlapping blocks for ribosomal frameshift as in SARS-CoV-2.

diff --git src/hg/lib/variantProjector.c src/hg/lib/variantProjector.c
index 760c347..ca94850 100644
--- src/hg/lib/variantProjector.c
+++ src/hg/lib/variantProjector.c
@@ -276,31 +276,33 @@
     }
 bufSize++;
 return bufSize;
 }
 
 static void appendOverlap(struct seqWindow *gSeqWin, uint blkStart, uint blkEnd,
                           uint gStart, uint gEnd, char *buf, size_t bufSize, int *pBufOffset)
 /* If there is any overlap between [blkStart,blkEnd) and [gStart,gEnd) then append the genomic
  * sequence to buf and update *pBufOffset. */
 {
 int startInBlk = max(blkStart, gStart);
 int endInBlk = min(blkEnd, gEnd);
 int len = endInBlk - startInBlk;
 if (len > 0)
     {
-    assert(bufSize > *pBufOffset + len);
+    if (bufSize <= *pBufOffset + len)
+        errAbort("appendOverlap: bufSize=%u <= bufOffset %d + len %d",
+                 (unsigned int)bufSize, *pBufOffset, len);
     seqWindowCopy(gSeqWin, startInBlk, len, buf+*pBufOffset, bufSize-*pBufOffset);
     *pBufOffset += len;
     }
 }
 
 static void spliceGenomicInRange(struct seqWindow *gSeqWin, uint gStart, uint gEnd,
                                  struct psl *txAli, boolean checkIntrons, char *buf, size_t bufSize)
 /* Splice genomic exons in range into buf and reverse-complement if necessary.
  * If checkIntrons is true then include genomic sequence from "introns" that are too short to be
  * actual introns; use bufSizeForSplicedPlusGInsBases to calc bufSize before calling this. */
 {
 int splicedLen = 0;
 buf[0] = 0;
 int ix;
 for (ix = 0;  ix < txAli->blockCount;  ix++)
@@ -326,31 +328,33 @@
 boolean isRc = (pslQStrand(txAli) == '-');
 if (isRc && splicedLen)
     reverseComplement(buf, splicedLen);
 }
 
 static boolean genomeTxMismatch(char *txRef, struct seqWindow *gSeqWin,
                                 uint gStart, uint gEnd, struct psl *txAli)
 /* If the variant overlaps aligned blocks then compare spliced strand-corrected genomic reference
  * sequence with transcript reference sequence.  vpTx start, end and txRef must be in place.
  * Note: this will detect substitutions but not indels by design -- leave it to processIndels
  * and vpTxSetTxAlt to detect indel mismatches. */
 {
 boolean mismatch = FALSE;
 if (isNotEmpty(txRef))
     {
-    int bufLen = gEnd - gStart + 1;
+    // Watch out for overlapping blocks due to ribosomal slippage as in SARS-CoV-2.
+    int fudge = 3;
+    int bufLen = gEnd - gStart + 1 + fudge;;
     char splicedGSeq[bufLen];
     splicedGSeq[0] = '\0';
     spliceGenomicInRange(gSeqWin, gStart, gEnd, txAli, FALSE, splicedGSeq, sizeof(splicedGSeq));
     if (isNotEmpty(splicedGSeq) && differentString(splicedGSeq, txRef))
         mismatch = TRUE;
     }
 return mismatch;
 }
 
 boolean vpTxPosIsInsertion(struct vpTxPosition *startPos, struct vpTxPosition *endPos)
 /* Return TRUE if startPos and endPos describe a zero-length region. */
 {
 // Sometimes an insertion happens at the boundary between regions, in which case startPos->region
 // and endPos->region will be different even though they land on the same point.
 // At the boundary, endPos->region is looking left/5' and startPos->region is looking right/3'.