ec58e08d65dcad145657e4c4420988ce5c3e4b18 angie Mon Feb 22 14:22:40 2021 -0800 Increase bufLen in genomeTxMismatch to accommodate overlapping blocks for ribosomal frameshift as in SARS-CoV-2. diff --git src/hg/lib/variantProjector.c src/hg/lib/variantProjector.c index 760c347..ca94850 100644 --- src/hg/lib/variantProjector.c +++ src/hg/lib/variantProjector.c @@ -276,31 +276,33 @@ } bufSize++; return bufSize; } static void appendOverlap(struct seqWindow *gSeqWin, uint blkStart, uint blkEnd, uint gStart, uint gEnd, char *buf, size_t bufSize, int *pBufOffset) /* If there is any overlap between [blkStart,blkEnd) and [gStart,gEnd) then append the genomic * sequence to buf and update *pBufOffset. */ { int startInBlk = max(blkStart, gStart); int endInBlk = min(blkEnd, gEnd); int len = endInBlk - startInBlk; if (len > 0) { - assert(bufSize > *pBufOffset + len); + if (bufSize <= *pBufOffset + len) + errAbort("appendOverlap: bufSize=%u <= bufOffset %d + len %d", + (unsigned int)bufSize, *pBufOffset, len); seqWindowCopy(gSeqWin, startInBlk, len, buf+*pBufOffset, bufSize-*pBufOffset); *pBufOffset += len; } } static void spliceGenomicInRange(struct seqWindow *gSeqWin, uint gStart, uint gEnd, struct psl *txAli, boolean checkIntrons, char *buf, size_t bufSize) /* Splice genomic exons in range into buf and reverse-complement if necessary. * If checkIntrons is true then include genomic sequence from "introns" that are too short to be * actual introns; use bufSizeForSplicedPlusGInsBases to calc bufSize before calling this. */ { int splicedLen = 0; buf[0] = 0; int ix; for (ix = 0; ix < txAli->blockCount; ix++) @@ -326,31 +328,33 @@ boolean isRc = (pslQStrand(txAli) == '-'); if (isRc && splicedLen) reverseComplement(buf, splicedLen); } static boolean genomeTxMismatch(char *txRef, struct seqWindow *gSeqWin, uint gStart, uint gEnd, struct psl *txAli) /* If the variant overlaps aligned blocks then compare spliced strand-corrected genomic reference * sequence with transcript reference sequence. vpTx start, end and txRef must be in place. * Note: this will detect substitutions but not indels by design -- leave it to processIndels * and vpTxSetTxAlt to detect indel mismatches. */ { boolean mismatch = FALSE; if (isNotEmpty(txRef)) { - int bufLen = gEnd - gStart + 1; + // Watch out for overlapping blocks due to ribosomal slippage as in SARS-CoV-2. + int fudge = 3; + int bufLen = gEnd - gStart + 1 + fudge;; char splicedGSeq[bufLen]; splicedGSeq[0] = '\0'; spliceGenomicInRange(gSeqWin, gStart, gEnd, txAli, FALSE, splicedGSeq, sizeof(splicedGSeq)); if (isNotEmpty(splicedGSeq) && differentString(splicedGSeq, txRef)) mismatch = TRUE; } return mismatch; } boolean vpTxPosIsInsertion(struct vpTxPosition *startPos, struct vpTxPosition *endPos) /* Return TRUE if startPos and endPos describe a zero-length region. */ { // Sometimes an insertion happens at the boundary between regions, in which case startPos->region // and endPos->region will be different even though they land on the same point. // At the boundary, endPos->region is looking left/5' and startPos->region is looking right/3'.