src/hg/lib/variantProjector.c 04f1eb41233f2cd7dd80fb51f36eda1462f2899f

04f1eb41233f2cd7dd80fb51f36eda1462f2899f
angie
  Wed Mar 3 15:26:23 2021 -0800
Code review feedback: ';;' --> ';'.  refs #27102

diff --git src/hg/lib/variantProjector.c src/hg/lib/variantProjector.c
index ca94850..f0a926e 100644
--- src/hg/lib/variantProjector.c
+++ src/hg/lib/variantProjector.c
@@ -1,2007 +1,2007 @@
 /* variantProjector -- use sequence alignments and indel shifting to transform genomic variants
  * to transcript/CDS and protein variants.  Compute sufficient information to predict functional
  * effects on proteins and to form HGVS g., n., and if applicable c. and p. terms. */
 
 /* Copyright (C) 2017 The Regents of the University of California
  * See README in this or parent directory for licensing information. */
 
 #include "common.h"
 #include "indelShift.h"
 #include "variantProjector.h"
 
 static enum vpTxRegion complementRegion(enum vpTxRegion regionIn)
 /* Reverse the +-strand-computed region for a transcript on the - strand. */
 {
 enum vpTxRegion regionOut = regionIn;
 if (regionIn == vpUpstream)
     regionOut = vpDownstream;
 else if (regionIn == vpDownstream)
     regionOut = vpUpstream;
 return regionOut;
 }
 
 INLINE void swapUint(uint *pA, uint *pB)
 /* Swap the values of a and b */
 {
 uint tmp = *pA;
 *pA = *pB;
 *pB = tmp;
 }
 
 static void vpTxPosRc(struct vpTxPosition *txPos, uint txSize)
 /* Reverse/complement all components of txPos. */
 {
 txPos->region = complementRegion(txPos->region);
 txPos->txOffset = txSize - txPos->txOffset;
 // No change to distances -- except for introns where we swap 5' / 3':
 if (txPos->region == vpIntron)
     {
     txPos->intron3TxOffset = txSize - txPos->intron3TxOffset;
     swapUint(&txPos->txOffset, &txPos->intron3TxOffset);
     swapUint(&txPos->gDistance, &txPos->intron3Distance);
     }
 // No change to aliBlkIx -- no change to alignment, it is still genomic + strand.
 }
 
 static void vpTxPosIntronic(struct vpTxPosition *txPos, struct psl *txAli, uint gOffset, int ix)
 /* For introns [could there ever be double-sided gaps??] we need tx offsets and distances
  * for both 5' exon (before the intron) and 3' exon (after the intron).  */
 {
 uint intronStart = txAli->tStarts[ix] + txAli->blockSizes[ix];
 if (gOffset < intronStart)
     errAbort("vpTxIntronicPos: gOffset (%u) < start of intron %d (%u)", gOffset, ix, intronStart);
 uint intronEnd = txAli->tStarts[ix+1];
 if (gOffset > intronEnd)
     errAbort("vpTxIntronicPos: gOffset (%u) > end of intron %d (%u)", gOffset, ix, intronEnd);
 uint txLeft = txAli->qStarts[ix] + txAli->blockSizes[ix];
 uint txRight = txAli->qStarts[ix+1];
 txPos->txOffset = txLeft;
 txPos->gDistance = gOffset - intronStart;
 txPos->intron3TxOffset = txRight;
 txPos->intron3Distance = intronEnd - gOffset;
 }
 
 static boolean genomeHasDeletion(struct psl *txAli, int ix)
 /* Return TRUE if block ix has a gap to the right that skips fewer bases on target (genome)
  * than on query (transcript) -- i.e. not an intron, but rather some missing base(s)
  * in the reference genome. */
 {
 if (ix < 0 || ix >= txAli->blockCount - 1)
     return FALSE;
 int blockSize = txAli->blockSizes[ix];
 int tGapSize = txAli->tStarts[ix+1] - txAli->tStarts[ix] - blockSize;
 int qGapSize = txAli->qStarts[ix+1] - txAli->qStarts[ix] - blockSize;
 return (tGapSize < qGapSize);
 }
 
 // Google search on "minimal intron size" turned up a study of shortest known introns, ~48-50 in
 // most species surveyed at the time.
 #define MIN_INTRON 45
 
 static boolean pslIntronTooShort(struct psl *psl, int blkIx, int minIntronSize)
 /* Return TRUE if the target gap between blkIx and blkIx+1 is too short to be a plausible intron. */
 {
 if (blkIx >= psl->blockCount - 1 || blkIx < 0)
     errAbort("pslIntronTooShort: %s blkIx %d is out of range [0, %d]",
              psl->qName, blkIx, psl->blockCount - 1);
 int tGapLen = psl->tStarts[blkIx+1] - psl->tStarts[blkIx] - psl->blockSizes[blkIx];
 int qGapLen = psl->qStarts[blkIx+1] - psl->qStarts[blkIx] - psl->blockSizes[blkIx];
 int intronLen = tGapLen - qGapLen;
 return (intronLen < minIntronSize);
 }
 
 void vpPosGenoToTx(uint gOffset, struct psl *txAli, struct vpTxPosition *txPos, boolean isTxEnd)
 /* Use txAli to project gOffset onto transcript-relative coords in txPos.
  * Set isTxEnd to TRUE if we are projecting to the end coordinate in transcript space:
  * higher genomic coord if transcript on '+' strand, lower genomic coord if tx on '-' strand. */
 {
 ZeroVar(txPos);
 txPos->gOffset = gOffset;
 boolean isRc = (pslQStrand(txAli) == '-');
 // Coordinate transforms of start and end coordinates can be done the same way, but
 // when determining which region of the transcript the variant falls in, we need to
 // treat the open end differently (looking backward) from the closed start (looking forward).
 int endCmp = (isTxEnd != isRc) ? 1 : 0;
 int gOffsetCmp = gOffset - endCmp;
 if (gOffsetCmp < txAli->tStart)
     {
     txPos->region = vpUpstream;
     // Can't use qStart here -- when isRc, qStarts are reversed but qStart and qEnd are not
     txPos->txOffset = txAli->qStarts[0];
     txPos->gDistance = txAli->tStart - gOffset;
     txPos->aliBlkIx = 0;
     }
 else if (gOffsetCmp < txAli->tEnd)
     {
     int ix;
     for (ix = 0;  ix < txAli->blockCount;  ix++)
         {
         int tBlkStart = txAli->tStarts[ix];
         int tBlkEnd = tBlkStart + txAli->blockSizes[ix];
         if (!endCmp && ix > 0 && gOffset == tBlkStart && genomeHasDeletion(txAli, ix-1))
             {
             // Include adjacent skipped transcript bases to the left
             txPos->region = vpExon;
             txPos->txOffset = txAli->qStarts[ix-1] + txAli->blockSizes[ix-1];
             txPos->aliBlkIx = ix-1;
             break;
             }
         else if (endCmp && gOffset == tBlkEnd && genomeHasDeletion(txAli, ix))
             {
             // Include adjacent skipped transcript bases to the right
             txPos->region = vpExon;
             txPos->txOffset = txAli->qStarts[ix+1];
             txPos->aliBlkIx = ix+1;
             break;
             }
         else if (gOffsetCmp >= tBlkStart && gOffsetCmp < tBlkEnd)
             {
             // Normal exonic base
             txPos->region = vpExon;
             txPos->txOffset = txAli->qStarts[ix] + gOffset - tBlkStart;
             txPos->aliBlkIx = ix;
             break;
             }
         else if (ix < txAli->blockCount-1 &&
                  gOffsetCmp >= tBlkEnd && gOffsetCmp < txAli->tStarts[ix+1])
             {
             // Intronic -- or genomic insertion
             if (pslIntronTooShort(txAli, ix, MIN_INTRON))
                 {
                 // Genome has extra bases relative to transcript -- use tx exon position
                 txPos->region = vpExon;
                 if (endCmp)
                     {
                     txPos->txOffset = txAli->qStarts[ix+1];
                     txPos->gInsBases = txAli->tStarts[ix+1] - gOffset;
                     }
                 else
                     {
                     txPos->txOffset = txAli->qStarts[ix] + txAli->blockSizes[ix];
                     txPos->gInsBases = gOffset - tBlkEnd;
                     }
                 }
             else
                 {
                 txPos->region = vpIntron;
                 vpTxPosIntronic(txPos, txAli, gOffset, ix);
                 }
             txPos->aliBlkIx = ix;
             break;
             }
         }
     }
 else
     {
     txPos->region = vpDownstream;
     // Can't use qEnd here -- when isRc, qStarts are reversed but qStart and qEnd are not
     int lastIx = txAli->blockCount - 1;
     txPos->txOffset = txAli->qStarts[lastIx] + txAli->blockSizes[lastIx];
     txPos->gDistance = gOffset - txAli->tEnd;
     txPos->aliBlkIx = lastIx;
     }
 if (isRc)
     vpTxPosRc(txPos, txAli->qSize);
 }
 
 static char *getTxInRange(struct dnaSeq *txSeq, struct vpTxPosition *startPos,
                           struct vpTxPosition *endPos)
 /* If [startPos, endPos) overlaps the actual transcript (i.e. is not intronic or up/down,
  * return the transcript sequence in that range (possibly empty).  If there is no overlap
  * then return NULL. */
  {
 char *txRef = NULL;
 if (endPos->txOffset >= startPos->txOffset)
     {
     int txRefLen = endPos->txOffset - startPos->txOffset;
     txRef = cloneStringZ(txSeq->dna + startPos->txOffset, txRefLen);
     }
 if (txRef)
     touppers(txRef);
 return txRef;
 }
 
 static void vpTxSetRef(struct vpTx *vpTx, struct dnaSeq *txSeq)
 /* Set vpTx->txRef to transcript variant reference sequence from txSeq using vpTx start & end,
  * or NULL if variant has no overlap with transcript exons. */
 {
 freez(&vpTx->txRef);
 vpTx->txRef = getTxInRange(txSeq, &vpTx->start, &vpTx->end);
 }
 
 static int pslCountExons(struct psl *psl, int minIntronSize)
 /* Return the number of exons separated by gaps whose length on target is at least minIntronSize. */
 {
 int exonCount = 1;
 int ix;
 for (ix = 0;  ix < psl->blockCount-1;  ix++)
     {
     if (!pslIntronTooShort(psl, ix, minIntronSize))
         exonCount++;
     }
 return exonCount;
 }
 
 static uint pslBlkIxToExonIx(struct psl *psl, int blkIx, int minIntronSize)
 /* Convert PSL block to exon number, glossing over too-short introns */
 {
 uint exonIx = 0, ix;
 if (blkIx < 0 || blkIx >= psl->blockCount)
     errAbort("pslBlkIxToExonIx: illegal blkIx %d (expecting [0..%d]", blkIx, psl->blockCount-1);
 // Increment exonIx every time we pass an intron that's not too short.
 if (pslQStrand(psl) == '-')
     for (ix = psl->blockCount - 1; ix > blkIx && ix > 0;  ix--)
         {
         if (!pslIntronTooShort(psl, ix-1, minIntronSize))
             exonIx++;
         }
 else
     for (ix = 0;  ix < blkIx;  ix++)
         {
         if (! pslIntronTooShort(psl, ix, minIntronSize))
             exonIx++;
         }
 return exonIx;
 }
 
 static size_t bufSizeForSplicedPlusGInsBases(struct psl *txAli, uint gStart, uint gEnd)
 // Calculate the buffer size required for spliced genomic sequence plus sequence of
 // suspiciously short "introns".
 {
 size_t bufSize = 0;
 int ix;
 for (ix = 0;  ix < txAli->blockCount;  ix++)
     {
     int tBlkStart = txAli->tStarts[ix];
     if (gEnd <= tBlkStart)
         break;
     int tBlkEnd = tBlkStart + txAli->blockSizes[ix];
     if (gStart < tBlkEnd)
         {
         int startInBlk = max(tBlkStart, gStart);
         int endInBlk = min(tBlkEnd, gEnd);
         int len = endInBlk - startInBlk;
         if (len > 0)
             bufSize += len;
         }
     int tNextBlkStart = txAli->tStarts[ix+1];
     if (ix < txAli->blockCount - 1 && gEnd >= tBlkEnd && gStart <= tNextBlkStart &&
         pslIntronTooShort(txAli, ix, MIN_INTRON))
         {
         // It's an indel between genome and transcript, not an intron -- add genomic sequence
         int len = tNextBlkStart - tBlkEnd;
         if (len > 0)
             bufSize += len;
         }
     }
 bufSize++;
 return bufSize;
 }
 
 static void appendOverlap(struct seqWindow *gSeqWin, uint blkStart, uint blkEnd,
                           uint gStart, uint gEnd, char *buf, size_t bufSize, int *pBufOffset)
 /* If there is any overlap between [blkStart,blkEnd) and [gStart,gEnd) then append the genomic
  * sequence to buf and update *pBufOffset. */
 {
 int startInBlk = max(blkStart, gStart);
 int endInBlk = min(blkEnd, gEnd);
 int len = endInBlk - startInBlk;
 if (len > 0)
     {
     if (bufSize <= *pBufOffset + len)
         errAbort("appendOverlap: bufSize=%u <= bufOffset %d + len %d",
                  (unsigned int)bufSize, *pBufOffset, len);
     seqWindowCopy(gSeqWin, startInBlk, len, buf+*pBufOffset, bufSize-*pBufOffset);
     *pBufOffset += len;
     }
 }
 
 static void spliceGenomicInRange(struct seqWindow *gSeqWin, uint gStart, uint gEnd,
                                  struct psl *txAli, boolean checkIntrons, char *buf, size_t bufSize)
 /* Splice genomic exons in range into buf and reverse-complement if necessary.
  * If checkIntrons is true then include genomic sequence from "introns" that are too short to be
  * actual introns; use bufSizeForSplicedPlusGInsBases to calc bufSize before calling this. */
 {
 int splicedLen = 0;
 buf[0] = 0;
 int ix;
 for (ix = 0;  ix < txAli->blockCount;  ix++)
     {
     int tBlkStart = txAli->tStarts[ix];
     if (tBlkStart >= gEnd)
         break;
     int tBlkEnd = tBlkStart + txAli->blockSizes[ix];
     appendOverlap(gSeqWin, tBlkStart, tBlkEnd, gStart, gEnd, buf, bufSize, &splicedLen);
     int tNextBlkStart = txAli->tStarts[ix+1];
     if (checkIntrons && ix < txAli->blockCount - 1 && gEnd >= tBlkEnd && gStart <= tNextBlkStart &&
         pslIntronTooShort(txAli, ix, MIN_INTRON))
         {
         // It's an indel between genome and transcript, not an intron -- add genomic sequence
         int len = tNextBlkStart - tBlkEnd;
         if (len > 0)
             {
             seqWindowCopy(gSeqWin, tBlkEnd, len, buf+splicedLen, bufSize-splicedLen);
             splicedLen += len;
             }
         }
     }
 boolean isRc = (pslQStrand(txAli) == '-');
 if (isRc && splicedLen)
     reverseComplement(buf, splicedLen);
 }
 
 static boolean genomeTxMismatch(char *txRef, struct seqWindow *gSeqWin,
                                 uint gStart, uint gEnd, struct psl *txAli)
 /* If the variant overlaps aligned blocks then compare spliced strand-corrected genomic reference
  * sequence with transcript reference sequence.  vpTx start, end and txRef must be in place.
  * Note: this will detect substitutions but not indels by design -- leave it to processIndels
  * and vpTxSetTxAlt to detect indel mismatches. */
 {
 boolean mismatch = FALSE;
 if (isNotEmpty(txRef))
     {
     // Watch out for overlapping blocks due to ribosomal slippage as in SARS-CoV-2.
     int fudge = 3;
-    int bufLen = gEnd - gStart + 1 + fudge;;
+    int bufLen = gEnd - gStart + 1 + fudge;
     char splicedGSeq[bufLen];
     splicedGSeq[0] = '\0';
     spliceGenomicInRange(gSeqWin, gStart, gEnd, txAli, FALSE, splicedGSeq, sizeof(splicedGSeq));
     if (isNotEmpty(splicedGSeq) && differentString(splicedGSeq, txRef))
         mismatch = TRUE;
     }
 return mismatch;
 }
 
 boolean vpTxPosIsInsertion(struct vpTxPosition *startPos, struct vpTxPosition *endPos)
 /* Return TRUE if startPos and endPos describe a zero-length region. */
 {
 // Sometimes an insertion happens at the boundary between regions, in which case startPos->region
 // and endPos->region will be different even though they land on the same point.
 // At the boundary, endPos->region is looking left/5' and startPos->region is looking right/3'.
 return (startPos->txOffset == endPos->txOffset &&
         ((startPos->gDistance == endPos->gDistance &&
           startPos->intron3TxOffset == endPos->intron3TxOffset &&
           startPos->intron3Distance == endPos->intron3Distance) ||
          // intron -> exon boundary: startPos is exonic (right), endPos is intronic (left),
          // but end->intron3Distance is 0
          (startPos->region == vpExon && endPos->region == vpIntron &&
           endPos->intron3Distance == 0) ||
          // exon -> intron boundary: startPos is intronic (right), endPos is exonic (left),
          // but start->gDistance is 0
          (startPos->region == vpIntron && endPos->region == vpExon &&
           startPos->gDistance == 0)));
 }
 
 static int limitToExon(struct vpTx *vpTx, uint gEnd, struct psl *txAli)
 /* If variant ends in an exon, return the max number of bases by which we can shift the variant
  * along the genome in the direction of transcription without running past the end of the exon
  * into a splice site.
  * See HGVS "exception 3' rule": http://varnomen.hgvs.org/bg-material/numbering/
  * If not applicable, return INDEL_SHIFT_NO_MAX. */
 {
 int maxShift = INDEL_SHIFT_NO_MAX;
 if (vpTx->end.region == vpExon)
     {
     int blkIx = vpTx->end.aliBlkIx;
     if (txAli->strand[0] == '-')
         {
         while (blkIx > 0 && pslIntronTooShort(txAli, blkIx-1, MIN_INTRON))
             blkIx--;
         int tBlkStart = txAli->tStarts[blkIx];
         maxShift = gEnd - tBlkStart;
         }
     else
         {
         while (blkIx < txAli->blockCount - 1 && pslIntronTooShort(txAli, blkIx, MIN_INTRON))
             blkIx++;
         int tBlkEnd = txAli->tStarts[blkIx] + txAli->blockSizes[blkIx];
         maxShift = tBlkEnd - gEnd;
         }
     }
 return maxShift;
 }
 
 static boolean hasAnomalousGaps(struct psl *txAli, uint gStart, uint gEnd)
 /* Return TRUE if txAli has an indel between genomic start & end that is too short to be intron. */
 {
 int ix;
 for (ix = 0;  ix < txAli->blockCount - 1;  ix++)
     {
     uint intronStart = txAli->tStarts[ix] + txAli->blockSizes[ix];
     uint intronEnd = txAli->tStarts[ix+1];
     if (gStart <= intronEnd && gEnd >= intronStart && pslIntronTooShort(txAli, ix, MIN_INTRON))
         return TRUE;
     else if (intronStart > gEnd)
         break;
     }
 return FALSE;
 }
 
 static char *cloneMaybeRc(char *seq, boolean isRc)
 /* Clone a sequence string; if isRc, reverseComplement it. */
 {
 int len = strlen(seq);
 char *copy = cloneStringZ(seq, len);
 if (isRc)
     reverseComplement(copy, len);
 return copy;
 }
 
 static void vpTxSetTxAlt(struct vpTx *vpTx, struct seqWindow *gSeqWin, uint gStart, uint gEnd,
                          char *gAlt, boolean isRc)
 /* Set vpTx->txAlt to strand-corrected gAlt plus adjacent genomic insertion bases if any. */
 {
 if (vpTx->start.gInsBases || vpTx->end.gInsBases)
     {
     // variant overlaps genomic insertion; add genomic inserted bases to txAlt
     uint gibLeft = isRc ? vpTx->end.gInsBases : vpTx->start.gInsBases;
     uint gibRight = isRc ? vpTx->start.gInsBases : vpTx->end.gInsBases;
     uint altLen = strlen(gAlt);
     uint txAltLen = gibLeft + altLen + gibRight;
     char txAlt[txAltLen+1];
     if (gibLeft)
         seqWindowCopy(gSeqWin, gStart - gibLeft, gibLeft, txAlt, sizeof(txAlt));
     safecpy(txAlt+gibLeft, sizeof(txAlt)-gibLeft, gAlt);
     if (gibRight)
         seqWindowCopy(gSeqWin, gEnd, gibRight, txAlt+gibLeft+altLen, sizeof(txAlt)-gibLeft-altLen);
     if (isRc)
         reverseComplement(txAlt, txAltLen);
     vpTx->txAlt = cloneString(txAlt);
     vpTx->genomeMismatch = TRUE;
     int placeholder = 0;
     trimRefAlt(vpTx->txRef, vpTx->txAlt, &vpTx->start.txOffset, &vpTx->end.txOffset,
                &placeholder, &placeholder);
     }
 else
     vpTx->txAlt = cloneMaybeRc(gAlt, isRc);
 }
 
 static void spliceInBlk(struct seqWindow *gSeqWin, uint blkStart, uint blkEnd,
                         uint ambigStart, uint ambigEnd, uint varStart, uint varEnd, char *gAlt,
                         char *txAlt, size_t txAltSize, int *pSplicedLen, boolean *pAddedGAlt)
 /* If there is any overlap between [blkStart,blkEnd) and [ambigStart,ambigEnd) then append the
  * sequence to txAlt and update *pSplicedLen -- except for [varStart,varEnd) where we add
  * gAlt instead and update *pAddedGalt. */
 {
 if ((ambigStart < blkEnd && ambigEnd > blkStart) ||
     // both var and ambig are zero-length insertion points
     (ambigStart == ambigEnd && ambigStart <= blkEnd && ambigEnd >= blkStart))
     {
     // Add in blk's sequence between ambigStart and varStart, if any
     appendOverlap(gSeqWin, blkStart, blkEnd, ambigStart, varStart, txAlt, txAltSize, pSplicedLen);
     // If gAlt hasn't already been added, and blk overlaps [varStart,varEnd), add gAlt
     if (!*pAddedGAlt &&
         ((varStart < blkEnd && varEnd > blkStart) ||
          (varStart == varEnd && varStart <= blkEnd && varEnd >= blkStart)))
         {
         safecpy(txAlt+*pSplicedLen, txAltSize-*pSplicedLen, gAlt);
         *pSplicedLen += strlen(gAlt);
         *pAddedGAlt = TRUE;
         }
     // Add in blk's sequence between varEnd and ambigEnd, if any
     appendOverlap(gSeqWin, blkStart, blkEnd, varEnd, ambigEnd, txAlt, txAltSize, pSplicedLen);
     }
 }
 
 static char *spliceGenomicAndAlt(struct seqWindow *gSeqWin, uint ambigStart, uint ambigEnd,
                                  uint varStart, uint varEnd, struct psl *txAli, char *gAlt)
 /* Carefully construct the modified transcribed sequence by retaining non-intron genomic inserted
  * sequence and splicing in gAlt (which is on + strand of genome) in place of varStart..varEnd. */
 {
 assert(ambigStart <= varStart);
 assert(varStart <= varEnd);
 assert(varEnd <= ambigEnd);
 // This may count genomic inserted bases twice, but that's OK when allocating mem:
 uint leftLen = bufSizeForSplicedPlusGInsBases(txAli, ambigStart, varStart) - 1;
 uint rightLen = bufSizeForSplicedPlusGInsBases(txAli, varEnd, ambigEnd) - 1;
 uint gAltLen = strlen(gAlt);
 uint txAltSize = leftLen + gAltLen + rightLen + 1;
 char *txAlt = needMem(txAltSize);
 boolean addedGAlt = FALSE;
 int splicedLen = 0;
 int ix;
 for (ix = 0;  ix < txAli->blockCount;  ix++)
     {
     int tBlkStart = txAli->tStarts[ix];
     if (tBlkStart >= ambigEnd)
         break;
     int tBlkEnd = tBlkStart + txAli->blockSizes[ix];
     spliceInBlk(gSeqWin, tBlkStart, tBlkEnd, ambigStart, ambigEnd,
                 varStart, varEnd, gAlt, txAlt, txAltSize, &splicedLen, &addedGAlt);
     int tNextBlkStart = txAli->tStarts[ix+1];
     if (ix < txAli->blockCount - 1 && ambigEnd >= tBlkEnd && ambigStart <= tNextBlkStart &&
         pslIntronTooShort(txAli, ix, MIN_INTRON))
         {
         // Genomic insertion not intron -- splice in *all* inserted bases (except var bases).
         spliceInBlk(gSeqWin, tBlkEnd, tNextBlkStart,
                     min(tBlkEnd, ambigStart), max(ambigEnd, tNextBlkStart),
                     varStart, varEnd, gAlt, txAlt, txAltSize, &splicedLen, &addedGAlt);
         }
     }
 boolean isRc = (pslQStrand(txAli) == '-');
 if (isRc && splicedLen)
     reverseComplement(txAlt, splicedLen);
 return txAlt;
 }
 
 static void processIndels(struct vpTx *vpTx, struct seqWindow *gSeqWin,
                           uint gStart, uint gEnd, char *gAltCpy,
                           struct psl *txAli, struct dnaSeq *txSeq)
 /* If variant is an insertion or deletion, detect whether its placement is ambiguous, i.e. could
  * be shifted left or right with the same result.  Also detect whether txAli happens to have
  * a non-intronic indel in the same ambiguous region -- in that case, the genomic variant might
  * actually mean no change to the transcript so compare carefully.  If txAli is on '-' strand,
  * gTxStart can be greater than gTxEnd (i.e. they are swapped relative to the genome). */
 {
 int refLen = gEnd - gStart, altLen = strlen(gAltCpy);
 boolean isRc = (pslQStrand(txAli) == '-');
 if (! vpTx->genomeMismatch && indelShiftIsApplicable(refLen, altLen))
     {
     boolean isRc = (pslQStrand(txAli) == '-');
     // Genomic coords for transcript start and transcript end -- swapped if isRc!
     uint gTxStart = gStart, gTxEnd = gEnd;
     if (isRc)
         swapUint(&gTxStart, &gTxEnd);
     // Attempt to shift variants along the genome as far in the direction of transcription
     // as possible -- but not past an exon's 3' boundary into a splice site / intron.
     // shifting: http://andrewjesaitis.com/2017/03/the-state-of-variant-annotation-in-2017/
     // HGVS "exception 3' rule": http://varnomen.hgvs.org/bg-material/numbering/
     // Also find out how far we could shift 5' so we can detect genome/tx non-intron indels.
     uint gTxStart5 = gTxStart, gTxEnd5 = gTxEnd;
     char gAltCpy5[altLen+1];
     safecpy(gAltCpy5, sizeof(gAltCpy5), gAltCpy);
     uint ambigStart, ambigEnd;
     if (isRc)
         {
         indelShift(gSeqWin, &gTxEnd5, &gTxStart5, gAltCpy5, INDEL_SHIFT_NO_MAX, isdRight);
         int maxShift = limitToExon(vpTx, gTxEnd5, txAli);
         vpTx->basesShifted = indelShift(gSeqWin, &gTxEnd, &gTxStart, gAltCpy, maxShift, isdLeft);
         ambigStart = gTxEnd;
         ambigEnd = gTxStart5;
         }
     else
         {
         indelShift(gSeqWin, &gTxStart5, &gTxEnd5, gAltCpy5, INDEL_SHIFT_NO_MAX, isdLeft);
         int maxShift = limitToExon(vpTx, gTxEnd5, txAli);
         vpTx->basesShifted = indelShift(gSeqWin, &gTxStart, &gTxEnd, gAltCpy, maxShift, isdRight);
         ambigStart = gTxStart5;
         ambigEnd = gTxEnd;
         }
     if (vpTx->basesShifted)
         {
         // Variant was shifted on genome; re-project genomic coordinates to tx.
         vpPosGenoToTx(gTxStart, txAli, &vpTx->start, FALSE);
         vpPosGenoToTx(gTxEnd, txAli, &vpTx->end, TRUE);
         vpTxSetRef(vpTx, txSeq);
         }
     // Now that that's settled, set gRef and gAlt:
     gStart = isRc ? gTxEnd : gTxStart;
     gEnd = isRc ? gTxStart : gTxEnd;
     vpTx->gRef = needMem(refLen+1);
     seqWindowCopy(gSeqWin, gStart, refLen, vpTx->gRef, refLen+1);
     if (isRc)
         reverseComplement(vpTx->gRef, refLen);
     vpTx->gAlt = cloneMaybeRc(gAltCpy, isRc);
     vpTxSetTxAlt(vpTx, gSeqWin, gStart, gEnd, gAltCpy, isRc);
     if (hasAnomalousGaps(txAli, ambigStart, ambigEnd))
         {
         // The transcript and genome have a non-intron indel in this indel region,
         // so this variant could actually mean no change to the transcript.  We need to
         // carefully compare the transcript sequence to the genomic alt sequence over the
         // whole region.
         struct vpTxPosition ambigStartPos, ambigEndPos;
         vpPosGenoToTx(isRc ? ambigEnd : ambigStart, txAli, &ambigStartPos, FALSE);
         vpPosGenoToTx(isRc ? ambigStart : ambigEnd, txAli, &ambigEndPos, TRUE);
         char *ambigTxRef = getTxInRange(txSeq, &ambigStartPos, &ambigEndPos);
         // First get the reference according to the genome -- retaining genomic sequence from
         // the non-intron indel(s).
         size_t bufSize = bufSizeForSplicedPlusGInsBases(txAli, ambigStart, ambigEnd);
         char ambigGRef[bufSize];
         spliceGenomicInRange(gSeqWin, ambigStart, ambigEnd, txAli, TRUE,
                              ambigGRef, sizeof(ambigGRef));
         //#*** What if ambigStart.region != ambigEnd.region?
         if (differentString(ambigTxRef, ambigGRef))
             {
             // The ambiguous region contains a non-intron indel... construct ambiguous alt:
             char *ambigAlt = spliceGenomicAndAlt(gSeqWin, ambigStart, ambigEnd, gStart, gEnd, txAli,
                                                  gAltCpy);
             freeMem(vpTx->txAlt);
             if (sameString(ambigTxRef, ambigAlt))
                 {
                 // No change to the transcript; keep the right-shifted tx coords so that a
                 // would-have-been substitution doesn't become a confusing insertion point that
                 // makes it seem like the base to the right was somehow involved.
                 vpTx->txAlt = cloneString(vpTx->txRef);
                 }
             else
                 {
                 // There was some change to the transcript; trim redundant bases from the
                 // ambiguous region ref and alt to get the right-shifted change.
                 int placeholder = 0;
                 //#*** will vpTx->start/end.region ever change from this??
                 trimRefAlt(ambigTxRef, ambigAlt, &ambigStartPos.txOffset, &ambigEndPos.txOffset,
                            &placeholder, &placeholder);
                 vpTx->start.txOffset = ambigStartPos.txOffset;
                 vpTx->end.txOffset = ambigEndPos.txOffset;
                 freeMem(vpTx->txRef);
                 vpTx->txRef = ambigTxRef;
                 vpTx->txAlt = ambigAlt;
                 }
             vpTx->genomeMismatch = TRUE;
             }
         }
     }
 else if (hasAnomalousGaps(txAli, gStart, gEnd))
     {
     vpTx->gRef = needMem(refLen+1);
     seqWindowCopy(gSeqWin, gStart, refLen, vpTx->gRef, refLen+1);
     if (isRc)
         reverseComplement(vpTx->gRef, refLen);
     vpTx->gAlt = cloneMaybeRc(gAltCpy, isRc);
     if (differentString(vpTx->txRef, vpTx->gRef))
         {
         vpTx->txAlt = spliceGenomicAndAlt(gSeqWin, gStart, gEnd, gStart, gEnd, txAli, gAltCpy);
         if (differentString(vpTx->txRef, vpTx->txAlt))
             {
             // There was some change to the transcript; trim redundant bases (if any) from tx
             // ref and alt.
             int placeholder = 0;
             trimRefAlt(vpTx->txRef, vpTx->txAlt, &vpTx->start.txOffset, &vpTx->end.txOffset,
                        &placeholder, &placeholder);
             }
         vpTx->genomeMismatch = TRUE;
         }
     }
 }
 
 static void pslDeleteBlock(struct psl *psl, int ix)
 /* Remove block indexed by ix (in blockSizes, tStarts, qStarts) from psl. */
 {
 int jx;
 for (jx = ix+1;  jx < psl->blockCount;  jx++)
     {
     psl->blockSizes[jx-1] = psl->blockSizes[jx];
     psl->tStarts[jx-1] = psl->tStarts[jx];
     psl->qStarts[jx-1] = psl->qStarts[jx];
     }
 psl->blockCount--;
 pslRecalcBounds(psl);
 }
 
 static void pslUpdateGapCounts(struct psl *psl)
 /* After psl's gaps have been modified, update {q,t}{Num,Base}Insert and match statistics.
  * match and/or repMatch may be inaccurate after this. */
 {
 int qNumInsert = 0, tNumInsert = 0, qBaseInsert = 0, tBaseInsert = 0, aligned = 0;
 int ix;
 for (ix = 0;  ix < psl->blockCount;  ix++)
     {
     aligned += psl->blockSizes[ix];
     if (ix < psl->blockCount - 1)
         {
         int tInsLen = psl->tStarts[ix+1] - (psl->tStarts[ix] + psl->blockSizes[ix]);
         int qInsLen = psl->qStarts[ix+1] - (psl->qStarts[ix] + psl->blockSizes[ix]);
         if (tInsLen)
             {
             tNumInsert++;
             tBaseInsert += tInsLen;
             }
         if (qInsLen)
             {
             qNumInsert++;
             qBaseInsert += qInsLen;
             }
         }
     }
 psl->qNumInsert = qNumInsert;
 psl->qBaseInsert = qBaseInsert;
 psl->tNumInsert = tNumInsert;
 psl->tBaseInsert = tBaseInsert;
 int oldAligned = psl->match + psl->misMatch + psl->repMatch + psl->nCount;
 int gapified = oldAligned - aligned;
 // Count them against repMatch first, since shiftable gaps imply repetitive sequence.
 if (psl->repMatch >= gapified)
     psl->repMatch -= gapified;
 else
     {
     int extra = gapified - psl->repMatch;
     psl->repMatch = 0;
     if (psl->match >= extra)
         psl->match -= extra;
     else
         {
         // Maybe they were just left blank...
         psl->match = aligned;
         psl->misMatch = 0;
         psl->repMatch = 0;
         psl->nCount = 0;
         }
     }
 }
 
 static boolean pslExpandGapRight(struct psl *psl, int ix, uint shiftR, boolean fixBrokenExons)
 /* Expand the gap following block ix by shiftR bases on both t and q, making it a double-sided gap.
  * If shiftR is greater than or equal to the size of the following block then delete that block,
  * extending the double-sided gap into the next gap.  If fixBrokenExons is true then also check
  * to see if the following gap cancels out this gap (i.e. after sliding and considering the next
  * gap, the same number of bases are skipped on t and q); in that case, merge the following
  * block too because the two gaps should not have been introduced. */
 {
 boolean deletedBlocks = FALSE;
 if (ix < 0 || ix >= psl->blockCount - 1)
     errAbort("pslExpandGapRight: invalid ix %d for %s, expecting 0..%d",
              ix, psl->qName, psl->blockCount - 2);
 if (psl->blockSizes[ix+1] > shiftR)
     {
     // Expand gap to the right -- increase {t,q}Starts[ix+1], decrease blockSizes[ix+1]
     psl->tStarts[ix+1] += shiftR;
     psl->qStarts[ix+1] += shiftR;
     psl->blockSizes[ix+1] -= shiftR;
     }
 else
     {
     // The ambiguous region extends all the way through the next block, maybe past it!
     // Delete the block (i.e. merge the gaps).
     verbose(2, "%s gapIx %d slides past block %d; deleting block %d.\n",
             psl->qName, ix, ix+1, ix+1);
     // Calculate these before modifying psl:
     uint newTGapEnd = psl->tStarts[ix+1] + shiftR;
     pslDeleteBlock(psl, ix+1);
     // There should be at least one block left after deletion; check for overlap with it.
     if (ix > psl->blockCount - 2)
         errAbort("vpExpandIndelGaps: %s gapIx %d ambiguous region extended past final block. "
                  "(Do we need a fake 0-length block to end the double-sided gap?)  "
                  "Please report this case as a bug in the aligner.", psl->qName, ix);
     if (newTGapEnd > psl->tStarts[ix+1])
         {
         int overlap = newTGapEnd - psl->tStarts[ix+1];
         if (overlap > psl->blockSizes[ix+1])
             errAbort("vpExpandIndelGaps: %s gapIx %d ambiguous region extended past >1 blocks.  "
                      "Please report this case as a bug in the aligner.", psl->qName, ix);
         psl->tStarts[ix+1] += overlap;
         psl->qStarts[ix+1] += overlap;
         psl->blockSizes[ix+1] -= overlap;
         }
     if (fixBrokenExons && newTGapEnd == psl->tStarts[ix+1])
         {
         // NCBI has given us alignments that include unnecessary but compensating gaps. For example,
         // both galGal6 and NM_001030566.1 have a run of 15 A's in the middle of an aligning block.
         // For unknown reasons, NCBI's alignment introduces a 1-base gap on the target and then a
         // 1-base gap on the query that compensate for each other, but split up an exon into three
         // aligned blocks with unnecessary gaps.  Detect and fix that case:
         int newTInsLen = psl->tStarts[ix+1] - (psl->tStarts[ix] + psl->blockSizes[ix]);
         int newQInsLen = psl->qStarts[ix+1] - (psl->qStarts[ix] + psl->blockSizes[ix]);
         if (newTInsLen == newQInsLen)
             {
             // Gaps cancelled each other out; merge exons.
             verbose(2, "%s gapIx %d seems to cancel out gapIx %d; merging block formerly known "
                     "as %d\n",
                     psl->qName, ix+1, ix, ix+2);
             psl->blockSizes[ix] += newTInsLen + psl->blockSizes[ix+1];
             pslDeleteBlock(psl, ix+1);
             }
         }
     // Block ix has changed, so repeat with the same ix to see what happens to the next gap.
     deletedBlocks = TRUE;
     }
 return deletedBlocks;
 }
 
 void vpExpandIndelGaps(struct psl *txAli, struct seqWindow *gSeqWin, struct dnaSeq *txSeq)
 /* If txAli has any gaps that are too short to be introns, they are often indels that could be
  * shifted left and/or right.  If so, turn those gaps into double-sided gaps that span the
  * ambiguous region. This may change gSeqWin's range. */
 {
 boolean modified = FALSE;
 int ix;
 for (ix = 0;  ix < txAli->blockCount - 1;  ix++)
     {
     if (pslIntronTooShort(txAli, ix, MIN_INTRON))
         {
         gSeqWin->fetch(gSeqWin, txAli->tName, txAli->tStart, txAli->tEnd);
         // See if it is possible to shift the gap left and/or right on the genome.
         uint gStartL = txAli->tStarts[ix] + txAli->blockSizes[ix];
         uint gEndL = txAli->tStarts[ix+1];
         uint gStartR = gStartL, gEndR = gEndL;
         uint qLen = txAli->qStarts[ix+1] - txAli->qStarts[ix] - txAli->blockSizes[ix];
         char txCpyL[qLen+1], txCpyR[qLen+1];
         if (pslQStrand(txAli) == '-')
             {
             // Change query sequence to genome + strand
             uint qGapStart = txAli->qSize - txAli->qStarts[ix+1];
             safencpy(txCpyL, sizeof(txCpyL), txSeq->dna+qGapStart, qLen);
             reverseComplement(txCpyL, qLen);
             }
         else
             safencpy(txCpyL, sizeof(txCpyL), txSeq->dna+txAli->qStarts[ix+1]-qLen, qLen);
         safencpy(txCpyR, sizeof(txCpyR), txCpyL, qLen);
         uint shiftL = indelShift(gSeqWin, &gStartL, &gEndL, txCpyL, INDEL_SHIFT_NO_MAX, isdLeft);
         uint shiftR = indelShift(gSeqWin, &gStartR, &gEndR, txCpyR, INDEL_SHIFT_NO_MAX, isdRight);
         if (shiftL)
             {
             // Expand gap to the left -- decrease blockSizes[ix].
             if (txAli->blockSizes[ix] < shiftL)
                 {
                 if (ix == 0)
                     warn("vpExpandIndelGaps: %s gapIx %d slides left past start of first block.  "
                          "Skipping.  (Should we make a 0-length block at beginning?)",
                          txAli->qName, ix);
                 else
                     warn("vpExpandIndelGaps: %s gapIx %d slides left past the start of block %d, "
                          "but this should have already been taken care of when pslExpandGapRight "
                          "was called for gapIx %d.  Investigate...",
                          txAli->qName, ix, ix, ix-1);
                 continue;
                 }
             else
                 {
                 txAli->blockSizes[ix] -= shiftL;
                 }
             modified = TRUE;
             }
         if (shiftR)
             {
             boolean deletedBlocks = pslExpandGapRight(txAli, ix, shiftR, TRUE);
             if (deletedBlocks && ix < txAli->blockCount - 1 &&
                 pslIntronTooShort(txAli, ix, MIN_INTRON))
                 {
                 // Repeat w/same ix because block ix has expanded & gap to the right has changed.
                 // indelShift doesn't work on gaps that have already been expanded, though --
                 // so shrink the expanded gap back to single-sided.
                 int tInsLen = txAli->tStarts[ix+1] - (txAli->tStarts[ix] + txAli->blockSizes[ix]);
                 int qInsLen = txAli->qStarts[ix+1] - (txAli->qStarts[ix] + txAli->blockSizes[ix]);
                 int doubleLen = min(tInsLen, qInsLen);
                 txAli->blockSizes[ix] += doubleLen;
                 ix--;
                 }
             modified = TRUE;
             }
         }
     }
 if (modified)
     pslUpdateGapCounts(txAli);
 }
 
 struct vpTx *vpGenomicToTranscript(struct seqWindow *gSeqWin, struct bed3 *gBed3, char *gAlt,
                                    struct psl *txAli, struct dnaSeq *txSeq)
 /* Project a genomic variant onto a transcript, trimming identical bases at the beginning and/or
  * end of ref and alt alleles and shifting ambiguous indel placements in the direction of
  * transcription except across an exon-intron boundary.
  * Both ref and alt must be [ACGTN]-only (no symbolic alleles like "." or "-" or "<DUP>"
  * but "<DEL>" is OK).
  * Calling vpExpandIndelGaps on txAli before calling this will improve detection of variants
  * near ambiguously placed indels between genome and transcript.
  * This may change gSeqWin's range. */
 {
 if (sameString(gAlt, "<DEL>"))
     gAlt[0] = '\0';
 int altLen = strlen(gAlt);
 if (!isAllNt(gAlt, altLen) && differentString(gAlt, "*"))
     errAbort("vpGenomicToTranscript: alternate allele must be sequence of IUPAC DNA characters, "
              "'*', or '<DEL>' but is '%s'", gAlt);
 gSeqWin->fetch(gSeqWin, gBed3->chrom, gBed3->chromStart, gBed3->chromEnd);
 struct vpTx *vpTx;
 AllocVar(vpTx);
 vpTx->txName = cloneString(txSeq->name);
 boolean isRc = (pslQStrand(txAli) == '-');
 // Genomic coords and sequence for variant -- trim identical bases from ref and alt if any:
 uint gStart = gBed3->chromStart, gEnd = gBed3->chromEnd;
 int refLen = gEnd - gStart;
 char gRef[refLen+1];
 seqWindowCopy(gSeqWin, gStart, refLen, gRef, sizeof(gRef));
 // ALT='*' means the variant is irrelevant because it falls in a deleted region.  Treat as no-change.
 if (sameString(gAlt, "*"))
     altLen = refLen;
 char gAltCpy[altLen+1];
 safecpy(gAltCpy, sizeof(gAltCpy), (sameString(gAlt, "*") ? gRef: gAlt));
 if (differentString(gRef, gAltCpy))
     // Trim identical bases from start and end -- unless this is an assertion that there is
     // no change, in which case it's good to keep the range on which that assertion was made.
     trimRefAlt(gRef, gAltCpy, &gStart, &gEnd, &refLen, &altLen);
 // Even if we may later shift the variant position in the direction of transcription, first do
 // an initial projection to find exon boundaries and detect mismatch between genome and tx.
 vpPosGenoToTx(isRc ? gEnd : gStart, txAli, &vpTx->start, FALSE);
 vpPosGenoToTx(isRc ? gStart : gEnd, txAli, &vpTx->end, TRUE);
 // Compare genomic ref allele vs. txRef
 vpTxSetRef(vpTx, txSeq);
 vpTx->genomeMismatch = genomeTxMismatch(vpTx->txRef, gSeqWin, gStart, gEnd, txAli);
 processIndels(vpTx, gSeqWin, gStart, gEnd, gAltCpy, txAli, txSeq);
 // processIndels may or may not set vpTx->gAlt, vpTx->txAlt and vpTx->gRef
 if (vpTx->gAlt == NULL)
     vpTx->gAlt = cloneMaybeRc(gAltCpy, isRc);
 if (vpTx->txAlt == NULL)
     vpTxSetTxAlt(vpTx, gSeqWin, gStart, gEnd, gAltCpy, isRc);
 if (vpTx->gRef == NULL)
     vpTx->gRef = cloneMaybeRc(gRef, isRc);
 return vpTx;
 }
 
 void vpTxFree(struct vpTx **pVpTx)
 /* Free up a vpTx. */
 {
 if (pVpTx)
     {
     struct vpTx *vpTx = *pVpTx;
     freeMem(vpTx->txName);
     freeMem(vpTx->txRef);
     freeMem(vpTx->txAlt);
     freeMem(vpTx->gRef);
     freeMem(vpTx->gAlt);
     freez(pVpTx);
     }
 }
 
 void vpTxFreeList(struct vpTx **pList)
 /* Free up memory associated with list of vpTxs. */
 {
 if (pList == NULL)
     return;
 struct vpTx *el, *next;
 for (el = *pList; el != NULL; el = next)
     {
     next = el->next;
     vpTxFree(&el);
     }
 *pList = NULL;
 }
 
 char *translateString(char *codons)
 /* Translate a string of codon DNA into a string of peptide bases.  stop codon is 'X'. */
 {
 struct dnaSeq *codonSeq = newDnaSeq(cloneString(codons), strlen(codons), NULL);
 aaSeq *alt = translateSeq(codonSeq, 0, FALSE);
 aaSeqZToX(alt);
 dnaSeqFree(&codonSeq);
 return dnaSeqCannibalize(&alt);
 }
 
 struct vpPep *vpTranscriptToProtein(struct vpTx *vpTx, struct genbankCds *cds,
                                     struct dnaSeq *txSeq, struct dnaSeq *protSeq)
 /* Project a coding transcript variant onto a protein sequence, shifting position to the first
  * differing amino acid position.  Return NULL if no cds or incomplete cds. */
 //#*** This will produce incorrect results for the rare cds with join(...) unless we make a more
 //#*** complicated cds data structure to represent those (basically list of cds's) and use it here.
 {
 if (cds == NULL || cds->start == -1 || cds->end == -1 || !cds->startComplete)
     return NULL;
 if (txSeq == NULL)
     errAbort("vpTranscriptToProtein: txSeq must not be NULL");
 struct vpPep *vpPep = NULL;
 AllocVar(vpPep);
 vpPep->name = cloneString(protSeq->name);
 uint txStart = vpTx->start.txOffset;
 uint txEnd = vpTx->end.txOffset;
 // If the variant starts and ends within exon(s) and overlaps CDS then predict protein change.
 if (txStart >= cds->start && txStart < cds->end && txEnd > cds->start &&
     ((vpTx->start.region == vpExon && vpTx->end.region == vpExon) ||
      // Insertion at exon boundary -- it doesn't disrupt the splice site so assume its effect
      // is on the exon, in the spirit of HGVS's 3' exception rule
      (vpTxPosIsInsertion(&vpTx->start, &vpTx->end) &&
       (vpTx->start.region == vpExon || vpTx->end.region == vpExon)) ||
      (vpTx->start.region == vpUpstream && vpTx->end.region == vpDownstream && isEmpty(vpTx->txAlt))
      ))
     {
     uint startInCds = max(txStart, cds->start) - cds->start;
     uint endInCds = min(txEnd, cds->end) - cds->start;
     vpPep->start = startInCds / 3;
     vpPep->end = (endInCds + 2) / 3;
     uint codonStartInCds = vpPep->start * 3;
     uint codonEndInCds = vpPep->end * 3;
     uint codonLenInCds = codonEndInCds - codonStartInCds;
     aaSeq *txTrans = translateSeqN(txSeq, cds->start + codonStartInCds,
                                    codonLenInCds, FALSE);
     aaSeqZToX(txTrans);
     // We need pSeq to end with "X" because vpPep->start can be the stop codon / terminal
     char *pSeq = protSeq->dna;
     int pLen = protSeq->size;
     char pSeqWithX[pLen+2];
     if (pSeq[pLen-1] != 'X')
         {
         safencpy(pSeqWithX, sizeof(pSeqWithX), pSeq, pLen);
         safencpy(pSeqWithX+pLen, sizeof(pSeqWithX)-pLen, "X", 1);
         pSeq = pSeqWithX;
         }
     vpPep->txMismatch = !sameStringN(txTrans->dna, pSeq+vpPep->start,
                                      vpPep->end - vpPep->start);
     int startPadding = (startInCds - codonStartInCds);
     int endPadding = codonEndInCds - endInCds;
     int txAltLen = strlen(vpTx->txAlt);
     char altCodons[txSeq->size+txAltLen+1];
     altCodons[0] = '\0';
     if (startPadding > 0)
         // Copy the unchanged first base or two of ref codons
         safencpy(altCodons, sizeof(altCodons),
                  txSeq->dna + cds->start + codonStartInCds, startPadding);
     int txRefLen = txEnd - txStart;
     uint utr5Bases = (cds->start > txStart) ? cds->start - txStart : 0;
     uint utr3Bases = (txEnd > cds->end) ? txEnd - cds->end : 0;
     int cdsRefLen = txRefLen - utr5Bases - utr3Bases;
     int cdsAltLen = txAltLen - utr5Bases - utr3Bases;
     if (cdsAltLen < 0)
         cdsAltLen = 0;
     if (cdsAltLen > 0)
         // Copy in the alternate allele
         safencpy(altCodons+startPadding, sizeof(altCodons)-startPadding,
                  vpTx->txAlt + utr5Bases, cdsAltLen);
     int altCodonsEnd = strlen(altCodons);
     if ((cdsRefLen - cdsAltLen) % 3 != 0)
         {
         vpPep->frameshift = TRUE;
         // Extend ref to the end of the protein.
         vpPep->ref = cloneString(pSeq+vpPep->start);
         // Copy in all remaining tx sequence to see how soon we would hit a stop codon
         safecpy(altCodons+altCodonsEnd, sizeof(altCodons)-altCodonsEnd, txSeq->dna + txEnd);
         }
     else
         {
         vpPep->ref = cloneStringZ(pSeq+vpPep->start, vpPep->end - vpPep->start);
         if (endPadding > 0)
             // Copy the unchanged last base or two of ref codons
             safencpy(altCodons+altCodonsEnd, sizeof(altCodons)-altCodonsEnd,
                      txSeq->dna + cds->start + endInCds, endPadding);
         }
     char *alt = translateString(altCodons);
     if (endsWith(vpPep->ref, "X") && !endsWith(alt, "X"))
         {
         // Stop loss -- recompute alt
         freeMem(alt);
         safecpy(altCodons+altCodonsEnd, sizeof(altCodons)-altCodonsEnd, txSeq->dna + txEnd);
         alt = translateString(altCodons);
         }
     vpPep->alt = alt;
     int refLen = strlen(vpPep->ref), altLen = strlen(vpPep->alt);
     if (differentString(vpPep->ref, vpPep->alt))
         {
         // If alt has a stop codon, temporarily disguise it so it can't get trimmed
         char *altStop = strchr(vpPep->alt, 'X');
         if (altStop)
             *altStop = 'Z';
         trimRefAlt(vpPep->ref, vpPep->alt, &vpPep->start, &vpPep->end, &refLen, &altLen);
         if (altStop)
             *strchr(vpPep->alt, 'Z') = 'X';
         }
     if (indelShiftIsApplicable(refLen, altLen))
         {
         struct seqWindow *pSeqWin = memSeqWindowNew(vpPep->name, pSeq);
         vpPep->rightShiftedBases = indelShift(pSeqWin, &vpPep->start, &vpPep->end,
                                               vpPep->alt, INDEL_SHIFT_NO_MAX, isdRight);
         freeMem(vpPep->ref);
         vpPep->ref = cloneStringZ(pSeq+vpPep->start, vpPep->end - vpPep->start);
         memSeqWindowFree(&pSeqWin);
         }
     dnaSeqFree((struct dnaSeq **)&txTrans);
     }
 else
     {
     vpPep->cantPredict = TRUE;
     }
 return vpPep;
 }
 
 void vpPepFree(struct vpPep **pVp)
 /* Free up a vpPep. */
 {
 if (pVp && *pVp)
     {
     struct vpPep *vp = *pVp;
     freeMem(vp->name);
     freeMem(vp->ref);
     freeMem(vp->alt);
     freez(pVp);
     }
 }
 
 static char *regionStrings[] =
     { "vpUnknown",
       "vpUpstream",
       "vpDownstream",
       "vpExon",
       "vpIntron",
     };
 
 char *vpTxRegionToString(enum vpTxRegion region)
 /* Return a static string for region.  Do not free result! */
 {
 if (region > ArraySize(regionStrings))
     errAbort("vpTxRegionToString: invalid region %d", region);
 return regionStrings[region];
 }
 
 void vpTxPosSlideInSameRegion(struct vpTxPosition *txPos, int bases)
 /* Move txPos's region-appropriate offsets and distances by bases (can be negative).
  * *Caller must ensure that this will not slide us into another region!* */
 {
 if (txPos->region == vpIntron)
     {
     txPos->gDistance += bases;
     txPos->intron3Distance -= bases;
     }
 else if (txPos->region == vpExon)
     txPos->txOffset += bases;
 else if (txPos->region == vpUpstream)
     txPos->gDistance -= bases;
 else if (txPos->region == vpDownstream)
     txPos->gDistance += bases;
 else
     errAbort("vpTxPosSlideInSameRegion: unrecognized region %d", txPos->region);
 }
 
 boolean vpTxPosRangeIsSingleBase(struct vpTxPosition *startPos, struct vpTxPosition *endPos)
 /* Return true if [startPos, endPos) is a single-base region. */
 {
 if (startPos->region != endPos->region)
     return FALSE;
 if (startPos->region == vpUpstream && startPos->gDistance == endPos->gDistance + 1)
     return TRUE;
 if (startPos->region == vpExon && startPos->txOffset + 1 == endPos->txOffset)
     return TRUE;
 if (startPos->region == vpIntron &&
     startPos->txOffset == endPos->txOffset && startPos->gDistance + 1 == endPos->gDistance)
     return TRUE;
 if (startPos->region == vpDownstream && startPos->gDistance + 1 == endPos->gDistance)
     return TRUE;
 return FALSE;
 }
 
 char *vpTxGetRef(struct vpTx *vpTx)
 /* If vpTx->txRef is non-NULL and both start & end are exonic, return txRef;
  * otherwise return genomic.  For example, if a deletion spans exon/intron boundary, use genomic
  * ref because it includes the intron bases.  Do not free the returned value. */
 {
 if (vpTx->txRef != NULL &&
     vpTx->start.region == vpExon && vpTx->end.region == vpExon)
     return vpTx->txRef;
 return vpTx->gRef;
 }
 
 static char *getGAlt(struct vpTx *vpTx, struct psl *psl)
 /* Return a possibly strand-swapped copy of vpTx->gAlt.  Do not free returned value. */
 {
 static struct dyString *dy = NULL;
 if (dy == NULL)
     dy = dyStringNew(0);
 dyStringClear(dy);
 int altLen = strlen(vpTx->gAlt);
 dyStringAppendN(dy, vpTx->gAlt, altLen);
 if (pslQStrand(psl) == '-')
     reverseComplement(dy->string, altLen);
 return dy->string;
 }
 
 static void truncateAtStopCodon(char *codingSeq)
 /* If codingSeq contains a stop codon, truncate any sequence past that. */
 {
 if (codingSeq == NULL)
     errAbort("truncateAtStopCodon: null input");
 char *p = codingSeq;
 while (p[0] != '\0' && p[1] != '\0' && p[2] != '\0')
     {
     if (isStopCodon(p))
 	{
 	p[3] = '\0';
 	break;
 	}
     p += 3;
     }
 }
 
 static void getRefAltCodon(struct vpTx *vpTx, struct genbankCds *cds, struct dnaSeq *txSeq,
                            struct codingChange *cc, boolean isFrameshift, struct lm *lm)
 /* Make an in-frame representation of modified CDS sequence. */
 {
 char *txCdsSeq = txSeq->dna + cds->start;
 uint txStart = vpTx->start.txOffset;
 uint txEnd = vpTx->end.txOffset;
 uint utr5Bases = (cds->start > txStart) ? cds->start - txStart : 0;
 uint utr3Bases = (txEnd > cds->end) ? txEnd - cds->end : 0;
 int txAltLen = strlen(vpTx->txAlt);
 int cdsAltLen = (txAltLen > utr5Bases + utr3Bases) ? txAltLen - utr5Bases - utr3Bases : 0;
 int cdsStart = (txStart > cds->start) ? txStart - cds->start : 0;
 int basesBefore = cdsStart % 3;
 int codonStart = cdsStart - basesBefore;
 int cdsEnd = txEnd - cds->start - utr3Bases;
 int basesAfter = (3 - cdsEnd % 3) % 3;
 if (isFrameshift)
     {
     // Report the original remainder of CDS
     int restOfCds = cds->end - cds->start - codonStart;
     cc->codonOld = lmCloneStringZ(lm, txCdsSeq+codonStart, restOfCds);
     }
 else
     {
     int refCodonLen = cdsEnd + basesAfter - codonStart;
     cc->codonOld = lmCloneStringZ(lm, txCdsSeq + codonStart, refCodonLen);
     }
 if (isFrameshift)
     {
     // Include the rest of the transcript sequence so we can figure out where the new stop
     // codon will be.
     basesAfter = txSeq->size - txEnd;
     cdsAltLen += utr3Bases;
     }
 size_t codonNewSize = basesBefore + cdsAltLen + basesAfter + 1;
 char *codonNew = lmAlloc(lm, codonNewSize);
 if (basesBefore > 0)
     safencpy(codonNew, codonNewSize, txCdsSeq + codonStart, basesBefore);
 if (cdsAltLen > 0)
     safencpy(codonNew+basesBefore, codonNewSize-basesBefore, vpTx->txAlt + utr5Bases, cdsAltLen);
 if (basesAfter > 0)
     safencpy(codonNew + basesBefore + cdsAltLen, codonNewSize - basesBefore - cdsAltLen,
              txCdsSeq + cdsEnd, basesAfter);
 if (isFrameshift)
     truncateAtStopCodon(codonNew);
 cc->codonNew = codonNew;
 }
 
 static boolean pslNmdTarget(struct psl *psl, struct genbankCds *cds, int minIntronSize)
 /* Use psl and cds to determine whether a transcript is already subject to
  * nonsense-mediated decay (NMD), i.e. cds end is more than 50bp upstream of last intron.
  * If minIntronSize > 0, treat gaps between psl blocks as true introns only if they are
  * at least that many bases long.  If pslQStrand(psl) is '-', this calls pslRc twice. */
 {
 if (psl->blockCount < 2 || cds == NULL || cds->start == cds->end)
     return FALSE;
 int numIntronsPastCds = 0;
 int cdsEndToIntron = 0;
 boolean isRc = (pslQStrand(psl) == '-');
 boolean noTStrand = (psl->strand[1] == '\0');
 // Use pslRc so all math is on query + strand (cds->end is query +)
 if (isRc)
     pslRc(psl);
 int ix;
 for (ix = 0;  ix < psl->blockCount-1;  ix++)
     {
     int exonQStart = psl->qStarts[ix];
     int exonQEnd = exonQStart + psl->blockSizes[ix];
     if (exonQEnd >= cds->end)
         {
         // t coords are now on the reverse strand and in reverse order; intron size works the same
         int exonTEnd = psl->tStarts[ix] + psl->blockSizes[ix];
         int nextExonTStart = psl->tStarts[ix+1];
         int tGapLen = nextExonTStart - exonTEnd;
         int qGapLen = psl->qStarts[ix+1] - exonQEnd;
         int intronSize = tGapLen - qGapLen;
         if (intronSize > minIntronSize)
             {
             if (numIntronsPastCds == 0)
                 {
                 // First real intron following cdsEnd
                 cdsEndToIntron = exonQEnd - cds->end;
                 }
             numIntronsPastCds++;
             }
         }
     }
 if (isRc)
     {
     pslRc(psl);
     if (noTStrand)
         psl->strand[1] = '\0';
     }
 return (numIntronsPastCds > 1 || cdsEndToIntron > 50);
 }
 
 static void setCodingInfo(struct gpFx *fx, struct vpTx *vpTx, struct psl *psl,
                           struct genbankCds *cds, struct dnaSeq *txSeq,
                           struct vpPep *vpPep, struct lm *lm)
 /* Fill in the values of fx->details.codingChange. */
 {
 struct codingChange *cc = &fx->details.codingChange;
 uint txStart = vpTx->start.txOffset;
 cc->cDnaPosition = txStart;
 cc->txRef = lmCloneString(lm, vpTx->txRef);
 cc->txAlt = lmCloneString(lm, vpTx->txAlt);
 cc->cdsPosition = (txStart > cds->start) ? txStart - cds->start : 0;
 cc->exonNumber = pslBlkIxToExonIx(psl, vpTx->start.aliBlkIx, MIN_INTRON);
 cc->exonCount = pslCountExons(psl, MIN_INTRON);
 getRefAltCodon(vpTx, cds, txSeq, cc, (fx->soNumber == frameshift_variant), lm);
 if (vpPep->cantPredict)
     {
     cc->pepPosition = cc->cdsPosition / 3;
     cc->aaOld = lmCloneString(lm, "?");
     cc->aaNew = lmCloneString(lm, "?");
     }
 else
     {
     cc->pepPosition = vpPep->start;
     uint pepRefLen = strlen(vpPep->ref);
     uint pepAltLen = strlen(vpPep->alt);
     cc->aaOld = lmCloneStringZ(lm, vpPep->ref, pepRefLen);
     if (pepRefLen > 0 && cc->aaOld[pepRefLen-1] == 'X')
         cc->aaOld[pepRefLen-1] = '*';
     cc->aaNew = lmCloneStringZ(lm, vpPep->alt, pepAltLen);
     if (pepAltLen > 0 && cc->aaNew[pepAltLen-1] == 'X')
         cc->aaNew[pepAltLen-1] = '*';
     }
 }
 
 static struct gpFx *vpTxToFxCds(struct vpTx *vpTx, struct psl *psl, struct genbankCds *cds,
                                 struct dnaSeq *txSeq, struct vpPep *vpPep,
                                 struct dnaSeq *protSeq, struct lm *lm)
 /* Predict function for a variant in CDS. */
 {
 char *txName = txSeq->name;
 char *gAlt = getGAlt(vpTx, psl);
 struct gpFx *fx = gpFxNew(gAlt, txName, coding_sequence_variant, codingChange, lm);
 if (! vpPep->cantPredict)
     {
     uint pepRefLen = strlen(vpPep->ref);
     uint pepAltLen = strlen(vpPep->alt);
     char lastPepRef = (pepRefLen > 0) ? vpPep->ref[pepRefLen-1] : '\0';
     // Alt pep base at position of last ref pep base, if applicable:
     char lastPepRefAlt = (pepRefLen > 0 && pepRefLen <= pepAltLen) ? vpPep->alt[pepRefLen-1] : '\0';
     if (vpPep->start == 0 && vpPep->ref[0] == 'M')
         fx->soNumber = initiator_codon_variant;
     else if (sameString(vpPep->alt, "X") && differentString(vpPep->ref, "X"))
         fx->soNumber = stop_gained;
     else if (sameString(vpPep->alt, "X") && sameString(vpPep->ref, "X"))
         fx->soNumber = stop_retained_variant;
     else if (vpPep->frameshift)
         fx->soNumber = frameshift_variant;
     else if (endsWith(vpPep->alt, "X") && lastPepRef != 'X')
         fx->soNumber = stop_gained;
     else if (lastPepRef == 'X' && lastPepRefAlt != 0)
         {
         // Stop codon variant -- did it actually change?
         if (lastPepRefAlt != 'X')
             fx->soNumber = stop_lost;
         else
             fx->soNumber = stop_retained_variant;
         }
     else if (sameString(vpPep->ref, vpPep->alt))
         fx->soNumber = synonymous_variant;
     else if (pepRefLen > pepAltLen)
         fx->soNumber = inframe_deletion;
     else if (pepRefLen < pepAltLen)
         fx->soNumber = inframe_insertion;
     else
         fx->soNumber = missense_variant;
     }
 setCodingInfo(fx, vpTx, psl, cds, txSeq, vpPep, lm);
 return fx;
 }
 
 static struct gpFx *vpTxToFxUtrCds(struct vpTx *vpTx, struct psl *psl, struct genbankCds *cds,
                                    struct dnaSeq *txSeq, struct vpPep *vpPep,
                                    struct dnaSeq *protSeq, struct lm *lm)
 /* Predict function for a variant that spans UTR and CDS -- if it spans tx start, all we can
  * say is it's complicated. */
 {
 struct gpFx *fxList = NULL;
 char *gAlt = getGAlt(vpTx, psl);
 int startExonIx = pslBlkIxToExonIx(psl, vpTx->start.aliBlkIx, MIN_INTRON);
 int exonCount = pslCountExons(psl, MIN_INTRON);
 if (vpTx->start.txOffset < cds->start)
     {
     // _5_prime_UTR_variant
     slAddHead(&fxList, gpFxNew(gAlt, txSeq->name, _5_prime_UTR_variant, nonCodingExon, lm));
     gpFxSetNoncodingInfo(fxList, startExonIx, exonCount, vpTx->start.txOffset,
                          vpTx->txRef, vpTx->txAlt, lm);
     // initiator_codon_variant
     slAddHead(&fxList, vpTxToFxCds(vpTx, psl, cds, txSeq, vpPep, protSeq, lm));
     }
 else if (vpTx->end.txOffset > cds->end)
     {
     // _3_prime_UTR_variant
     slAddHead(&fxList, gpFxNew(gAlt, txSeq->name, _3_prime_UTR_variant, none, lm));
     gpFxSetNoncodingInfo(fxList, startExonIx, exonCount, vpTx->start.txOffset,
                          vpTx->txRef, vpTx->txAlt, lm);
     // Find out what happened to the stop codon
     slAddHead(&fxList, vpTxToFxCds(vpTx, psl, cds, txSeq, vpPep, protSeq, lm));
     }
 return fxList;
 }
 
 static struct gpFx *addLostExons(struct vpTx *vpTx, struct psl *psl, char *txName, char *gAlt,
                                  struct lm *lm)
 /* Call this only if vpTx spans at least one exon or intron.  This returns exon_loss_variant
  * effects if applicable. */
 {
 struct gpFx *fxList = NULL;
 int startBlkIx = vpTx->start.aliBlkIx, endBlkIx = vpTx->end.aliBlkIx;
 boolean isRc = pslQStrand(psl) == '-';
 if (isRc)
     {
     if (vpTx->start.region == vpIntron)
         startBlkIx++;
     if (vpTx->end.region == vpIntron)
         endBlkIx++;
     }
 int startIx = pslBlkIxToExonIx(psl, startBlkIx, MIN_INTRON) + 1;
 if (vpTx->start.region == vpUpstream)
     startIx--;
 int endIx = pslBlkIxToExonIx(psl, endBlkIx, MIN_INTRON);
 if (vpTx->end.region == vpIntron || vpTx->end.region == vpDownstream)
     endIx++;
 int exonCount = pslCountExons(psl, MIN_INTRON);
 int ix;
 for (ix = startIx;  ix < endIx ;  ix++)
     {
     // It would be better to compute nonCodingExon details: actual tx position and txRef of exon
     struct gpFx *fx = gpFxNew(gAlt, txName, exon_loss_variant, intron, lm);
     fx->details.intron.intronNumber = ix;
     fx->details.intron.intronCount = exonCount - 1;
     slAddHead(&fxList, fx);
     }
 slReverse(&fxList);
 return fxList;
 }
 
 static struct gpFx *fxIntronFromPsl(struct psl *psl, int aliBlkIx, char *txName, char *gAlt,
                                     enum soTerm soTerm, struct lm *lm)
 /* Return a gpFx with intron number/count corresponding to aliBlkIx */
 {
 struct gpFx *fx = gpFxNew(gAlt, txName, soTerm, intron, lm);
 int exonCount = pslCountExons(psl, MIN_INTRON);
 int exonStartBlkIx = aliBlkIx;
 boolean isRc = pslQStrand(psl) == '-';
 if (isRc)
     // The exon to the right on the genome is the exon before this intron
     exonStartBlkIx++;
 fx->details.intron.intronNumber = pslBlkIxToExonIx(psl, exonStartBlkIx, MIN_INTRON);
 fx->details.intron.intronCount = exonCount - 1;
 return fx;
 }
 
 static struct gpFx *vpTxToFxIntron(struct vpTx *vpTx, struct psl *psl, struct genbankCds *cds,
                                    char *txName, struct lm *lm)
 /* Return a list of gpFx for intronic variant (could include splice, exon_loss) */
 {
 // Is it also a splice donor/acceptor/region variant?
 uint intronStartDistance = vpTx->start.gDistance;
 uint intronEndDistance = vpTx->end.intron3Distance;
 enum soTerm soTerm = intron_variant;
 if (pslNmdTarget(psl, cds, MIN_INTRON))
     soTerm = NMD_transcript_variant;
 else if (intronStartDistance <= 1)
     soTerm = splice_donor_variant;
 else if (intronEndDistance <= 1)
     soTerm = splice_acceptor_variant;
 else if (intronStartDistance <= 7 || intronEndDistance <= 7)
     soTerm = splice_region_variant;
 char *gAlt = getGAlt(vpTx, psl);
 struct gpFx *fxList = fxIntronFromPsl(psl, vpTx->start.aliBlkIx, txName, gAlt, soTerm, lm);
 fxList = slCat(fxList, addLostExons(vpTx, psl, txName, gAlt, lm));
 return fxList;
 }
 
 static boolean txPosIsExonSpliceRegion(struct vpTxPosition *txPos, struct psl *psl, boolean isGEnd)
 /* Return TRUE if vpTxPos falls within 3bp of a true intron boundary.  If isGEnd, this is
  * the genomic end position (tx end for + strand, tx start for - strand). */
 {
 int leftBlkIx = txPos->aliBlkIx, rightBlkIx = txPos->aliBlkIx;
 while (leftBlkIx > 0 && pslIntronTooShort(psl, leftBlkIx-1, MIN_INTRON))
     leftBlkIx--;
 while (rightBlkIx < psl->blockCount - 1 && pslIntronTooShort(psl, rightBlkIx, MIN_INTRON))
     rightBlkIx++;
 if (leftBlkIx > 0)
     {
     // Check exon left edge
     int gLeft = psl->tStarts[leftBlkIx];
     int distance = txPos->gOffset - gLeft;
     if (isGEnd)
         distance--;
     if (distance < 3)
         return TRUE;
     }
 if (rightBlkIx < psl->blockCount - 1)
     {
     // Check exon right edge
     int gRight = psl->tStarts[rightBlkIx] + psl->blockSizes[rightBlkIx];
     int distance = gRight - txPos->gOffset;
     if (!isGEnd)
         distance++;
     if (distance < 3)
         return TRUE;
     }
 return FALSE;
 }
 
 static boolean vpTxIsExonSpliceRegion(struct vpTx *vpTx, struct psl *psl)
 /* Return TRUE if vpTx start or end falls within 3bp of a true intron boundary. */
 {
 boolean isRc = (pslQStrand(psl) == '-');
 return (txPosIsExonSpliceRegion(&vpTx->start, psl, isRc) ||
         txPosIsExonSpliceRegion(&vpTx->end, psl, !isRc));
 }
 
 static struct gpFx *vpTxToFxExon(struct vpTx *vpTx, struct psl *psl, struct genbankCds *cds,
                                  struct dnaSeq *txSeq, struct vpPep *vpPep,
                                  struct dnaSeq *protSeq, struct lm *lm)
 /* Variant's start and end are both exonic (possibly not the same exon). */
 {
 struct gpFx *fxList = NULL;
 int startExonIx = pslBlkIxToExonIx(psl, vpTx->start.aliBlkIx, MIN_INTRON);
 int exonCount = pslCountExons(psl, MIN_INTRON);
 char *txName = txSeq->name;
 char *gAlt = getGAlt(vpTx, psl);
 if (vpTx->start.gOffset == psl->tStart && vpTx->end.gOffset == psl->tEnd)
     {
     // Entire transcript is deleted -- no need to go into details.
     fxList = gpFxNew(gAlt, txName, transcript_ablation, none, lm);
     }
 else if (cds && cds->end > cds->start)
     {
     // coding transcript exon -- UTR, CDS or both?
     if (vpTx->start.txOffset < cds->start)
         {
         if (vpTx->end.txOffset <= cds->start)
             {
             fxList = gpFxNew(gAlt, txName, _5_prime_UTR_variant, nonCodingExon, lm);
             gpFxSetNoncodingInfo(fxList, startExonIx, exonCount, vpTx->start.txOffset,
                                  vpTx->txRef, vpTx->txAlt, lm);
             }
         else
             fxList = vpTxToFxUtrCds(vpTx, psl, cds, txSeq, vpPep, protSeq, lm);
         }
     else if (vpTx->end.txOffset > cds->end)
         {
         if (vpTx->start.txOffset >= cds->end)
             {
             fxList = gpFxNew(gAlt, txName, _3_prime_UTR_variant, nonCodingExon, lm);
             gpFxSetNoncodingInfo(fxList, startExonIx, exonCount, vpTx->start.txOffset,
                                  vpTx->txRef, vpTx->txAlt, lm);
             }
         else
             fxList = vpTxToFxUtrCds(vpTx, psl, cds, txSeq, vpPep, protSeq, lm);
         }
     else
         {
         fxList = vpTxToFxCds(vpTx, psl, cds, txSeq, vpPep, protSeq, lm);
         }
     if (pslNmdTarget(psl, cds, MIN_INTRON))
         fxList->soNumber = NMD_transcript_variant;
     }
 else
     {
     // non-coding exon
     fxList = gpFxNew(gAlt, txName, non_coding_transcript_exon_variant, nonCodingExon, lm);
     gpFxSetNoncodingInfo(fxList, startExonIx, exonCount, vpTx->start.txOffset,
                          vpTx->txRef, vpTx->txAlt, lm);
     }
 if (vpTxIsExonSpliceRegion(vpTx, psl))
     {
     struct gpFx *fx = gpFxNew(gAlt, txName, splice_region_variant, nonCodingExon, lm);
     gpFxSetNoncodingInfo(fx, startExonIx, exonCount, vpTx->start.txOffset,
                          vpTx->txRef, vpTx->txAlt, lm);
     fxList = slCat(fxList, fx);
     }
 fxList = slCat(fxList, addLostExons(vpTx, psl, txName, gAlt, lm));
 return fxList;
 }
 
 static struct gpFx *vpTxToFxSingleRegion(struct vpTx *vpTx, struct psl *psl, struct genbankCds *cds,
                                          struct dnaSeq *txSeq, struct vpPep *vpPep,
                                          struct dnaSeq *protSeq, struct lm *lm)
 /* A simple variant that doesn't straddle an exon edge (i.e. start & end have same region type). */
 {
 if (vpTx->start.region != vpTx->end.region)
     errAbort("vpTxToFxSingleRegion: call this only when start region == end region (got %s != %s)",
              vpTxRegionToString(vpTx->start.region), vpTxRegionToString(vpTx->end.region));
 struct gpFx *fxList = NULL;
 enum vpTxRegion region = vpTx->start.region;
 char *gAlt = getGAlt(vpTx, psl);
 char *txName = txSeq->name;
 char *txRef = vpTxGetRef(vpTx);
 if (sameString(txRef, vpTx->txAlt))
     fxList = gpFxNew(gAlt, txName, no_sequence_alteration, none, lm);
 else if (region == vpUpstream)
     fxList = gpFxNew(gAlt, txName, upstream_gene_variant, none, lm);
 else if (region == vpDownstream)
     fxList = gpFxNew(gAlt, txName, downstream_gene_variant, none, lm);
 else if (region == vpIntron)
     fxList = slCat(fxList, vpTxToFxIntron(vpTx, psl, cds, txName, lm));
 else if (region == vpExon)
     fxList = slCat(fxList, vpTxToFxExon(vpTx, psl, cds, txSeq, vpPep, protSeq, lm));
 else
     errAbort("vpTranscriptToGpFx: unrecognized region type %s (%d)",
              vpTxRegionToString(region), region);
 return fxList;
 }
 
 static struct vpTx *vpTxNewUpstreamPart(struct vpTx *vpTxIn, boolean isRc)
 /* vpTxIn starts upstream and ends in an exon or intron; return a new vpTx that contains
  * only the upstream portion. */
 {
 if (vpTxIn->start.region != vpUpstream)
     errAbort("vpTxNewUpstreamPart: vpTx input start is %s, should be vpUpstream",
              vpTxRegionToString(vpTxIn->start.region));
 if (vpTxIn->end.region != vpExon && vpTxIn->end.region != vpIntron)
     errAbort("vpTxNewUpstreamPart: unexpected end region %s, should be vpExon or vpIntron",
              vpTxRegionToString(vpTxIn->end.region));
 struct vpTx *vpTxUp;
 AllocVar(vpTxUp);
 vpTxUp->start = vpTxIn->start;
 // End at last upstream base (to the left of first exon base)
 vpTxUp->end.region = vpUpstream;
 vpTxUp->end.aliBlkIx = vpTxIn->start.aliBlkIx;
 if (isRc)
     vpTxUp->end.gOffset = vpTxIn->start.gOffset - vpTxIn->start.gDistance;
 else
     vpTxUp->end.gOffset = vpTxIn->start.gOffset + vpTxIn->start.gDistance;
 // The other fields of vpTxUp->end are all 0.
 vpTxUp->txName = cloneString(vpTxIn->txName);
 // Truncate alleles to just the upstream part.
 int upLen = vpTxIn->start.gDistance;
 vpTxUp->gRef = cloneStringZ(vpTxIn->gRef, upLen);
 vpTxUp->gAlt = cloneStringZ(vpTxIn->gAlt, upLen);
 vpTxUp->txRef = NULL;
 vpTxUp->txAlt = cloneString(vpTxUp->gAlt);
 vpTxUp->basesShifted = vpTxIn->basesShifted;
 vpTxUp->genomeMismatch = vpTxIn->genomeMismatch;
 return vpTxUp;
 }
 
 static char *cloneLastN(char *in, size_t lastN)
 /* Return a clone of only the last N bases of in (or all of in if N >= strlen(in)). */
 {
 if (in == NULL)
     return NULL;
 size_t inLen = strlen(in);
 if (inLen > lastN)
     return cloneString(in + inLen - lastN);
 else
     return cloneString(in);
 }
 
 static struct vpTx *vpTxNewExonPart(struct vpTx *vpTxIn, boolean isRc)
 /* vpTxIn overlaps at least one exon; return a new vpTx that contains only the exonic part. */
 {
 enum vpTxRegion startRegion = vpTxIn->start.region;
 if (startRegion != vpUpstream && startRegion != vpExon && startRegion != vpIntron)
     errAbort("vpTxNewExonPart: unexpected start region %s, should be upstream/exon/intron",
              vpTxRegionToString(startRegion));
 enum vpTxRegion endRegion = vpTxIn->end.region;
 if (endRegion != vpExon && endRegion != vpIntron && endRegion != vpDownstream)
     errAbort("vpTxNewExonPart: unexpected end region %s, should be exon/intron/downstream",
              vpTxRegionToString(endRegion));
 struct vpTx *vpTxEx;
 AllocVar(vpTxEx);
 vpTxEx->start.region = vpTxEx->end.region = vpExon;
 // Work forward from vpTxIn->start to first exon on or after that point.
 uint gSeqOffset = 0;
 if (startRegion == vpExon)
     vpTxEx->start = vpTxIn->start;
 else if (startRegion == vpUpstream)
     {
     // Starts at txStart; all vpTxEx->start values are 0 except possibly aliBlkIx
     vpTxEx->start.aliBlkIx = vpTxIn->start.aliBlkIx;
     if (isRc)
         vpTxEx->start.gOffset = vpTxIn->start.gOffset - vpTxIn->start.gDistance;
     else
         vpTxEx->start.gOffset = vpTxIn->start.gOffset + vpTxIn->start.gDistance;
     gSeqOffset = vpTxIn->start.gDistance;
     }
 else if (startRegion == vpIntron)
     {
     vpTxEx->start.txOffset = vpTxIn->start.intron3TxOffset;
     if (isRc)
         vpTxEx->start.gOffset = vpTxIn->start.gOffset - vpTxIn->start.intron3Distance;
     else
         vpTxEx->start.gOffset = vpTxIn->start.gOffset + vpTxIn->start.intron3Distance;
     vpTxEx->start.aliBlkIx = isRc ? vpTxIn->start.aliBlkIx : vpTxIn->start.aliBlkIx + 1;
     gSeqOffset = vpTxIn->start.intron3Distance;
     }
 // Work backward from vpTxIn->end to last exon on or before that point.
 uint gSeqTrim = 0;
 if (endRegion == vpExon)
     vpTxEx->end = vpTxIn->end;
 else if (endRegion == vpIntron || endRegion == vpDownstream)
     {
     vpTxEx->end.txOffset = vpTxIn->end.txOffset;
     if (isRc)
         vpTxEx->end.gOffset = vpTxIn->end.gOffset + vpTxIn->end.gDistance;
     else
         vpTxEx->end.gOffset = vpTxIn->end.gOffset - vpTxIn->end.gDistance;
     if (endRegion == vpIntron)
         vpTxEx->end.aliBlkIx = isRc ? vpTxIn->end.aliBlkIx + 1 : vpTxIn->end.aliBlkIx;
     else
         vpTxEx->end.aliBlkIx = vpTxIn->end.aliBlkIx;
     gSeqTrim = vpTxIn->end.gDistance;
     }
 vpTxEx->txName = cloneString(vpTxIn->txName);
 // Truncate alleles to just the exon part.
 int gRefLen = strlen(vpTxIn->gRef);
 if (gRefLen <= gSeqOffset)
     vpTxEx->gRef = cloneString("");
 else
     vpTxEx->gRef = cloneStringZ(vpTxIn->gRef + gSeqOffset, gRefLen - gSeqOffset - gSeqTrim);
 int gAltLen = strlen(vpTxIn->gAlt);
 if (gAltLen <= gSeqOffset)
     vpTxEx->gAlt = cloneString("");
 else
     vpTxEx->gAlt = cloneStringZ(vpTxIn->gAlt + gSeqOffset, gAltLen - gSeqOffset - gSeqTrim);
 vpTxEx->txRef = cloneString(vpTxIn->txRef);
 vpTxEx->txAlt = cloneString(vpTxIn->txAlt);
 vpTxEx->basesShifted = vpTxIn->basesShifted;
 vpTxEx->genomeMismatch = vpTxIn->genomeMismatch;
 return vpTxEx;
 }
 
 static struct vpTx *vpTxNewIntronPart(struct vpTx *vpTxIn, struct psl *psl)
 /* vpTxIn either starts or ends in an intron; return a new vpTx that contains only the
  * intronic part. */
 {
 enum vpTxRegion startRegion = vpTxIn->start.region;
 enum vpTxRegion endRegion = vpTxIn->end.region;
 if (startRegion != vpIntron && endRegion != vpIntron)
     errAbort("vpTxNewIntronPart: neither start (%s) nor end (%s) is intron",
              vpTxRegionToString(startRegion), vpTxRegionToString(endRegion));
 if (startRegion != vpUpstream && startRegion != vpExon && startRegion != vpIntron)
     errAbort("vpTxNewIntronPart: unexpected start region %s, should be upstream/exon/intron",
              vpTxRegionToString(startRegion));
 if (endRegion != vpExon && endRegion != vpIntron && endRegion != vpDownstream)
     errAbort("vpTxNewIntronPart: unexpected end region %s, should be exon/intron/downstream",
              vpTxRegionToString(endRegion));
 // Use vpTxPosRc for - strand, so we can do all the figuring on the + strand of the genome...
 // too complicated otherwise.
 boolean isRc = (pslQStrand(psl) == '-');
 struct vpTxPosition posLeft = isRc ? vpTxIn->end : vpTxIn->start;
 struct vpTxPosition posRight = isRc ? vpTxIn->start : vpTxIn->end;
 if (isRc)
     {
     vpTxPosRc(&posLeft, psl->qSize);
     vpTxPosRc(&posRight, psl->qSize);
     }
 uint gSeqOffset = 0, gSeqLen = 0;
 if (posLeft.region == vpIntron)
     {
     // posLeft is good to go; posRight needs to be the right edge of this intron, left of next exon
     int intronBlkIx = posLeft.aliBlkIx;
     int exonBlkIx = intronBlkIx+1;
     uint gLeft = psl->tStarts[intronBlkIx] + psl->blockSizes[intronBlkIx];
     uint gRight = psl->tStarts[exonBlkIx];
     gSeqOffset = isRc ? (posRight.gOffset - gRight) : 0;
     gSeqLen = gRight - posLeft.gOffset;
     posRight.region = vpIntron;
     posRight.txOffset = psl->qStarts[intronBlkIx] + psl->blockSizes[intronBlkIx];
     posRight.gDistance = (gRight - gLeft);
     posRight.intron3TxOffset = psl->qStarts[exonBlkIx];
     posRight.intron3Distance = 0;
     posRight.gOffset = gRight;
     // posRight's aliBlkIx is for the exon following the intron; decrement to get intron blkIx.
     posRight.aliBlkIx--;
     }
 else if (posRight.region == vpIntron)
     {
     // posRight is good to go; posLeft needs to be the left edge of this intron, right of prev exon
     int intronBlkIx = posRight.aliBlkIx;
     int exonBlkIx = intronBlkIx;
     uint gLeft = psl->tStarts[exonBlkIx] + psl->blockSizes[exonBlkIx];
     uint gRight = psl->tStarts[exonBlkIx+1];
     gSeqOffset = isRc ? 0 : (gLeft - posLeft.gOffset);
     gSeqLen = posRight.gOffset - gLeft;
     posLeft.region = vpIntron;
     posLeft.txOffset = psl->qStarts[exonBlkIx] + psl->blockSizes[exonBlkIx];
     posLeft.gDistance = 0;
     posLeft.intron3TxOffset = psl->qStarts[exonBlkIx+1];
     posLeft.intron3Distance = gRight - gLeft;
     posLeft.gOffset = gLeft;
     }
 else
     errAbort("vpTxNewIntronPart: neither posLeft (%s) nor posRight (%s) is intron.",
              vpTxRegionToString(posLeft.region), vpTxRegionToString(posRight.region));
 struct vpTx *vpTxIntron;
 AllocVar(vpTxIntron);
 if (isRc)
     {
     vpTxPosRc(&posLeft, psl->qSize);
     vpTxPosRc(&posRight, psl->qSize);
     vpTxIntron->start = posRight;
     vpTxIntron->end = posLeft;
     }
 else
     {
     vpTxIntron->start = posLeft;
     vpTxIntron->end = posRight;
     }
 vpTxIntron->txName = cloneString(vpTxIn->txName);
 // Truncate alleles to just the exon part.
 int gRefLen = strlen(vpTxIn->gRef);
 if (gRefLen <= gSeqOffset)
     vpTxIntron->gRef = cloneString("");
 else
     vpTxIntron->gRef = cloneStringZ(vpTxIn->gRef + gSeqOffset, gSeqLen);
 int gAltLen = strlen(vpTxIn->gAlt);
 if (gAltLen <= gSeqOffset)
     vpTxIntron->gAlt = cloneString("");
 else
     vpTxIntron->gAlt = cloneStringZ(vpTxIn->gAlt + gSeqOffset, gSeqLen);
 vpTxIntron->txRef = NULL;
 vpTxIntron->txAlt = cloneString(vpTxIntron->gAlt);
 vpTxIntron->basesShifted = vpTxIn->basesShifted;
 vpTxIntron->genomeMismatch = vpTxIn->genomeMismatch;
 return vpTxIntron;
 }
 
 static struct vpTx *vpTxNewDownstreamPart(struct vpTx *vpTxIn, boolean isRc)
 /* vpTxIn starts in an exon or intron and ends downstream; return a new vpTx that contains
  * only the downstream portion. */
 {
 if (vpTxIn->start.region != vpExon && vpTxIn->start.region != vpIntron)
     errAbort("vpTxNewDownstreamPart: unexpected start region %s, should be vpExon or vpIntron",
              vpTxRegionToString(vpTxIn->start.region));
 if (vpTxIn->end.region != vpDownstream)
     errAbort("vpTxNewDownstreamPart: vpTx input end is %s, should be vpDownstream",
              vpTxRegionToString(vpTxIn->end.region));
 struct vpTx *vpTxDown;
 AllocVar(vpTxDown);
 vpTxDown->end = vpTxIn->end;
 // Start at first downstream base (to the right of last exon base)
 vpTxDown->start.region = vpDownstream;
 vpTxDown->start.txOffset = vpTxIn->end.txOffset;
 vpTxDown->start.aliBlkIx = vpTxIn->end.aliBlkIx;
 if (isRc)
     vpTxDown->start.gOffset = vpTxIn->end.gOffset + vpTxIn->end.gDistance;
 else
     vpTxDown->start.gOffset = vpTxIn->end.gOffset - vpTxIn->end.gDistance;
 vpTxDown->txName = cloneString(vpTxIn->txName);
 // Truncate alleles to just the downstream part.
 size_t downLen = vpTxIn->end.gDistance;
 vpTxDown->gRef = cloneLastN(vpTxIn->gRef, downLen);
 vpTxDown->gAlt = cloneLastN(vpTxIn->gAlt, downLen);
 vpTxDown->txRef = NULL;
 vpTxDown->txAlt = cloneString(vpTxDown->gAlt);
 vpTxDown->basesShifted = vpTxIn->basesShifted;
 vpTxDown->genomeMismatch = vpTxIn->genomeMismatch;
 return vpTxDown;
 }
 
 static struct vpTx *vpTxSplitByRegion(struct vpTx *vpTx, struct psl *psl,
                                       struct genbankCds *cds, struct dnaSeq *txSeq,
                                       struct vpPep *vpPep, struct dnaSeq *protSeq)
 /* If vpTx doesn't delete the whole transcript but spans multiple regions *and* is
  * either a pure deletion or MNV then return a list of vpTx, one per region type,
  * so we can report functional effects of complex variants in more detail.
  * Otherwise return NULL to signify that we could not reliably split it up (e.g. when
  * nonzero but unequal numbers of bases are deleted and inserted across a boundary,
  * we don't know how to apportion the inserted bases).
  * Handle single-region vpTxs, insertions and transcript_ablation cases separately --
  * those do not need to be split up so don't call this on them. */
 {
 if (vpTx->start.region == vpTx->end.region ||
     vpTxPosIsInsertion(&vpTx->start, &vpTx->end) ||
     (vpTx->start.region == vpUpstream && vpTx->end.region == vpDownstream))
     errAbort("vpTxSplitByRegion: don't call this if start and end region (%s, %s) are the same, "
              "if vpTx is an insertion, or if vpTx deletes the entire transcript",
              vpTxRegionToString(vpTx->start.region), vpTxRegionToString(vpTx->end.region));
 // If start region and end region are different then at least one of them should be exon or
 // they should have at least one exon in the middle, so txRef should not be NULL.
 if (vpTx->txRef == NULL)
     errAbort("vpTxSplitByRegion: program error: how is txRef NULL if start region and end region "
              "are different (%s, %s)?",
              vpTxRegionToString(vpTx->start.region), vpTxRegionToString(vpTx->end.region));
 boolean isDeletion = (vpTx->txAlt[0] == '\0' && strlen(vpTx->txRef) > 0);
 boolean isMnv = (strlen(vpTx->txRef) == strlen(vpTx->txAlt));
 if (! (isDeletion || isMnv))
     return NULL;
 // Step through regions and add one vpTx per region type.
 boolean isRc = pslQStrand(psl) == '-';
 struct vpTx *vpTxList = NULL;
 if (vpTx->start.region == vpUpstream)
     {
     slAddHead(&vpTxList, vpTxNewUpstreamPart(vpTx, isRc));
     if (vpTx->end.region == vpExon)
         slAddHead(&vpTxList, vpTxNewExonPart(vpTx, isRc));
     else if (vpTx->end.region == vpIntron)
         {
         // At least one exon is deleted
         slAddHead(&vpTxList, vpTxNewExonPart(vpTx, isRc));
         slAddHead(&vpTxList, vpTxNewIntronPart(vpTx, psl));
         }
     // We won't see downstream here because it's excluded above.
     else
         errAbort("vpTxSplitByRegion: unexpected end region type %s after start==vpUpstream",
                  vpTxRegionToString(vpTx->end.region));
     }
 else if (vpTx->start.region == vpExon)
     {
     slAddHead(&vpTxList, vpTxNewExonPart(vpTx, isRc));
     if (vpTx->end.region == vpIntron)
         slAddHead(&vpTxList, vpTxNewIntronPart(vpTx, psl));
     else if (vpTx->end.region == vpDownstream)
         slAddHead(&vpTxList, vpTxNewDownstreamPart(vpTx, isRc));
     else if (vpTx->end.region != vpExon)
         errAbort("vpTxSplitByRegion: unexpected end region type %s after start==vpExon",
                  vpTxRegionToString(vpTx->end.region));
     }
 else if (vpTx->start.region == vpIntron)
     {
     slAddHead(&vpTxList, vpTxNewIntronPart(vpTx, psl));
     if (vpTx->end.region == vpExon)
         slAddHead(&vpTxList, vpTxNewExonPart(vpTx, isRc));
     else if (vpTx->end.region == vpIntron || vpTx->end.region == vpDownstream)
         {
         // At least one exon is deleted
         slAddHead(&vpTxList, vpTxNewExonPart(vpTx, isRc));
         if (vpTx->end.region == vpDownstream)
             slAddHead(&vpTxList, vpTxNewDownstreamPart(vpTx, isRc));
         else
             slAddHead(&vpTxList, vpTxNewIntronPart(vpTx, psl));
         }
     else
         errAbort("vpTxSplitByRegion: unexpected end region type %s after start==vpIntron",
                  vpTxRegionToString(vpTx->end.region));
     }
 else
     errAbort("vpTxSplitByRegion: unexpected start region %s",
              vpTxRegionToString(vpTx->start.region));
 slReverse(&vpTxList);
 return vpTxList;
 }
 
 static struct gpFx *vpTxToFxComplexIns(struct vpTx *vpTx, struct psl *psl,
                                        struct genbankCds *cds, struct dnaSeq *txSeq,
                                        struct vpPep *vpPep, struct dnaSeq *protSeq,
                                        struct lm *lm)
 /* Return consequence(s) of an insertion at the boundary between regions. */
 {
 // Insertions can have start regions and end regions that would normally be out of order,
 // e.g. start.region == vpExon and end.region == vpUpstream for an insertion before
 // the first tx base, so there's a different ordering to check here.
 struct gpFx *fxList = NULL;
 char *gAlt = getGAlt(vpTx, psl);
 char *txName = txSeq->name;
 if (vpTx->end.region == vpUpstream)
     fxList = gpFxNew(gAlt, txName, upstream_gene_variant, none, lm);
 if (vpTx->start.region == vpExon || vpTx->end.region == vpExon)
     fxList = slCat(fxList, vpTxToFxExon(vpTx, psl, cds, txSeq, vpPep, protSeq, lm));
 // Insertions at intron boundaries don't disrupt the splice site sequence.  So in
 // the spirit of HGVS's "3' exception rule", assume the change is to the exon and
 // don't report a splice hit, i.e. ignore vpIntron here and don't consider intron/exon
 // boundary insertions complex.  (Still note if the insertion could also be up/downstream.)
 if (vpTx->start.region == vpDownstream)
     fxList = slCat(fxList, gpFxNew(gAlt, txName, downstream_gene_variant, none, lm));
 if (vpTx->end.region == vpUpstream || vpTx->start.region == vpDownstream)
     fxList = slCat(fxList, gpFxNew(gAlt, txName, complex_transcript_variant, none, lm));
 if (vpTx->start.region == vpUpstream || vpTx->end.region == vpDownstream)
     errAbort("Unexpected combo of start and end region for insertion: "
              "start==%s, end==%s",
              vpTxRegionToString(vpTx->start.region), vpTxRegionToString(vpTx->end.region));
 return fxList;
 }
 
 static
 struct gpFx *vpTxToFxComplex(struct vpTx *vpTx, struct psl *psl, struct genbankCds *cds,
                              struct dnaSeq *txSeq, struct vpPep *vpPep, struct dnaSeq *protSeq,
                              struct lm *lm)
 /* Predict consequences (to the extent possible) of variants that begin and end in different
  * transcript regions. */
 {
 struct gpFx *fxList = NULL;
 char *gAlt = getGAlt(vpTx, psl);
 char *txName = txSeq->name;
 if (vpTx->start.region == vpUpstream && vpTx->end.region == vpDownstream)
     {
     // Entire transcript is deleted -- no need to go into details.
     fxList = gpFxNew(gAlt, txName, transcript_ablation, none, lm);
     }
 else if (vpTxPosIsInsertion(&vpTx->start, &vpTx->end))
     {
     fxList = vpTxToFxComplexIns(vpTx, psl, cds, txSeq, vpPep, protSeq, lm);
     }
 else
     {
     struct vpTx *vpTxList = vpTxSplitByRegion(vpTx, psl, cds, txSeq, vpPep, protSeq);
     struct vpTx *vpTxPart;
     for (vpTxPart = vpTxList;  vpTxPart != NULL;  vpTxPart = vpTxPart->next)
         fxList = slCat(fxList, vpTxToFxSingleRegion(vpTxPart, psl, cds, txSeq, vpPep, protSeq, lm));
 //#*** If it's too complicated for vpTxSplitByRegion, should we at least say whether it overlaps
 //#*** UTR/coding region/introns??
     fxList = slCat(fxList, addLostExons(vpTx, psl, txName, gAlt, lm));
     fxList = slCat(fxList, gpFxNew(gAlt, txName, complex_transcript_variant, none, lm));
     vpTxFreeList(&vpTxList);
     }
 return fxList;
 }
 
 struct gpFx *vpTranscriptToGpFx(struct vpTx *vpTx, struct psl *psl, struct genbankCds *cds,
                                 struct dnaSeq *txSeq, struct vpPep *vpPep, struct dnaSeq *protSeq,
                                 struct lm *lm)
 /* Make gpFx functional prediction(s) (SO term & additional data) from vpTx and sequence. */
 {
 if (vpTx->start.region == vpTx->end.region)
     return vpTxToFxSingleRegion(vpTx, psl, cds, txSeq, vpPep, protSeq, lm);
 else
     return vpTxToFxComplex(vpTx, psl, cds, txSeq, vpPep, protSeq, lm);
 }