b5b259b8314a0253ed8c26811bb8f9167f10829d
angie
  Mon Aug 28 14:11:56 2017 -0700
hgvs{C,N}FromVpTx: when variant spans exon/intron boundary, use genomic ref instead of transcript ref so that intron bases aren't omitted.  Thanks ChrisL for example in #19968-note17.  refs #19968

diff --git src/hg/lib/hgHgvs.c src/hg/lib/hgHgvs.c
index 2bed75a..f2f3b2d 100644
--- src/hg/lib/hgHgvs.c
+++ src/hg/lib/hgHgvs.c
@@ -2493,69 +2493,80 @@
     else
         {
         exonAnchor = txPos->intron3TxOffset;
         direction = '-';
         intronOffset = txPos->intron3Distance + closedEnd;
         anchorIsStart = TRUE;
         }
     exonAnchor = cds ? hgvsTxToCds(exonAnchor, cds, anchorIsStart, cdsPrefix) :
                        exonAnchor + (anchorIsStart ? oneBased : 0);
     dyStringPrintf(dy, "%s%u%c%u", cdsPrefix, exonAnchor, direction, intronOffset);
     }
 else
     errAbort("appendHgvsNucPos: unrecognized vpTxRegion value %d", txPos->region);
 }
 
+static char *refFromVpTx(struct vpTx *vpTx)
+/* If vpTx->txRef is non-NULL and both start & end are exonic, return txRef;
+ * otherwise return genomic.  For example, if a deletion spans exon/intron boundary, use genomic
+ * ref because it includes the intron bases.  Do not free the returned value. */
+{
+if (vpTx->txRef != NULL &&
+    vpTx->start.region == vpExon && vpTx->end.region == vpExon)
+    return vpTx->txRef;
+return vpTx->gRef;
+}
+
 char *hgvsNFromVpTx(struct vpTx *vpTx, struct seqWindow *gSeqWin, struct psl *txAli,
                     struct dnaSeq *txSeq, boolean breakDelIns)
 /* Return an HGVS n. (noncoding transcript) term for a variant projected onto a transcript.
  * gSeqWin must already have at least the correct seqName if not the surrounding sequence.
  * If breakDelIns, then show deleted bases (eg show 'delAGinsTT' instead of 'delinsTT'). */
 {
 struct dyString *dy = dyStringCreate("%s:n.", vpTx->txName);
 // Make local copies of vpTx->{start,end} -- we may need to modify them for HGVS ins/dup.
 struct vpTxPosition startPos = vpTx->start, endPos = vpTx->end;
 int dupLen = tweakInsDup(&startPos, &endPos, vpTx->txAlt, gSeqWin, txAli, txSeq);
 appendHgvsNucPos(dy, &startPos, TRUE, NULL);
 if (!vpTxPosRangeIsSingleBase(&startPos, &endPos))
     {
     dyStringAppendC(dy, '_');
     appendHgvsNucPos(dy, &endPos, FALSE, NULL);
     }
-char *ref = vpTx->txRef ? vpTx->txRef : vpTx->gRef;
+char *ref = refFromVpTx(vpTx);
 hgvsAppendChangesFromNucRefAlt(dy, ref, vpTx->txAlt, dupLen, breakDelIns);
 return dyStringCannibalize(&dy);
 }
 
 
 char *hgvsCFromVpTx(struct vpTx *vpTx, struct seqWindow *gSeqWin, struct psl *txAli,
                     struct genbankCds *cds,  struct dnaSeq *txSeq, boolean breakDelIns)
 /* Return an HGVS c. (coding transcript) term for a variant projected onto a transcript w/cds.
  * gSeqWin must already have at least the correct seqName if not the surrounding sequence.
  * If breakDelIns, then show deleted bases (eg show 'delAGinsTT' instead of 'delinsTT'). */
 {
 struct dyString *dy = dyStringCreate("%s:c.", vpTx->txName);
 // Make local copies of vpTx->{start,end} -- we may need to modify them for HGVS ins/dup.
 struct vpTxPosition startPos = vpTx->start, endPos = vpTx->end;
 int dupLen = tweakInsDup(&startPos, &endPos, vpTx->txAlt, gSeqWin, txAli, txSeq);
 appendHgvsNucPos(dy, &startPos, TRUE, cds);
 if (!vpTxPosRangeIsSingleBase(&startPos, &endPos))
     {
     dyStringAppendC(dy, '_');
     appendHgvsNucPos(dy, &endPos, FALSE, cds);
     }
-char *ref = vpTx->txRef ? vpTx->txRef : vpTx->gRef;
+char *ref = refFromVpTx(vpTx);
 hgvsAppendChangesFromNucRefAlt(dy, ref, vpTx->txAlt, dupLen, breakDelIns);
 return dyStringCannibalize(&dy);
 }
 
 char *hgvsPFromVpPep(struct vpPep *vpPep, struct dnaSeq *protSeq, boolean addParens)
 /* Return an HGVS p. (protein) term for a variant projected into protein space.
  * Strict HGVS compliance requires parentheses around predicted protein changes, but
  * nobody seems to do that in practice.
  * Return NULL if an input is NULL. */
 {
 if (vpPep == NULL || protSeq == NULL)
     return NULL;
 struct dyString *dy = dyStringCreate("%s:p.", vpPep->name);
 if (addParens)
     dyStringAppendC(dy, '(');