b5b259b8314a0253ed8c26811bb8f9167f10829d
angie
Mon Aug 28 14:11:56 2017 -0700
hgvs{C,N}FromVpTx: when variant spans exon/intron boundary, use genomic ref instead of transcript ref so that intron bases aren't omitted. Thanks ChrisL for example in #19968-note17. refs #19968
diff --git src/hg/lib/hgHgvs.c src/hg/lib/hgHgvs.c
index 2bed75a..f2f3b2d 100644
--- src/hg/lib/hgHgvs.c
+++ src/hg/lib/hgHgvs.c
@@ -2493,69 +2493,80 @@
else
{
exonAnchor = txPos->intron3TxOffset;
direction = '-';
intronOffset = txPos->intron3Distance + closedEnd;
anchorIsStart = TRUE;
}
exonAnchor = cds ? hgvsTxToCds(exonAnchor, cds, anchorIsStart, cdsPrefix) :
exonAnchor + (anchorIsStart ? oneBased : 0);
dyStringPrintf(dy, "%s%u%c%u", cdsPrefix, exonAnchor, direction, intronOffset);
}
else
errAbort("appendHgvsNucPos: unrecognized vpTxRegion value %d", txPos->region);
}
+static char *refFromVpTx(struct vpTx *vpTx)
+/* If vpTx->txRef is non-NULL and both start & end are exonic, return txRef;
+ * otherwise return genomic. For example, if a deletion spans exon/intron boundary, use genomic
+ * ref because it includes the intron bases. Do not free the returned value. */
+{
+if (vpTx->txRef != NULL &&
+ vpTx->start.region == vpExon && vpTx->end.region == vpExon)
+ return vpTx->txRef;
+return vpTx->gRef;
+}
+
char *hgvsNFromVpTx(struct vpTx *vpTx, struct seqWindow *gSeqWin, struct psl *txAli,
struct dnaSeq *txSeq, boolean breakDelIns)
/* Return an HGVS n. (noncoding transcript) term for a variant projected onto a transcript.
* gSeqWin must already have at least the correct seqName if not the surrounding sequence.
* If breakDelIns, then show deleted bases (eg show 'delAGinsTT' instead of 'delinsTT'). */
{
struct dyString *dy = dyStringCreate("%s:n.", vpTx->txName);
// Make local copies of vpTx->{start,end} -- we may need to modify them for HGVS ins/dup.
struct vpTxPosition startPos = vpTx->start, endPos = vpTx->end;
int dupLen = tweakInsDup(&startPos, &endPos, vpTx->txAlt, gSeqWin, txAli, txSeq);
appendHgvsNucPos(dy, &startPos, TRUE, NULL);
if (!vpTxPosRangeIsSingleBase(&startPos, &endPos))
{
dyStringAppendC(dy, '_');
appendHgvsNucPos(dy, &endPos, FALSE, NULL);
}
-char *ref = vpTx->txRef ? vpTx->txRef : vpTx->gRef;
+char *ref = refFromVpTx(vpTx);
hgvsAppendChangesFromNucRefAlt(dy, ref, vpTx->txAlt, dupLen, breakDelIns);
return dyStringCannibalize(&dy);
}
char *hgvsCFromVpTx(struct vpTx *vpTx, struct seqWindow *gSeqWin, struct psl *txAli,
struct genbankCds *cds, struct dnaSeq *txSeq, boolean breakDelIns)
/* Return an HGVS c. (coding transcript) term for a variant projected onto a transcript w/cds.
* gSeqWin must already have at least the correct seqName if not the surrounding sequence.
* If breakDelIns, then show deleted bases (eg show 'delAGinsTT' instead of 'delinsTT'). */
{
struct dyString *dy = dyStringCreate("%s:c.", vpTx->txName);
// Make local copies of vpTx->{start,end} -- we may need to modify them for HGVS ins/dup.
struct vpTxPosition startPos = vpTx->start, endPos = vpTx->end;
int dupLen = tweakInsDup(&startPos, &endPos, vpTx->txAlt, gSeqWin, txAli, txSeq);
appendHgvsNucPos(dy, &startPos, TRUE, cds);
if (!vpTxPosRangeIsSingleBase(&startPos, &endPos))
{
dyStringAppendC(dy, '_');
appendHgvsNucPos(dy, &endPos, FALSE, cds);
}
-char *ref = vpTx->txRef ? vpTx->txRef : vpTx->gRef;
+char *ref = refFromVpTx(vpTx);
hgvsAppendChangesFromNucRefAlt(dy, ref, vpTx->txAlt, dupLen, breakDelIns);
return dyStringCannibalize(&dy);
}
char *hgvsPFromVpPep(struct vpPep *vpPep, struct dnaSeq *protSeq, boolean addParens)
/* Return an HGVS p. (protein) term for a variant projected into protein space.
* Strict HGVS compliance requires parentheses around predicted protein changes, but
* nobody seems to do that in practice.
* Return NULL if an input is NULL. */
{
if (vpPep == NULL || protSeq == NULL)
return NULL;
struct dyString *dy = dyStringCreate("%s:p.", vpPep->name);
if (addParens)
dyStringAppendC(dy, '(');