b5b259b8314a0253ed8c26811bb8f9167f10829d angie Mon Aug 28 14:11:56 2017 -0700 hgvs{C,N}FromVpTx: when variant spans exon/intron boundary, use genomic ref instead of transcript ref so that intron bases aren't omitted. Thanks ChrisL for example in #19968-note17. refs #19968 diff --git src/hg/lib/hgHgvs.c src/hg/lib/hgHgvs.c index 2bed75a..f2f3b2d 100644 --- src/hg/lib/hgHgvs.c +++ src/hg/lib/hgHgvs.c @@ -2493,69 +2493,80 @@ else { exonAnchor = txPos->intron3TxOffset; direction = '-'; intronOffset = txPos->intron3Distance + closedEnd; anchorIsStart = TRUE; } exonAnchor = cds ? hgvsTxToCds(exonAnchor, cds, anchorIsStart, cdsPrefix) : exonAnchor + (anchorIsStart ? oneBased : 0); dyStringPrintf(dy, "%s%u%c%u", cdsPrefix, exonAnchor, direction, intronOffset); } else errAbort("appendHgvsNucPos: unrecognized vpTxRegion value %d", txPos->region); } +static char *refFromVpTx(struct vpTx *vpTx) +/* If vpTx->txRef is non-NULL and both start & end are exonic, return txRef; + * otherwise return genomic. For example, if a deletion spans exon/intron boundary, use genomic + * ref because it includes the intron bases. Do not free the returned value. */ +{ +if (vpTx->txRef != NULL && + vpTx->start.region == vpExon && vpTx->end.region == vpExon) + return vpTx->txRef; +return vpTx->gRef; +} + char *hgvsNFromVpTx(struct vpTx *vpTx, struct seqWindow *gSeqWin, struct psl *txAli, struct dnaSeq *txSeq, boolean breakDelIns) /* Return an HGVS n. (noncoding transcript) term for a variant projected onto a transcript. * gSeqWin must already have at least the correct seqName if not the surrounding sequence. * If breakDelIns, then show deleted bases (eg show 'delAGinsTT' instead of 'delinsTT'). */ { struct dyString *dy = dyStringCreate("%s:n.", vpTx->txName); // Make local copies of vpTx->{start,end} -- we may need to modify them for HGVS ins/dup. struct vpTxPosition startPos = vpTx->start, endPos = vpTx->end; int dupLen = tweakInsDup(&startPos, &endPos, vpTx->txAlt, gSeqWin, txAli, txSeq); appendHgvsNucPos(dy, &startPos, TRUE, NULL); if (!vpTxPosRangeIsSingleBase(&startPos, &endPos)) { dyStringAppendC(dy, '_'); appendHgvsNucPos(dy, &endPos, FALSE, NULL); } -char *ref = vpTx->txRef ? vpTx->txRef : vpTx->gRef; +char *ref = refFromVpTx(vpTx); hgvsAppendChangesFromNucRefAlt(dy, ref, vpTx->txAlt, dupLen, breakDelIns); return dyStringCannibalize(&dy); } char *hgvsCFromVpTx(struct vpTx *vpTx, struct seqWindow *gSeqWin, struct psl *txAli, struct genbankCds *cds, struct dnaSeq *txSeq, boolean breakDelIns) /* Return an HGVS c. (coding transcript) term for a variant projected onto a transcript w/cds. * gSeqWin must already have at least the correct seqName if not the surrounding sequence. * If breakDelIns, then show deleted bases (eg show 'delAGinsTT' instead of 'delinsTT'). */ { struct dyString *dy = dyStringCreate("%s:c.", vpTx->txName); // Make local copies of vpTx->{start,end} -- we may need to modify them for HGVS ins/dup. struct vpTxPosition startPos = vpTx->start, endPos = vpTx->end; int dupLen = tweakInsDup(&startPos, &endPos, vpTx->txAlt, gSeqWin, txAli, txSeq); appendHgvsNucPos(dy, &startPos, TRUE, cds); if (!vpTxPosRangeIsSingleBase(&startPos, &endPos)) { dyStringAppendC(dy, '_'); appendHgvsNucPos(dy, &endPos, FALSE, cds); } -char *ref = vpTx->txRef ? vpTx->txRef : vpTx->gRef; +char *ref = refFromVpTx(vpTx); hgvsAppendChangesFromNucRefAlt(dy, ref, vpTx->txAlt, dupLen, breakDelIns); return dyStringCannibalize(&dy); } char *hgvsPFromVpPep(struct vpPep *vpPep, struct dnaSeq *protSeq, boolean addParens) /* Return an HGVS p. (protein) term for a variant projected into protein space. * Strict HGVS compliance requires parentheses around predicted protein changes, but * nobody seems to do that in practice. * Return NULL if an input is NULL. */ { if (vpPep == NULL || protSeq == NULL) return NULL; struct dyString *dy = dyStringCreate("%s:p.", vpPep->name); if (addParens) dyStringAppendC(dy, '(');