1ffd9a045c83c4b4a90493323e4dc3d3ac677f31 angie Fri Sep 22 16:07:10 2017 -0700 When start codon is lost, HGVS p. should be p.? because there may or may not be something that rescues the protein like a downstream in-frame start codon. diff --git src/hg/lib/hgHgvs.c src/hg/lib/hgHgvs.c index 43fc4e5..ed54650 100644 --- src/hg/lib/hgHgvs.c +++ src/hg/lib/hgHgvs.c @@ -2547,55 +2547,63 @@ struct dyString *dy = dyStringCreate("%s:c.", vpTx->txName); // Make local copies of vpTx->{start,end} -- we may need to modify them for HGVS ins/dup. struct vpTxPosition startPos = vpTx->start, endPos = vpTx->end; int dupLen = tweakInsDup(&startPos, &endPos, vpTx->txAlt, gSeqWin, txAli, txSeq); appendHgvsNucPos(dy, &startPos, TRUE, cds); if (!vpTxPosRangeIsSingleBase(&startPos, &endPos)) { dyStringAppendC(dy, '_'); appendHgvsNucPos(dy, &endPos, FALSE, cds); } char *ref = refFromVpTx(vpTx); hgvsAppendChangesFromNucRefAlt(dy, ref, vpTx->txAlt, dupLen, breakDelIns); return dyStringCannibalize(&dy); } +static boolean isStartLoss(struct vpPep *vpPep) +/* Return TRUE if vpPep shows that the start codon has been lost. */ +{ +return (vpPep->start == 0 && + isNotEmpty(vpPep->ref) && vpPep->ref[0] == 'M' && + (isEmpty(vpPep->alt) || vpPep->alt[0] != 'M')); +} + char *hgvsPFromVpPep(struct vpPep *vpPep, struct dnaSeq *protSeq, boolean addParens) /* Return an HGVS p. (protein) term for a variant projected into protein space. * Strict HGVS compliance requires parentheses around predicted protein changes, but * nobody seems to do that in practice. * Return NULL if an input is NULL. */ { if (vpPep == NULL || protSeq == NULL) return NULL; struct dyString *dy = dyStringCreate("%s:p.", vpPep->name); if (addParens) dyStringAppendC(dy, '('); int refLen = vpPep->end - vpPep->start; // When predicting frameshift/extension, the length of ref may be different from refLen int refExtLen = vpPep->ref ? strlen(vpPep->ref) : refLen; int altLen = vpPep->alt ? strlen(vpPep->alt) : 0; char refStartAbbr[4]; if (vpPep->ref) aaToAbbr(vpPep->ref[0], refStartAbbr, sizeof(refStartAbbr)); else // If ref is null then we should be writing just '=' or '?' but prevent garbage just in case: safecpy(refStartAbbr, sizeof(refStartAbbr), "?"); // protSeq may or may not end with X, so treat protSeq->size accordingly boolean hitsStopCodon = (vpPep->end > protSeq->size || ((protSeq->dna[protSeq->size-1] == 'X') && vpPep->end == protSeq->size)); -if (vpPep->cantPredict || vpPep->spansUtrCds) +if (vpPep->cantPredict || vpPep->spansUtrCds || isStartLoss(vpPep)) dyStringAppend(dy, "?"); else if (vpPep->frameshift) { dyStringPrintf(dy, "%s%d", refStartAbbr, vpPep->start+1); if (altLen == 1) dyStringAppend(dy, "Ter"); else { char altStartAbbr[4]; aaToAbbr(vpPep->alt[0], altStartAbbr, sizeof(altStartAbbr)); // For stop-loss extension, make it "ext*" if (hitsStopCodon && altLen > refExtLen) dyStringPrintf(dy, "%sext*%d", altStartAbbr, altLen - refExtLen); else dyStringPrintf(dy, "%sfsTer%d", altStartAbbr, altLen);