43722932e2326cad75a374cc87eeca7a3cf62fc9 chmalee Thu Jan 5 16:25:52 2023 -0800 Make hgVai not crash on some problematic variants. When the cds start site of a transcript overlapping a variant is undefined, we can still annotate the variant as a coding variant, just not the standard frameshift,missense,etc. Related, prevent crashing when certain required structures are NULL, refs #29262 diff --git src/hg/lib/variantProjector.c src/hg/lib/variantProjector.c index df860f7..4e4ed61 100644 --- src/hg/lib/variantProjector.c +++ src/hg/lib/variantProjector.c @@ -953,48 +953,48 @@ { struct dnaSeq *codonSeq = newDnaSeq(cloneString(codons), strlen(codons), NULL); aaSeq *alt = translateSeq(codonSeq, 0, FALSE); aaSeqZToX(alt); dnaSeqFree(&codonSeq); return dnaSeqCannibalize(&alt); } struct vpPep *vpTranscriptToProtein(struct vpTx *vpTx, struct genbankCds *cds, struct dnaSeq *txSeq, struct dnaSeq *protSeq) /* Project a coding transcript variant onto a protein sequence, shifting position to the first * differing amino acid position. Return NULL if no cds or incomplete cds. */ //#*** This will produce incorrect results for the rare cds with join(...) unless we make a more //#*** complicated cds data structure to represent those (basically list of cds's) and use it here. { -if (cds == NULL || cds->start == -1 || cds->end == -1 || !cds->startComplete) +if (cds == NULL || cds->start == -1 || cds->end == -1) return NULL; if (txSeq == NULL) errAbort("vpTranscriptToProtein: txSeq must not be NULL"); struct vpPep *vpPep = NULL; AllocVar(vpPep); vpPep->name = cloneString(protSeq->name); uint txStart = vpTx->start.txOffset; uint txEnd = vpTx->end.txOffset; // If the variant starts and ends within exon(s) and overlaps CDS then predict protein change. -if (txStart >= cds->start && txStart < cds->end && txEnd > cds->start && +if (cds->startComplete && (txStart >= cds->start && txStart < cds->end && txEnd > cds->start && ((vpTx->start.region == vpExon && vpTx->end.region == vpExon) || // Insertion at exon boundary -- it doesn't disrupt the splice site so assume its effect // is on the exon, in the spirit of HGVS's 3' exception rule (vpTxPosIsInsertion(&vpTx->start, &vpTx->end) && (vpTx->start.region == vpExon || vpTx->end.region == vpExon)) || (vpTx->start.region == vpUpstream && vpTx->end.region == vpDownstream && isEmpty(vpTx->txAlt)) - )) + ))) { uint startInCds = max(txStart, cds->start) - cds->start; uint endInCds = min(txEnd, cds->end) - cds->start; vpPep->start = startInCds / 3; vpPep->end = (endInCds + 2) / 3; uint codonStartInCds = vpPep->start * 3; uint codonEndInCds = vpPep->end * 3; uint codonLenInCds = codonEndInCds - codonStartInCds; aaSeq *txTrans = translateSeqN(txSeq, cds->start + codonStartInCds, codonLenInCds, FALSE); aaSeqZToX(txTrans); // We need pSeq to end with "X" because vpPep->start can be the stop codon / terminal char *pSeq = protSeq->dna; int pLen = protSeq->size; char pSeqWithX[pLen+2]; @@ -1530,49 +1530,49 @@ { // Entire transcript is deleted -- no need to go into details. fxList = gpFxNew(gAlt, txName, transcript_ablation, none, lm); } else if (cds && cds->end > cds->start) { // coding transcript exon -- UTR, CDS or both? if (vpTx->start.txOffset < cds->start) { if (vpTx->end.txOffset <= cds->start) { fxList = gpFxNew(gAlt, txName, _5_prime_UTR_variant, nonCodingExon, lm); gpFxSetNoncodingInfo(fxList, startExonIx, exonCount, vpTx->start.txOffset, vpTx->txRef, vpTx->txAlt, lm); } - else + else if (vpPep) fxList = vpTxToFxUtrCds(vpTx, psl, cds, txSeq, vpPep, protSeq, lm); } else if (vpTx->end.txOffset > cds->end) { if (vpTx->start.txOffset >= cds->end) { fxList = gpFxNew(gAlt, txName, _3_prime_UTR_variant, nonCodingExon, lm); gpFxSetNoncodingInfo(fxList, startExonIx, exonCount, vpTx->start.txOffset, vpTx->txRef, vpTx->txAlt, lm); } - else + else if (vpPep) fxList = vpTxToFxUtrCds(vpTx, psl, cds, txSeq, vpPep, protSeq, lm); } - else + else if (vpPep) { fxList = vpTxToFxCds(vpTx, psl, cds, txSeq, vpPep, protSeq, lm); } - if (pslNmdTarget(psl, cds, MIN_INTRON)) + if (pslNmdTarget(psl, cds, MIN_INTRON) && fxList) fxList->soNumber = NMD_transcript_variant; } else { // non-coding exon fxList = gpFxNew(gAlt, txName, non_coding_transcript_exon_variant, nonCodingExon, lm); gpFxSetNoncodingInfo(fxList, startExonIx, exonCount, vpTx->start.txOffset, vpTx->txRef, vpTx->txAlt, lm); } if (vpTxIsExonSpliceRegion(vpTx, psl)) { struct gpFx *fx = gpFxNew(gAlt, txName, splice_region_variant, nonCodingExon, lm); gpFxSetNoncodingInfo(fx, startExonIx, exonCount, vpTx->start.txOffset, vpTx->txRef, vpTx->txAlt, lm); fxList = slCat(fxList, fx);