4a170c36598f984d9957b4a591e09c743b49821f angie Thu May 1 11:35:31 2025 -0700 In vpTranscriptToProtein, check for NULL protSeq which can happen when the transcript has a CDS but none of the CDS has been aligned to the genome. refs #35577 diff --git src/hg/lib/variantProjector.c src/hg/lib/variantProjector.c index 3eb731c39ec..82c386237f6 100644 --- src/hg/lib/variantProjector.c +++ src/hg/lib/variantProjector.c @@ -952,30 +952,33 @@ dnaSeqFree(&codonSeq); return dnaSeqCannibalize(&alt); } struct vpPep *vpTranscriptToProtein(struct vpTx *vpTx, struct genbankCds *cds, struct dnaSeq *txSeq, struct dnaSeq *protSeq) /* Project a coding transcript variant onto a protein sequence, shifting position to the first * differing amino acid position. Return NULL if no cds or incomplete cds. */ //#*** This will produce incorrect results for the rare cds with join(...) unless we make a more //#*** complicated cds data structure to represent those (basically list of cds's) and use it here. { if (cds == NULL || cds->start == -1 || cds->end == -1) return NULL; if (txSeq == NULL) errAbort("vpTranscriptToProtein: txSeq must not be NULL"); +if (protSeq == NULL) + // This can happen when none of the CDS is aligned to the genome. + return NULL; struct vpPep *vpPep = NULL; AllocVar(vpPep); vpPep->name = cloneString(protSeq->name); uint txStart = vpTx->start.txOffset; uint txEnd = vpTx->end.txOffset; // If the variant starts and ends within exon(s) and overlaps CDS then predict protein change. if (cds->startComplete && (txStart >= cds->start && txStart < cds->end && txEnd > cds->start && ((vpTx->start.region == vpExon && vpTx->end.region == vpExon) || // Insertion at exon boundary -- it doesn't disrupt the splice site so assume its effect // is on the exon, in the spirit of HGVS's 3' exception rule (vpTxPosIsInsertion(&vpTx->start, &vpTx->end) && (vpTx->start.region == vpExon || vpTx->end.region == vpExon)) || (vpTx->start.region == vpUpstream && vpTx->end.region == vpDownstream && isEmpty(vpTx->txAlt)) ))) {