43722932e2326cad75a374cc87eeca7a3cf62fc9
chmalee
  Thu Jan 5 16:25:52 2023 -0800
Make hgVai not crash on some problematic variants. When the cds start site of a transcript overlapping a variant is undefined, we can still annotate the variant as a coding variant, just not the standard frameshift,missense,etc. Related, prevent crashing when certain required structures are NULL, refs #29262

diff --git src/hg/lib/variantProjector.c src/hg/lib/variantProjector.c
index df860f7..4e4ed61 100644
--- src/hg/lib/variantProjector.c
+++ src/hg/lib/variantProjector.c
@@ -953,48 +953,48 @@
 {
 struct dnaSeq *codonSeq = newDnaSeq(cloneString(codons), strlen(codons), NULL);
 aaSeq *alt = translateSeq(codonSeq, 0, FALSE);
 aaSeqZToX(alt);
 dnaSeqFree(&codonSeq);
 return dnaSeqCannibalize(&alt);
 }
 
 struct vpPep *vpTranscriptToProtein(struct vpTx *vpTx, struct genbankCds *cds,
                                     struct dnaSeq *txSeq, struct dnaSeq *protSeq)
 /* Project a coding transcript variant onto a protein sequence, shifting position to the first
  * differing amino acid position.  Return NULL if no cds or incomplete cds. */
 //#*** This will produce incorrect results for the rare cds with join(...) unless we make a more
 //#*** complicated cds data structure to represent those (basically list of cds's) and use it here.
 {
-if (cds == NULL || cds->start == -1 || cds->end == -1 || !cds->startComplete)
+if (cds == NULL || cds->start == -1 || cds->end == -1)
     return NULL;
 if (txSeq == NULL)
     errAbort("vpTranscriptToProtein: txSeq must not be NULL");
 struct vpPep *vpPep = NULL;
 AllocVar(vpPep);
 vpPep->name = cloneString(protSeq->name);
 uint txStart = vpTx->start.txOffset;
 uint txEnd = vpTx->end.txOffset;
 // If the variant starts and ends within exon(s) and overlaps CDS then predict protein change.
-if (txStart >= cds->start && txStart < cds->end && txEnd > cds->start &&
+if (cds->startComplete && (txStart >= cds->start && txStart < cds->end && txEnd > cds->start &&
     ((vpTx->start.region == vpExon && vpTx->end.region == vpExon) ||
      // Insertion at exon boundary -- it doesn't disrupt the splice site so assume its effect
      // is on the exon, in the spirit of HGVS's 3' exception rule
      (vpTxPosIsInsertion(&vpTx->start, &vpTx->end) &&
       (vpTx->start.region == vpExon || vpTx->end.region == vpExon)) ||
      (vpTx->start.region == vpUpstream && vpTx->end.region == vpDownstream && isEmpty(vpTx->txAlt))
-     ))
+     )))
     {
     uint startInCds = max(txStart, cds->start) - cds->start;
     uint endInCds = min(txEnd, cds->end) - cds->start;
     vpPep->start = startInCds / 3;
     vpPep->end = (endInCds + 2) / 3;
     uint codonStartInCds = vpPep->start * 3;
     uint codonEndInCds = vpPep->end * 3;
     uint codonLenInCds = codonEndInCds - codonStartInCds;
     aaSeq *txTrans = translateSeqN(txSeq, cds->start + codonStartInCds,
                                    codonLenInCds, FALSE);
     aaSeqZToX(txTrans);
     // We need pSeq to end with "X" because vpPep->start can be the stop codon / terminal
     char *pSeq = protSeq->dna;
     int pLen = protSeq->size;
     char pSeqWithX[pLen+2];
@@ -1530,49 +1530,49 @@
     {
     // Entire transcript is deleted -- no need to go into details.
     fxList = gpFxNew(gAlt, txName, transcript_ablation, none, lm);
     }
 else if (cds && cds->end > cds->start)
     {
     // coding transcript exon -- UTR, CDS or both?
     if (vpTx->start.txOffset < cds->start)
         {
         if (vpTx->end.txOffset <= cds->start)
             {
             fxList = gpFxNew(gAlt, txName, _5_prime_UTR_variant, nonCodingExon, lm);
             gpFxSetNoncodingInfo(fxList, startExonIx, exonCount, vpTx->start.txOffset,
                                  vpTx->txRef, vpTx->txAlt, lm);
             }
-        else
+        else if (vpPep)
             fxList = vpTxToFxUtrCds(vpTx, psl, cds, txSeq, vpPep, protSeq, lm);
         }
     else if (vpTx->end.txOffset > cds->end)
         {
         if (vpTx->start.txOffset >= cds->end)
             {
             fxList = gpFxNew(gAlt, txName, _3_prime_UTR_variant, nonCodingExon, lm);
             gpFxSetNoncodingInfo(fxList, startExonIx, exonCount, vpTx->start.txOffset,
                                  vpTx->txRef, vpTx->txAlt, lm);
             }
-        else
+        else if (vpPep)
             fxList = vpTxToFxUtrCds(vpTx, psl, cds, txSeq, vpPep, protSeq, lm);
         }
-    else
+    else if (vpPep)
         {
         fxList = vpTxToFxCds(vpTx, psl, cds, txSeq, vpPep, protSeq, lm);
         }
-    if (pslNmdTarget(psl, cds, MIN_INTRON))
+    if (pslNmdTarget(psl, cds, MIN_INTRON) && fxList)
         fxList->soNumber = NMD_transcript_variant;
     }
 else
     {
     // non-coding exon
     fxList = gpFxNew(gAlt, txName, non_coding_transcript_exon_variant, nonCodingExon, lm);
     gpFxSetNoncodingInfo(fxList, startExonIx, exonCount, vpTx->start.txOffset,
                          vpTx->txRef, vpTx->txAlt, lm);
     }
 if (vpTxIsExonSpliceRegion(vpTx, psl))
     {
     struct gpFx *fx = gpFxNew(gAlt, txName, splice_region_variant, nonCodingExon, lm);
     gpFxSetNoncodingInfo(fx, startExonIx, exonCount, vpTx->start.txOffset,
                          vpTx->txRef, vpTx->txAlt, lm);
     fxList = slCat(fxList, fx);