1bb425f9fa71e6a0092954afb46e99886e9f3c08
angie
  Wed Mar 1 16:26:44 2023 -0800
Add a check for a boundary condition that so far has appeared only in zebrafish:
an insertion that lands (with HGVS right-shifting) at the boundary between an
alignment block and *double-sided* gap, i.e. it's not a clean intron gap but
instead has skipped over both genomic and transcript sequence.  Due to the
double-sided gap, vpTx start txOffset > end txOffset (not ==) so vpTxIsInsertion
returns false but it's still an insertion in the genome so there is no
vpTx->txRef.  This was triggering an errAbort; head that off by returning NULL
from vpTxSplitByRegion so we just call it complex_transcript_variant, which is
all we can say when the transcript aligner is perhaps trying too hard.  refs #29262

diff --git src/hg/lib/variantProjector.c src/hg/lib/variantProjector.c
index 4e4ed61..ac227e2 100644
--- src/hg/lib/variantProjector.c
+++ src/hg/lib/variantProjector.c
@@ -1864,30 +1864,37 @@
 /* If vpTx doesn't delete the whole transcript but spans multiple regions *and* is
  * either a pure deletion or MNV then return a list of vpTx, one per region type,
  * so we can report functional effects of complex variants in more detail.
  * Otherwise return NULL to signify that we could not reliably split it up (e.g. when
  * nonzero but unequal numbers of bases are deleted and inserted across a boundary,
  * we don't know how to apportion the inserted bases).
  * Handle single-region vpTxs, insertions and transcript_ablation cases separately --
  * those do not need to be split up so don't call this on them. */
 {
 if (vpTx->start.region == vpTx->end.region ||
     vpTxPosIsInsertion(&vpTx->start, &vpTx->end) ||
     (vpTx->start.region == vpUpstream && vpTx->end.region == vpDownstream))
     errAbort("vpTxSplitByRegion: don't call this if start and end region (%s, %s) are the same, "
              "if vpTx is an insertion, or if vpTx deletes the entire transcript",
              vpTxRegionToString(vpTx->start.region), vpTxRegionToString(vpTx->end.region));
+// Even if vpTxPosIsInsertion() is false because this is not an insertion into the transcript,
+// it might still be an insertion into the genome, e.g. danRer11's rs499587024 with NCBI's
+// alignment of NM_001002580.1 that places a 12-base "exon" after a double-sided gap "intron"
+// that skips 400-odd transcript bases, with rs499587024 at the "exon"/"intron" boundary.
+// In this case start > end in tx coords, not equal, so vpTxPosIsInsertion returns false.
+if (vpTx->start.gOffset == vpTx->end.gOffset)
+    return NULL;
 // If start region and end region are different then at least one of them should be exon or
 // they should have at least one exon in the middle, so txRef should not be NULL.
 if (vpTx->txRef == NULL)
     errAbort("vpTxSplitByRegion: program error: how is txRef NULL if start region and end region "
              "are different (%s, %s)?",
              vpTxRegionToString(vpTx->start.region), vpTxRegionToString(vpTx->end.region));
 boolean isDeletion = (vpTx->txAlt[0] == '\0' && strlen(vpTx->txRef) > 0);
 boolean isMnv = (strlen(vpTx->txRef) == strlen(vpTx->txAlt));
 if (! (isDeletion || isMnv))
     return NULL;
 // Step through regions and add one vpTx per region type.
 boolean isRc = pslQStrand(psl) == '-';
 struct vpTx *vpTxList = NULL;
 if (vpTx->start.region == vpUpstream)
     {