b1291b0928de2ce364c5335642acfc457110641e
angie
  Mon Apr 24 14:08:18 2017 -0700
When an HGVS term maps to a base that has been deleted from the reference genome, e.g. NM_020469.2(ABO):c.261= maps to a deleted base in hg19 & hg38, pslTransMap returns NULL, so we were failing to map it to any genomic location.  Added mapToDeletion for this special case, so we can map to the zero-length region where the reference genome has a deleted base.

diff --git src/hg/lib/hgHgvs.c src/hg/lib/hgHgvs.c
index 0d06599..ab6b363 100644
--- src/hg/lib/hgHgvs.c
+++ src/hg/lib/hgHgvs.c
@@ -1125,49 +1125,113 @@
 int altLen = refLen;
 psl->qName = cloneString(hgvs->changes);
 psl->qSize = altLen;
 psl->qStart = 0;
 psl->qEnd = altLen;
 psl->blockCount = 1;
 AllocArray(psl->blockSizes, psl->blockCount);
 AllocArray(psl->qStarts, psl->blockCount);
 AllocArray(psl->tStarts, psl->blockCount);
 psl->blockSizes[0] = refLen <= altLen ? refLen : altLen;
 psl->qStarts[0] = psl->qStart;
 psl->tStarts[0] = psl->tStart;
 return psl;
 }
 
+static struct psl *pslDelFromCoord(struct psl *txAli, int tStart, struct psl *variantPsl)
+/* Return a PSL with same target and query as txAli, but as a deletion at offset tStart:
+ * two zero-length blocks surrounding no target and query = variantPsl's target coords. */
+{
+struct psl *del;
+AllocVar(del);
+del->tName = cloneString(txAli->tName);
+del->tSize = txAli->tSize;
+safecpy(del->strand, sizeof(del->strand), txAli->strand);
+del->tStart = del->tEnd = tStart;
+del->qName = cloneString(txAli->qName);
+del->qStart = variantPsl->tStart;
+del->qEnd = variantPsl->tEnd;
+del->blockCount = 2;
+AllocArray(del->blockSizes, del->blockCount);
+AllocArray(del->qStarts, del->blockCount);
+AllocArray(del->tStarts, del->blockCount);
+// I wonder if zero-length blockSizes would trigger crashes somewhere...
+del->blockSizes[0] = del->blockSizes[1] = 0;
+del->tStarts[0] = del->tStarts[1] = del->tStart;
+del->qStarts[0] = del->qStart;
+del->qStarts[1] = del->qEnd;
+return del;
+}
+
+
+static struct psl *mapToDeletion(struct psl *variantPsl, struct psl *txAli)
+/* If the variant falls on a transcript base that is deleted in the reference genome,
+ * return the deletion coords (pslTransMap returns NULL), otherwise return NULL. */
+{
+// variant start and end coords, in transcript coords:
+int vStart = variantPsl->tStart;
+int vEnd = variantPsl->tEnd;
+// If txAli->strand is '-', reverse coords
+if (txAli->strand[0] == '-')
+    {
+    vStart = variantPsl->tSize - variantPsl->tEnd;
+    vEnd = variantPsl->tSize - variantPsl->tStart;
+    }
+if (vEnd < txAli->qStart)
+    return pslDelFromCoord(txAli, txAli->qStart, variantPsl);
+else if (vStart > txAli->qEnd)
+    return pslDelFromCoord(txAli, txAli->qEnd, variantPsl);
+int i;
+for (i = 0;  i < txAli->blockCount - 1;  i++)
+    {
+    int qBlockEnd = txAli->qStarts[i] + txAli->blockSizes[i];
+    int qNextBlockStart = txAli->qStarts[i+1];
+    int tBlockEnd = txAli->tStarts[i] + txAli->blockSizes[i];
+    int tNextBlockStart = txAli->tStarts[i+1];
+    if (vStart >= qBlockEnd && vEnd <= qNextBlockStart &&
+        tBlockEnd == tNextBlockStart)
+        return pslDelFromCoord(txAli, tBlockEnd, variantPsl);
+    }
+// Not contained in a deletion from reference genome (txAli target) -- return NULL.
+return NULL;
+}
+
+
 static struct psl *mapPsl(char *db, struct hgvsVariant *hgvs, char *pslTable, char *acc,
                           struct genbankCds *cds, int *retUpstream, int *retDownstream)
 /* If acc is found in pslTable, use pslTransMap to map hgvs onto the genome. */
 {
 struct psl *mappedToGenome = NULL;
 char query[2048];
 sqlSafef(query, sizeof(query), "select * from %s where qName = '%s'", pslTable, acc);
 struct sqlConnection *conn = hAllocConn(db);
 struct sqlResult *sr = sqlGetResult(conn, query);
 char **row;
 if (sr && (row = sqlNextRow(sr)))
     {
     int bin = 1; // All PSL tables used here, and any made in the future, use the bin column.
     struct psl *txAli = pslLoad(row+bin);
     // variantPsl contains the anchor if a non-cdsStart anchor is used because
     // the actual position might be outside the bounds of the transcript sequence (intron/UTR)
     struct psl *variantPsl = pslFromHgvsNuc(hgvs, acc, txAli->qSize, txAli->qEnd, cds,
                                             retUpstream, retDownstream);
     mappedToGenome = pslTransMap(pslTransMapNoOpts, variantPsl, txAli);
+    // If there is a deletion in the genome / insertion in the transcript then pslTransMap cannot
+    // map those bases to the genome.  In that case take a harder look and return the deletion
+    // coords.
+    if (mappedToGenome == NULL)
+        mappedToGenome = mapToDeletion(variantPsl, txAli);
     pslFree(&variantPsl);
     pslFree(&txAli);
     }
 sqlFreeResult(&sr);
 hFreeConn(&conn);
 return mappedToGenome;
 }
 
 static char *pslTableForAcc(char *db, char *acc)
 /* Based on acc (and whether db has NCBI RefSeq alignments), pick a PSL table where
  * acc should be found (table may or may not exist).  Don't free the returned string. */
 {
 char *pslTable = NULL;
 if (startsWith("LRG_", acc))
     pslTable = "lrgTranscriptAli";