1eb02994652b9a06a33024bc629520aeeeb0a2f1
chmalee
  Fri Aug 6 11:18:49 2021 -0700
When a user has searched for a psuedo hgvs term, if the term does not map to any transcripts, warn the user but head to the general location around where they meant to go, refs #15554, #27942

diff --git src/hg/lib/hgFind.c src/hg/lib/hgFind.c
index 2b9d417..424f138 100644
--- src/hg/lib/hgFind.c
+++ src/hg/lib/hgFind.c
@@ -2740,71 +2740,71 @@
     }
 return foundIt;
 }
 
 // a little data structure for combining multiple transcripts that resolve
 // to the same hgvs change. This struct can be used to fill out a struct hgPos
 struct hgvsHelper
     {
     struct hgvsHelper *next;
     char *chrom; // chromosome name of position
     int chromStart; // start of position
     int chromEnd; // end of position
     struct slName *validTranscripts; // valid transcripts/protein accessions for this position
     char *label; // corresponding hgvs term
     char *table; // type of match, LRG, NCBI, etc
+    boolean mapError; // does this hgvs mapping result in a map error?
     };
 
 static boolean matchesHgvs(struct cart *cart, char *db, char *term, struct hgPositions *hgp)
 /* Return TRUE if the search term looks like a variant encoded using the HGVS nomenclature
  * See http://varnomen.hgvs.org/
  * If search term is a pseudo hgvs term like GeneName AminoAcidPosition (RUNX2 Arg155) and
  * matches more than one transcript, fill out the hgp with the potential matches so the user
  * can choose where to go, otherwise return a singlePos */
 {
 boolean foundIt = FALSE;
 struct hgvsVariant *hgvsList = hgvsParseTerm(term);
 if (hgvsList == NULL)
     hgvsList = hgvsParsePseudoHgvs(db, term);
 if (hgvsList)
     {
     struct hgvsVariant *hgvs = NULL;
-    int hgvsListLen = slCount(hgvs);
+    int hgvsListLen = slCount(hgvsList);
     struct hgPosTable *table;
     AllocVar(table);
     table->description = "HGVS";
     int padding = 5;
+    int mapErrCnt = 0;
     struct dyString *dyWarn = dyStringNew(0);
     struct hgvsHelper *helper = NULL;
     struct hash *uniqHgvsPos = hashNew(0);
     struct dyString *chromPosIndex = dyStringNew(0);
+    struct dyString *allWarnings = dyStringNew(0);
     for (hgvs = hgvsList; hgvs != NULL; hgvs = hgvs->next)
         {
         dyStringClear(dyWarn);
         dyStringClear(chromPosIndex);
         char *pslTable = NULL;
         struct bed *mapping = hgvsValidateAndMap(hgvs, db, term, dyWarn, &pslTable);
         if (dyStringLen(dyWarn) > 0)
             {
+            mapErrCnt++;
             if (hgvsListLen == 1)
                 {
                 warn("%s", dyStringContents(dyWarn));
                 }
-            else
-                {
-                continue;
-                }
             }
         if (mapping)
             {
             char *trackTable;
             if (isEmpty(pslTable))
                 trackTable = "chromInfo";
             else if (startsWith("lrg", pslTable))
                 trackTable = "lrgTranscriptAli";
             else if (startsWith("wgEncodeGencode", pslTable))
                 trackTable = pslTable;
             else if (startsWith("ncbiRefSeqPsl", pslTable))
                 {
                 if (startsWith("NM_", hgvs->seqAcc) || startsWith("NR_", hgvs->seqAcc) ||
                     startsWith("NP_", hgvs->seqAcc) || startsWith("YP_", hgvs->seqAcc))
                     trackTable = "ncbiRefSeqCurated";
@@ -2821,54 +2821,101 @@
             if ((helper = hashFindVal(uniqHgvsPos, chromPosIndex->string)) != NULL)
                 {
                 slNameAddHead(&helper->validTranscripts, hgvs->seqAcc);
                 }
             else
                 {
                 AllocVar(helper);
                 helper->chrom = mapping->chrom;
                 helper->chromStart = mapping->chromStart;
                 helper->chromEnd = mapping->chromEnd;
                 helper->validTranscripts = slNameNew(hgvs->seqAcc);
                 helper->label = cloneString(term);
                 helper->table = trackTable;
                 hashAdd(uniqHgvsPos, chromPosIndex->string, helper);
                 }
+            if (dyStringLen(dyWarn) > 0)
+                {
+                helper->mapError = TRUE;
+                dyStringPrintf(allWarnings, "%s%s", dyStringLen(allWarnings) > 0 ? "\n" : "", dyStringContents(dyWarn));
+                }
             }
         }
+    if (mapErrCnt < hgvsListLen)
+        // at least one of the hgvs terms mapped sucessfully, so we can go to that spot
+        // or let the user pick a location
+        {
         struct hashEl *hel, *helList= hashElListHash(uniqHgvsPos);
         for (hel = helList; hel != NULL; hel = hel->next)
             {
+            helper = (struct hgvsHelper *)hel->val;
+            if (!helper->mapError)
+                {
                 if (hgp->tableList == NULL)
                     hgp->tableList = table;
-        helper = (struct hgvsHelper *)hel->val;
+                foundIt = TRUE;
                 table->name = helper->table;
                 struct hgPos *pos;
                 AllocVar(pos);
                 pos->chrom = helper->chrom;
                 pos->chromStart = helper->chromStart - padding;
                 pos->chromEnd = helper->chromEnd + padding;
                 pos->name = slNameListToString(helper->validTranscripts, '/');
                 pos->description = cloneString(helper->label);
                 pos->browserName = "";
                 slAddHead(&table->posList, pos);
                 // highlight the mapped bases to distinguish from padding
                 hgp->tableList->posList->highlight = addHighlight(db, helper->chrom,
                                                         helper->chromStart, helper->chromEnd);
+                }
+            }
+        }
+    else
+        // all of the positions mapped incorrectly, so the term was bad. However, we may
+        // be able to still go to a general area around the term, so build that, warn the
+        // user about their bad search term, and warn that this is not an exactly correct position
+        {
+        struct hashEl *hel, *helList= hashElListHash(uniqHgvsPos);
+        if (hgp->tableList == NULL)
+            hgp->tableList = table;
         foundIt = TRUE;
+        table->name = helper->table;
+        struct hgPos *pos;
+        AllocVar(pos);
+        char *chrom;
+        int spanStart = INT_MAX, spanEnd = 0;
+        for (hel = helList; hel != NULL; hel = hel->next)
+            {
+            helper = (struct hgvsHelper *)hel->val;
+            chrom = helper->chrom;
+            spanStart = helper->chromStart < spanStart ? helper->chromStart : spanStart;
+            spanEnd = helper->chromEnd > spanEnd ? helper->chromEnd : spanEnd;
+            }
+        pos->chrom = cloneString(chrom);
+        pos->chromStart = spanStart-padding;
+        pos->chromEnd = spanEnd + padding;
+        pos->name = "Approximate area";
+        pos->description = term;
+        pos->browserName = term;
+        slAddHead(&table->posList, pos);
+        // highlight the 'mapped' bases to distinguish from padding
+        hgp->tableList->posList->highlight = addHighlight(db, helper->chrom, spanStart, spanEnd);
+        warn("%s", dyStringContents(allWarnings));
+        warn("Sorry, couldn't locate %s, moving to general location", term);
         }
     dyStringFree(&dyWarn);
+    dyStringFree(&allWarnings);
     }
 return foundIt;
 }
 
 struct hgPositions *hgPositionsFind(char *db, char *term, char *extraCgi,
 	char *hgAppNameIn, struct cart *cart, boolean multiTerm)
 /* Return container of tracks and positions (if any) that match term. */
 {
 struct hgPositions *hgp = NULL, *hgpItem = NULL;
 regmatch_t substrs[4];
 boolean canonicalSpec = FALSE;
 boolean gbrowserSpec = FALSE;
 boolean lengthSpec = FALSE;
 boolean singleBaseSpec = FALSE;
 boolean relativeFlag = FALSE;