e275bb31c96733585d0a1d511fa992f10fa56b69
chmalee
  Fri Oct 31 16:13:13 2025 -0700
Support gnomAD variation and range specifiers like 4-1234-2234 and 5-12345-A-C, refs #36388

diff --git src/hg/lib/hgFind.c src/hg/lib/hgFind.c
index 6b2d76e4f27..31348eca7f4 100644
--- src/hg/lib/hgFind.c
+++ src/hg/lib/hgFind.c
@@ -2715,30 +2715,43 @@
 		     "([0-9,]+)"
 		     "[[:space:]]+"
 		     "([0-9,]+)$";
 char *sqlRangeExp = 
 		     "^([[:alnum:]._#\\-]+)"
 		     "[[:space:]]*\\|[[:space:]]*"
 		     "([0-9,]+)"
 		     "[[:space:]]*\\|[[:space:]]*"
 		     "([0-9,]+)$";
 
 char *singleBaseExp = 
 		     "^([[:alnum:]._#\\-]+)"
 		     "[[:space:]]*:[[:space:]]*"
 		     "([0-9,]+)$";
 
+// allow gnomad variants, ex: 12-1234-A-C
+char *gnomadVarExp =
+            "^(([0-9]+)|(X|Y|M|MT))-"
+            "([0-9]+)-"
+            "([A-Za-z]+)-"
+            "([A-Za-z]+)$";
+
+// allow gnomad ranges, ex: 12-1234-11223344
+char *gnomadRangeExp =
+            "^(([0-9]+)|(X|Y|M|MT))-"
+            "([0-9]+)-"
+            "([0-9]+)$";
+
 static void collapseSamePos(struct hgPositions *hgp)
 /* If all positions in all tables in hgp are the same position, then 
  * trim all but the first table/pos. */
 {
 struct hgPosTable *firstTable = NULL, *table;
 struct hgPos *firstPos = NULL, *pos;
 char *chrom = NULL;
 int start=0, end=0;
 
 for (table = hgp->tableList; table != NULL; table = table->next)
     {
     for (pos = table->posList; pos != NULL; pos = pos->next)
         {
 	if (pos->chrom != NULL)
 	    {
@@ -3741,35 +3754,37 @@
             warn("%s", dyStringContents(dyWarn));
         }
     dyStringFree(&dyWarn);
     dyStringFree(&allWarnings);
     if (measureTiming && hgp && hgp->tableList)
         table->searchTime = clock1000() - startTime;
     }
 return foundIt;
 }
 
 struct hgPositions *hgPositionsFind(char *db, char *term, char *extraCgi,
 	char *hgAppNameIn, struct cart *cart, boolean multiTerm, boolean measureTiming, struct searchCategory *categories)
 /* Return container of tracks and positions (if any) that match term. */
 {
 struct hgPositions *hgp = NULL, *hgpItem = NULL;
-regmatch_t substrs[4];
+regmatch_t substrs[7];
 boolean canonicalSpec = FALSE;
 boolean gbrowserSpec = FALSE;
 boolean lengthSpec = FALSE;
 boolean singleBaseSpec = FALSE;
+boolean gnomadVarSpec = FALSE;
+boolean gnomadRangeSpec = FALSE;
 boolean relativeFlag = FALSE;
 int relStart = 0, relEnd = 0;
 
 hgAppName = hgAppNameIn;
 
 // Exhaustive searches can lead to timeouts on CGIs (#11626).
 // However, hgGetAnn requires exhaustive searches (#11665).
 // So... set a non-exhaustive search limit on all except hgGetAnn.
 // NOTE: currently non-exhaustive search limits are only applied to findMrnaKeys
 int limitResults = NONEXHAUSTIVE_SEARCH_LIMIT;
 if (sameString(hgAppNameIn,"hgGetAnn"))
     limitResults = EXHAUSTIVE_SEARCH_REQUIRED;
 
 AllocVar(hgp);
 hgp->useAlias = FALSE;
@@ -3825,40 +3840,61 @@
     }
 
 /* Allow any search term to end with a :Start-End range -- also support stuff 
  * pasted in from BED (chrom start end) or SQL query (chrom | start | end).  
  * If found, strip it off and remember the start and end. */
 char *originalTerm = term;
 if ((canonicalSpec = 
         regexMatchSubstrNoCase(term, canonicalRangeExp, substrs, ArraySize(substrs))) ||
     (gbrowserSpec = 
         regexMatchSubstrNoCase(term, gbrowserRangeExp, substrs, ArraySize(substrs))) ||
     (lengthSpec = 
         regexMatchSubstrNoCase(term, lengthRangeExp, substrs, ArraySize(substrs))) ||
     regexMatchSubstrNoCase(term, bedRangeExp, substrs, ArraySize(substrs)) ||
     (singleBaseSpec =
 	regexMatchSubstrNoCase(term, singleBaseExp, substrs, ArraySize(substrs))) ||
+    (gnomadVarSpec =
+	regexMatchSubstrNoCase(term, gnomadVarExp, substrs, ArraySize(substrs))) ||
+    (gnomadRangeSpec =
+	regexMatchSubstrNoCase(term, gnomadRangeExp, substrs, ArraySize(substrs))) ||
     regexMatchSubstrNoCase(term, sqlRangeExp, substrs, ArraySize(substrs)))
     {
     term = cloneString(term);
+    if (gnomadVarSpec || gnomadRangeSpec)
+        {
+        /* Since we got a match, substrs[1] is the chrom/term, [4] is relStart,
+         * [5] is relEnd or an allele. ([0] is all.) */
+        term[substrs[1].rm_eo] = 0;
+        eraseTrailingSpaces(term);
+        term[substrs[4].rm_eo] = 0;
+        relStart = atoi(term+substrs[4].rm_so);
+        term[substrs[5].rm_eo] = 0;
+        if (gnomadVarSpec)
+            singleBaseSpec = TRUE; // relEnd = relStart, relStart -= 1
+        else
+            relEnd = atoi(term+substrs[5].rm_so);
+        }
+    else
+        {
         /* Since we got a match, substrs[1] is the chrom/term, [2] is relStart,
          * [3] is relEnd. ([0] is all.) */
         term[substrs[1].rm_eo] = 0;
         eraseTrailingSpaces(term);
         term[substrs[2].rm_eo] = 0;
         relStart = atoi(stripCommas(term+substrs[2].rm_so));
         term[substrs[3].rm_eo] = 0;
+        }
     if (singleBaseSpec)
         {
         relEnd   = relStart;
         relStart--;
         }
     else
         relEnd   = atoi(stripCommas(term+substrs[3].rm_so));
     if (lengthSpec)
         relEnd += relStart;
     if (relStart > relEnd)
         {
         int tmp  = relStart;
         relStart = relEnd;
         relEnd   = tmp;
         }