e275bb31c96733585d0a1d511fa992f10fa56b69 chmalee Fri Oct 31 16:13:13 2025 -0700 Support gnomAD variation and range specifiers like 4-1234-2234 and 5-12345-A-C, refs #36388 diff --git src/hg/lib/hgFind.c src/hg/lib/hgFind.c index 6b2d76e4f27..31348eca7f4 100644 --- src/hg/lib/hgFind.c +++ src/hg/lib/hgFind.c @@ -2715,30 +2715,43 @@ "([0-9,]+)" "[[:space:]]+" "([0-9,]+)$"; char *sqlRangeExp = "^([[:alnum:]._#\\-]+)" "[[:space:]]*\\|[[:space:]]*" "([0-9,]+)" "[[:space:]]*\\|[[:space:]]*" "([0-9,]+)$"; char *singleBaseExp = "^([[:alnum:]._#\\-]+)" "[[:space:]]*:[[:space:]]*" "([0-9,]+)$"; +// allow gnomad variants, ex: 12-1234-A-C +char *gnomadVarExp = + "^(([0-9]+)|(X|Y|M|MT))-" + "([0-9]+)-" + "([A-Za-z]+)-" + "([A-Za-z]+)$"; + +// allow gnomad ranges, ex: 12-1234-11223344 +char *gnomadRangeExp = + "^(([0-9]+)|(X|Y|M|MT))-" + "([0-9]+)-" + "([0-9]+)$"; + static void collapseSamePos(struct hgPositions *hgp) /* If all positions in all tables in hgp are the same position, then * trim all but the first table/pos. */ { struct hgPosTable *firstTable = NULL, *table; struct hgPos *firstPos = NULL, *pos; char *chrom = NULL; int start=0, end=0; for (table = hgp->tableList; table != NULL; table = table->next) { for (pos = table->posList; pos != NULL; pos = pos->next) { if (pos->chrom != NULL) { @@ -3741,35 +3754,37 @@ warn("%s", dyStringContents(dyWarn)); } dyStringFree(&dyWarn); dyStringFree(&allWarnings); if (measureTiming && hgp && hgp->tableList) table->searchTime = clock1000() - startTime; } return foundIt; } struct hgPositions *hgPositionsFind(char *db, char *term, char *extraCgi, char *hgAppNameIn, struct cart *cart, boolean multiTerm, boolean measureTiming, struct searchCategory *categories) /* Return container of tracks and positions (if any) that match term. */ { struct hgPositions *hgp = NULL, *hgpItem = NULL; -regmatch_t substrs[4]; +regmatch_t substrs[7]; boolean canonicalSpec = FALSE; boolean gbrowserSpec = FALSE; boolean lengthSpec = FALSE; boolean singleBaseSpec = FALSE; +boolean gnomadVarSpec = FALSE; +boolean gnomadRangeSpec = FALSE; boolean relativeFlag = FALSE; int relStart = 0, relEnd = 0; hgAppName = hgAppNameIn; // Exhaustive searches can lead to timeouts on CGIs (#11626). // However, hgGetAnn requires exhaustive searches (#11665). // So... set a non-exhaustive search limit on all except hgGetAnn. // NOTE: currently non-exhaustive search limits are only applied to findMrnaKeys int limitResults = NONEXHAUSTIVE_SEARCH_LIMIT; if (sameString(hgAppNameIn,"hgGetAnn")) limitResults = EXHAUSTIVE_SEARCH_REQUIRED; AllocVar(hgp); hgp->useAlias = FALSE; @@ -3825,40 +3840,61 @@ } /* Allow any search term to end with a :Start-End range -- also support stuff * pasted in from BED (chrom start end) or SQL query (chrom | start | end). * If found, strip it off and remember the start and end. */ char *originalTerm = term; if ((canonicalSpec = regexMatchSubstrNoCase(term, canonicalRangeExp, substrs, ArraySize(substrs))) || (gbrowserSpec = regexMatchSubstrNoCase(term, gbrowserRangeExp, substrs, ArraySize(substrs))) || (lengthSpec = regexMatchSubstrNoCase(term, lengthRangeExp, substrs, ArraySize(substrs))) || regexMatchSubstrNoCase(term, bedRangeExp, substrs, ArraySize(substrs)) || (singleBaseSpec = regexMatchSubstrNoCase(term, singleBaseExp, substrs, ArraySize(substrs))) || + (gnomadVarSpec = + regexMatchSubstrNoCase(term, gnomadVarExp, substrs, ArraySize(substrs))) || + (gnomadRangeSpec = + regexMatchSubstrNoCase(term, gnomadRangeExp, substrs, ArraySize(substrs))) || regexMatchSubstrNoCase(term, sqlRangeExp, substrs, ArraySize(substrs))) { term = cloneString(term); + if (gnomadVarSpec || gnomadRangeSpec) + { + /* Since we got a match, substrs[1] is the chrom/term, [4] is relStart, + * [5] is relEnd or an allele. ([0] is all.) */ + term[substrs[1].rm_eo] = 0; + eraseTrailingSpaces(term); + term[substrs[4].rm_eo] = 0; + relStart = atoi(term+substrs[4].rm_so); + term[substrs[5].rm_eo] = 0; + if (gnomadVarSpec) + singleBaseSpec = TRUE; // relEnd = relStart, relStart -= 1 + else + relEnd = atoi(term+substrs[5].rm_so); + } + else + { /* Since we got a match, substrs[1] is the chrom/term, [2] is relStart, * [3] is relEnd. ([0] is all.) */ term[substrs[1].rm_eo] = 0; eraseTrailingSpaces(term); term[substrs[2].rm_eo] = 0; relStart = atoi(stripCommas(term+substrs[2].rm_so)); term[substrs[3].rm_eo] = 0; + } if (singleBaseSpec) { relEnd = relStart; relStart--; } else relEnd = atoi(stripCommas(term+substrs[3].rm_so)); if (lengthSpec) relEnd += relStart; if (relStart > relEnd) { int tmp = relStart; relStart = relEnd; relEnd = tmp; }