049e50b2293ce5e9102eef9177a7cb88b49d7a10 chmalee Wed Mar 4 14:14:03 2026 -0800 Move chromosome:start-end parsing into it's own function in hgFind so we can call just that code from elsewhere. Add a defaultPosition check for assembly hub into hubCheck that verifies the requested chromosome exists for that assembly. Do not warn if the range is too large since we actually silently accept that anyways and clamp to the chromosome ends. refs #37126 diff --git src/hg/lib/hgFind.c src/hg/lib/hgFind.c index bd4a6d6e1bf..5772125d374 100644 --- src/hg/lib/hgFind.c +++ src/hg/lib/hgFind.c @@ -3762,206 +3762,221 @@ hgp->tableList->posList->highlight = addHighlight(db, helper->chrom, spanStart, spanEnd); warn("%s", dyStringContents(allWarnings)); warn("Sorry, couldn't locate %s, moving to general location", term); } else warn("%s", dyStringContents(dyWarn)); } dyStringFree(&dyWarn); dyStringFree(&allWarnings); if (measureTiming && hgp && hgp->tableList) table->searchTime = clock1000() - startTime; } return foundIt; } -struct hgPositions *hgPositionsFind(char *db, char *term, char *extraCgi, - char *hgAppNameIn, struct cart *cart, boolean multiTerm, boolean measureTiming, struct searchCategory *categories) -/* Return container of tracks and positions (if any) that match term. */ +boolean parseAndResolvePosition(char **inpPos, char *db, struct hgPositions *inpHgPos, + int *relStart, int *relEnd, boolean *relativeFlag, boolean *singleBaseSpec) +/* If inpPos is a valid chromosome position string for db, fill out the associated + * hgPos, relative start and stop, and return TRUE. Otherwise return FALSE so our regular + * search code can deal with item name */ { -struct hgPositions *hgp = NULL, *hgpItem = NULL; +/* Allow any search term to end with a :Start-End range -- also support stuff + * pasted in from BED (chrom start end) or SQL query (chrom | start | end). + * If found, strip it off and remember the start and end. */ +char *originalTerm = cloneString(*inpPos); +char *term = cloneString(*inpPos); regmatch_t substrs[7]; boolean canonicalSpec = FALSE; boolean gbrowserSpec = FALSE; boolean lengthSpec = FALSE; -boolean singleBaseSpec = FALSE; boolean gnomadVarSpec = FALSE; boolean gnomadRangeSpec = FALSE; -boolean relativeFlag = FALSE; -int relStart = 0, relEnd = 0; - -hgAppName = hgAppNameIn; - -// Exhaustive searches can lead to timeouts on CGIs (#11626). -// However, hgGetAnn requires exhaustive searches (#11665). -// So... set a non-exhaustive search limit on all except hgGetAnn. -// NOTE: currently non-exhaustive search limits are only applied to findMrnaKeys -int limitResults = NONEXHAUSTIVE_SEARCH_LIMIT; -if (sameString(hgAppNameIn,"hgGetAnn")) - limitResults = EXHAUSTIVE_SEARCH_REQUIRED; - -AllocVar(hgp); -hgp->useAlias = FALSE; -term = trimSpaces(term); -if(isEmpty(term)) - return hgp; - -hgp->query = cloneString(term); -hgp->database = db; -if (extraCgi == NULL) - extraCgi = ""; -hgp->extraCgi = cloneString(extraCgi); - -if (singleSearch(db, term, limitResults, cart, hgp, measureTiming)) - return hgp; - -if (categories != NULL) - { - char *originalTerm = term; - if (hgOfficialChromName(db, term) != NULL) // this mangles the term - { - char *chrom; - int start, end; - - hgParseChromRange(db, term, &chrom, &start, &end); - if (relativeFlag) - { - int chromSize = end; - end = start + relEnd; - start = start + relStart; - if (end > chromSize) - end = chromSize; - if (start < 0) - start = 0; - } - singlePos(hgp, "Chromosome Range", NULL, "chromInfo", originalTerm, - "", chrom, start, end); - } - else if (!matchesHgvs(cart, db, term, hgp, measureTiming)) - userDefinedSearch(db, term, limitResults, cart, hgp, categories, multiTerm, measureTiming); - slReverse(&hgp->tableList); - if (multiTerm) - collapseSamePos(hgp); - fixSinglePos(hgp); - if (cart && hgp->singlePos && isNotEmpty(hgp->singlePos->highlight)) - cartSetString(cart, "addHighlight", hgp->singlePos->highlight); - if (hgp->posCount > 0) - return hgp; - else - // if categories was passed in we should explicitly return no results - // if there weren't any - return NULL; - } -/* Allow any search term to end with a :Start-End range -- also support stuff - * pasted in from BED (chrom start end) or SQL query (chrom | start | end). - * If found, strip it off and remember the start and end. */ -char *originalTerm = term; if ((canonicalSpec = regexMatchSubstrNoCase(term, canonicalRangeExp, substrs, ArraySize(substrs))) || (gbrowserSpec = regexMatchSubstrNoCase(term, gbrowserRangeExp, substrs, ArraySize(substrs))) || (lengthSpec = regexMatchSubstrNoCase(term, lengthRangeExp, substrs, ArraySize(substrs))) || regexMatchSubstrNoCase(term, bedRangeExp, substrs, ArraySize(substrs)) || - (singleBaseSpec = + (*singleBaseSpec = regexMatchSubstrNoCase(term, singleBaseExp, substrs, ArraySize(substrs))) || (gnomadVarSpec = regexMatchSubstrNoCase(term, gnomadVarExp, substrs, ArraySize(substrs))) || (gnomadRangeSpec = regexMatchSubstrNoCase(term, gnomadRangeExp, substrs, ArraySize(substrs))) || regexMatchSubstrNoCase(term, sqlRangeExp, substrs, ArraySize(substrs))) { - term = cloneString(term); if (gnomadVarSpec || gnomadRangeSpec) { /* Since we got a match, substrs[1] is the chrom/term, [4] is relStart, * [5] is relEnd or an allele. ([0] is all.) */ term[substrs[1].rm_eo] = 0; eraseTrailingSpaces(term); term[substrs[4].rm_eo] = 0; - relStart = atoi(term+substrs[4].rm_so); + *relStart = atoi(term+substrs[4].rm_so); term[substrs[5].rm_eo] = 0; if (gnomadVarSpec) - singleBaseSpec = TRUE; // relEnd = relStart, relStart -= 1 + *singleBaseSpec = TRUE; // relEnd = relStart, relStart -= 1 else - relEnd = atoi(term+substrs[5].rm_so); + *relEnd = atoi(term+substrs[5].rm_so); } else { /* Since we got a match, substrs[1] is the chrom/term, [2] is relStart, * [3] is relEnd. ([0] is all.) */ term[substrs[1].rm_eo] = 0; eraseTrailingSpaces(term); term[substrs[2].rm_eo] = 0; - relStart = atoi(stripCommas(term+substrs[2].rm_so)); + *relStart = atoi(stripCommas(term+substrs[2].rm_so)); term[substrs[3].rm_eo] = 0; } - if (singleBaseSpec) + if (*singleBaseSpec) { - relEnd = relStart; - relStart--; + *relEnd = *relStart; + (*relStart)--; } - else - relEnd = atoi(stripCommas(term+substrs[3].rm_so)); + else if (!gnomadRangeSpec) + *relEnd = atoi(stripCommas(term+substrs[3].rm_so)); if (lengthSpec) - relEnd += relStart; - if (relStart > relEnd) + *relEnd += *relStart; + if (*relStart > *relEnd) { - int tmp = relStart; - relStart = relEnd; - relEnd = tmp; + int tmp = *relStart; + *relStart = *relEnd; + *relEnd = tmp; } if (canonicalSpec || gbrowserSpec || lengthSpec) - relStart--; - relativeFlag = TRUE; + (*relStart)--; + *relativeFlag = TRUE; } -term = cloneString(term); // because hgOfficialChromName mangles it -if (hgOfficialChromName(db, term) != NULL) // this mangles the term +// purposefully mangle the input term. We implicitly rely on this behavior of stripping +// ranges off of search terms when the first part of the range does not match a +// chromosome name +*inpPos = term; + +if (hgOfficialChromName(db, term) != NULL) { char *chrom; int start, end; hgParseChromRange(db, term, &chrom, &start, &end); - if (relativeFlag) + // NOTE that if the above regexes catch the term, we clamp a wildly large range to + // the chromosome size. This is great for users inputting search coordinates, but + // maybe we should have a way to not force this clamp in the future. + if (*relativeFlag) { int chromSize = end; - end = start + relEnd; - start = start + relStart; + end = start + *relEnd; + start = start + *relStart; if (end > chromSize) end = chromSize; if (start < 0) start = 0; } - singlePos(hgp, "Chromosome Range", NULL, "chromInfo", originalTerm, + if (inpHgPos) + { + singlePos(inpHgPos, "Chromosome Range", NULL, "chromInfo", originalTerm, "", chrom, start, end); } -else if (!matchesHgvs(cart, db, term, hgp, measureTiming)) + return TRUE; + } +return FALSE; +} + +struct hgPositions *hgPositionsFind(char *db, char *term, char *extraCgi, + char *hgAppNameIn, struct cart *cart, boolean multiTerm, boolean measureTiming, struct searchCategory *categories) +/* Return container of tracks and positions (if any) that match term. */ { - struct hgFindSpec *shortList = NULL, *longList = NULL; - struct hgFindSpec *hfs; - boolean done = FALSE; +struct hgPositions *hgp = NULL, *hgpItem = NULL; +boolean singleBaseSpec = FALSE; +int relStart = 0, relEnd = 0; +boolean relativeFlag = FALSE; + +hgAppName = hgAppNameIn; + +// Exhaustive searches can lead to timeouts on CGIs (#11626). +// However, hgGetAnn requires exhaustive searches (#11665). +// So... set a non-exhaustive search limit on all except hgGetAnn. +// NOTE: currently non-exhaustive search limits are only applied to findMrnaKeys +int limitResults = NONEXHAUSTIVE_SEARCH_LIMIT; +if (sameString(hgAppNameIn,"hgGetAnn")) + limitResults = EXHAUSTIVE_SEARCH_REQUIRED; + +AllocVar(hgp); +hgp->useAlias = FALSE; +term = trimSpaces(term); +if(isEmpty(term)) + return hgp; + +hgp->query = cloneString(term); +hgp->database = db; +if (extraCgi == NULL) + extraCgi = ""; +hgp->extraCgi = cloneString(extraCgi); + +if (singleSearch(db, term, limitResults, cart, hgp, measureTiming)) + return hgp; + +char *originalTerm = cloneString(term); +if (categories != NULL) + { + if (!parseAndResolvePosition(&term, db, hgp, &relStart, &relEnd, &relativeFlag, &singleBaseSpec)) + { + // Disable singleBaseSpec for any term that is not hgOfficialChromName + // because that mangles legitimate IDs that are [A-Z]:[0-9]+. + if (singleBaseSpec) + { + singleBaseSpec = relativeFlag = FALSE; + term = cloneString(originalTerm); // restore original term + relStart = relEnd = 0; + } + if (!matchesHgvs(cart, db, term, hgp, measureTiming)) + userDefinedSearch(db, term, limitResults, cart, hgp, categories, multiTerm, measureTiming); + } + slReverse(&hgp->tableList); + if (multiTerm) + collapseSamePos(hgp); + fixSinglePos(hgp); + if (cart && hgp->singlePos && isNotEmpty(hgp->singlePos->highlight)) + cartSetString(cart, "addHighlight", hgp->singlePos->highlight); + if (hgp->posCount > 0) + return hgp; + else + // if categories was passed in we should explicitly return no results + // if there weren't any + return NULL; + } +// NOTE: parseAndResolvePosition mangles the term on purpose +if (!parseAndResolvePosition(&term, db, hgp, &relStart, &relEnd, &relativeFlag, &singleBaseSpec)) + { // Disable singleBaseSpec for any term that is not hgOfficialChromName // because that mangles legitimate IDs that are [A-Z]:[0-9]+. if (singleBaseSpec) { singleBaseSpec = relativeFlag = FALSE; term = cloneString(originalTerm); // restore original term relStart = relEnd = 0; } + if (!matchesHgvs(cart, db, term, hgp, measureTiming)) + { + struct hgFindSpec *shortList = NULL, *longList = NULL; + struct hgFindSpec *hfs; + boolean done = FALSE; if (!trackHubDatabase(db)) hgFindSpecGetAllSpecs(db, &shortList, &longList); if ((cart == NULL) || (cartOptionalString(cart, "noShort") == NULL)) { hgp->shortCircuited = TRUE; for (hfs = shortList; hfs != NULL; hfs = hfs->next) { if (hgFindUsingSpec(cart, db, hfs, term, limitResults, hgp, relativeFlag, relStart, relEnd, multiTerm, measureTiming)) { done = TRUE; if (! hgFindSpecSetting(hfs, "semiShortCircuit")) break; } @@ -3997,30 +4012,31 @@ struct hgPosTable *hpTable = NULL; for(hpTable = hgpItem->tableList; hpTable != NULL; hpTable = hpTable->next) { struct hgPos *pos = NULL; for(pos = hpTable->posList; pos != NULL; pos = pos->next) { if (limitResults != EXHAUSTIVE_SEARCH_REQUIRED && matchCount++ >= limitResults) break; dyStringPrintf(hgpMatchNames, "%s,", pos->browserName); } } } cartSetString(cart, "hgFind.matches", hgpMatchNames->string); } } + } slReverse(&hgp->tableList); if (multiTerm) collapseSamePos(hgp); fixSinglePos(hgp); if (cart && hgp->singlePos && isNotEmpty(hgp->singlePos->highlight)) cartSetString(cart, "addHighlight", hgp->singlePos->highlight); return hgp; } void hgPositionsHelpHtmlCart(struct cart *cart, char *organism, char *database) /* Display contents of dbDb.htmlPath for database, or print an HTML comment * explaining what's missing. */ { char *htmlPath = hHtmlPath(database);