c6ce277e36a537437a04146c8fa7adebb40428ff chmalee Wed May 15 12:10:42 2024 -0700 Libify some searching code so checkHgFindSpec can use it. Make checkHgFindSpec use the same code path as hgSearch so it can correctly test the search correctly, refs #33731 diff --git src/hg/lib/hgFind.c src/hg/lib/hgFind.c index a29b858..52be58a 100644 --- src/hg/lib/hgFind.c +++ src/hg/lib/hgFind.c @@ -42,30 +42,33 @@ #include "bigBedFind.h" #include "genbank.h" #include "chromAlias.h" #include "cart.h" #include "cartTrackDb.h" #include "jsonParse.h" // Exhaustive searches can lead to timeouts on CGIs (#11626). // However, hgGetAnn requires exhaustive searches (#11665). #define NONEXHAUSTIVE_SEARCH_LIMIT 500 #define EXHAUSTIVE_SEARCH_REQUIRED -1 #define SNIPPET_LIMIT 100 char *hgAppName = ""; +/* Caches used by hgFind.c */ +struct hash *hgFindTrackHash = NULL; + /* alignment tables to check when looking for mrna alignments */ static char *estTables[] = { "intronEst", "all_est", "xenoEst", NULL }; static char *estLabels[] = { "Spliced ESTs", "ESTs", "Other ESTs", NULL }; static char *mrnaTables[] = { "all_mrna", "xenoMrna", NULL }; static char *mrnaLabels[] = { "mRNAs", "Other mRNAs", NULL }; static struct dyString *hgpMatchNames = NULL; void hgPosFree(struct hgPos **pEl) /* Free up hgPos. */ { struct hgPos *el; if ((el = *pEl) != NULL) { freeMem(el->name); freeMem(el->description); @@ -2041,41 +2044,55 @@ struct trackDb *tdbList = NULL; // This used to be an argument, but only stdout was used: FILE *f = stdout; if (hgp->posCount == 0) { fprintf(f, "<div id='hgFindResults'>\n"); fprintf(f, "<p>No additional items found</p>"); fprintf(f, "</div>\n"); return; } for (table = hgp->tableList; table != NULL; table = table->next) { if (table->posList != NULL) { - char *tableName = table->name; - if (startsWith("all_", tableName)) - tableName += strlen("all_"); - + char *trackName = table->name, *tableName = table->name; + struct trackDb *tdb = NULL; // clear the tdb cache if this track is a hub track + if ((sameString("trackDb", tableName) || sameString("helpDocs", tableName) || + sameString("publicHubs", tableName))) + // not relevant for hgTables/hgVai/hgIntegrator + continue; + else + { if (isHubTrack(tableName)) tdbList = NULL; - struct trackDb *tdb = tdbForTrack(db, tableName, &tdbList); + tdb = tdbForTrack(db, tableName, &tdbList); + if (!tdb && startsWith("all_", tableName)) + tdb = tdbForTrack(db, tableName+strlen("all_"), &tdbList); + if (!tdb && startsWith("xeno", tableName)) + { + // due to genbank track changes over the years, sometimes tables + // get left on different servers when their trackDb entry was removed + // long ago. In that case skip those hits + continue; + } if (!tdb) errAbort("no track for table \"%s\" found via a findSpec", tableName); - char *trackName = tdb->track; + trackName = tdb->track; + } char *vis = hCarefulTrackOpenVisCart(cart, db, trackName); boolean excludeTable = FALSE; if(!containerDivPrinted) { fprintf(f, "<div id='hgFindResults'>\n"); if (hgp->singlePos == NULL) // we might be called with only one result fprintf(f, "<p>Your search resulted in multiple matches. " "Please select a position:</p>\n"); containerDivPrinted = TRUE; } if (table->htmlStart) table->htmlStart(table, f); else fprintf(f, "<H2>%s</H2><PRE>\n", table->description); for (pos = table->posList; pos != NULL; pos = pos->next) @@ -2145,57 +2162,57 @@ { char *queryString = getenv("QUERY_STRING"); char *addString = "&noShort=1"; if (isEmpty(queryString)) addString = "noShort=1"; fprintf(f, "<A HREF=\"%s?%s%s\"> More results...</A>", hgAppName, queryString, addString); } fprintf(f, "</div>\n"); } } static struct hgPositions *hgPositionsSearch(char *db, char *spec, char **retChromName, int *retWinStart, int *retWinEnd, boolean *retIsMultiTerm, struct cart *cart, char *hgAppName, char **retMultiChrom, - struct dyString *dyWarn) + struct dyString *dyWarn, struct searchCategory *categories) /* Search for positions that match spec (possibly ;-separated in which case *retIsMultiTerm is set). * Return a container of tracks and positions (if any) that match term. If different components * of a multi-term search land on different chromosomes then *retMultiChrom will be set. */ { struct hgPositions *hgp = NULL; char *chrom = NULL; int start = INT_MAX; int end = 0; char *terms[16]; int termCount = chopByChar(cloneString(spec), ';', terms, ArraySize(terms)); boolean multiTerm = (termCount > 1); boolean measureTiming = cartUsualBoolean(cart, "measureTiming", FALSE); if (retIsMultiTerm) *retIsMultiTerm = multiTerm; if (retMultiChrom) *retMultiChrom = NULL; int i; for (i = 0; i < termCount; i++) { trimSpaces(terms[i]); if (isEmpty(terms[i])) continue; // Append warning messages to dyWarn, but allow errAborts to continue struct errCatch *errCatch = errCatchNew(); if (errCatchStart(errCatch)) - hgp = hgPositionsFind(db, terms[i], "", hgAppName, cart, multiTerm, measureTiming, NULL); + hgp = hgPositionsFind(db, terms[i], "", hgAppName, cart, multiTerm, measureTiming, categories); errCatchEnd(errCatch); if (errCatch->gotError) errAbort("%s", errCatch->message->string); else if (isNotEmpty(errCatch->message->string)) dyStringAppend(dyWarn, errCatch->message->string); errCatchFree(&errCatch); if (hgp->singlePos != NULL) { if (retMultiChrom && chrom != NULL && differentString(chrom, hgp->singlePos->chrom)) *retMultiChrom = cloneString(chrom); chrom = hgp->singlePos->chrom; if (hgp->singlePos->chromStart < start) start = hgp->singlePos->chromStart; if (hgp->singlePos->chromEnd > end) end = hgp->singlePos->chromEnd; @@ -2219,82 +2236,82 @@ * resolved position. Append warnings to dyWarn, errAbort if defaultPos doesn't work. */ { struct hgPositions *hgp = NULL; boolean isMultiTerm = FALSE; char *multiDiffChrom = NULL; char *db = cartString(cart, "db"); char *lastPosition = cartOptionalString(cart, "lastPosition"); if (isNotEmpty(lastPosition) && !IS_CART_VAR_EMPTY(lastPosition)) { if (startsWith(MULTI_REGION_CHROM, lastPosition) || startsWith(OLD_MULTI_REGION_CHROM, lastPosition)) { lastPosition = cartUsualString(cart, "nonVirtPosition", hDefaultPos(db)); } hgp = hgPositionsSearch(db, lastPosition, retChrom, retStart, retEnd, &isMultiTerm, - cart, hgAppName, &multiDiffChrom, dyWarn); + cart, hgAppName, &multiDiffChrom, dyWarn, NULL); if (hgp->singlePos && !(isMultiTerm && isNotEmpty(multiDiffChrom))) { freez(pPosition); *pPosition = cloneString(lastPosition); return hgp; } else dyStringPrintf(dyWarn, " Unable to resolve lastPosition '%s'; " "reverting to default position.", lastPosition); } char *defaultPosition = hDefaultPos(db); hgp = hgPositionsSearch(db, defaultPosition, retChrom, retStart, retEnd, &isMultiTerm, - cart, hgAppName, &multiDiffChrom, dyWarn); + cart, hgAppName, &multiDiffChrom, dyWarn, NULL); if (hgp->singlePos && !(isMultiTerm && isNotEmpty(multiDiffChrom))) { freez(pPosition); *pPosition = cloneString(defaultPosition); } else errAbort("Unable to resolve default position '%s' for database '%s'.", defaultPosition, db); return hgp; } static boolean posIsObsolete(char *pos) /* Return TRUE if pos is genome (or other obsolete keyword). Once upon a time position=genome * was used to indicate genome-wide search, but now we have an independent option. */ { pos = trimSpaces(pos); return(sameWord(pos, "genome") || sameWord(pos, "hgBatch")); } struct hgPositions *hgFindSearch(struct cart *cart, char **pPosition, char **retChrom, int *retStart, int *retEnd, - char *hgAppName, struct dyString *dyWarn) + char *hgAppName, struct dyString *dyWarn, struct searchCategory *categories) /* If *pPosition is a search term, then try to resolve it to genomic position(s). * If unable to find a unique position then revert pPosition to lastPosition (or default position). * Return a container of matching tables and positions. Warnings/errors are appended to dyWarn. */ { struct hgPositions *hgp = NULL; if (posIsObsolete(*pPosition)) { hgp = revertPosition(cart, pPosition, retChrom, retStart, retEnd, hgAppName, dyWarn); } else { boolean isMultiTerm = FALSE; char *multiDiffChrom = NULL; char *db = cartString(cart, "db"); hgp = hgPositionsSearch(db, *pPosition, retChrom, retStart, retEnd, - &isMultiTerm, cart, hgAppName, &multiDiffChrom, dyWarn); + &isMultiTerm, cart, hgAppName, &multiDiffChrom, dyWarn, categories); if (isMultiTerm && isNotEmpty(multiDiffChrom)) { dyStringPrintf(dyWarn, "Sites occur on different chromosomes: %s, %s.", multiDiffChrom, hgp->singlePos->chrom); hgp = revertPosition(cart, pPosition, retChrom, retStart, retEnd, hgAppName, dyWarn); } else if (hgp->posCount > 1 || // In weird cases it's possible to get a single result that does not have coords, but // leads to another search a la multiple results! That happened with genbank keyword // search ("elmer" in hg19, hg38 Feb. '19). I fixed it but there could be other cases. (hgp->posCount == 1 && !hgp->singlePos)) { if (isMultiTerm) dyStringPrintf(dyWarn, "%s not uniquely determined (%d locations) -- " "can't do multi-position search.", @@ -2885,33 +2902,30 @@ { boolean isVisible = FALSE; if (tdb->parent == NULL) { char *cartVis = cartOptionalString(cart, tdb->track); if (cartVis == NULL) isVisible = tdb->visibility != tvHide; else isVisible = differentString(cartVis, "hide"); } else if (isParentVisible(cart, tdb) && isSubtrackVisible(cart, tdb)) isVisible = TRUE; return isVisible; } -struct hash *hgFindTrackHash = NULL; -struct hash *hgFindGroupHash = NULL; - int cmpCategories(const void *a, const void *b) /* Compare two categories for uniquifying */ { struct searchCategory *categA = *(struct searchCategory **)a; struct searchCategory *categB = *(struct searchCategory **)b; return strcmp(categA->id, categB->id); } static struct searchableTrack *getSearchableTracks(struct cart *cart, char *database) /* Return the list of all tracks with an hgFindSpec available */ { if (trackHubDatabase(database)) return NULL; struct searchableTrack *ret = NULL; struct sqlConnection *conn = hAllocConn(database);