c6ce277e36a537437a04146c8fa7adebb40428ff
chmalee
Wed May 15 12:10:42 2024 -0700
Libify some searching code so checkHgFindSpec can use it. Make checkHgFindSpec use the same code path as hgSearch so it can correctly test the search correctly, refs #33731
diff --git src/hg/lib/hgFind.c src/hg/lib/hgFind.c
index a29b858..52be58a 100644
--- src/hg/lib/hgFind.c
+++ src/hg/lib/hgFind.c
@@ -42,30 +42,33 @@
#include "bigBedFind.h"
#include "genbank.h"
#include "chromAlias.h"
#include "cart.h"
#include "cartTrackDb.h"
#include "jsonParse.h"
// Exhaustive searches can lead to timeouts on CGIs (#11626).
// However, hgGetAnn requires exhaustive searches (#11665).
#define NONEXHAUSTIVE_SEARCH_LIMIT 500
#define EXHAUSTIVE_SEARCH_REQUIRED -1
#define SNIPPET_LIMIT 100
char *hgAppName = "";
+/* Caches used by hgFind.c */
+struct hash *hgFindTrackHash = NULL;
+
/* alignment tables to check when looking for mrna alignments */
static char *estTables[] = { "intronEst", "all_est", "xenoEst", NULL };
static char *estLabels[] = { "Spliced ESTs", "ESTs", "Other ESTs", NULL };
static char *mrnaTables[] = { "all_mrna", "xenoMrna", NULL };
static char *mrnaLabels[] = { "mRNAs", "Other mRNAs", NULL };
static struct dyString *hgpMatchNames = NULL;
void hgPosFree(struct hgPos **pEl)
/* Free up hgPos. */
{
struct hgPos *el;
if ((el = *pEl) != NULL)
{
freeMem(el->name);
freeMem(el->description);
@@ -2041,41 +2044,55 @@
struct trackDb *tdbList = NULL;
// This used to be an argument, but only stdout was used:
FILE *f = stdout;
if (hgp->posCount == 0)
{
fprintf(f, "
\n");
fprintf(f, "
No additional items found
");
fprintf(f, "
\n");
return;
}
for (table = hgp->tableList; table != NULL; table = table->next)
{
if (table->posList != NULL)
{
- char *tableName = table->name;
- if (startsWith("all_", tableName))
- tableName += strlen("all_");
-
+ char *trackName = table->name, *tableName = table->name;
+ struct trackDb *tdb = NULL;
// clear the tdb cache if this track is a hub track
+ if ((sameString("trackDb", tableName) || sameString("helpDocs", tableName) ||
+ sameString("publicHubs", tableName)))
+ // not relevant for hgTables/hgVai/hgIntegrator
+ continue;
+ else
+ {
if (isHubTrack(tableName))
tdbList = NULL;
- struct trackDb *tdb = tdbForTrack(db, tableName, &tdbList);
+ tdb = tdbForTrack(db, tableName, &tdbList);
+ if (!tdb && startsWith("all_", tableName))
+ tdb = tdbForTrack(db, tableName+strlen("all_"), &tdbList);
+ if (!tdb && startsWith("xeno", tableName))
+ {
+ // due to genbank track changes over the years, sometimes tables
+ // get left on different servers when their trackDb entry was removed
+ // long ago. In that case skip those hits
+ continue;
+ }
if (!tdb)
errAbort("no track for table \"%s\" found via a findSpec", tableName);
- char *trackName = tdb->track;
+ trackName = tdb->track;
+ }
char *vis = hCarefulTrackOpenVisCart(cart, db, trackName);
boolean excludeTable = FALSE;
if(!containerDivPrinted)
{
fprintf(f, "\n");
if (hgp->singlePos == NULL) // we might be called with only one result
fprintf(f, "
Your search resulted in multiple matches. "
"Please select a position:
\n");
containerDivPrinted = TRUE;
}
if (table->htmlStart)
table->htmlStart(table, f);
else
fprintf(f, "
%s
\n", table->description);
for (pos = table->posList; pos != NULL; pos = pos->next)
@@ -2145,57 +2162,57 @@
{
char *queryString = getenv("QUERY_STRING");
char *addString = "&noShort=1";
if (isEmpty(queryString))
addString = "noShort=1";
fprintf(f, " More results...", hgAppName, queryString, addString);
}
fprintf(f, "
\n");
}
}
static struct hgPositions *hgPositionsSearch(char *db, char *spec,
char **retChromName, int *retWinStart, int *retWinEnd,
boolean *retIsMultiTerm, struct cart *cart,
char *hgAppName, char **retMultiChrom,
- struct dyString *dyWarn)
+ struct dyString *dyWarn, struct searchCategory *categories)
/* Search for positions that match spec (possibly ;-separated in which case *retIsMultiTerm is set).
* Return a container of tracks and positions (if any) that match term. If different components
* of a multi-term search land on different chromosomes then *retMultiChrom will be set. */
{
struct hgPositions *hgp = NULL;
char *chrom = NULL;
int start = INT_MAX;
int end = 0;
char *terms[16];
int termCount = chopByChar(cloneString(spec), ';', terms, ArraySize(terms));
boolean multiTerm = (termCount > 1);
boolean measureTiming = cartUsualBoolean(cart, "measureTiming", FALSE);
if (retIsMultiTerm)
*retIsMultiTerm = multiTerm;
if (retMultiChrom)
*retMultiChrom = NULL;
int i;
for (i = 0; i < termCount; i++)
{
trimSpaces(terms[i]);
if (isEmpty(terms[i]))
continue;
// Append warning messages to dyWarn, but allow errAborts to continue
struct errCatch *errCatch = errCatchNew();
if (errCatchStart(errCatch))
- hgp = hgPositionsFind(db, terms[i], "", hgAppName, cart, multiTerm, measureTiming, NULL);
+ hgp = hgPositionsFind(db, terms[i], "", hgAppName, cart, multiTerm, measureTiming, categories);
errCatchEnd(errCatch);
if (errCatch->gotError)
errAbort("%s", errCatch->message->string);
else if (isNotEmpty(errCatch->message->string))
dyStringAppend(dyWarn, errCatch->message->string);
errCatchFree(&errCatch);
if (hgp->singlePos != NULL)
{
if (retMultiChrom && chrom != NULL && differentString(chrom, hgp->singlePos->chrom))
*retMultiChrom = cloneString(chrom);
chrom = hgp->singlePos->chrom;
if (hgp->singlePos->chromStart < start)
start = hgp->singlePos->chromStart;
if (hgp->singlePos->chromEnd > end)
end = hgp->singlePos->chromEnd;
@@ -2219,82 +2236,82 @@
* resolved position. Append warnings to dyWarn, errAbort if defaultPos doesn't work. */
{
struct hgPositions *hgp = NULL;
boolean isMultiTerm = FALSE;
char *multiDiffChrom = NULL;
char *db = cartString(cart, "db");
char *lastPosition = cartOptionalString(cart, "lastPosition");
if (isNotEmpty(lastPosition) && !IS_CART_VAR_EMPTY(lastPosition))
{
if (startsWith(MULTI_REGION_CHROM, lastPosition) ||
startsWith(OLD_MULTI_REGION_CHROM, lastPosition))
{
lastPosition = cartUsualString(cart, "nonVirtPosition", hDefaultPos(db));
}
hgp = hgPositionsSearch(db, lastPosition, retChrom, retStart, retEnd, &isMultiTerm,
- cart, hgAppName, &multiDiffChrom, dyWarn);
+ cart, hgAppName, &multiDiffChrom, dyWarn, NULL);
if (hgp->singlePos && !(isMultiTerm && isNotEmpty(multiDiffChrom)))
{
freez(pPosition);
*pPosition = cloneString(lastPosition);
return hgp;
}
else
dyStringPrintf(dyWarn, " Unable to resolve lastPosition '%s'; "
"reverting to default position.", lastPosition);
}
char *defaultPosition = hDefaultPos(db);
hgp = hgPositionsSearch(db, defaultPosition, retChrom, retStart, retEnd, &isMultiTerm,
- cart, hgAppName, &multiDiffChrom, dyWarn);
+ cart, hgAppName, &multiDiffChrom, dyWarn, NULL);
if (hgp->singlePos && !(isMultiTerm && isNotEmpty(multiDiffChrom)))
{
freez(pPosition);
*pPosition = cloneString(defaultPosition);
}
else
errAbort("Unable to resolve default position '%s' for database '%s'.",
defaultPosition, db);
return hgp;
}
static boolean posIsObsolete(char *pos)
/* Return TRUE if pos is genome (or other obsolete keyword). Once upon a time position=genome
* was used to indicate genome-wide search, but now we have an independent option. */
{
pos = trimSpaces(pos);
return(sameWord(pos, "genome") || sameWord(pos, "hgBatch"));
}
struct hgPositions *hgFindSearch(struct cart *cart, char **pPosition,
char **retChrom, int *retStart, int *retEnd,
- char *hgAppName, struct dyString *dyWarn)
+ char *hgAppName, struct dyString *dyWarn, struct searchCategory *categories)
/* If *pPosition is a search term, then try to resolve it to genomic position(s).
* If unable to find a unique position then revert pPosition to lastPosition (or default position).
* Return a container of matching tables and positions. Warnings/errors are appended to dyWarn. */
{
struct hgPositions *hgp = NULL;
if (posIsObsolete(*pPosition))
{
hgp = revertPosition(cart, pPosition, retChrom, retStart, retEnd, hgAppName, dyWarn);
}
else
{
boolean isMultiTerm = FALSE;
char *multiDiffChrom = NULL;
char *db = cartString(cart, "db");
hgp = hgPositionsSearch(db, *pPosition, retChrom, retStart, retEnd,
- &isMultiTerm, cart, hgAppName, &multiDiffChrom, dyWarn);
+ &isMultiTerm, cart, hgAppName, &multiDiffChrom, dyWarn, categories);
if (isMultiTerm && isNotEmpty(multiDiffChrom))
{
dyStringPrintf(dyWarn, "Sites occur on different chromosomes: %s, %s.",
multiDiffChrom, hgp->singlePos->chrom);
hgp = revertPosition(cart, pPosition, retChrom, retStart, retEnd, hgAppName, dyWarn);
}
else if (hgp->posCount > 1 ||
// In weird cases it's possible to get a single result that does not have coords, but
// leads to another search a la multiple results! That happened with genbank keyword
// search ("elmer" in hg19, hg38 Feb. '19). I fixed it but there could be other cases.
(hgp->posCount == 1 && !hgp->singlePos))
{
if (isMultiTerm)
dyStringPrintf(dyWarn, "%s not uniquely determined (%d locations) -- "
"can't do multi-position search.",
@@ -2885,33 +2902,30 @@
{
boolean isVisible = FALSE;
if (tdb->parent == NULL)
{
char *cartVis = cartOptionalString(cart, tdb->track);
if (cartVis == NULL)
isVisible = tdb->visibility != tvHide;
else
isVisible = differentString(cartVis, "hide");
}
else if (isParentVisible(cart, tdb) && isSubtrackVisible(cart, tdb))
isVisible = TRUE;
return isVisible;
}
-struct hash *hgFindTrackHash = NULL;
-struct hash *hgFindGroupHash = NULL;
-
int cmpCategories(const void *a, const void *b)
/* Compare two categories for uniquifying */
{
struct searchCategory *categA = *(struct searchCategory **)a;
struct searchCategory *categB = *(struct searchCategory **)b;
return strcmp(categA->id, categB->id);
}
static struct searchableTrack *getSearchableTracks(struct cart *cart, char *database)
/* Return the list of all tracks with an hgFindSpec available */
{
if (trackHubDatabase(database))
return NULL;
struct searchableTrack *ret = NULL;
struct sqlConnection *conn = hAllocConn(database);