c6ce277e36a537437a04146c8fa7adebb40428ff
chmalee
  Wed May 15 12:10:42 2024 -0700
Libify some searching code so checkHgFindSpec can use it. Make checkHgFindSpec use the same code path as hgSearch so it can correctly test the search correctly, refs #33731

diff --git src/hg/lib/hgFind.c src/hg/lib/hgFind.c
index a29b858..52be58a 100644
--- src/hg/lib/hgFind.c
+++ src/hg/lib/hgFind.c
@@ -42,30 +42,33 @@
 #include "bigBedFind.h"
 #include "genbank.h"
 #include "chromAlias.h"
 #include "cart.h"
 #include "cartTrackDb.h"
 #include "jsonParse.h"
 
 // Exhaustive searches can lead to timeouts on CGIs (#11626).
 // However, hgGetAnn requires exhaustive searches (#11665).
 #define NONEXHAUSTIVE_SEARCH_LIMIT 500
 #define EXHAUSTIVE_SEARCH_REQUIRED  -1
 #define SNIPPET_LIMIT 100
 
 char *hgAppName = "";
 
+/* Caches used by hgFind.c */
+struct hash *hgFindTrackHash = NULL;
+
 /* alignment tables to check when looking for mrna alignments */
 static char *estTables[] = { "intronEst", "all_est", "xenoEst", NULL };
 static char *estLabels[] = { "Spliced ESTs", "ESTs", "Other ESTs", NULL };
 static char *mrnaTables[] = { "all_mrna", "xenoMrna", NULL };
 static char *mrnaLabels[] = { "mRNAs", "Other mRNAs", NULL };
 static struct dyString *hgpMatchNames = NULL;
 
 void hgPosFree(struct hgPos **pEl)
 /* Free up hgPos. */
 {
 struct hgPos *el;
 if ((el = *pEl) != NULL)
     {
     freeMem(el->name);
     freeMem(el->description);
@@ -2041,41 +2044,55 @@
 struct trackDb *tdbList = NULL;
 // This used to be an argument, but only stdout was used:
 FILE *f = stdout;
 
 if (hgp->posCount == 0)
     {
     fprintf(f, "<div id='hgFindResults'>\n");
     fprintf(f, "<p>No additional items found</p>");
     fprintf(f, "</div>\n");
     return;
     }
 for (table = hgp->tableList; table != NULL; table = table->next)
     {
     if (table->posList != NULL)
 	{
-	char *tableName = table->name;
-	if (startsWith("all_", tableName))
-	    tableName += strlen("all_");
-
+    char *trackName = table->name, *tableName = table->name;
+    struct trackDb *tdb = NULL;
     // clear the tdb cache if this track is a hub track
+    if ((sameString("trackDb", tableName) || sameString("helpDocs", tableName) ||
+            sameString("publicHubs", tableName)))
+        // not relevant for hgTables/hgVai/hgIntegrator
+        continue;
+    else
+        {
         if (isHubTrack(tableName))
             tdbList = NULL;
-	struct trackDb *tdb = tdbForTrack(db, tableName, &tdbList);
+        tdb = tdbForTrack(db, tableName, &tdbList);
+        if (!tdb && startsWith("all_", tableName))
+            tdb = tdbForTrack(db, tableName+strlen("all_"), &tdbList);
+        if (!tdb && startsWith("xeno", tableName))
+            {
+            // due to genbank track changes over the years, sometimes tables
+            // get left on different servers when their trackDb entry was removed
+            // long ago. In that case skip those hits
+            continue;
+            }
         if (!tdb)
             errAbort("no track for table \"%s\" found via a findSpec", tableName);
-	char *trackName = tdb->track;
+        trackName = tdb->track;
+        }
     char *vis = hCarefulTrackOpenVisCart(cart, db, trackName);
     boolean excludeTable = FALSE;
     if(!containerDivPrinted)
         {
         fprintf(f, "<div id='hgFindResults'>\n");
         if (hgp->singlePos == NULL) // we might be called with only one result
             fprintf(f, "<p>Your search resulted in multiple matches.  "
                     "Please select a position:</p>\n");
         containerDivPrinted = TRUE;
         }
     if (table->htmlStart) 
         table->htmlStart(table, f);
     else
         fprintf(f, "<H2>%s</H2><PRE>\n", table->description);
     for (pos = table->posList; pos != NULL; pos = pos->next)
@@ -2145,57 +2162,57 @@
         {
         char *queryString = getenv("QUERY_STRING");
         char *addString = "&noShort=1";
         if (isEmpty(queryString))
             addString = "noShort=1";
         fprintf(f, "<A HREF=\"%s?%s%s\"> More results...</A>", hgAppName, queryString, addString);
         }
     fprintf(f, "</div>\n");
     }
 }
 
 static struct hgPositions *hgPositionsSearch(char *db, char *spec,
                                              char **retChromName, int *retWinStart, int *retWinEnd,
                                              boolean *retIsMultiTerm, struct cart *cart,
                                              char *hgAppName, char **retMultiChrom,
-                                             struct dyString *dyWarn)
+                                             struct dyString *dyWarn, struct searchCategory *categories)
 /* Search for positions that match spec (possibly ;-separated in which case *retIsMultiTerm is set).
  * Return a container of tracks and positions (if any) that match term.  If different components
  * of a multi-term search land on different chromosomes then *retMultiChrom will be set. */
 {
 struct hgPositions *hgp = NULL;
 char *chrom = NULL;
 int start = INT_MAX;
 int end = 0;
 char *terms[16];
 int termCount = chopByChar(cloneString(spec), ';', terms, ArraySize(terms));
 boolean multiTerm = (termCount > 1);
 boolean measureTiming = cartUsualBoolean(cart, "measureTiming", FALSE);
 if (retIsMultiTerm)
     *retIsMultiTerm = multiTerm;
 if (retMultiChrom)
     *retMultiChrom = NULL;
 int i;
 for (i = 0;  i < termCount;  i++)
     {
     trimSpaces(terms[i]);
     if (isEmpty(terms[i]))
 	continue;
     // Append warning messages to dyWarn, but allow errAborts to continue
     struct errCatch *errCatch = errCatchNew();
     if (errCatchStart(errCatch))
-        hgp = hgPositionsFind(db, terms[i], "", hgAppName, cart, multiTerm, measureTiming, NULL);
+        hgp = hgPositionsFind(db, terms[i], "", hgAppName, cart, multiTerm, measureTiming, categories);
     errCatchEnd(errCatch);
     if (errCatch->gotError)
         errAbort("%s", errCatch->message->string);
     else if (isNotEmpty(errCatch->message->string))
         dyStringAppend(dyWarn, errCatch->message->string);
     errCatchFree(&errCatch);
     if (hgp->singlePos != NULL)
 	{
 	if (retMultiChrom && chrom != NULL && differentString(chrom, hgp->singlePos->chrom))
             *retMultiChrom = cloneString(chrom);
 	chrom = hgp->singlePos->chrom;
 	if (hgp->singlePos->chromStart < start)
 	    start = hgp->singlePos->chromStart;
 	if (hgp->singlePos->chromEnd > end)
 	    end = hgp->singlePos->chromEnd;
@@ -2219,82 +2236,82 @@
  * resolved position.  Append warnings to dyWarn, errAbort if defaultPos doesn't work.  */
 {
 struct hgPositions *hgp = NULL;
 boolean isMultiTerm = FALSE;
 char *multiDiffChrom = NULL;
 char *db = cartString(cart, "db");
 char *lastPosition = cartOptionalString(cart, "lastPosition");
 if (isNotEmpty(lastPosition) && !IS_CART_VAR_EMPTY(lastPosition))
     {
     if (startsWith(MULTI_REGION_CHROM, lastPosition) || 
         startsWith(OLD_MULTI_REGION_CHROM, lastPosition))
         {
         lastPosition = cartUsualString(cart, "nonVirtPosition", hDefaultPos(db));
         }
     hgp = hgPositionsSearch(db, lastPosition, retChrom, retStart, retEnd, &isMultiTerm,
-                            cart, hgAppName, &multiDiffChrom, dyWarn);
+                            cart, hgAppName, &multiDiffChrom, dyWarn, NULL);
     if (hgp->singlePos && !(isMultiTerm && isNotEmpty(multiDiffChrom)))
         {
         freez(pPosition);
         *pPosition = cloneString(lastPosition);
         return hgp;
         }
     else
         dyStringPrintf(dyWarn, "  Unable to resolve lastPosition '%s'; "
                        "reverting to default position.", lastPosition);
     }
 char *defaultPosition = hDefaultPos(db);
 hgp = hgPositionsSearch(db, defaultPosition, retChrom, retStart, retEnd, &isMultiTerm,
-                        cart, hgAppName, &multiDiffChrom, dyWarn);
+                        cart, hgAppName, &multiDiffChrom, dyWarn, NULL);
 if (hgp->singlePos && !(isMultiTerm && isNotEmpty(multiDiffChrom)))
     {
     freez(pPosition);
     *pPosition = cloneString(defaultPosition);
     }
 else
     errAbort("Unable to resolve default position '%s' for database '%s'.",
              defaultPosition, db);
 return hgp;
 }
 
 static boolean posIsObsolete(char *pos)
 /* Return TRUE if pos is genome (or other obsolete keyword).  Once upon a time position=genome
  * was used to indicate genome-wide search, but now we have an independent option. */
 {
 pos = trimSpaces(pos);
 return(sameWord(pos, "genome") || sameWord(pos, "hgBatch"));
 }
 
 struct hgPositions *hgFindSearch(struct cart *cart, char **pPosition,
                                  char **retChrom, int *retStart, int *retEnd,
-                                 char *hgAppName, struct dyString *dyWarn)
+                                 char *hgAppName, struct dyString *dyWarn, struct searchCategory *categories)
 /* If *pPosition is a search term, then try to resolve it to genomic position(s).
  * If unable to find a unique position then revert pPosition to lastPosition (or default position).
  * Return a container of matching tables and positions.  Warnings/errors are appended to dyWarn. */
 {
 struct hgPositions *hgp = NULL;
 if (posIsObsolete(*pPosition))
     {
     hgp = revertPosition(cart, pPosition, retChrom, retStart, retEnd, hgAppName, dyWarn);
     }
 else
     {
     boolean isMultiTerm = FALSE;
     char *multiDiffChrom = NULL;
     char *db = cartString(cart, "db");
     hgp = hgPositionsSearch(db, *pPosition, retChrom, retStart, retEnd,
-                            &isMultiTerm, cart, hgAppName, &multiDiffChrom, dyWarn);
+                            &isMultiTerm, cart, hgAppName, &multiDiffChrom, dyWarn, categories);
     if (isMultiTerm && isNotEmpty(multiDiffChrom))
         {
         dyStringPrintf(dyWarn, "Sites occur on different chromosomes: %s, %s.",
                        multiDiffChrom, hgp->singlePos->chrom);
         hgp = revertPosition(cart, pPosition, retChrom, retStart, retEnd, hgAppName, dyWarn);
         }
     else if (hgp->posCount > 1 ||
              // In weird cases it's possible to get a single result that does not have coords, but
              // leads to another search a la multiple results!  That happened with genbank keyword
              // search ("elmer" in hg19, hg38 Feb. '19).  I fixed it but there could be other cases.
              (hgp->posCount == 1 && !hgp->singlePos))
         {
         if (isMultiTerm)
             dyStringPrintf(dyWarn, "%s not uniquely determined (%d locations) -- "
                            "can't do multi-position search.",
@@ -2885,33 +2902,30 @@
 {
 boolean isVisible = FALSE;
 if (tdb->parent == NULL)
     {
     char *cartVis = cartOptionalString(cart, tdb->track);
     if (cartVis == NULL)
         isVisible =  tdb->visibility != tvHide;
     else
         isVisible =  differentString(cartVis, "hide");
     }
 else if (isParentVisible(cart, tdb) &&  isSubtrackVisible(cart, tdb))
     isVisible = TRUE;
 return isVisible;
 }
 
-struct hash *hgFindTrackHash = NULL;
-struct hash *hgFindGroupHash = NULL;
-
 int cmpCategories(const void *a, const void *b)
 /* Compare two categories for uniquifying */
 {
 struct searchCategory *categA = *(struct searchCategory **)a;
 struct searchCategory *categB = *(struct searchCategory **)b;
 return strcmp(categA->id, categB->id);
 }
 
 static struct searchableTrack *getSearchableTracks(struct cart *cart, char *database)
 /* Return the list of all tracks with an hgFindSpec available */
 {
 if (trackHubDatabase(database))
     return NULL;
 struct searchableTrack *ret = NULL;
 struct sqlConnection *conn = hAllocConn(database);