03803eba90389888a329f1c2ddb16b61fdaac4b3
angie
  Wed Jan 10 17:06:08 2018 -0800
In hgGateway's autocomplete search for assembly hubs, use the new & improved hubSearchText table instead of the no-longer-updated trix search files.  refs #20694

diff --git src/hg/hgGateway/hgGateway.c src/hg/hgGateway/hgGateway.c
index d030c8d..0109356 100644
--- src/hg/hgGateway/hgGateway.c
+++ src/hg/hgGateway/hgGateway.c
@@ -15,31 +15,30 @@
 #include "cheapcgi.h"
 #include "errCatch.h"
 #include "googleAnalytics.h"
 #include "hCommon.h"
 #include "hgConfig.h"
 #include "hdb.h"
 #include "htmshell.h"
 #include "hubConnect.h"
 #include "hui.h"
 #include "jsHelper.h"
 #include "jsonParse.h"
 #include "obscure.h"  // for readInGulp
 #include "regexHelper.h"
 #include "suggest.h"
 #include "trackHub.h"
-#include "trix.h"
 #include "web.h"
 
 /* Global Variables */
 struct cart *cart = NULL;             /* CGI and other variables */
 struct hash *oldVars = NULL;          /* Old contents of cart before it was updated by CGI */
 
 #define SEARCH_TERM "hggw_term"
 
 static char *maybeGetDescriptionText(char *db)
 /* Slurp the description.html file for db into a string (if possible, don't die if
  * we can't read it) and return it. */
 {
 struct errCatch *errCatch = errCatchNew();
 char *descText = NULL;
 if (errCatchStart(errCatch))
@@ -680,67 +679,107 @@
     char *hubUrl;              // hub url
     char *aDb;                 // assembly db hosted by hub
     };
 
 static struct aHubMatch *aHubMatchNew(char *shortLabel, char *hubUrl, char *aDb)
 /* Allocate and return a description of an assembly hub db. */
 {
 struct aHubMatch *match;
 AllocVar(match);
 match->shortLabel = cloneString(shortLabel);
 match->hubUrl = cloneString(hubUrl);
 match->aDb = cloneString(aDb);
 return match;
 }
 
-static struct aHubMatch *filterTrixSearchMatches(struct dbDb *dbDbList,
-                                                 struct trixSearchResult *tsrList)
-/* Collect the assembly hub matches (not track hub matches) from a search in hub trix files. */
+static struct hash *unpackHubDbUrlList(struct slName *hubDbUrlList)
+/* hubDbUrlList contains strings like "db,hubUrl" -- split on comma and return a hash of
+ * hubUrl to one or more dbs. */
 {
-if (tsrList == NULL)
+struct hash *hubToDb = hashNew(0);
+struct slName *hubDbUrl;
+for (hubDbUrl = hubDbUrlList;  hubDbUrl != NULL;  hubDbUrl = hubDbUrl->next)
+    {
+    char *comma = strchr(hubDbUrl->name, ',');
+    if (comma)
+        {
+        char *db = hubDbUrl->name;
+        *comma = '\0';
+        char *hubUrl = comma+1;
+        struct hashEl *hel = hashLookup(hubToDb, hubUrl);
+        struct slName *dbList = hel ? hel->val : NULL;
+        slAddHead(&dbList, slNameNew(db));
+        if (hel == NULL)
+            hashAdd(hubToDb, hubUrl, dbList);
+        else
+            hel->val = dbList;
+        }
+    }
+return hubToDb;
+}
+
+static struct aHubMatch *filterHubSearchTextMatches(struct dbDb *dbDbList,
+                                                    struct slName *hubDbUrlList)
+/* Collect the assembly hub matches (not track hub matches) from a search in hubSearchText. */
+{
+if (hubDbUrlList == NULL)
     return NULL;
 struct aHubMatch *aHubMatchList = NULL;
 // Make a hash of local dbs so we can tell which hub dbs must be assembly hubs
 // not track hubs.
 struct hash *localDbs = hashNew(0);
 struct dbDb *dbDb;
 for (dbDb = dbDbList;  dbDb != NULL;  dbDb = dbDb->next)
     hashStore(localDbs, dbDb->name);
-
-// tsrList gives hub URLs which we can then look up in hubPublic.
+struct hash *hubToDb = unpackHubDbUrlList(hubDbUrlList);
+// Build up a query to find shortLabel and dbList for each hubUrl.
 struct dyString *query = sqlDyStringCreate("select shortLabel,hubUrl,dbList from %s "
                                            "where hubUrl in (",
                                            hubPublicTableName());
-struct trixSearchResult *tsr;
-for (tsr = tsrList;  tsr != NULL; tsr = tsr->next)
+struct hashEl *hel;
+struct hashCookie cookie = hashFirst(hubToDb);
+boolean isFirst = TRUE;
+while ((hel = hashNext(&cookie)) != NULL)
     {
-    if (tsr != tsrList)
+    if (isFirst)
+        isFirst = FALSE;
+    else
         dyStringAppend(query, ", ");
-    dyStringPrintf(query, "'%s'", tsr->itemId);
+    dyStringPrintf(query, "'%s'", hel->name);
     }
 dyStringAppendC(query, ')');
 struct sqlConnection *conn = hConnectCentral();
 struct sqlResult *sr = sqlGetResult(conn, query->string);
 char **row;
 while ((row = sqlNextRow(sr)) != NULL)
     {
     char *shortLabel = row[0];
     char *hubUrl = row[1];
-    struct slName *dbName, *dbList = slNameListFromComma(row[2]);
-    for (dbName = dbList;  dbName != NULL;  dbName = dbName->next)
+    struct slName *dbName, *matchDbList = hashFindVal(hubToDb, hubUrl);
+    struct slName *hubDbList = slNameListFromComma(row[2]);
+    if (slCount(matchDbList) == 1 && isEmpty(matchDbList->name))
+        {
+        // top-level hub match, no specific db match; add all of hub's assembly dbs
+        for (dbName = hubDbList;  dbName != NULL;  dbName = dbName->next)
             if (! hashLookup(localDbs, dbName->name))
+                slAddHead(&aHubMatchList, aHubMatchNew(shortLabel, hubUrl, dbName->name));
+        }
+    else
         {
+        // Add matching assembly dbs that are found in hubDbList
+        for (dbName = matchDbList;  dbName != NULL;  dbName = dbName->next)
+            if (! hashLookup(localDbs, dbName->name) && slNameInList(hubDbList, dbName->name))
                 slAddHead(&aHubMatchList, aHubMatchNew(shortLabel, hubUrl, dbName->name));
         }
     }
 slReverse(&aHubMatchList);
 hDisconnectCentral(&conn);
 return aHubMatchList;
 }
 
 static void writeAssemblyHubMatches(struct jsonWrite *jw, struct aHubMatch *aHubMatchList)
 /* Write out JSON for each assembly in each assembly hub that matched the search term. */
 {
 struct aHubMatch *aHubMatch;
 for (aHubMatch = aHubMatchList;  aHubMatch != NULL;  aHubMatch = aHubMatch->next)
     {
     jsonWriteObjectStart(jw, NULL);
@@ -748,48 +787,56 @@
     jsonWriteString(jw, "db", aHubMatch->aDb);
     jsonWriteString(jw, "hubUrl", aHubMatch->hubUrl);
     jsonWriteString(jw, "hubName", hubNameFromUrl(aHubMatch->hubUrl));
     // Add a category label for customized autocomplete-with-categories.
     char category[PATH_LEN*4];
     safef(category, sizeof(category), "Assembly Hub: %s", aHubMatch->shortLabel);
     jsonWriteString(jw, "category", category);
     jsonWriteString(jw, "value", aHubMatch->aDb);
     // Use just the db as label, since shortLabel is included in the category label.
     jsonWriteString(jw, "label", aHubMatch->aDb);
     jsonWriteObjectEnd(jw);
     }
 }
 
 static struct aHubMatch *searchPublicHubs(struct dbDb *dbDbList, char *term)
-/* Search for term in public hub trix files -- return a list of matches to assembly hubs
+/* Search for term in public hubs -- return a list of matches to assembly hubs
  * (i.e. hubs that host an assembly with 2bit etc as opposed to only providing tracks.) */
 {
 struct aHubMatch *aHubMatchList = NULL;
-char *trixFile = cfgOptionEnvDefault("HUBSEARCHTRIXFILE", "hubSearchTrixFile",
-                                     hReplaceGbdb("/gbdb/hubs/public.ix"));
-if (fileExists(trixFile))
-    {
-    struct trix *trix = trixOpen(trixFile);
-    char termCopy[strlen(term)+1];
-    safecpy(termCopy, sizeof(termCopy), term);
-    tolowers(termCopy);
-    char *words[512];
-    int wordCount = chopByWhite(termCopy, words, ArraySize(words));
-    struct trixSearchResult *tsrList = trixSearch(trix, wordCount, words, tsmFirstFive);
-    aHubMatchList = filterTrixSearchMatches(dbDbList, tsrList);
-    trixClose(&trix);
+char *hubSearchTableName = cfgOptionDefault("hubSearchTextTable", "hubSearchText");
+struct sqlConnection *conn = hConnectCentral();
+if (sqlTableExists(conn, hubSearchTableName))
+    {
+    char query[1024];
+    sqlSafef(query, sizeof(query), "select distinct(concat(db, concat(',', hubUrl))) from %s "
+             "where track = '' and "
+             "(db like '%s%%' or label like '%%%s%%' or text like '%s%%')",
+             hubSearchTableName, term, term, term);
+    struct slName *hubDbUrlList = sqlQuickList(conn, query);
+    aHubMatchList = filterHubSearchTextMatches(dbDbList, hubDbUrlList);
+    if (aHubMatchList == NULL)
+        {
+        // Try a looser query
+        sqlSafef(query, sizeof(query), "select distinct(concat(db, concat(',', hubUrl))) from %s "
+                 "where track = '' and text like '%% %s%%'",
+                 hubSearchTableName, term);
+        hubDbUrlList = sqlQuickList(conn, query);
+        aHubMatchList = filterHubSearchTextMatches(dbDbList, hubDbUrlList);
         }
+    }
+hDisconnectCentral(&conn);
 return aHubMatchList;
 }
 
 static char *getSearchTermUpperCase()
 /* If we don't have the SEARCH_TERM cgi param, exit with an HTTP Bad Request response.
  * If we do, convert it to upper case for case-insensitive matching and return it. */
 {
 pushAbortHandler(htmlVaBadRequestAbort);
 char *term = cgiOptionalString(SEARCH_TERM);
 touppers(term);
 if (isEmpty(term))
     errAbort("Missing required CGI parameter %s", SEARCH_TERM);
 popAbortHandler();
 return term;
 }