03803eba90389888a329f1c2ddb16b61fdaac4b3 angie Wed Jan 10 17:06:08 2018 -0800 In hgGateway's autocomplete search for assembly hubs, use the new & improved hubSearchText table instead of the no-longer-updated trix search files. refs #20694 diff --git src/hg/hgGateway/hgGateway.c src/hg/hgGateway/hgGateway.c index d030c8d..0109356 100644 --- src/hg/hgGateway/hgGateway.c +++ src/hg/hgGateway/hgGateway.c @@ -15,31 +15,30 @@ #include "cheapcgi.h" #include "errCatch.h" #include "googleAnalytics.h" #include "hCommon.h" #include "hgConfig.h" #include "hdb.h" #include "htmshell.h" #include "hubConnect.h" #include "hui.h" #include "jsHelper.h" #include "jsonParse.h" #include "obscure.h" // for readInGulp #include "regexHelper.h" #include "suggest.h" #include "trackHub.h" -#include "trix.h" #include "web.h" /* Global Variables */ struct cart *cart = NULL; /* CGI and other variables */ struct hash *oldVars = NULL; /* Old contents of cart before it was updated by CGI */ #define SEARCH_TERM "hggw_term" static char *maybeGetDescriptionText(char *db) /* Slurp the description.html file for db into a string (if possible, don't die if * we can't read it) and return it. */ { struct errCatch *errCatch = errCatchNew(); char *descText = NULL; if (errCatchStart(errCatch)) @@ -680,67 +679,107 @@ char *hubUrl; // hub url char *aDb; // assembly db hosted by hub }; static struct aHubMatch *aHubMatchNew(char *shortLabel, char *hubUrl, char *aDb) /* Allocate and return a description of an assembly hub db. */ { struct aHubMatch *match; AllocVar(match); match->shortLabel = cloneString(shortLabel); match->hubUrl = cloneString(hubUrl); match->aDb = cloneString(aDb); return match; } -static struct aHubMatch *filterTrixSearchMatches(struct dbDb *dbDbList, - struct trixSearchResult *tsrList) -/* Collect the assembly hub matches (not track hub matches) from a search in hub trix files. */ +static struct hash *unpackHubDbUrlList(struct slName *hubDbUrlList) +/* hubDbUrlList contains strings like "db,hubUrl" -- split on comma and return a hash of + * hubUrl to one or more dbs. */ { -if (tsrList == NULL) +struct hash *hubToDb = hashNew(0); +struct slName *hubDbUrl; +for (hubDbUrl = hubDbUrlList; hubDbUrl != NULL; hubDbUrl = hubDbUrl->next) + { + char *comma = strchr(hubDbUrl->name, ','); + if (comma) + { + char *db = hubDbUrl->name; + *comma = '\0'; + char *hubUrl = comma+1; + struct hashEl *hel = hashLookup(hubToDb, hubUrl); + struct slName *dbList = hel ? hel->val : NULL; + slAddHead(&dbList, slNameNew(db)); + if (hel == NULL) + hashAdd(hubToDb, hubUrl, dbList); + else + hel->val = dbList; + } + } +return hubToDb; +} + +static struct aHubMatch *filterHubSearchTextMatches(struct dbDb *dbDbList, + struct slName *hubDbUrlList) +/* Collect the assembly hub matches (not track hub matches) from a search in hubSearchText. */ +{ +if (hubDbUrlList == NULL) return NULL; struct aHubMatch *aHubMatchList = NULL; // Make a hash of local dbs so we can tell which hub dbs must be assembly hubs // not track hubs. struct hash *localDbs = hashNew(0); struct dbDb *dbDb; for (dbDb = dbDbList; dbDb != NULL; dbDb = dbDb->next) hashStore(localDbs, dbDb->name); - -// tsrList gives hub URLs which we can then look up in hubPublic. +struct hash *hubToDb = unpackHubDbUrlList(hubDbUrlList); +// Build up a query to find shortLabel and dbList for each hubUrl. struct dyString *query = sqlDyStringCreate("select shortLabel,hubUrl,dbList from %s " "where hubUrl in (", hubPublicTableName()); -struct trixSearchResult *tsr; -for (tsr = tsrList; tsr != NULL; tsr = tsr->next) +struct hashEl *hel; +struct hashCookie cookie = hashFirst(hubToDb); +boolean isFirst = TRUE; +while ((hel = hashNext(&cookie)) != NULL) { - if (tsr != tsrList) + if (isFirst) + isFirst = FALSE; + else dyStringAppend(query, ", "); - dyStringPrintf(query, "'%s'", tsr->itemId); + dyStringPrintf(query, "'%s'", hel->name); } dyStringAppendC(query, ')'); struct sqlConnection *conn = hConnectCentral(); struct sqlResult *sr = sqlGetResult(conn, query->string); char **row; while ((row = sqlNextRow(sr)) != NULL) { char *shortLabel = row[0]; char *hubUrl = row[1]; - struct slName *dbName, *dbList = slNameListFromComma(row[2]); - for (dbName = dbList; dbName != NULL; dbName = dbName->next) + struct slName *dbName, *matchDbList = hashFindVal(hubToDb, hubUrl); + struct slName *hubDbList = slNameListFromComma(row[2]); + if (slCount(matchDbList) == 1 && isEmpty(matchDbList->name)) + { + // top-level hub match, no specific db match; add all of hub's assembly dbs + for (dbName = hubDbList; dbName != NULL; dbName = dbName->next) if (! hashLookup(localDbs, dbName->name)) + slAddHead(&aHubMatchList, aHubMatchNew(shortLabel, hubUrl, dbName->name)); + } + else { + // Add matching assembly dbs that are found in hubDbList + for (dbName = matchDbList; dbName != NULL; dbName = dbName->next) + if (! hashLookup(localDbs, dbName->name) && slNameInList(hubDbList, dbName->name)) slAddHead(&aHubMatchList, aHubMatchNew(shortLabel, hubUrl, dbName->name)); } } slReverse(&aHubMatchList); hDisconnectCentral(&conn); return aHubMatchList; } static void writeAssemblyHubMatches(struct jsonWrite *jw, struct aHubMatch *aHubMatchList) /* Write out JSON for each assembly in each assembly hub that matched the search term. */ { struct aHubMatch *aHubMatch; for (aHubMatch = aHubMatchList; aHubMatch != NULL; aHubMatch = aHubMatch->next) { jsonWriteObjectStart(jw, NULL); @@ -748,48 +787,56 @@ jsonWriteString(jw, "db", aHubMatch->aDb); jsonWriteString(jw, "hubUrl", aHubMatch->hubUrl); jsonWriteString(jw, "hubName", hubNameFromUrl(aHubMatch->hubUrl)); // Add a category label for customized autocomplete-with-categories. char category[PATH_LEN*4]; safef(category, sizeof(category), "Assembly Hub: %s", aHubMatch->shortLabel); jsonWriteString(jw, "category", category); jsonWriteString(jw, "value", aHubMatch->aDb); // Use just the db as label, since shortLabel is included in the category label. jsonWriteString(jw, "label", aHubMatch->aDb); jsonWriteObjectEnd(jw); } } static struct aHubMatch *searchPublicHubs(struct dbDb *dbDbList, char *term) -/* Search for term in public hub trix files -- return a list of matches to assembly hubs +/* Search for term in public hubs -- return a list of matches to assembly hubs * (i.e. hubs that host an assembly with 2bit etc as opposed to only providing tracks.) */ { struct aHubMatch *aHubMatchList = NULL; -char *trixFile = cfgOptionEnvDefault("HUBSEARCHTRIXFILE", "hubSearchTrixFile", - hReplaceGbdb("/gbdb/hubs/public.ix")); -if (fileExists(trixFile)) - { - struct trix *trix = trixOpen(trixFile); - char termCopy[strlen(term)+1]; - safecpy(termCopy, sizeof(termCopy), term); - tolowers(termCopy); - char *words[512]; - int wordCount = chopByWhite(termCopy, words, ArraySize(words)); - struct trixSearchResult *tsrList = trixSearch(trix, wordCount, words, tsmFirstFive); - aHubMatchList = filterTrixSearchMatches(dbDbList, tsrList); - trixClose(&trix); +char *hubSearchTableName = cfgOptionDefault("hubSearchTextTable", "hubSearchText"); +struct sqlConnection *conn = hConnectCentral(); +if (sqlTableExists(conn, hubSearchTableName)) + { + char query[1024]; + sqlSafef(query, sizeof(query), "select distinct(concat(db, concat(',', hubUrl))) from %s " + "where track = '' and " + "(db like '%s%%' or label like '%%%s%%' or text like '%s%%')", + hubSearchTableName, term, term, term); + struct slName *hubDbUrlList = sqlQuickList(conn, query); + aHubMatchList = filterHubSearchTextMatches(dbDbList, hubDbUrlList); + if (aHubMatchList == NULL) + { + // Try a looser query + sqlSafef(query, sizeof(query), "select distinct(concat(db, concat(',', hubUrl))) from %s " + "where track = '' and text like '%% %s%%'", + hubSearchTableName, term); + hubDbUrlList = sqlQuickList(conn, query); + aHubMatchList = filterHubSearchTextMatches(dbDbList, hubDbUrlList); } + } +hDisconnectCentral(&conn); return aHubMatchList; } static char *getSearchTermUpperCase() /* If we don't have the SEARCH_TERM cgi param, exit with an HTTP Bad Request response. * If we do, convert it to upper case for case-insensitive matching and return it. */ { pushAbortHandler(htmlVaBadRequestAbort); char *term = cgiOptionalString(SEARCH_TERM); touppers(term); if (isEmpty(term)) errAbort("Missing required CGI parameter %s", SEARCH_TERM); popAbortHandler(); return term; }