b62fb9699fbc8ae0ef6ac9bf8e8760831e6f5ba2 chmalee Tue Apr 30 13:24:22 2024 -0700 Add genark assemblies to species search on hgGateway, refs #33572 diff --git src/hg/hgGateway/hgGateway.c src/hg/hgGateway/hgGateway.c index 2df826f..6da5f8a 100644 --- src/hg/hgGateway/hgGateway.c +++ src/hg/hgGateway/hgGateway.c @@ -17,30 +17,31 @@ #include "googleAnalytics.h" #include "hCommon.h" #include "hgConfig.h" #include "hdb.h" #include "htmshell.h" #include "hubConnect.h" #include "hui.h" #include "jsHelper.h" #include "jsonParse.h" #include "obscure.h" // for readInGulp #include "regexHelper.h" #include "suggest.h" #include "trackHub.h" #include "web.h" #include "botDelay.h" +#include "genark.h" /* Global Variables */ struct cart *cart = NULL; /* CGI and other variables */ struct hash *oldVars = NULL; /* Old contents of cart before it was updated by CGI */ static boolean issueBotWarning = FALSE; static int measureTiming = 0; static long enteredMainTime = 0; #define SEARCH_TERM "hggw_term" static char *maybeGetDescriptionText(char *db) /* Slurp the description.html file for db into a string (if possible, don't die if * we can't read it) and return it. */ { @@ -714,53 +715,79 @@ if (! (startsWith(term, "BRO") || startsWith(term, "WU") || startsWith(term, "BAY") || startsWith(term, "AGE"))) { // dbDb.description also starts with dates followed by actual description in parentheses, // so search only the part in parentheses to avoid month prefix matches. char *leftP = strchr(dbDb->description, '('); char *toSearch = leftP ? leftP+1 : dbDb->description; checkTerm(term, toSearch, ddmtDescription, dbDb, matchHash, &matchList); } } slSort(&matchList, dbDbMatchCmp); return matchList; } // Assembly hub match: - struct aHubMatch // description of an assembly hub db { struct aHubMatch *next; char *shortLabel; // hub shortLabel char *hubUrl; // hub url char *aDb; // assembly db hosted by hub char *label; // label for this db }; static struct aHubMatch *aHubMatchNew(char *shortLabel, char *hubUrl, char *aDb, char *label) /* Allocate and return a description of an assembly hub db. */ { struct aHubMatch *match; AllocVar(match); match->shortLabel = cloneString(shortLabel); match->hubUrl = cloneString(hubUrl); match->aDb = cloneString(aDb); match->label = cloneString(label); return match; } +// Genark hub match: +struct gHubMatch + // description of an genark hub db + { + struct gHubMatch *next; + char *gcAccession; + char *hubUrl; + char *asmName; + char *scientificName; + char *commonName; + int priority; // reserver for later ranking, currently unused + }; + +static struct gHubMatch *gHubMatchNew(char *acc, char *hubUrl, char *asmName, char *scientificName, char *commonName, int priority) +/* Allocate and return a description of an assembly hub db. */ +{ +struct gHubMatch *match; +AllocVar(match); +match->gcAccession = cloneString(acc); +match->hubUrl = cloneString(hubUrl); +match->asmName = cloneString(asmName); +match->scientificName = cloneString(scientificName); +match->commonName = cloneString(commonName); +match->priority = priority; +return match; +} + static struct hash *unpackHubDbUrlList(struct slName *hubDbUrlList, struct hash **labelHash) /* hubDbUrlList contains strings like "db\tlabel\thubUrl" -- split on tab and return a hash of * hubUrl to one or more dbs. */ { struct hash *hubToDb = hashNew(0); struct hash *dbToLabel = hashNew(0); struct slName *hubDbUrl; for (hubDbUrl = hubDbUrlList; hubDbUrl != NULL; hubDbUrl = hubDbUrl->next) { char *tab = strchr(hubDbUrl->name, '\t'); if (tab) { char *db = hubDbUrl->name; *tab = '\0'; char *label = tab+1; @@ -892,67 +919,127 @@ aHubMatchList = filterHubSearchTextMatches(dbDbList, hubDbUrlList); if (aHubMatchList == NULL) { // Try a looser query sqlSafef(query, sizeof(query), "select distinct(concat(db, concat(concat('\t', label), concat('\t', hubUrl)))) from %s " "where track = '' and text like '%% %s%%'", hubSearchTableName, term); hubDbUrlList = sqlQuickList(conn, query); aHubMatchList = filterHubSearchTextMatches(dbDbList, hubDbUrlList); } } hDisconnectCentral(&conn); return aHubMatchList; } +static void writeGenarkMatches(struct jsonWrite *jw, struct gHubMatch *gHubMatchList) +/* Write out JSON for each genark hub that matched the users term */ +{ +struct gHubMatch *gHubMatch; +for (gHubMatch = gHubMatchList; gHubMatch != NULL; gHubMatch = gHubMatch->next) + { + jsonWriteObjectStart(jw, NULL); + jsonWriteString(jw, "genome", gHubMatch->gcAccession); + jsonWriteString(jw, "db", gHubMatch->asmName); + jsonWriteString(jw, "hubUrl", gHubMatch->hubUrl); + // Add a category label for customized autocomplete-with-categories. + jsonWriteString(jw, "category", "GenArk"); + jsonWriteString(jw, "value", gHubMatch->asmName); + // Use just the db as label, since shortLabel is included in the category label. + jsonWriteStringf(jw, "label", "%s - %s", gHubMatch->commonName, gHubMatch->scientificName); + jsonWriteObjectEnd(jw); + } +} + +static struct gHubMatch *filterGenarkMatches(char *genarkHubUrl, struct genark *matchList) +/* Turn the sql results into a struct gHubMatch list */ +{ +struct genark *match; +struct gHubMatch *ret = NULL; + +for (match = matchList; match != NULL; match = match->next) + { + // the match contains tab-sep accession, hubUrl, asmName, scientificName, commonName + char *hubUrl = catTwoStrings(genarkHubUrl, match->hubUrl); + slAddHead(&ret, gHubMatchNew(match->gcAccession, hubUrl, match->asmName, match->scientificName, match->commonName, -1)); + } +return ret; +} + +static struct gHubMatch *searchGenark(char *term) +/* Search through the genark table for hubs matches term */ +{ +char *genarkPrefix = cfgOption("genarkHubPrefix"); +if (genarkPrefix == NULL) + return NULL; + +struct gHubMatch *gHubMatchList = NULL; +char *genarkTbl = genarkTableName(); +struct sqlConnection *conn = hConnectCentral(); +if (sqlTableExists(conn, genarkTbl)) + { + char query[1024]; + sqlSafef(query, sizeof(query), "select * from %s where " + "(gcAccession like '%%%s%%' or scientificName like '%%%s%%' or commonName like '%%%s%%' or asmName like '%%%s%%')", + genarkTbl, term, term, term, term); + struct genark *matchList = genarkLoadByQuery(conn, query); + gHubMatchList = filterGenarkMatches(genarkPrefix, matchList); + } +hDisconnectCentral(&conn); +return gHubMatchList; +} + static char *getSearchTermUpperCase() /* If we don't have the SEARCH_TERM cgi param, exit with an HTTP Bad Request response. * If we do, convert it to upper case for case-insensitive matching and return it. */ { pushWarnHandler(htmlVaBadRequestAbort); pushAbortHandler(htmlVaBadRequestAbort); char *cgiTerm = cgiOptionalString(SEARCH_TERM); char *term = skipLeadingSpaces(cgiTerm); eraseTrailingSpaces(term); touppers(term); if (isEmpty(term)) errAbort("Missing required CGI parameter %s", SEARCH_TERM); popWarnHandler(); popAbortHandler(); return term; } static void lookupTerm() /* Look for matches to term in hgcentral and print as JSON for autocomplete if found. */ { char *term = getSearchTermUpperCase(); // Write JSON response with list of matches puts("Content-Type:text/javascript\n"); // Before accessing hubs, intialize udc cache location from hg.conf: setUdcCacheDir(); struct dbDb *dbDbList = hDbDbList(); struct dbDbMatch *matchList = searchDbDb(dbDbList, term); +struct gHubMatch *gHubMatchList = searchGenark(term); struct aHubMatch *aHubMatchList = searchPublicHubs(dbDbList, term); struct jsonWrite *jw = jsonWriteNew(); jsonWriteListStart(jw, NULL); // Write out JSON for dbDb matches, if any; add category if we found assembly hub matches too. char *category = aHubMatchList ? "UCSC databases" : NULL; struct dbDbMatch *match; for (match = matchList; match != NULL; match = match->next) writeDbDbMatch(jw, match, term, category); +// Write out genark matches, if any +writeGenarkMatches(jw, gHubMatchList); // Write out assembly hub matches, if any. writeAssemblyHubMatches(jw, aHubMatchList); jsonWriteListEnd(jw); puts(jw->dy->string); jsonWriteFree(&jw); } int main(int argc, char *argv[]) /* Process CGI / command line. */ { /* Null terminated list of CGI Variables we don't want to save * permanently. */ char *excludeVars[] = {SEARCH_TERM, CARTJSON_COMMAND, NULL,}; cgiSpoof(&argc, argv); measureTiming = cgiOptionalInt("measureTiming", 0);