621822a450d38d641f59df03aa461fa7dba64da6 hiram Fri Sep 27 15:24:08 2024 -0700 now with one word searches on the assemblyList table refs #32596 diff --git src/hg/hgGateway/hgGateway.c src/hg/hgGateway/hgGateway.c index f262d8e..ca8f0f5 100644 --- src/hg/hgGateway/hgGateway.c +++ src/hg/hgGateway/hgGateway.c @@ -19,30 +19,31 @@ #include "hgConfig.h" #include "hdb.h" #include "htmshell.h" #include "hubConnect.h" #include "hui.h" #include "jsHelper.h" #include "jsonParse.h" #include "obscure.h" // for readInGulp #include "regexHelper.h" #include "suggest.h" #include "trackHub.h" #include "web.h" #include "botDelay.h" #include "genark.h" #include "assemblyList.h" +#include <limits.h> /* Global Variables */ struct cart *cart = NULL; /* CGI and other variables */ struct hash *oldVars = NULL; /* Old contents of cart before it was updated by CGI */ static boolean issueBotWarning = FALSE; static int measureTiming = 0; static long enteredMainTime = 0; #define SEARCH_TERM "hggw_term" static char *maybeGetDescriptionText(char *db) /* Slurp the description.html file for db into a string (if possible, don't die if * we can't read it) and return it. */ { @@ -748,34 +749,34 @@ match->aDb = cloneString(aDb); match->label = cloneString(label); return match; } // Genark hub match: struct gHubMatch // description of an genark hub db { struct gHubMatch *next; char *gcAccession; char *hubUrl; char *asmName; char *scientificName; char *commonName; - int priority; // reserver for later ranking, currently unused + unsigned priority; // for ranking, currently unused }; -static struct gHubMatch *gHubMatchNew(char *acc, char *hubUrl, char *asmName, char *scientificName, char *commonName, int priority) +static struct gHubMatch *gHubMatchNew(char *acc, char *hubUrl, char *asmName, char *scientificName, char *commonName, unsigned priority) /* Allocate and return a description of an assembly hub db. */ { struct gHubMatch *match; AllocVar(match); match->gcAccession = cloneString(acc); match->hubUrl = cloneString(hubUrl); match->asmName = cloneString(asmName); match->scientificName = cloneString(scientificName); match->commonName = cloneString(commonName); match->priority = priority; return match; } static struct hash *unpackHubDbUrlList(struct slName *hubDbUrlList, struct hash **labelHash) /* hubDbUrlList contains strings like "db\tlabel\thubUrl" -- split on tab and return a hash of @@ -958,42 +959,107 @@ struct genark *match; struct gHubMatch *ret = NULL; for (match = matchList; match != NULL; match = match->next) { // the match contains tab-sep accession, hubUrl, asmName, scientificName, commonName char hubUrl[PATH_LEN+1]; safef(hubUrl, sizeof(hubUrl), "%s/%s", genarkHubUrl, match->hubUrl); slAddHead(&ret, gHubMatchNew(match->gcAccession, hubUrl, match->asmName, match->scientificName, match->commonName, -1)); } if (ret) slReverse(&ret); return ret; } +static struct gHubMatch *filterAssemblyListMatches(struct sqlConnection *conn, + char *asmListTable, char *term, char *genarkPrefix, boolean wildCard) +{ +struct gHubMatch *ret = NULL; +struct dyString *query = dyStringNew(64); +/* LIMIT of 100 will allow enough results to include some genArk assemblies */ +if (wildCard) + sqlDyStringPrintf(query, "SELECT * FROM %s WHERE MATCH(name, commonName, scientificName, clade, description, refSeqCategory, versionStatus, assemblyLevel) AGAINST ('%s*' IN BOOLEAN MODE) AND browserExists=1 LIMIT 100", asmListTable, term); +else + sqlDyStringPrintf(query, "SELECT * FROM %s WHERE MATCH(name, commonName, scientificName, clade, description, refSeqCategory, versionStatus, assemblyLevel) AGAINST ('%s' IN BOOLEAN MODE) AND browserExists=1 LIMIT 100", asmListTable, term); + +struct sqlResult *sr = sqlGetResult(conn, query->string); +dyStringFree(&query); +char **row; +int c = 0; +while ((row = sqlNextRow(sr)) != NULL) + { + struct assemblyList *el = assemblyListLoadWithNull(row); + if (isGenArk(el->name)) + { + ++c; + char genarkUrl[PATH_MAX]; + safef(genarkUrl, sizeof(genarkUrl), "%s/%s", genarkPrefix, el->hubUrl); + slAddHead(&ret, gHubMatchNew(el->name, genarkUrl, NULL, el->scientificName, el->commonName, *el->priority)); + } + if ( c > 20 ) /* allow only 20 genArk returns */ + break; + } +sqlFreeResult(&sr); + +if (ret) + slReverse(&ret); +return ret; +} + static struct gHubMatch *searchGenark(char *term) -/* Search through the genark table for hubs matches term */ +/* Search through the genark table (or assemblyList table) for hubs + matches term */ { char *genarkPrefix = cfgOption("genarkHubPrefix"); if (genarkPrefix == NULL) return NULL; +struct sqlConnection *conn = hConnectCentral(); struct gHubMatch *gHubMatchList = NULL; char *genarkTbl = genarkTableName(); int colCount = genArkColumnCount(); -struct sqlConnection *conn = hConnectCentral(); -if (sqlTableExists(conn, genarkTbl)) +int termLength = strlen(term); + +char *asmListTable = assemblyListTableName(); +/* only allow the asmList query when the search term is more than 2 letters */ +if ((termLength > 2) && sqlTableExists(conn, asmListTable)) + { + int wordCount = chopByWhite(term, NULL, 0); + if (1 == wordCount) + { + struct dyString *query = dyStringNew(64); + sqlDyStringPrintf(query, "SELECT COUNT(*) FROM %s WHERE MATCH(name, commonName, scientificName, clade, description, refSeqCategory, versionStatus, assemblyLevel) AGAINST ('%s' IN BOOLEAN MODE) AND browserExists=1", asmListTable, term); + + long long matchCount = sqlQuickLongLong(conn, query->string); + + dyStringFree(&query); + boolean wildCard = FALSE; + if (0 == matchCount) /* try prefix search */ + { + query = dyStringNew(64); + sqlDyStringPrintf(query, "SELECT COUNT(*) FROM %s WHERE MATCH(name, commonName, scientificName, clade, description, refSeqCategory, versionStatus, assemblyLevel) AGAINST ('%s*' IN BOOLEAN MODE) AND browserExists=1", asmListTable, term); + matchCount = sqlQuickLongLong(conn, query->string); + dyStringFree(&query); + if (matchCount > 0) + wildCard = TRUE; + } + if (matchCount > 0) + gHubMatchList = filterAssemblyListMatches(conn, asmListTable, term, genarkPrefix, wildCard); + } /* 1 == wordCout */ + } /* termLength > 2 */ +else if (sqlTableExists(conn, genarkTbl)) { char query[1024]; if (colCount > 6) { sqlSafef(query, sizeof(query), "select * from %s where " "(gcAccession like '%%%s%%' or scientificName like '%%%s%%' or commonName like '%%%s%%' or asmName like '%%%s%%') order by priority", genarkTbl, term, term, term, term); } else { sqlSafef(query, sizeof(query), "select * from %s where " "(gcAccession like '%%%s%%' or scientificName like '%%%s%%' or commonName like '%%%s%%' or asmName like '%%%s%%') order by taxId ASC, commonName DESC", genarkTbl, term, term, term, term); } struct genark *matchList = genarkLoadByQuery(conn, query);