b62fb9699fbc8ae0ef6ac9bf8e8760831e6f5ba2
chmalee
  Tue Apr 30 13:24:22 2024 -0700
Add genark assemblies to species search on hgGateway, refs #33572

diff --git src/hg/hgGateway/hgGateway.c src/hg/hgGateway/hgGateway.c
index 2df826f..6da5f8a 100644
--- src/hg/hgGateway/hgGateway.c
+++ src/hg/hgGateway/hgGateway.c
@@ -17,30 +17,31 @@
 #include "googleAnalytics.h"
 #include "hCommon.h"
 #include "hgConfig.h"
 #include "hdb.h"
 #include "htmshell.h"
 #include "hubConnect.h"
 #include "hui.h"
 #include "jsHelper.h"
 #include "jsonParse.h"
 #include "obscure.h"  // for readInGulp
 #include "regexHelper.h"
 #include "suggest.h"
 #include "trackHub.h"
 #include "web.h"
 #include "botDelay.h"
+#include "genark.h"
 
 /* Global Variables */
 struct cart *cart = NULL;             /* CGI and other variables */
 struct hash *oldVars = NULL;          /* Old contents of cart before it was updated by CGI */
 
 static boolean issueBotWarning = FALSE;
 static int measureTiming = 0;
 static long enteredMainTime = 0;
 
 #define SEARCH_TERM "hggw_term"
 
 static char *maybeGetDescriptionText(char *db)
 /* Slurp the description.html file for db into a string (if possible, don't die if
  * we can't read it) and return it. */
 {
@@ -714,53 +715,79 @@
     if (! (startsWith(term, "BRO") || startsWith(term, "WU") || startsWith(term, "BAY") ||
            startsWith(term, "AGE")))
         {
         // dbDb.description also starts with dates followed by actual description in parentheses,
         // so search only the part in parentheses to avoid month prefix matches.
         char *leftP = strchr(dbDb->description, '(');
         char *toSearch = leftP ? leftP+1 : dbDb->description;
         checkTerm(term, toSearch, ddmtDescription, dbDb, matchHash, &matchList);
         }
     }
 slSort(&matchList, dbDbMatchCmp);
 return matchList;
 }
 
 // Assembly hub match:
-
 struct aHubMatch
     // description of an assembly hub db
     {
     struct aHubMatch *next;
     char *shortLabel;          // hub shortLabel
     char *hubUrl;              // hub url
     char *aDb;                 // assembly db hosted by hub
     char *label;               // label for this db
     };
 
 static struct aHubMatch *aHubMatchNew(char *shortLabel, char *hubUrl, char *aDb, char *label)
 /* Allocate and return a description of an assembly hub db. */
 {
 struct aHubMatch *match;
 AllocVar(match);
 match->shortLabel = cloneString(shortLabel);
 match->hubUrl = cloneString(hubUrl);
 match->aDb = cloneString(aDb);
 match->label = cloneString(label);
 return match;
 }
 
+// Genark  hub match:
+struct gHubMatch
+    // description of an genark hub db
+    {
+    struct gHubMatch *next;
+    char *gcAccession;
+    char *hubUrl;
+    char *asmName;
+    char *scientificName;
+    char *commonName;
+    int priority; // reserver for later ranking, currently unused
+    };
+
+static struct gHubMatch *gHubMatchNew(char *acc, char *hubUrl, char *asmName, char *scientificName, char *commonName, int priority)
+/* Allocate and return a description of an assembly hub db. */
+{
+struct gHubMatch *match;
+AllocVar(match);
+match->gcAccession = cloneString(acc);
+match->hubUrl = cloneString(hubUrl);
+match->asmName = cloneString(asmName);
+match->scientificName = cloneString(scientificName);
+match->commonName = cloneString(commonName);
+match->priority = priority;
+return match;
+}
+
 static struct hash *unpackHubDbUrlList(struct slName *hubDbUrlList, struct hash **labelHash)
 /* hubDbUrlList contains strings like "db\tlabel\thubUrl" -- split on tab and return a hash of
  * hubUrl to one or more dbs. */
 {
 struct hash *hubToDb = hashNew(0);
 struct hash *dbToLabel = hashNew(0);
 struct slName *hubDbUrl;
 for (hubDbUrl = hubDbUrlList;  hubDbUrl != NULL;  hubDbUrl = hubDbUrl->next)
     {
     char *tab = strchr(hubDbUrl->name, '\t');
     if (tab)
         {
         char *db = hubDbUrl->name;
         *tab = '\0';
         char *label = tab+1;
@@ -892,67 +919,127 @@
     aHubMatchList = filterHubSearchTextMatches(dbDbList, hubDbUrlList);
     if (aHubMatchList == NULL)
         {
         // Try a looser query
         sqlSafef(query, sizeof(query), "select distinct(concat(db, concat(concat('\t', label), concat('\t', hubUrl)))) from %s "
                  "where track = '' and text like '%% %s%%'",
                  hubSearchTableName, term);
         hubDbUrlList = sqlQuickList(conn, query);
         aHubMatchList = filterHubSearchTextMatches(dbDbList, hubDbUrlList);
         }
     }
 hDisconnectCentral(&conn);
 return aHubMatchList;
 }
 
+static void writeGenarkMatches(struct jsonWrite *jw, struct gHubMatch *gHubMatchList)
+/* Write out JSON for each genark hub that matched the users term */
+{
+struct gHubMatch *gHubMatch;
+for (gHubMatch = gHubMatchList;  gHubMatch != NULL;  gHubMatch = gHubMatch->next)
+    {
+    jsonWriteObjectStart(jw, NULL);
+    jsonWriteString(jw, "genome", gHubMatch->gcAccession);
+    jsonWriteString(jw, "db", gHubMatch->asmName);
+    jsonWriteString(jw, "hubUrl", gHubMatch->hubUrl);
+    // Add a category label for customized autocomplete-with-categories.
+    jsonWriteString(jw, "category", "GenArk");
+    jsonWriteString(jw, "value", gHubMatch->asmName);
+    // Use just the db as label, since shortLabel is included in the category label.
+    jsonWriteStringf(jw, "label", "%s - %s", gHubMatch->commonName, gHubMatch->scientificName);
+    jsonWriteObjectEnd(jw);
+    }
+}
+
+static struct gHubMatch *filterGenarkMatches(char *genarkHubUrl, struct genark *matchList)
+/* Turn the sql results into a struct gHubMatch list */
+{
+struct genark *match;
+struct gHubMatch *ret = NULL;
+
+for (match = matchList; match != NULL; match = match->next)
+    {
+    // the match contains tab-sep accession, hubUrl, asmName, scientificName, commonName
+    char *hubUrl = catTwoStrings(genarkHubUrl, match->hubUrl);
+    slAddHead(&ret, gHubMatchNew(match->gcAccession, hubUrl, match->asmName, match->scientificName, match->commonName, -1));
+    }
+return ret;
+}
+
+static struct gHubMatch *searchGenark(char *term)
+/* Search through the genark table for hubs matches term */
+{
+char *genarkPrefix = cfgOption("genarkHubPrefix");
+if (genarkPrefix == NULL)
+    return NULL;
+
+struct gHubMatch *gHubMatchList = NULL;
+char *genarkTbl = genarkTableName();
+struct sqlConnection *conn = hConnectCentral();
+if (sqlTableExists(conn, genarkTbl))
+    {
+    char query[1024];
+    sqlSafef(query, sizeof(query), "select * from %s where "
+             "(gcAccession like '%%%s%%' or scientificName like '%%%s%%' or commonName like '%%%s%%' or asmName like '%%%s%%')",
+             genarkTbl, term, term, term, term);
+    struct genark *matchList = genarkLoadByQuery(conn, query);
+    gHubMatchList = filterGenarkMatches(genarkPrefix, matchList);
+    }
+hDisconnectCentral(&conn);
+return gHubMatchList;
+}
+
 static char *getSearchTermUpperCase()
 /* If we don't have the SEARCH_TERM cgi param, exit with an HTTP Bad Request response.
  * If we do, convert it to upper case for case-insensitive matching and return it. */
 {
 pushWarnHandler(htmlVaBadRequestAbort);
 pushAbortHandler(htmlVaBadRequestAbort);
 char *cgiTerm = cgiOptionalString(SEARCH_TERM);
 char *term = skipLeadingSpaces(cgiTerm);
 eraseTrailingSpaces(term);
 touppers(term);
 if (isEmpty(term))
     errAbort("Missing required CGI parameter %s", SEARCH_TERM);
 popWarnHandler();
 popAbortHandler();
 return term;
 }
 
 static void lookupTerm()
 /* Look for matches to term in hgcentral and print as JSON for autocomplete if found. */
 {
 char *term = getSearchTermUpperCase();
 
 // Write JSON response with list of matches
 puts("Content-Type:text/javascript\n");
 
 // Before accessing hubs, intialize udc cache location from hg.conf:
 setUdcCacheDir();
 struct dbDb *dbDbList = hDbDbList();
 struct dbDbMatch *matchList = searchDbDb(dbDbList, term);
+struct gHubMatch *gHubMatchList = searchGenark(term);
 struct aHubMatch *aHubMatchList = searchPublicHubs(dbDbList, term);
 struct jsonWrite *jw = jsonWriteNew();
 jsonWriteListStart(jw, NULL);
 // Write out JSON for dbDb matches, if any; add category if we found assembly hub matches too.
 char *category = aHubMatchList ? "UCSC databases" : NULL;
 struct dbDbMatch *match;
 for (match = matchList;  match != NULL;  match = match->next)
     writeDbDbMatch(jw, match, term, category);
+// Write out genark matches, if any
+writeGenarkMatches(jw, gHubMatchList);
 // Write out assembly hub matches, if any.
 writeAssemblyHubMatches(jw, aHubMatchList);
 jsonWriteListEnd(jw);
 puts(jw->dy->string);
 jsonWriteFree(&jw);
 }
 
 int main(int argc, char *argv[])
 /* Process CGI / command line. */
 {
 /* Null terminated list of CGI Variables we don't want to save
  * permanently. */
 char *excludeVars[] = {SEARCH_TERM, CARTJSON_COMMAND, NULL,};
 cgiSpoof(&argc, argv);
 measureTiming = cgiOptionalInt("measureTiming", 0);