621822a450d38d641f59df03aa461fa7dba64da6
hiram
  Fri Sep 27 15:24:08 2024 -0700
now with one word searches on the assemblyList table refs #32596

diff --git src/hg/hgGateway/hgGateway.c src/hg/hgGateway/hgGateway.c
index f262d8e..ca8f0f5 100644
--- src/hg/hgGateway/hgGateway.c
+++ src/hg/hgGateway/hgGateway.c
@@ -19,30 +19,31 @@
 #include "hgConfig.h"
 #include "hdb.h"
 #include "htmshell.h"
 #include "hubConnect.h"
 #include "hui.h"
 #include "jsHelper.h"
 #include "jsonParse.h"
 #include "obscure.h"  // for readInGulp
 #include "regexHelper.h"
 #include "suggest.h"
 #include "trackHub.h"
 #include "web.h"
 #include "botDelay.h"
 #include "genark.h"
 #include "assemblyList.h"
+#include <limits.h>
 
 /* Global Variables */
 struct cart *cart = NULL;             /* CGI and other variables */
 struct hash *oldVars = NULL;          /* Old contents of cart before it was updated by CGI */
 
 static boolean issueBotWarning = FALSE;
 static int measureTiming = 0;
 static long enteredMainTime = 0;
 
 #define SEARCH_TERM "hggw_term"
 
 static char *maybeGetDescriptionText(char *db)
 /* Slurp the description.html file for db into a string (if possible, don't die if
  * we can't read it) and return it. */
 {
@@ -748,34 +749,34 @@
 match->aDb = cloneString(aDb);
 match->label = cloneString(label);
 return match;
 }
 
 // Genark  hub match:
 struct gHubMatch
     // description of an genark hub db
     {
     struct gHubMatch *next;
     char *gcAccession;
     char *hubUrl;
     char *asmName;
     char *scientificName;
     char *commonName;
-    int priority; // reserver for later ranking, currently unused
+    unsigned priority; // for ranking, currently unused
     };
 
-static struct gHubMatch *gHubMatchNew(char *acc, char *hubUrl, char *asmName, char *scientificName, char *commonName, int priority)
+static struct gHubMatch *gHubMatchNew(char *acc, char *hubUrl, char *asmName, char *scientificName, char *commonName, unsigned priority)
 /* Allocate and return a description of an assembly hub db. */
 {
 struct gHubMatch *match;
 AllocVar(match);
 match->gcAccession = cloneString(acc);
 match->hubUrl = cloneString(hubUrl);
 match->asmName = cloneString(asmName);
 match->scientificName = cloneString(scientificName);
 match->commonName = cloneString(commonName);
 match->priority = priority;
 return match;
 }
 
 static struct hash *unpackHubDbUrlList(struct slName *hubDbUrlList, struct hash **labelHash)
 /* hubDbUrlList contains strings like "db\tlabel\thubUrl" -- split on tab and return a hash of
@@ -958,42 +959,107 @@
 struct genark *match;
 struct gHubMatch *ret = NULL;
 
 for (match = matchList; match != NULL; match = match->next)
     {
     // the match contains tab-sep accession, hubUrl, asmName, scientificName, commonName
     char hubUrl[PATH_LEN+1];
     safef(hubUrl, sizeof(hubUrl), "%s/%s", genarkHubUrl, match->hubUrl);
     slAddHead(&ret, gHubMatchNew(match->gcAccession, hubUrl, match->asmName, match->scientificName, match->commonName, -1));
     }
 if (ret)
     slReverse(&ret);
 return ret;
 }
 
+static struct gHubMatch *filterAssemblyListMatches(struct sqlConnection *conn,
+   char *asmListTable, char *term, char *genarkPrefix, boolean wildCard)
+{
+struct gHubMatch *ret = NULL;
+struct dyString *query = dyStringNew(64);
+/* LIMIT of 100 will allow enough results to include some genArk assemblies */
+if (wildCard)
+    sqlDyStringPrintf(query, "SELECT * FROM %s WHERE MATCH(name, commonName, scientificName, clade, description, refSeqCategory, versionStatus, assemblyLevel) AGAINST ('%s*' IN BOOLEAN MODE) AND browserExists=1 LIMIT 100", asmListTable, term);
+else
+    sqlDyStringPrintf(query, "SELECT * FROM %s WHERE MATCH(name, commonName, scientificName, clade, description, refSeqCategory, versionStatus, assemblyLevel) AGAINST ('%s' IN BOOLEAN MODE) AND browserExists=1 LIMIT 100", asmListTable, term);
+
+struct sqlResult *sr = sqlGetResult(conn, query->string);
+dyStringFree(&query);
+char **row;
+int c = 0;
+while ((row = sqlNextRow(sr)) != NULL)
+    {
+    struct assemblyList *el = assemblyListLoadWithNull(row);
+    if (isGenArk(el->name))
+	{
+	++c;
+	char genarkUrl[PATH_MAX];
+	safef(genarkUrl, sizeof(genarkUrl), "%s/%s", genarkPrefix, el->hubUrl);
+	slAddHead(&ret, gHubMatchNew(el->name, genarkUrl, NULL, el->scientificName, el->commonName, *el->priority));
+	}
+    if ( c > 20 )	/* allow only 20 genArk returns */
+	break;
+    }
+sqlFreeResult(&sr);
+
+if (ret)
+    slReverse(&ret);
+return ret;
+}
+
 static struct gHubMatch *searchGenark(char *term)
-/* Search through the genark table for hubs matches term */
+/* Search through the genark table (or assemblyList table) for hubs
+   matches term */
 {
 char *genarkPrefix = cfgOption("genarkHubPrefix");
 if (genarkPrefix == NULL)
     return NULL;
 
+struct sqlConnection *conn = hConnectCentral();
 struct gHubMatch *gHubMatchList = NULL;
 char *genarkTbl = genarkTableName();
 int colCount = genArkColumnCount();
-struct sqlConnection *conn = hConnectCentral();
-if (sqlTableExists(conn, genarkTbl))
+int termLength = strlen(term);
+
+char *asmListTable = assemblyListTableName();
+/* only allow the asmList query when the search term is more than 2 letters */
+if ((termLength > 2) && sqlTableExists(conn, asmListTable))
+    {
+    int wordCount = chopByWhite(term, NULL, 0);
+    if (1 == wordCount)
+	{
+	struct dyString *query = dyStringNew(64);
+	sqlDyStringPrintf(query, "SELECT COUNT(*) FROM %s WHERE MATCH(name, commonName, scientificName, clade, description, refSeqCategory, versionStatus, assemblyLevel) AGAINST ('%s' IN BOOLEAN MODE) AND browserExists=1", asmListTable, term);
+
+	long long matchCount = sqlQuickLongLong(conn, query->string);
+
+	dyStringFree(&query);
+        boolean wildCard = FALSE;
+        if (0 == matchCount)	/* try prefix search */
+	    {
+	    query = dyStringNew(64);
+	    sqlDyStringPrintf(query, "SELECT COUNT(*) FROM %s WHERE MATCH(name, commonName, scientificName, clade, description, refSeqCategory, versionStatus, assemblyLevel) AGAINST ('%s*' IN BOOLEAN MODE) AND browserExists=1", asmListTable, term);
+	    matchCount = sqlQuickLongLong(conn, query->string);
+	    dyStringFree(&query);
+            if (matchCount > 0)
+		wildCard = TRUE;
+	    }
+        if (matchCount > 0)
+            gHubMatchList = filterAssemblyListMatches(conn, asmListTable, term, genarkPrefix, wildCard);
+	}	/* 1 == wordCout  */
+    }	/* termLength > 2	*/
+else if (sqlTableExists(conn, genarkTbl))
     {
     char query[1024];
     if (colCount > 6)
 	{
 	sqlSafef(query, sizeof(query), "select * from %s where "
              "(gcAccession like '%%%s%%' or scientificName like '%%%s%%' or commonName like '%%%s%%' or asmName like '%%%s%%') order by priority",
              genarkTbl, term, term, term, term);
 	}
     else
 	{
 	sqlSafef(query, sizeof(query), "select * from %s where "
              "(gcAccession like '%%%s%%' or scientificName like '%%%s%%' or commonName like '%%%s%%' or asmName like '%%%s%%') order by taxId ASC, commonName DESC",
              genarkTbl, term, term, term, term);
 	}
     struct genark *matchList = genarkLoadByQuery(conn, query);