bffbb000a3df4b88a8117ea5f852ab3abeea6e17
angie
  Mon Sep 24 14:42:01 2018 -0700
Added a new mode to hgSuggest: altOrPatch, to match alt haplo or fix patch sequence names.  Use it in hgTracks' multi-region dialog.  Make the dialog inputs activate their respective radio buttons.  refs #18854

diff --git src/hg/hgSuggest/hgSuggest.c src/hg/hgSuggest/hgSuggest.c
index 08f258c..2f740f4 100644
--- src/hg/hgSuggest/hgSuggest.c
+++ src/hg/hgSuggest/hgSuggest.c
@@ -1,107 +1,192 @@
 /* hgGeneSuggest - suggest a gene. */
 
 /* Copyright (C) 2013 The Regents of the University of California 
  * See README in this or parent directory for licensing information. */
 
 #include "common.h"
 #include "jksql.h"
 #include "hdb.h"
 #include "cheapcgi.h"
 #include "htmshell.h"
 #include "dystring.h"
 #include "jsonParse.h"
+#include "jsonWrite.h"
 #include "suggest.h"
 #include "genbank.h"
 
+// Optional CGI param type can specify what kind of thing to suggest (default: gene)
+#define ALT_OR_PATCH "altOrPatch"
 
-int main(int argc, char *argv[])
+void suggestGene(char *database, char *table, char *prefix)
+/* Print out a Javascript list of objects describing genes that start with prefix. */
 {
-long enteredMainTime = clock1000();
-
-cgiSpoof(&argc, argv);
-char *prefix = cgiOptionalString("prefix");
-char *database = cgiOptionalString("db");
-int exact = cgiOptionalInt("exact", 0);
-char query[2048];
-char **row;
-struct sqlResult *sr;
-int count = 0;
-boolean hasKnownCanonical;
 struct dyString *str = newDyString(10000);
-char *table;
-
-pushWarnHandler(htmlVaBadRequestAbort);
-pushAbortHandler(htmlVaBadRequestAbort);
-if(prefix == NULL || database == NULL)
-    errAbort("%s", "Missing prefix and/or db CGI parameter");
-
-initGenbankTableNames(database);
-struct sqlConnection *conn = hAllocConn(database);
-table = connGeneSuggestTable(conn);
-if(table == NULL)
-    errAbort("gene autosuggest is not supported for db '%s'", database);
-popWarnHandler();
-popAbortHandler();
-
-hasKnownCanonical = sameString(table, "knownCanonical");
-
-puts("Content-Type:text/plain");
-puts("\n");
-
 dyStringPrintf(str, "[\n");
 
+int exact = cgiOptionalInt("exact", 0);
+boolean hasKnownCanonical = sameString(table, "knownCanonical");
+initGenbankTableNames(database);
+char query[2048];
 if(exact)
     {
-    // NOTE that exact is no longer used by the UI as of v271, but there are still some robots using it so we still support it.
+    // NOTE that exact is no longer used by the UI as of v271, but there are still some robots
+    // using it so we still support it.
     if(hasKnownCanonical)
-        sqlSafef(query, sizeof(query), "select x.geneSymbol, k.chrom, kg.txStart, kg.txEnd, x.kgID, x.description "
-              "from knownCanonical k, knownGene kg, kgXref x where k.transcript = x.kgID and k.transcript = kg.name "
-              "and x.geneSymbol = '%s' order by x.geneSymbol, k.chrom, kg.txEnd - kg.txStart desc", prefix);
+        sqlSafef(query, sizeof(query),
+                 "select x.geneSymbol, k.chrom, kg.txStart, kg.txEnd, x.kgID, x.description "
+                 "from knownCanonical k, knownGene kg, kgXref x "
+                 "where k.transcript = x.kgID and k.transcript = kg.name and x.geneSymbol = '%s' "
+                 "order by x.geneSymbol, k.chrom, kg.txEnd - kg.txStart desc", prefix);
     else
-        sqlSafef(query, sizeof(query), "select r.name2, r.chrom, r.txStart, r.txEnd, r.name, d.name "
-              "from %s r, %s g, %s d where r.name2 = '%s' and g.acc = r.name "
-              "and g.description = d.id order by r.name2, r.chrom, r.txEnd - r.txStart desc", table, gbCdnaInfoTable, descriptionTable, prefix);
+        sqlSafef(query, sizeof(query),
+                 "select r.name2, r.chrom, r.txStart, r.txEnd, r.name, d.name "
+                 "from %s r, %s g, %s d "
+                 "where r.name2 = '%s' and g.acc = r.name and g.description = d.id "
+                 "order by r.name2, r.chrom, r.txEnd - r.txStart desc",
+                 table, gbCdnaInfoTable, descriptionTable, prefix);
     }
 else
     {
     // We use a LIKE query b/c it uses the geneSymbol index (substr queries do not use indices in mysql).
     // Also note that we take advantage of the fact that searches are case-insensitive in mysql.
     // Unfortunately, knownCanonical sometimes has multiple entries for a given gene (e.g. 2 TTn's in mm9 knownCanonical;
     // 3 POU5F1's in hg19); we return all of them (#5962).
     if(hasKnownCanonical)
-        sqlSafef(query, sizeof(query), "select x.geneSymbol, k.chrom, kg.txStart, kg.txEnd, x.kgID, x.description "
-              "from knownCanonical k, knownGene kg, kgXref x where k.transcript = x.kgID and k.transcript = kg.name "
-              "and x.geneSymbol LIKE '%s%%' order by x.geneSymbol, k.chrom, kg.txStart", prefix);
+        sqlSafef(query, sizeof(query),
+                 "select x.geneSymbol, k.chrom, kg.txStart, kg.txEnd, x.kgID, x.description "
+                 "from knownCanonical k, knownGene kg, kgXref x "
+                 "where k.transcript = x.kgID and k.transcript = kg.name "
+                 "and x.geneSymbol LIKE '%s%%' "
+                 "order by x.geneSymbol, k.chrom, kg.txStart", prefix);
     else
         sqlSafef(query, sizeof(query), "select r.name2, r.chrom, r.txStart, r.txEnd, r.name, d.name "
-              "from %s r, %s g, %s d where r.name2 LIKE '%s%%' and g.acc = r.name "
-              "and g.description = d.id order by r.name2, r.chrom, r.txStart", table, gbCdnaInfoTable, descriptionTable, prefix);
+                 "from %s r, %s g, %s d "
+                 "where r.name2 LIKE '%s%%' and g.acc = r.name and g.description = d.id "
+                 "order by r.name2, r.chrom, r.txStart",
+                 table, gbCdnaInfoTable, descriptionTable, prefix);
     }
-sr = sqlGetResult(conn, query);
+struct sqlConnection *conn = hAllocConn(database);
+struct sqlResult *sr = sqlGetResult(conn, query);
+char **row;
+int count = 0;
 while ((row = sqlNextRow(sr)) != NULL)
     {
     // ignore funny chroms (e.g. _hap chroms. See redmine #4257.
     if(!strchr(row[1], '_'))
         {
         // We have some very long descriptions, e.g. 4277 chars for hg38 CLOCK, so truncate:
         const int maxDesc = 120;
         char *description = row[5];
         if (strlen(description) > maxDesc + 4)
             strcpy(description + maxDesc, "...");
         count++;
         dyStringPrintf(str, "%s{\"value\": \"%s (%s)\", "
                        "\"id\": \"%s:%d-%s\", "
                        "\"geneSymbol\": \"%s\", "
                        "\"internalId\": \"%s\"}",
                        count == 1 ? "" : ",\n", row[0], jsonStringEscape(description),
                        row[1], atoi(row[2])+1, row[3],
                        jsonStringEscape(row[0]),
                        jsonStringEscape(row[4]));
         }
     }
-
+hFreeConn(&conn);
 dyStringPrintf(str, "\n]\n");
 puts(dyStringContents(str));
+}
+
+struct slName *queryQNames(struct sqlConnection *conn, char *table, char *prefix)
+/* If table exists, return qNames in table that match prefix, otherwise NULL. */
+{
+struct slName *names = NULL;
+if (sqlTableExists(conn, table))
+    {
+    char query[2048];
+    sqlSafef(query, sizeof query, "select distinct(qName) from %s where qName like '%s%%' "
+             "order by qName", table, sqlLikeFromWild(prefix));
+    names = sqlQuickList(conn, query);
+    }
+return names;
+}
+
+void writeAltFixMatches(struct jsonWrite *jw, struct slName *matches, char *category)
+/* Append JSON objects containing alt or fix patch sequence names & optional category. */
+{
+struct slName *match;
+for (match = matches; match != NULL; match = match->next)
+    {
+    if (strchr(match->name, '_'))
+        {
+        jsonWriteObjectStart(jw, NULL);
+        jsonWriteString(jw, "value", match->name);
+        if (isNotEmpty(category))
+            jsonWriteString(jw, "category", category);
+        jsonWriteObjectEnd(jw);
+        }
+    }
+}
+
+void suggestAltOrPatch(char *database, char *prefix)
+/* Print out a Javascript list of objects describing alternate haplotype or fix patch sequences
+ * from database that match prefix. */
+{
+struct jsonWrite *jw = jsonWriteNew();
+jsonWriteListStart(jw, NULL);
+struct sqlConnection *conn = hAllocConn(database);
+struct slName *fixMatches = queryQNames(conn, "fixSeqLiftOverPsl", prefix);
+struct slName *altMatches = queryQNames(conn, "altSeqLiftOverPsl", prefix);
+// Add category labels only if we get both types of matches.
+writeAltFixMatches(jw, fixMatches, altMatches ? "Fix Patches" : "");
+writeAltFixMatches(jw, altMatches, fixMatches ? "Alt Patches" : "");
+hFreeConn(&conn);
+jsonWriteListEnd(jw);
+puts(jw->dy->string);
+jsonWriteFree(&jw);
+}
+
+char *checkParams(char *database, char *prefix, char *type)
+/* If we don't have valid CGI parameters, quit with a Bad Request HTTP response. */
+{
+pushWarnHandler(htmlVaBadRequestAbort);
+pushAbortHandler(htmlVaBadRequestAbort);
+if(prefix == NULL || database == NULL)
+    errAbort("%s", "Missing prefix and/or db CGI parameter");
+if (! hDbIsActive(database))
+    errAbort("'%s' is not a valid, active database", htmlEncode(database));
+if (isNotEmpty(type) && differentString(type, ALT_OR_PATCH))
+    errAbort("'%s' is not a valid type", type);
+char *table = NULL;
+if (! sameOk(type, ALT_OR_PATCH))
+    {
+    struct sqlConnection *conn = hAllocConn(database);
+    table = connGeneSuggestTable(conn);
+    hFreeConn(&conn);
+    if(table == NULL)
+        errAbort("gene autosuggest is not supported for db '%s'", database);
+    }
+popWarnHandler();
+popAbortHandler();
+return table;
+}
+
+int main(int argc, char *argv[])
+{
+long enteredMainTime = clock1000();
+
+cgiSpoof(&argc, argv);
+char *database = cgiOptionalString("db");
+char *prefix = cgiOptionalString("prefix");
+char *type = cgiOptionalString("type");
+char *table = checkParams(database, prefix, type);
+
+puts("Content-Type:text/plain");
+puts("\n");
+
+if (sameOk(type, ALT_OR_PATCH))
+    suggestAltOrPatch(database, prefix);
+else
+    suggestGene(database, table, prefix);
+
 cgiExitTime("hgSuggest", enteredMainTime);
 return 0;
 }