aa59ba153582b1c462f61272c1689f7c6f8398f9
larrym
  Tue Jun 26 16:22:13 2012 -0700
Include description in gene suggest list to accomodate ambiguous entries (#5962)
diff --git src/hg/hgSuggest/hgSuggest.c src/hg/hgSuggest/hgSuggest.c
index a0092bb..a031430 100644
--- src/hg/hgSuggest/hgSuggest.c
+++ src/hg/hgSuggest/hgSuggest.c
@@ -1,81 +1,79 @@
 /* hgGeneSuggest - suggest a gene. */
 
 #include "common.h"
 #include "jksql.h"
 #include "hdb.h"
 #include "cheapcgi.h"
 #include "dystring.h"
 #include "suggest.h"
 
-
 static void fail(char *msg)
 {
 puts("Status: 400\n\n");
 puts(msg);
 exit(-1);
 }
 
 int main(int argc, char *argv[])
 {
 char *prefix = sqlEscapeString(cgiOptionalString("prefix"));
 char *database = sqlEscapeString(cgiOptionalString("db"));
 int exact = cgiOptionalInt("exact", 0);
 struct sqlConnection *conn;
 char query[2048];
 char **row;
 struct sqlResult *sr;
 int count = 0;
 boolean hasKnownCanonical;
 struct dyString *str = newDyString(10000);
-char *table, previous[256];
+char *table;
 
 if(prefix == NULL || database == NULL)
     fail("Missing prefix or database parameter");
 
 conn = hAllocConn(database);
 table = connGeneSuggestTable(conn);
 if(table == NULL)
     fail("gene autosuggest is not supported for this assembly");
 
 hasKnownCanonical = sameString(table, "knownCanonical");
 
 puts("Content-Type:text/plain");
 puts("\n");
 
 dyStringPrintf(str, "[\n");
 
-// We have to choose one isoform, so we choose the largest one; we order by chrom to make sure we choose normal chromosomes 
-// over _random and _hap chromosomes (see redmine #4257).
 if(exact)
     {
     if(hasKnownCanonical)
-        safef(query, sizeof(query), "select x.geneSymbol, k.chrom, k2.txStart, k2.txEnd from knownCanonical k, knownGene k2, kgXref x where k.transcript = x.kgID and k.transcript = k2.name and x.geneSymbol = '%s' order by x.geneSymbol, k.chrom, k2.txEnd - k2.txStart desc", prefix);
+        safef(query, sizeof(query), "select x.geneSymbol, k.chrom, kg.txStart, kg.txEnd, x.kgID, x.description from knownCanonical k, knownGene kg, kgXref x where k.transcript = x.kgID and k.transcript = kg.name and x.geneSymbol = '%s' order by x.geneSymbol, k.chrom, kg.txEnd - kg.txStart desc", prefix);
     else
-        safef(query, sizeof(query), "select r.name2, r.chrom, r.txStart, r.txEnd from %s r where r.name2 = '%s' order by r.name2, r.chrom, r.txEnd - r.txStart desc", table, prefix);
+        safef(query, sizeof(query), "select r.name2, r.chrom, r.txStart, r.txEnd, r.name, description.name from %s r, gbCdnaInfo, description where r.name2 = '%s' and gbCdnaInfo.description = description.id order by r.name2, r.chrom, r.txEnd - r.txStart desc", table, prefix);
     }
 else
     {
     // We use a LIKE query b/c it uses the geneSymbol index (substr queries do not use indices in mysql).
     // Also note that we take advantage of the fact that searches are case-insensitive in mysql.
-    // Some tables have duplicates (e.g. 2 TTn's in mm9 knownCanonical); currently the larger one wins.
+    // Unfortunately, knownCanonical sometimes has multiple entries for a given gene (e.g. 2 TTn's in mm9 knownCanonical;
+    // 3 POU5F1's in hg19); we return all of them (#5962).
     if(hasKnownCanonical)
-        safef(query, sizeof(query), "select x.geneSymbol, k.chrom, k2.txStart, k2.txEnd from knownCanonical k, knownGene k2, kgXref x where k.transcript = x.kgID and k.transcript = k2.name and x.geneSymbol LIKE '%s%%' order by x.geneSymbol, k.chrom, k2.txEnd - k2.txStart desc", prefix);
+        safef(query, sizeof(query), "select x.geneSymbol, k.chrom, kg.txStart, kg.txEnd, x.kgID, x.description from knownCanonical k, knownGene kg, kgXref x where k.transcript = x.kgID and k.transcript = kg.name and x.geneSymbol LIKE '%s%%' order by x.geneSymbol, k.chrom, kg.txStart", prefix);
     else
-        safef(query, sizeof(query), "select r.name2, r.chrom, r.txStart, r.txEnd from %s r where r.name2 LIKE '%s%%' order by r.name2, r.chrom, r.txEnd - r.txStart desc", table, prefix);
+        safef(query, sizeof(query), "select r.name2, r.chrom, r.txStart, r.txEnd, r.name2, description.name from %s r, gbCdnaInfo, description where r.name2 LIKE '%s%%' and acc = r.name and gbCdnaInfo.description = description.id order by r.name2, r.chrom, r.txStart", table, prefix);
     }
 sr = sqlGetResult(conn, query);
-previous[0] = 0;
 while ((row = sqlNextRow(sr)) != NULL)
     {
-    if(!previous[0] || !sameString(previous, row[0]))
+    // ignore funny chroms (e.g. _hap chroms. See redmine #4257.
+    if(!strchr(row[1], '_'))
         {
         count++;
-        dyStringPrintf(str, "%s{\"value\": \"%s\", \"id\": \"%s:%d-%s\"}", count == 1 ? "" : ",\n",
-                       row[0], row[1], atoi(row[2])+1, row[3]);
-        safecpy(previous, sizeof(previous), row[0]);
+        dyStringPrintf(str, "%s{\"value\": \"%s (%s)\", \"id\": \"%s:%d-%s\", \"internalId\": \"%s\"}", count == 1 ? "" : ",\n",
+                       row[0], javaScriptLiteralEncode(row[5]), row[1], atoi(row[2])+1, row[3], javaScriptLiteralEncode(row[4]));
         }
     }
+
 dyStringPrintf(str, "\n]\n");
 puts(dyStringContents(str));
 return 0;
 }