aa59ba153582b1c462f61272c1689f7c6f8398f9 larrym Tue Jun 26 16:22:13 2012 -0700 Include description in gene suggest list to accomodate ambiguous entries (#5962) diff --git src/hg/hgSuggest/hgSuggest.c src/hg/hgSuggest/hgSuggest.c index a0092bb..a031430 100644 --- src/hg/hgSuggest/hgSuggest.c +++ src/hg/hgSuggest/hgSuggest.c @@ -1,81 +1,79 @@ /* hgGeneSuggest - suggest a gene. */ #include "common.h" #include "jksql.h" #include "hdb.h" #include "cheapcgi.h" #include "dystring.h" #include "suggest.h" - static void fail(char *msg) { puts("Status: 400\n\n"); puts(msg); exit(-1); } int main(int argc, char *argv[]) { char *prefix = sqlEscapeString(cgiOptionalString("prefix")); char *database = sqlEscapeString(cgiOptionalString("db")); int exact = cgiOptionalInt("exact", 0); struct sqlConnection *conn; char query[2048]; char **row; struct sqlResult *sr; int count = 0; boolean hasKnownCanonical; struct dyString *str = newDyString(10000); -char *table, previous[256]; +char *table; if(prefix == NULL || database == NULL) fail("Missing prefix or database parameter"); conn = hAllocConn(database); table = connGeneSuggestTable(conn); if(table == NULL) fail("gene autosuggest is not supported for this assembly"); hasKnownCanonical = sameString(table, "knownCanonical"); puts("Content-Type:text/plain"); puts("\n"); dyStringPrintf(str, "[\n"); -// We have to choose one isoform, so we choose the largest one; we order by chrom to make sure we choose normal chromosomes -// over _random and _hap chromosomes (see redmine #4257). if(exact) { if(hasKnownCanonical) - safef(query, sizeof(query), "select x.geneSymbol, k.chrom, k2.txStart, k2.txEnd from knownCanonical k, knownGene k2, kgXref x where k.transcript = x.kgID and k.transcript = k2.name and x.geneSymbol = '%s' order by x.geneSymbol, k.chrom, k2.txEnd - k2.txStart desc", prefix); + safef(query, sizeof(query), "select x.geneSymbol, k.chrom, kg.txStart, kg.txEnd, x.kgID, x.description from knownCanonical k, knownGene kg, kgXref x where k.transcript = x.kgID and k.transcript = kg.name and x.geneSymbol = '%s' order by x.geneSymbol, k.chrom, kg.txEnd - kg.txStart desc", prefix); else - safef(query, sizeof(query), "select r.name2, r.chrom, r.txStart, r.txEnd from %s r where r.name2 = '%s' order by r.name2, r.chrom, r.txEnd - r.txStart desc", table, prefix); + safef(query, sizeof(query), "select r.name2, r.chrom, r.txStart, r.txEnd, r.name, description.name from %s r, gbCdnaInfo, description where r.name2 = '%s' and gbCdnaInfo.description = description.id order by r.name2, r.chrom, r.txEnd - r.txStart desc", table, prefix); } else { // We use a LIKE query b/c it uses the geneSymbol index (substr queries do not use indices in mysql). // Also note that we take advantage of the fact that searches are case-insensitive in mysql. - // Some tables have duplicates (e.g. 2 TTn's in mm9 knownCanonical); currently the larger one wins. + // Unfortunately, knownCanonical sometimes has multiple entries for a given gene (e.g. 2 TTn's in mm9 knownCanonical; + // 3 POU5F1's in hg19); we return all of them (#5962). if(hasKnownCanonical) - safef(query, sizeof(query), "select x.geneSymbol, k.chrom, k2.txStart, k2.txEnd from knownCanonical k, knownGene k2, kgXref x where k.transcript = x.kgID and k.transcript = k2.name and x.geneSymbol LIKE '%s%%' order by x.geneSymbol, k.chrom, k2.txEnd - k2.txStart desc", prefix); + safef(query, sizeof(query), "select x.geneSymbol, k.chrom, kg.txStart, kg.txEnd, x.kgID, x.description from knownCanonical k, knownGene kg, kgXref x where k.transcript = x.kgID and k.transcript = kg.name and x.geneSymbol LIKE '%s%%' order by x.geneSymbol, k.chrom, kg.txStart", prefix); else - safef(query, sizeof(query), "select r.name2, r.chrom, r.txStart, r.txEnd from %s r where r.name2 LIKE '%s%%' order by r.name2, r.chrom, r.txEnd - r.txStart desc", table, prefix); + safef(query, sizeof(query), "select r.name2, r.chrom, r.txStart, r.txEnd, r.name2, description.name from %s r, gbCdnaInfo, description where r.name2 LIKE '%s%%' and acc = r.name and gbCdnaInfo.description = description.id order by r.name2, r.chrom, r.txStart", table, prefix); } sr = sqlGetResult(conn, query); -previous[0] = 0; while ((row = sqlNextRow(sr)) != NULL) { - if(!previous[0] || !sameString(previous, row[0])) + // ignore funny chroms (e.g. _hap chroms. See redmine #4257. + if(!strchr(row[1], '_')) { count++; - dyStringPrintf(str, "%s{\"value\": \"%s\", \"id\": \"%s:%d-%s\"}", count == 1 ? "" : ",\n", - row[0], row[1], atoi(row[2])+1, row[3]); - safecpy(previous, sizeof(previous), row[0]); + dyStringPrintf(str, "%s{\"value\": \"%s (%s)\", \"id\": \"%s:%d-%s\", \"internalId\": \"%s\"}", count == 1 ? "" : ",\n", + row[0], javaScriptLiteralEncode(row[5]), row[1], atoi(row[2])+1, row[3], javaScriptLiteralEncode(row[4])); } } + dyStringPrintf(str, "\n]\n"); puts(dyStringContents(str)); return 0; }