105a881599567f756a6cddba548f4496edb54144 chmalee Wed Jul 5 17:00:30 2023 -0700 Make hgSuggest use distinct mysql keyword to prevent bugs when tables contain duplicate rows, refs #31705 diff --git src/hg/hgSuggest/hgSuggest.c src/hg/hgSuggest/hgSuggest.c index 8085dd7..c90dcf3 100644 --- src/hg/hgSuggest/hgSuggest.c +++ src/hg/hgSuggest/hgSuggest.c @@ -24,57 +24,57 @@ /* Print out a Javascript list of objects describing genes that start with prefix. */ { struct dyString *str = dyStringNew(10000); dyStringPrintf(str, "[\n"); int exact = cgiOptionalInt("exact", 0); boolean hasKnownCanonical = sameString(table, "knownCanonical"); initGenbankTableNames(database); char query[2048]; if(exact) { // NOTE that exact is no longer used by the UI as of v271, but there are still some robots // using it so we still support it. if(hasKnownCanonical) sqlSafef(query, sizeof(query), - "select x.geneSymbol, k.chrom, kg.txStart, kg.txEnd, x.kgID, x.description " + "select distinct x.geneSymbol, k.chrom, kg.txStart, kg.txEnd, x.kgID, x.description " "from knownCanonical k, knownGene kg, kgXref x " "where k.transcript = x.kgID and k.transcript = kg.name and x.geneSymbol = '%s' " "order by x.geneSymbol, k.chrom, kg.txEnd - kg.txStart desc", prefix); else sqlSafef(query, sizeof(query), - "select r.name2, r.chrom, r.txStart, r.txEnd, r.name, d.name " + "select distinct r.name2, r.chrom, r.txStart, r.txEnd, r.name, d.name " "from %s r, %s g, %s d " "where r.name2 = '%s' and g.acc = r.name and g.description = d.id " "order by r.name2, r.chrom, r.txEnd - r.txStart desc", table, gbCdnaInfoTable, descriptionTable, prefix); } else { // We use a LIKE query b/c it uses the geneSymbol index (substr queries do not use indices in mysql). // Also note that we take advantage of the fact that searches are case-insensitive in mysql. // Unfortunately, knownCanonical sometimes has multiple entries for a given gene (e.g. 2 TTn's in mm9 knownCanonical; // 3 POU5F1's in hg19); we return all of them (#5962). if(hasKnownCanonical) sqlSafef(query, sizeof(query), - "select x.geneSymbol, k.chrom, kg.txStart, kg.txEnd, x.kgID, x.description " + "select distinct x.geneSymbol, k.chrom, kg.txStart, kg.txEnd, x.kgID, x.description " "from knownCanonical k, knownGene kg, kgXref x " "where k.transcript = x.kgID and k.transcript = kg.name " "and x.geneSymbol LIKE '%s%%' " "order by x.geneSymbol, k.chrom, kg.txStart", prefix); else - sqlSafef(query, sizeof(query), "select r.name2, r.chrom, r.txStart, r.txEnd, r.name, d.name " + sqlSafef(query, sizeof(query), "select distinct r.name2, r.chrom, r.txStart, r.txEnd, r.name, d.name " "from %s r, %s g, %s d " "where r.name2 LIKE '%s%%' and g.acc = r.name and g.description = d.id " "order by r.name2, r.chrom, r.txStart", table, gbCdnaInfoTable, descriptionTable, prefix); } char *knownDatabase = hdbDefaultKnownDb(database); struct sqlConnection *conn = hAllocConn(knownDatabase); struct sqlResult *sr = sqlGetResult(conn, query); char **row; int count = 0; while ((row = sqlNextRow(sr)) != NULL) { // ignore funny chroms (e.g. _hap chroms. See redmine #4257. if(!strchr(row[1], '_')) {