105a881599567f756a6cddba548f4496edb54144
chmalee
  Wed Jul 5 17:00:30 2023 -0700
Make hgSuggest use distinct mysql keyword to prevent bugs when tables contain duplicate rows, refs #31705

diff --git src/hg/hgSuggest/hgSuggest.c src/hg/hgSuggest/hgSuggest.c
index 8085dd7..c90dcf3 100644
--- src/hg/hgSuggest/hgSuggest.c
+++ src/hg/hgSuggest/hgSuggest.c
@@ -24,57 +24,57 @@
 /* Print out a Javascript list of objects describing genes that start with prefix. */
 {
 struct dyString *str = dyStringNew(10000);
 dyStringPrintf(str, "[\n");
 
 int exact = cgiOptionalInt("exact", 0);
 boolean hasKnownCanonical = sameString(table, "knownCanonical");
 initGenbankTableNames(database);
 char query[2048];
 if(exact)
     {
     // NOTE that exact is no longer used by the UI as of v271, but there are still some robots
     // using it so we still support it.
     if(hasKnownCanonical)
         sqlSafef(query, sizeof(query),
-                 "select x.geneSymbol, k.chrom, kg.txStart, kg.txEnd, x.kgID, x.description "
+                 "select distinct x.geneSymbol, k.chrom, kg.txStart, kg.txEnd, x.kgID, x.description "
                  "from knownCanonical k, knownGene kg, kgXref x "
                  "where k.transcript = x.kgID and k.transcript = kg.name and x.geneSymbol = '%s' "
                  "order by x.geneSymbol, k.chrom, kg.txEnd - kg.txStart desc", prefix);
     else
         sqlSafef(query, sizeof(query),
-                 "select r.name2, r.chrom, r.txStart, r.txEnd, r.name, d.name "
+                 "select distinct r.name2, r.chrom, r.txStart, r.txEnd, r.name, d.name "
                  "from %s r, %s g, %s d "
                  "where r.name2 = '%s' and g.acc = r.name and g.description = d.id "
                  "order by r.name2, r.chrom, r.txEnd - r.txStart desc",
                  table, gbCdnaInfoTable, descriptionTable, prefix);
     }
 else
     {
     // We use a LIKE query b/c it uses the geneSymbol index (substr queries do not use indices in mysql).
     // Also note that we take advantage of the fact that searches are case-insensitive in mysql.
     // Unfortunately, knownCanonical sometimes has multiple entries for a given gene (e.g. 2 TTn's in mm9 knownCanonical;
     // 3 POU5F1's in hg19); we return all of them (#5962).
     if(hasKnownCanonical)
         sqlSafef(query, sizeof(query),
-                 "select x.geneSymbol, k.chrom, kg.txStart, kg.txEnd, x.kgID, x.description "
+                 "select distinct x.geneSymbol, k.chrom, kg.txStart, kg.txEnd, x.kgID, x.description "
                  "from knownCanonical k, knownGene kg, kgXref x "
                  "where k.transcript = x.kgID and k.transcript = kg.name "
                  "and x.geneSymbol LIKE '%s%%' "
                  "order by x.geneSymbol, k.chrom, kg.txStart", prefix);
     else
-        sqlSafef(query, sizeof(query), "select r.name2, r.chrom, r.txStart, r.txEnd, r.name, d.name "
+        sqlSafef(query, sizeof(query), "select distinct r.name2, r.chrom, r.txStart, r.txEnd, r.name, d.name "
                  "from %s r, %s g, %s d "
                  "where r.name2 LIKE '%s%%' and g.acc = r.name and g.description = d.id "
                  "order by r.name2, r.chrom, r.txStart",
                  table, gbCdnaInfoTable, descriptionTable, prefix);
     }
 char *knownDatabase = hdbDefaultKnownDb(database);
 struct sqlConnection *conn = hAllocConn(knownDatabase);
 struct sqlResult *sr = sqlGetResult(conn, query);
 char **row;
 int count = 0;
 while ((row = sqlNextRow(sr)) != NULL)
     {
     // ignore funny chroms (e.g. _hap chroms. See redmine #4257.
     if(!strchr(row[1], '_'))
         {