dc2fd8586c12ac58d0f1ff98118ef67781fda9eb
tdreszer
  Tue Aug 27 12:54:11 2013 -0700
Searches on excessively broad terms (like 'test') were leading to CGI 504 timeouts. This was tracked to almost 5000 queries to gbCdnaInfo, a 93 million row table.  Since the page presented to the user only shows the first 500 results, this one mRNA search will now be curtailed after the first 500 results.  Redmine #11626.
diff --git src/hg/lib/hgFind.c src/hg/lib/hgFind.c
index 7989c2a..af9f23e 100644
--- src/hg/lib/hgFind.c
+++ src/hg/lib/hgFind.c
@@ -1427,116 +1427,120 @@
     }
 return NULL;
 }
 
 
 static struct slName *genbankGrepQuery(char *indexFile, char *table, char *key)
 /* grep -i key indexFile, return a list of ids (first word of each line). */
 {
 char *extraOptions = "";
 if (sameString(table, "author"))
     extraOptions = "-w";
 return doGrepQuery(indexFile, table, key, extraOptions);
 }
 
 static struct slName *genbankSqlFuzzyQuery(struct sqlConnection *conn,
-					   char *table, char *key)
+					   char *table, char *key, int limit)
 /* Perform a fuzzy sql search for %key% in table.name; return list of 
  * corresponding table.id's.  */
 {
 struct slName *idList = NULL, *idEl = NULL;
 if (!isTooCommon(table, key))
     {
     struct sqlResult *sr;
     char **row;
     char query[256];
     sqlSafef(query, sizeof(query),
-	  "select id,name from %s where name like '%%%s%%'", table, key);  
+	  "select id,name from %s where name like '%%%s%%' limit %d", table, key, limit);
     sr = sqlGetResult(conn, query);
     while ((row = sqlNextRow(sr)) != NULL)
 	{
 	touppers(row[1]);
 	if (keyIsPrefix(key, row[1]))
 	    {
 	    idEl = newSlName(row[0]);
 	    slAddHead(&idList, idEl);
 	    }
 	}
     sqlFreeResult(&sr);
     }
 return idList;
 }
 
 static boolean gotAllGenbankGrepIndexFiles(char *db, struct hgFindSpec *hfs,
 					   char *tables[], int tableCount)
 /* Return TRUE if all tables have a readable genbank index file. */
 {
 int i;
 for (i=0;  i < tableCount;  i++)
     if (! getGenbankGrepIndex(db, hfs, tables[i], "idName"))
 	return FALSE;
 return TRUE;;
 }
 
 static void findHitsToTables(char *db, struct hgFindSpec *hfs,
 			     char *key, char *tables[], int tableCount, 
 			     struct hash **retHash, struct slName **retList)
 /* Return all unique accessions that match any table. */
+// Modified to return only the first 500 hits because of CGI timeouts
 {
 struct slName *list = NULL, *el;
 struct hash *hash = newHash(0);
 struct sqlConnection *conn = hAllocConn(db);
 struct sqlResult *sr;
 char **row;
 char query[256];
 char *field;
 int i;
 
-for (i = 0; i<tableCount; ++i)
+int rowCount = 0, limit = 500; // Excessively broad searches were leading to CGI timeouts
+for (i = 0; i<tableCount && rowCount <= limit; ++i)
     {
     struct slName *idList = NULL, *idEl;
     char *grepIndexFile = NULL;
     
     /* I'm doing this query in two steps in C rather than
      * in one step in SQL just because it somehow is much
      * faster this way (like 100x faster) when using mySQL. */
     field = tables[i];
     if (!hTableExists(db, field))
 	continue;
     if ((grepIndexFile = getGenbankGrepIndex(db, hfs, field, "idName")) != NULL)
 	idList = genbankGrepQuery(grepIndexFile, field, key);
     else
-	idList = genbankSqlFuzzyQuery(conn, field, key);
-    for (idEl = idList; idEl != NULL; idEl = idEl->next)
+	idList = genbankSqlFuzzyQuery(conn, field, key, limit);
+    for (idEl = idList; idEl != NULL && rowCount <= limit; idEl = idEl->next)
         {
         /* don't check srcDb to exclude refseq for compat with older tables */
         sqlSafef(query, sizeof(query),
               "select acc, organism from gbCdnaInfo where %s = %s "
-	      " and type = 'mRNA'",
-	      field, idEl->name);
+              " and type = 'mRNA' limit %d",
+              field, idEl->name, limit);
 	sr = sqlGetResult(conn, query);
 	while ((row = sqlNextRow(sr)) != NULL)
 	    {
 	    char *acc = row[0];
             /* will use this later to distinguish xeno mrna */
 	    int organismID = sqlUnsigned(row[1]);
 	    if (!isRefSeqAcc(acc) && !hashLookup(hash, acc))
 		{
 		el = newSlName(acc);
                 slAddHead(&list, el);
                 hashAddInt(hash, acc, organismID);
 		}
+            if (rowCount++ > limit)
+                break;
 	    }
 	sqlFreeResult(&sr);
         }
     slFreeList(&idList);
     }
 hFreeConn(&conn);
 slReverse(&list);
 *retList = list;
 *retHash = hash;
 }
 
 
 static void andHits(struct hash *aHash, struct slName *aList, 
 	struct hash *bHash, struct slName *bList,
 	struct hash **retHash, struct slName **retList)