dc2fd8586c12ac58d0f1ff98118ef67781fda9eb tdreszer Tue Aug 27 12:54:11 2013 -0700 Searches on excessively broad terms (like 'test') were leading to CGI 504 timeouts. This was tracked to almost 5000 queries to gbCdnaInfo, a 93 million row table. Since the page presented to the user only shows the first 500 results, this one mRNA search will now be curtailed after the first 500 results. Redmine #11626. diff --git src/hg/lib/hgFind.c src/hg/lib/hgFind.c index 7989c2a..af9f23e 100644 --- src/hg/lib/hgFind.c +++ src/hg/lib/hgFind.c @@ -1427,116 +1427,120 @@ } return NULL; } static struct slName *genbankGrepQuery(char *indexFile, char *table, char *key) /* grep -i key indexFile, return a list of ids (first word of each line). */ { char *extraOptions = ""; if (sameString(table, "author")) extraOptions = "-w"; return doGrepQuery(indexFile, table, key, extraOptions); } static struct slName *genbankSqlFuzzyQuery(struct sqlConnection *conn, - char *table, char *key) + char *table, char *key, int limit) /* Perform a fuzzy sql search for %key% in table.name; return list of * corresponding table.id's. */ { struct slName *idList = NULL, *idEl = NULL; if (!isTooCommon(table, key)) { struct sqlResult *sr; char **row; char query[256]; sqlSafef(query, sizeof(query), - "select id,name from %s where name like '%%%s%%'", table, key); + "select id,name from %s where name like '%%%s%%' limit %d", table, key, limit); sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) { touppers(row[1]); if (keyIsPrefix(key, row[1])) { idEl = newSlName(row[0]); slAddHead(&idList, idEl); } } sqlFreeResult(&sr); } return idList; } static boolean gotAllGenbankGrepIndexFiles(char *db, struct hgFindSpec *hfs, char *tables[], int tableCount) /* Return TRUE if all tables have a readable genbank index file. */ { int i; for (i=0; i < tableCount; i++) if (! getGenbankGrepIndex(db, hfs, tables[i], "idName")) return FALSE; return TRUE;; } static void findHitsToTables(char *db, struct hgFindSpec *hfs, char *key, char *tables[], int tableCount, struct hash **retHash, struct slName **retList) /* Return all unique accessions that match any table. */ +// Modified to return only the first 500 hits because of CGI timeouts { struct slName *list = NULL, *el; struct hash *hash = newHash(0); struct sqlConnection *conn = hAllocConn(db); struct sqlResult *sr; char **row; char query[256]; char *field; int i; -for (i = 0; inext) + idList = genbankSqlFuzzyQuery(conn, field, key, limit); + for (idEl = idList; idEl != NULL && rowCount <= limit; idEl = idEl->next) { /* don't check srcDb to exclude refseq for compat with older tables */ sqlSafef(query, sizeof(query), "select acc, organism from gbCdnaInfo where %s = %s " - " and type = 'mRNA'", - field, idEl->name); + " and type = 'mRNA' limit %d", + field, idEl->name, limit); sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) { char *acc = row[0]; /* will use this later to distinguish xeno mrna */ int organismID = sqlUnsigned(row[1]); if (!isRefSeqAcc(acc) && !hashLookup(hash, acc)) { el = newSlName(acc); slAddHead(&list, el); hashAddInt(hash, acc, organismID); } + if (rowCount++ > limit) + break; } sqlFreeResult(&sr); } slFreeList(&idList); } hFreeConn(&conn); slReverse(&list); *retList = list; *retHash = hash; } static void andHits(struct hash *aHash, struct slName *aList, struct hash *bHash, struct slName *bList, struct hash **retHash, struct slName **retList)