ff74889ad772c653a5a127e1c8a340870b17e3b1 braney Thu Mar 24 17:33:01 2016 -0700 fix up hgFind issues with genbank meta tables #16809 diff --git src/hg/lib/hgFind.c src/hg/lib/hgFind.c index 774b06f..eee779e 100644 --- src/hg/lib/hgFind.c +++ src/hg/lib/hgFind.c @@ -1333,32 +1333,42 @@ * grepIndex setting, and we can access the index file for table, then * return the filename; else return NULL. */ /* Special case for genbank: Mark completely specifies the root in * hg.conf, so hfs's grepIndex setting value is ignored -- it is used * only to enable grep indexing. So we have multiple ways to turn this * off if necessary: remove hg.conf setting (takes out all dbs), * remove hgFindSpec setting (takes out one db at a time), or remove * a file (takes out one table at a time). */ { char *grepIndexRoot = cfgOption("grepIndex.genbank"); char *hfsSetting = hgFindSpecSetting(hfs, "grepIndex"); if (grepIndexRoot != NULL && hfsSetting != NULL) { char buf[1024]; + char *dot; + // check to see if table name has database in it + if ((dot = strchr(table, '.')) != NULL) + { + *dot = 0; + db = table; + table = dot + 1; + } safef(buf, sizeof(buf), "%s/%s/%s.%s", grepIndexRoot, db, table, suffix); + if (dot) + *dot = '.'; if (fileExists(buf)) return cloneString(buf); } return NULL; } static struct slName *genbankGrepQuery(char *indexFile, char *table, char *key) /* grep -i key indexFile, return a list of ids (first word of each line). */ { char *extraOptions = ""; if (sameString(table, "author")) extraOptions = "-w"; return doGrepQuery(indexFile, table, key, extraOptions); } @@ -1383,30 +1393,41 @@ sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) { touppers(row[1]); if (keyIsPrefix(key, row[1])) { idEl = newSlName(row[0]); slAddHead(&idList, idEl); } } sqlFreeResult(&sr); } return idList; } +static char *skipDb(char *tableName) +/* retun a pointer past the datbase part of the table name (if any) */ +{ +char *dot = tableName; + +if ((dot = strchr(tableName, '.')) == NULL) + return tableName; + +return dot + 1; +} + static boolean gotAllGenbankGrepIndexFiles(char *db, struct hgFindSpec *hfs, char *tables[], int tableCount) /* Return TRUE if all tables have a readable genbank index file. */ { int i; for (i=0; i < tableCount; i++) if (! getGenbankGrepIndex(db, hfs, tables[i], "idName")) return FALSE; return TRUE;; } static void findHitsToTables(char *db, struct hgFindSpec *hfs, char *key, int limitResults, char *tables[], int tableCount, struct hash **retHash, struct slName **retList) /* Return all unique accessions that match any table. */ @@ -1421,44 +1442,44 @@ char *field; int i; int rowCount = 0; // Excessively broad searches were leading to CGI timeouts (#11626). for (i = 0; i<tableCount && (limitResults == EXHAUSTIVE_SEARCH_REQUIRED || rowCount < limitResults); ++i) { struct slName *idList = NULL, *idEl; char *grepIndexFile = NULL; /* I'm doing this query in two steps in C rather than * in one step in SQL just because it somehow is much * faster this way (like 100x faster) when using mySQL. */ field = tables[i]; - if (!hTableExists(db, field)) + if (!sqlTableExists(conn, field)) continue; if ((grepIndexFile = getGenbankGrepIndex(db, hfs, field, "idName")) != NULL) idList = genbankGrepQuery(grepIndexFile, field, key); else idList = genbankSqlFuzzyQuery(conn, field, key, limitResults); for (idEl = idList; idEl != NULL && (limitResults == EXHAUSTIVE_SEARCH_REQUIRED || rowCount < limitResults); idEl = idEl->next) { /* don't check srcDb to exclude refseq for compat with older tables */ sqlSafef(query, sizeof(query), "select acc, organism from %s where %s = '%s' " - " and type = 'mRNA'", gbCdnaInfoTable, field, idEl->name); + " and type = 'mRNA'", gbCdnaInfoTable, skipDb(field), idEl->name); // limit results to avoid CGI timeouts (#11626). if (limitResults != EXHAUSTIVE_SEARCH_REQUIRED) sqlSafefAppend(query, sizeof(query), " limit %d", limitResults); sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) { char *acc = row[0]; /* will use this later to distinguish xeno mrna */ int organismID = sqlUnsigned(row[1]); if (!isRefSeqAcc(acc) && !hashLookup(hash, acc)) { el = newSlName(acc); slAddHead(&list, el); hashAddInt(hash, acc, organismID); // limit results to avoid CGI timeouts (#11626). @@ -1645,33 +1666,33 @@ freeMem(organism); table->description = cloneString(title); table->name = cloneString(mrnaTable); table->htmlOnePos = mrnaKeysHtmlOnePos; slAddHead(&hgp->tableList, table); } freeDyString(&dy); return alignCount; } static boolean findMrnaKeys(char *db, struct hgFindSpec *hfs, char *keys, int limitResults, struct hgPositions *hgp) /* Find mRNA that has keyword in one of its fields. */ { int alignCount; -static char *tables[] = { - "productName", "geneName", - "author", "tissue", "cell", "description", "development", +char *tables[] = { + productNameTable, geneNameTable, + authorTable, tissueTable, cellTable, descriptionTable, developmentTable, }; struct hash *allKeysHash = NULL; struct slName *allKeysList = NULL; struct sqlConnection *conn = hAllocConn(db); boolean found = FALSE; /* If we can use grep to search all tables, then use piped grep to * implement implicit "AND" of multiple keys. */ if (gotAllGenbankGrepIndexFiles(db, hfs, tables, ArraySize(tables))) { findHitsToTables(db, hfs, keys, limitResults, tables, ArraySize(tables), &allKeysHash, &allKeysList); } else { @@ -2799,32 +2820,35 @@ { struct slPair *xrefList = NULL, *xrefPtr = NULL; boolean found = FALSE; if (hfs == NULL || term == NULL || hgp == NULL) errAbort("NULL passed to hgFindUsingSpec.\n"); if (strlen(term)<2 && ! (sameString(hfs->searchName, "knownGene") || sameString(hfs->searchName, "flyBaseGeneSymbolOneLetter"))) return FALSE; if (isNotEmpty(hfs->termRegex) && ! regexMatchNoCase(term, hfs->termRegex)) return(FALSE); +if (!(sameString(hfs->searchType, "mrnaKeyword") || sameString(hfs->searchType, "mrnaAcc") )) + { if (! hTableOrSplitExists(db, hfs->searchTable)) return(FALSE); + } if (isNotEmpty(hfs->searchType) && searchSpecial(db, hfs, term, limitResults, hgp, relativeFlag, relStart, relEnd, &found)) return(found); if (isNotEmpty(hfs->xrefTable)) { struct sqlConnection *conn = hAllocConn(db); // NOTE hfs->xrefTable can sometimes contain a comma-separated table list, // rather than just a single table. char *tables = replaceChars(hfs->xrefTable, ",", " "); boolean exists = sqlTablesExist(conn, tables); hFreeConn(&conn); freeMem(tables); if (! exists)