aba8125cb532df17beb7c7c9bc8467a43d09e3d6 braney Wed Feb 10 13:39:27 2016 -0800 changes to allow for GenBank metadata to be held in a common table. #16809 diff --git src/hg/lib/hgFind.c src/hg/lib/hgFind.c index f887c1f..fc2a080 100644 --- src/hg/lib/hgFind.c +++ src/hg/lib/hgFind.c @@ -26,31 +26,31 @@ #include "snp.h" #include "refLink.h" #include "kgAlias.h" #include "kgProtAlias.h" #include "findKGAlias.h" #include "findKGProtAlias.h" #include "tigrCmrGene.h" #include "minGeneInfo.h" #include "pipeline.h" #include "hgConfig.h" #include "trix.h" #include "trackHub.h" #include "udc.h" #include "hubConnect.h" #include "bigBedFind.h" - +#include "genbank.h" // Exhaustive searches can lead to timeouts on CGIs (#11626). // However, hgGetAnn requires exhaustive searches (#11665). #define NONEXHAUSTIVE_SEARCH_LIMIT 500 #define EXHAUSTIVE_SEARCH_REQUIRED -1 extern struct cart *cart; char *hgAppName = ""; /* alignment tables to check when looking for mrna alignments */ static char *estTables[] = { "intronEst", "all_est", "xenoEst", NULL }; static char *estLabels[] = { "Spliced ESTs", "ESTs", "Other ESTs", NULL }; static char *mrnaTables[] = { "all_mrna", "xenoMrna", NULL }; static char *mrnaLabels[] = { "mRNAs", "Other mRNAs", NULL }; static struct dyString *hgpMatchNames = NULL; @@ -422,34 +422,34 @@ freeMem(escapedKey); return idList; } static char *MrnaIDforGeneName(char *db, char *geneName) /* return mRNA ID for a gene name */ { struct sqlConnection *conn; struct sqlResult *sr = NULL; char query[256]; char **row; char *result = NULL; conn = hAllocConn(db); -if (sqlTableExists(conn, "refLink")) +if (sqlTableExists(conn, refLinkTable)) { - sqlSafef(query, sizeof(query), "SELECT mrnaAcc FROM refLink WHERE name='%s'", - geneName); + sqlSafef(query, sizeof(query), "SELECT mrnaAcc FROM %s WHERE name='%s'", + refLinkTable, geneName); sr = sqlGetResult(conn, query); if ((row = sqlNextRow(sr)) != NULL) { result = cloneString(row[0]); } else { result = NULL; } sqlFreeResult(&sr); } hFreeConn(&conn); return result; } @@ -1224,31 +1224,31 @@ #endif static boolean mrnaInfo(char *acc, struct sqlConnection *conn, char **mrnaType) /* Sets *mrnaType to mrna/est type for the accession */ /* Ignores returned values if parameters are NULL */ /* Return TRUE if search succeeded, else FALSE */ /* NOTE: caller must free mrnaType */ { char query[256]; struct sqlResult *sr; char **row; int ret; sqlSafef(query, sizeof(query), - "select type from gbCdnaInfo where acc = '%s'", acc); + "select type from %s where acc = '%s'", gbCdnaInfoTable, acc); sr = sqlGetResult(conn, query); if ((row = sqlNextRow(sr)) != NULL) { if (mrnaType != NULL) *mrnaType = cloneString(row[0]); ret = TRUE; } else ret = FALSE; sqlFreeResult(&sr); return ret; } boolean isRefSeqAcc(char *acc) /* Return TRUE if acc looks like a RefSeq acc. */ @@ -1381,56 +1381,65 @@ dyStringPrintf(dy, "\n"); pos->description = cloneString(dy->string); slAddHead(&table->posList, pos); } slReverse(&table->posList); freeDyString(&dy); } static boolean findMrnaPos(char *db, char *acc, struct hgPositions *hgp) /* Find MRNA or EST position(s) from accession number. * Look to see if it's an mRNA or EST. Fill in hgp and return * TRUE if it is, otherwise return FALSE. */ /* NOTE: this excludes RefSeq mrna's, as they are currently * handled in findRefGenes(), which is called later in the main function */ { -if (!hTableExists(db, "gbCdnaInfo")) +struct sqlConnection *conn = hAllocConn(db); +if (!sqlTableExists(conn, gbCdnaInfoTable)) + { + hFreeConn(&conn); return FALSE; + } char *type = mrnaType(db, acc); if (isEmpty(type)) + { + hFreeConn(&conn); /* this excludes refseq mrna's, and accessions with * invalid column type in mrna table (refseq's and ests) */ return FALSE; + } char lowerType[16]; -struct sqlConnection *conn = hAllocConn(db); char **tables, **labels, *tableName; boolean gotResults = FALSE; safecpy(lowerType, sizeof(lowerType), type); tolowers(lowerType); if (sameWord(lowerType, "mrna")) { tables = mrnaTables; labels = mrnaLabels; } else if (sameWord(lowerType, "est")) { tables = estTables; labels = estLabels; } else + { + hFreeConn(&conn); return FALSE; + } while ((tableName = *tables++) != NULL) { char *label = *labels++; struct psl *pslList = NULL; if (sameString(tableName, "intronEst") && !sqlTableExists(conn, tableName)) { struct slName *c, *chromList = hChromList(db); char splitTable[HDB_MAX_TABLE_STRING]; for (c = chromList; c != NULL; c = c->next) { safef(splitTable, sizeof(splitTable), "%s_%s", c->name, tableName); struct psl *chrPslList = getPslFromTable(conn, db, splitTable, acc); if (pslList == NULL) pslList = chrPslList; @@ -1558,32 +1567,32 @@ * in one step in SQL just because it somehow is much * faster this way (like 100x faster) when using mySQL. */ field = tables[i]; if (!hTableExists(db, field)) continue; if ((grepIndexFile = getGenbankGrepIndex(db, hfs, field, "idName")) != NULL) idList = genbankGrepQuery(grepIndexFile, field, key); else idList = genbankSqlFuzzyQuery(conn, field, key, limitResults); for (idEl = idList; idEl != NULL && (limitResults == EXHAUSTIVE_SEARCH_REQUIRED || rowCount < limitResults); idEl = idEl->next) { /* don't check srcDb to exclude refseq for compat with older tables */ sqlSafef(query, sizeof(query), - "select acc, organism from gbCdnaInfo where %s = '%s' " - " and type = 'mRNA'", field, idEl->name); + "select acc, organism from %s where %s = '%s' " + " and type = 'mRNA'", gbCdnaInfoTable, field, idEl->name); // limit results to avoid CGI timeouts (#11626). if (limitResults != EXHAUSTIVE_SEARCH_REQUIRED) sqlSafefAppend(query, sizeof(query), " limit %d", limitResults); sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) { char *acc = row[0]; /* will use this later to distinguish xeno mrna */ int organismID = sqlUnsigned(row[1]); if (!isRefSeqAcc(acc) && !hashLookup(hash, acc)) { el = newSlName(acc); slAddHead(&list, el); hashAddInt(hash, acc, organismID); // limit results to avoid CGI timeouts (#11626). @@ -1709,47 +1718,47 @@ else { /* display mRNA details page -- need to add dummy CGI variables*/ dyStringPrintf(dy, "<A HREF=\"%s%cg=%s&i=%s&c=0&o=0&l=0&r=0", hgcName(), hgAppCombiner, mrnaTable, acc); } if (ui != NULL) dyStringPrintf(dy, "&%s", ui); dyStringPrintf(dy, "%s\">", hgp->extraCgi); dyStringPrintf(dy, "%s</A>", acc); /* print description for item, or lacking that, the product name */ safef(description, sizeof(description), "%s", "n/a"); sqlSafef(query, sizeof(query), - "select description.name from gbCdnaInfo,description" - " where gbCdnaInfo.acc = '%s' and gbCdnaInfo.description = description.id", acc); + "select d.name from %s g,%s d" + " where g.acc = '%s' and g.description = d.id", gbCdnaInfoTable, descriptionTable, acc); sqlQuickQuery(conn, query, description, sizeof(description)); if (sameString(description, "n/a")) { /* look for product name */ sqlSafef(query, sizeof(query), - "select productName.name from gbCdnaInfo,productName" - " where gbCdnaInfo.acc = '%s' and gbCdnaInfo.productName = productName.id", - acc); + "select p.name from %s g,%s p" + " where g.acc = '%s' and g.productName = p.id", + gbCdnaInfoTable, productNameTable, acc); sqlQuickQuery(conn, query, product, sizeof(product)); if (!sameString(product, "n/a")) { /* get organism name */ sqlSafef(query, sizeof(query), - "select organism.name from gbCdnaInfo,organism" - " where gbCdnaInfo.acc = '%s' and gbCdnaInfo.organism = organism.id", acc); + "select o.name from %s g,%s o" + " where g.acc = '%s' and g.organism = o.id", gbCdnaInfoTable, organismTable, acc); *organism = 0; sqlQuickQuery(conn, query, organism, sizeof(organism)); safef(description, sizeof(description), "%s%s%s", *organism ? organism : "", *organism ? ", " : "", product); } } if (!sameString(description, "n/a")) /* print description if it has been loaded */ dyStringPrintf(dy, " - %s", description); dyStringPrintf(dy, "\n"); pos->description = cloneString(dy->string); /* remove processed element from accession list */ @@ -2071,93 +2080,93 @@ } sqlFreeResult(&sr); } static void addRefLinkAccs(struct sqlConnection *conn, struct slName *accList, struct refLink **pList) /* Query database and add returned refLinks to head of list. */ { struct slName *accEl = NULL; struct sqlResult *sr = NULL; char **row = NULL; char query[256]; for (accEl = accList; accEl != NULL; accEl = accEl->next) { - sqlSafef(query, sizeof(query), "select * from refLink where mrnaAcc = '%s'", - accEl->name); + sqlSafef(query, sizeof(query), "select * from %s where mrnaAcc = '%s'", + refLinkTable, accEl->name); sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) { struct refLink *rl = refLinkLoad(row); slAddHead(pList, rl); } sqlFreeResult(&sr); } } static boolean findRefGenes(char *db, struct hgFindSpec *hfs, char *spec, struct hgPositions *hgp) /* Look up refSeq genes in table. */ { struct sqlConnection *conn = hAllocConn(db); struct dyString *ds = newDyString(256); struct refLink *rlList = NULL, *rl; -boolean gotRefLink = hTableExists(db, "refLink"); +boolean gotRefLink = sqlTableExists(conn, refLinkTable); boolean found = FALSE; char *specNoVersion = cloneString(spec); // chop off the version number, e.g. "NM_000454.4 ", // but if spec starts with "." like ".stuff" then specNoVersion is entirely empty. (void) chopPrefix(specNoVersion); if (gotRefLink && isNotEmpty(specNoVersion)) { if (startsWith("NM_", specNoVersion) || startsWith("NR_", specNoVersion) || startsWith("XM_", specNoVersion)) { - sqlDyStringPrintf(ds, "select * from refLink where mrnaAcc = '%s'", specNoVersion); + sqlDyStringPrintf(ds, "select * from %s where mrnaAcc = '%s'", refLinkTable, specNoVersion); addRefLinks(conn, ds, &rlList); } else if (startsWith("NP_", specNoVersion) || startsWith("XP_", specNoVersion)) { - sqlDyStringPrintf(ds, "select * from refLink where protAcc = '%s'", specNoVersion); + sqlDyStringPrintf(ds, "select * from %s where protAcc = '%s'", refLinkTable, specNoVersion); addRefLinks(conn, ds, &rlList); } else if (isUnsignedInt(specNoVersion)) { - sqlDyStringPrintf(ds, "select * from refLink where locusLinkId = '%s'", - specNoVersion); + sqlDyStringPrintf(ds, "select * from %s where locusLinkId = '%s'", + refLinkTable, specNoVersion); addRefLinks(conn, ds, &rlList); dyStringClear(ds); - sqlDyStringPrintf(ds, "select * from refLink where omimId = '%s'", specNoVersion); + sqlDyStringPrintf(ds, "select * from %s where omimId = '%s'", refLinkTable,specNoVersion); addRefLinks(conn, ds, &rlList); } else { - char *indexFile = getGenbankGrepIndex(db, hfs, "refLink", "mrnaAccProduct"); - sqlDyStringPrintf(ds, "select * from refLink where name like '%s%%'", - specNoVersion); + char *indexFile = getGenbankGrepIndex(db, hfs, refLinkTable, "mrnaAccProduct"); + sqlDyStringPrintf(ds, "select * from %s where name like '%s%%'", + refLinkTable, specNoVersion); addRefLinks(conn, ds, &rlList); if (indexFile != NULL) { - struct slName *accList = doGrepQuery(indexFile, "refLink", specNoVersion, + struct slName *accList = doGrepQuery(indexFile, refLinkTable, specNoVersion, NULL); addRefLinkAccs(conn, accList, &rlList); } else { dyStringClear(ds); - sqlDyStringPrintf(ds, "select * from refLink where product like '%%%s%%'", - specNoVersion); + sqlDyStringPrintf(ds, "select * from %s where product like '%%%s%%'", + refLinkTable, specNoVersion); addRefLinks(conn, ds, &rlList); } } } if (rlList != NULL) { struct hgPosTable *table = NULL; struct hash *hash = newHash(8); for (rl = rlList; rl != NULL; rl = rl->next) { char where[64]; struct genePredReader *gpr; struct genePred *gp; /* Don't return duplicate mrna accessions */