aba8125cb532df17beb7c7c9bc8467a43d09e3d6
braney
  Wed Feb 10 13:39:27 2016 -0800
changes to allow for GenBank metadata to be held in a common table.  #16809

diff --git src/hg/lib/hgFind.c src/hg/lib/hgFind.c
index f887c1f..fc2a080 100644
--- src/hg/lib/hgFind.c
+++ src/hg/lib/hgFind.c
@@ -26,31 +26,31 @@
 #include "snp.h"
 #include "refLink.h"
 #include "kgAlias.h"
 #include "kgProtAlias.h"
 #include "findKGAlias.h"
 #include "findKGProtAlias.h"
 #include "tigrCmrGene.h"
 #include "minGeneInfo.h"
 #include "pipeline.h"
 #include "hgConfig.h"
 #include "trix.h"
 #include "trackHub.h"
 #include "udc.h"
 #include "hubConnect.h"
 #include "bigBedFind.h"
-
+#include "genbank.h"
 
 // Exhaustive searches can lead to timeouts on CGIs (#11626).
 // However, hgGetAnn requires exhaustive searches (#11665).
 #define NONEXHAUSTIVE_SEARCH_LIMIT 500
 #define EXHAUSTIVE_SEARCH_REQUIRED  -1
 
 extern struct cart *cart;
 char *hgAppName = "";
 
 /* alignment tables to check when looking for mrna alignments */
 static char *estTables[] = { "intronEst", "all_est", "xenoEst", NULL };
 static char *estLabels[] = { "Spliced ESTs", "ESTs", "Other ESTs", NULL };
 static char *mrnaTables[] = { "all_mrna", "xenoMrna", NULL };
 static char *mrnaLabels[] = { "mRNAs", "Other mRNAs", NULL };
 static struct dyString *hgpMatchNames = NULL;
@@ -422,34 +422,34 @@
 freeMem(escapedKey);
 return idList;
 }
 
 
 static char *MrnaIDforGeneName(char *db, char *geneName)
 /* return mRNA ID for a gene name */
 {
 struct sqlConnection *conn;
 struct sqlResult *sr = NULL;
 char query[256];
 char **row;
 char *result = NULL;
 
 conn = hAllocConn(db);
-if (sqlTableExists(conn, "refLink"))
+if (sqlTableExists(conn, refLinkTable))
     {
-    sqlSafef(query, sizeof(query), "SELECT mrnaAcc FROM refLink WHERE name='%s'",
-          geneName);
+    sqlSafef(query, sizeof(query), "SELECT mrnaAcc FROM %s WHERE name='%s'",
+          refLinkTable, geneName);
     sr = sqlGetResult(conn, query);
     if ((row = sqlNextRow(sr)) != NULL)
         {
         result = cloneString(row[0]);
         }
     else
         {
         result = NULL;
         }
 
     sqlFreeResult(&sr);
     }
 hFreeConn(&conn);
 return result;
 }
@@ -1224,31 +1224,31 @@
 #endif
 
 static boolean mrnaInfo(char *acc, struct sqlConnection *conn, 
                                 char **mrnaType)
 /* Sets *mrnaType to mrna/est type for the accession */
 /* Ignores returned values if parameters are NULL */
 /* Return TRUE if search succeeded, else FALSE */
 /* NOTE: caller must free mrnaType */
 {
 char query[256];
 struct sqlResult *sr;
 char **row;
 int ret;
 
 sqlSafef(query, sizeof(query),
-      "select type from gbCdnaInfo where acc = '%s'", acc);
+      "select type from %s where acc = '%s'", gbCdnaInfoTable, acc);
 sr = sqlGetResult(conn, query);
 if ((row = sqlNextRow(sr)) != NULL)
     {
     if (mrnaType != NULL)
         *mrnaType = cloneString(row[0]);
     ret = TRUE;
     }
 else
     ret = FALSE;
 sqlFreeResult(&sr);
 return ret;
 }
 
 boolean isRefSeqAcc(char *acc)
 /* Return TRUE if acc looks like a RefSeq acc. */
@@ -1381,56 +1381,65 @@
     dyStringPrintf(dy, "\n");
     pos->description = cloneString(dy->string);
     slAddHead(&table->posList, pos);
     }
 slReverse(&table->posList);
 freeDyString(&dy);
 }
 
 static boolean findMrnaPos(char *db, char *acc,  struct hgPositions *hgp)
 /* Find MRNA or EST position(s) from accession number.
  * Look to see if it's an mRNA or EST.  Fill in hgp and return
  * TRUE if it is, otherwise return FALSE. */
 /* NOTE: this excludes RefSeq mrna's, as they are currently
  * handled in findRefGenes(), which is called later in the main function */
 {
-if (!hTableExists(db, "gbCdnaInfo"))
+struct sqlConnection *conn = hAllocConn(db);
+if (!sqlTableExists(conn, gbCdnaInfoTable))
+    {
+    hFreeConn(&conn);
     return FALSE;
+    }
 char *type = mrnaType(db, acc); 
 if (isEmpty(type))
+    {
+    hFreeConn(&conn);
     /* this excludes refseq mrna's, and accessions with
      * invalid column type in mrna table (refseq's and ests) */
     return FALSE;
+    }
 char lowerType[16];
-struct sqlConnection *conn = hAllocConn(db);
 char **tables, **labels, *tableName;
 boolean gotResults = FALSE;
 
 safecpy(lowerType, sizeof(lowerType), type);
 tolowers(lowerType);
 if (sameWord(lowerType, "mrna"))
     {
     tables = mrnaTables;
     labels = mrnaLabels;
     }
 else if (sameWord(lowerType, "est"))
     {
     tables = estTables;
     labels = estLabels;
     }
 else
+    {
+    hFreeConn(&conn);
     return FALSE;
+    }
 
 while ((tableName = *tables++) != NULL)
     {
     char *label = *labels++;
     struct psl *pslList = NULL;
     if (sameString(tableName, "intronEst") && !sqlTableExists(conn, tableName))
 	{
 	struct slName *c, *chromList = hChromList(db);
 	char splitTable[HDB_MAX_TABLE_STRING];
 	for (c = chromList;  c != NULL;  c = c->next)
 	    {
 	    safef(splitTable, sizeof(splitTable), "%s_%s", c->name, tableName);
 	    struct psl *chrPslList = getPslFromTable(conn, db, splitTable, acc);
 	    if (pslList == NULL)
 		pslList = chrPslList;
@@ -1558,32 +1567,32 @@
      * in one step in SQL just because it somehow is much
      * faster this way (like 100x faster) when using mySQL. */
     field = tables[i];
     if (!hTableExists(db, field))
 	continue;
     if ((grepIndexFile = getGenbankGrepIndex(db, hfs, field, "idName")) != NULL)
 	idList = genbankGrepQuery(grepIndexFile, field, key);
     else
         idList = genbankSqlFuzzyQuery(conn, field, key, limitResults);
     for (idEl = idList;
          idEl != NULL && (limitResults == EXHAUSTIVE_SEARCH_REQUIRED || rowCount < limitResults);
          idEl = idEl->next)
         {
         /* don't check srcDb to exclude refseq for compat with older tables */
 	sqlSafef(query, sizeof(query),
-	      "select acc, organism from gbCdnaInfo where %s = '%s' "
-	      " and type = 'mRNA'", field, idEl->name);
+	      "select acc, organism from %s where %s = '%s' "
+	      " and type = 'mRNA'", gbCdnaInfoTable, field, idEl->name);
         // limit results to avoid CGI timeouts (#11626).
         if (limitResults != EXHAUSTIVE_SEARCH_REQUIRED)
             sqlSafefAppend(query, sizeof(query), " limit %d", limitResults);
 	sr = sqlGetResult(conn, query);
 	while ((row = sqlNextRow(sr)) != NULL)
 	    {
 	    char *acc = row[0];
             /* will use this later to distinguish xeno mrna */
 	    int organismID = sqlUnsigned(row[1]);
 	    if (!isRefSeqAcc(acc) && !hashLookup(hash, acc))
 		{
 		el = newSlName(acc);
                 slAddHead(&list, el);
                 hashAddInt(hash, acc, organismID);
                 // limit results to avoid CGI timeouts (#11626).
@@ -1709,47 +1718,47 @@
     else
         {
         /* display mRNA details page -- need to add dummy CGI variables*/
         dyStringPrintf(dy, "<A HREF=\"%s%cg=%s&i=%s&c=0&o=0&l=0&r=0",
 		       hgcName(), hgAppCombiner, mrnaTable, acc);
         }
     if (ui != NULL)
         dyStringPrintf(dy, "&%s", ui);
     dyStringPrintf(dy, "%s\">", 
                hgp->extraCgi);
     dyStringPrintf(dy, "%s</A>", acc);
 
     /* print description for item, or lacking that, the product name */
     safef(description, sizeof(description), "%s", "n/a"); 
     sqlSafef(query, sizeof(query), 
-        "select description.name from gbCdnaInfo,description"
-        " where gbCdnaInfo.acc = '%s' and gbCdnaInfo.description = description.id", acc);
+        "select d.name from %s g,%s d"
+        " where g.acc = '%s' and g.description = d.id", gbCdnaInfoTable, descriptionTable, acc);
     sqlQuickQuery(conn, query, description, sizeof(description));
     if (sameString(description, "n/a"))
         {
         /* look for product name */
         sqlSafef(query, sizeof(query), 
-            "select productName.name from gbCdnaInfo,productName"
-            " where gbCdnaInfo.acc = '%s' and gbCdnaInfo.productName = productName.id",
-                 acc);
+            "select p.name from %s g,%s p"
+            " where g.acc = '%s' and g.productName = p.id",
+                 gbCdnaInfoTable, productNameTable, acc);
         sqlQuickQuery(conn, query, product, sizeof(product));
         if (!sameString(product, "n/a"))
             {
             /* get organism name */
             sqlSafef(query, sizeof(query), 
-                "select organism.name from gbCdnaInfo,organism"
-                " where gbCdnaInfo.acc = '%s' and gbCdnaInfo.organism = organism.id", acc);
+                "select o.name from %s g,%s o"
+                " where g.acc = '%s' and g.organism = o.id", gbCdnaInfoTable, organismTable, acc);
             *organism = 0;
             sqlQuickQuery(conn, query, organism, sizeof(organism));
             safef(description, sizeof(description), "%s%s%s",
                     *organism ? organism : "",
                     *organism ? ", " : "",
                     product);
             }
         }
     if (!sameString(description, "n/a"))
         /* print description if it has been loaded */
         dyStringPrintf(dy, " - %s", description);
     dyStringPrintf(dy, "\n");
     pos->description = cloneString(dy->string);
 
     /* remove processed element from accession list */
@@ -2071,93 +2080,93 @@
     }
 sqlFreeResult(&sr);
 }
 
 static void addRefLinkAccs(struct sqlConnection *conn, struct slName *accList,
 			   struct refLink **pList)
 /* Query database and add returned refLinks to head of list. */
 {
 struct slName *accEl = NULL;
 struct sqlResult *sr = NULL;
 char **row = NULL;
 char query[256];
 
 for (accEl = accList;  accEl != NULL;  accEl = accEl->next)
     {
-    sqlSafef(query, sizeof(query), "select * from refLink where mrnaAcc = '%s'",
-	  accEl->name);
+    sqlSafef(query, sizeof(query), "select * from %s where mrnaAcc = '%s'",
+	  refLinkTable, accEl->name);
     sr = sqlGetResult(conn, query);
     while ((row = sqlNextRow(sr)) != NULL)
 	{
 	struct refLink *rl = refLinkLoad(row);
 	slAddHead(pList, rl);
 	}
     sqlFreeResult(&sr);
     }
 }
 
 static boolean findRefGenes(char *db, struct hgFindSpec *hfs, char *spec,
 			    struct hgPositions *hgp)
 /* Look up refSeq genes in table. */
 {
 struct sqlConnection *conn = hAllocConn(db);
 struct dyString *ds = newDyString(256);
 struct refLink *rlList = NULL, *rl;
-boolean gotRefLink = hTableExists(db, "refLink");
+boolean gotRefLink = sqlTableExists(conn, refLinkTable);
 boolean found = FALSE;
 char *specNoVersion = cloneString(spec);
 // chop off the version number, e.g. "NM_000454.4 ", 
 //  but if spec starts with "." like ".stuff" then specNoVersion is entirely empty.
 (void) chopPrefix(specNoVersion);  
 if (gotRefLink && isNotEmpty(specNoVersion))
     {
     if (startsWith("NM_", specNoVersion) || startsWith("NR_", specNoVersion) || startsWith("XM_", specNoVersion))
 	{
-	sqlDyStringPrintf(ds, "select * from refLink where mrnaAcc = '%s'", specNoVersion);
+	sqlDyStringPrintf(ds, "select * from %s where mrnaAcc = '%s'", refLinkTable, specNoVersion);
 	addRefLinks(conn, ds, &rlList);
 	}
     else if (startsWith("NP_", specNoVersion) || startsWith("XP_", specNoVersion))
         {
-	sqlDyStringPrintf(ds, "select * from refLink where protAcc = '%s'", specNoVersion);
+	sqlDyStringPrintf(ds, "select * from %s where protAcc = '%s'", refLinkTable, specNoVersion);
 	addRefLinks(conn, ds, &rlList);
 	}
     else if (isUnsignedInt(specNoVersion))
         {
-	sqlDyStringPrintf(ds, "select * from refLink where locusLinkId = '%s'",
-		       specNoVersion);
+	sqlDyStringPrintf(ds, "select * from %s where locusLinkId = '%s'",
+		       refLinkTable, specNoVersion);
 	addRefLinks(conn, ds, &rlList);
 	dyStringClear(ds);
-	sqlDyStringPrintf(ds, "select * from refLink where omimId = '%s'", specNoVersion);
+	sqlDyStringPrintf(ds, "select * from %s where omimId = '%s'", refLinkTable,specNoVersion);
 	addRefLinks(conn, ds, &rlList);
 	}
     else 
 	{
-	char *indexFile = getGenbankGrepIndex(db, hfs, "refLink", "mrnaAccProduct");
-	sqlDyStringPrintf(ds, "select * from refLink where name like '%s%%'",
-		       specNoVersion);
+	char *indexFile = getGenbankGrepIndex(db, hfs, refLinkTable, "mrnaAccProduct");
+	sqlDyStringPrintf(ds, "select * from %s where name like '%s%%'",
+		       refLinkTable, specNoVersion);
 	addRefLinks(conn, ds, &rlList);
 	if (indexFile != NULL)
 	    {
-	    struct slName *accList = doGrepQuery(indexFile, "refLink", specNoVersion,
+	    struct slName *accList = doGrepQuery(indexFile, refLinkTable, specNoVersion,
 						 NULL);
 	    addRefLinkAccs(conn, accList, &rlList);
 	    }
 	else
 	    {
 	    dyStringClear(ds);
-	    sqlDyStringPrintf(ds, "select * from refLink where product like '%%%s%%'",
-			   specNoVersion);
+	    sqlDyStringPrintf(ds, "select * from %s where product like '%%%s%%'",
+			   refLinkTable, specNoVersion);
 	    addRefLinks(conn, ds, &rlList);
 	    }
 	}
     }
 if (rlList != NULL)
     {
     struct hgPosTable *table = NULL;
     struct hash *hash = newHash(8);
     for (rl = rlList; rl != NULL; rl = rl->next)
         {
         char where[64];
         struct genePredReader *gpr;
         struct genePred *gp;
 
         /* Don't return duplicate mrna accessions */