d185918b081b11e5e66133f6d147cf6220b54f81
angie
  Fri Aug 12 14:52:31 2016 -0700
Make hDbForTaxon more discerning about extra fluff in defaultDb and dbDb: order multiple matches in defaultDb by dbDb.orderKey, require active=1 and check database existence.  refs #17886

diff --git src/hg/lib/hdb.c src/hg/lib/hdb.c
index 00ddd70..646913f 100644
--- src/hg/lib/hdb.c
+++ src/hg/lib/hdb.c
@@ -460,53 +460,72 @@
 {
 char *db = NULL;
 char query[256];
 struct sqlConnection *centralConn = hConnectCentral();
 
 sqlSafef(query, sizeof(query),
     "select f.name from %s d,%s f "
     "where d.scientificName='%s' "
     "and d.name = f.name ", dbDbTable(), defaultDbTable(), sciName);
 db = sqlQuickString(centralConn, query);
 hDisconnectCentral(&centralConn);
 
 return db;
 }
 
+static char *firstExistingDbFromQuery(struct sqlConnection *conn, char *query)
+/* Perform query; result is a list of database names.  Clone and return the first database
+ * that exists, or NULL if the query has no results or none of the databases exist. */
+{
+char *db = NULL;
+struct slName *sl, *list = sqlQuickList(conn, query);
+for (sl = list;  sl != NULL;  sl = sl->next)
+    {
+    if (sqlDatabaseExists(sl->name))
+        db = cloneString(sl->name);
+    break;
+    }
+slFreeList(&list);
+return db;
+}
+
 char *hDbForTaxon(int taxon)
-/* Get defaultDb database associated with NCBI taxon number if any. */
+/* Get default database associated with NCBI taxon number, or NULL if not found. */
 {
 char *db = NULL;
 if (taxon != 0)
     {
     struct sqlConnection *centralConn = hConnectCentral();
     char query[512];
+    // First try defaultDb.  Watch out for taxIds with multiple genomes (and hence multiple
+    // defaultDb matches).  For example, 9606 (human) has patch databases, each with a different
+    // genome.  Favor the "real" genome using orderKey and make sure databases are active in dbDb.
     sqlSafef(query, sizeof(query),
              "select d.name from %s d, %s f "
-             "where d.taxId = %d "
-             "and d.name not like 'zoo%%' "
-             "and d.name = f.name ", dbDbTable(), defaultDbTable(), taxon);
-    db = sqlQuickString(centralConn, query);
+             "where d.taxId = %d and d.name = f.name "
+             "and active = 1 order by orderKey",
+             dbDbTable(), defaultDbTable(), taxon);
+    db = firstExistingDbFromQuery(centralConn, query);
     // Rarely, we have one genome (like Baboon) that actually encompasses different species
     // and taxons (P. anubis and P. hamadryas).  defaultDb only has one (P. anubis), so the
     // query comes up empty for the other.  If so, try again using orderKey instead of defaultDb:
     if (isEmpty(db))
         {
         sqlSafef(query, sizeof(query),
-                 "select name from %s where taxId = %d order by orderKey limit 1",
+                 "select name from %s where taxId = %d and active = 1 order by orderKey limit 1",
                  dbDbTable(), taxon);
-        db = sqlQuickString(centralConn, query);
+        db = firstExistingDbFromQuery(centralConn, query);
         }
     hDisconnectCentral(&centralConn);
     }
 return db;
 }
 
 char *hDefaultDb()
 /* Return the default db if all else fails */
 {
 char *genome = cfgOptionDefault("defaultGenome", DEFAULT_GENOME);
 return hDefaultDbForGenome(genome);
 }
 
 char *hDefaultChrom(char *db)
 /* Return some sequence named in chromInfo from the given db, or NULL if db