d185918b081b11e5e66133f6d147cf6220b54f81 angie Fri Aug 12 14:52:31 2016 -0700 Make hDbForTaxon more discerning about extra fluff in defaultDb and dbDb: order multiple matches in defaultDb by dbDb.orderKey, require active=1 and check database existence. refs #17886 diff --git src/hg/lib/hdb.c src/hg/lib/hdb.c index 00ddd70..646913f 100644 --- src/hg/lib/hdb.c +++ src/hg/lib/hdb.c @@ -460,53 +460,72 @@ { char *db = NULL; char query[256]; struct sqlConnection *centralConn = hConnectCentral(); sqlSafef(query, sizeof(query), "select f.name from %s d,%s f " "where d.scientificName='%s' " "and d.name = f.name ", dbDbTable(), defaultDbTable(), sciName); db = sqlQuickString(centralConn, query); hDisconnectCentral(¢ralConn); return db; } +static char *firstExistingDbFromQuery(struct sqlConnection *conn, char *query) +/* Perform query; result is a list of database names. Clone and return the first database + * that exists, or NULL if the query has no results or none of the databases exist. */ +{ +char *db = NULL; +struct slName *sl, *list = sqlQuickList(conn, query); +for (sl = list; sl != NULL; sl = sl->next) + { + if (sqlDatabaseExists(sl->name)) + db = cloneString(sl->name); + break; + } +slFreeList(&list); +return db; +} + char *hDbForTaxon(int taxon) -/* Get defaultDb database associated with NCBI taxon number if any. */ +/* Get default database associated with NCBI taxon number, or NULL if not found. */ { char *db = NULL; if (taxon != 0) { struct sqlConnection *centralConn = hConnectCentral(); char query[512]; + // First try defaultDb. Watch out for taxIds with multiple genomes (and hence multiple + // defaultDb matches). For example, 9606 (human) has patch databases, each with a different + // genome. Favor the "real" genome using orderKey and make sure databases are active in dbDb. sqlSafef(query, sizeof(query), "select d.name from %s d, %s f " - "where d.taxId = %d " - "and d.name not like 'zoo%%' " - "and d.name = f.name ", dbDbTable(), defaultDbTable(), taxon); - db = sqlQuickString(centralConn, query); + "where d.taxId = %d and d.name = f.name " + "and active = 1 order by orderKey", + dbDbTable(), defaultDbTable(), taxon); + db = firstExistingDbFromQuery(centralConn, query); // Rarely, we have one genome (like Baboon) that actually encompasses different species // and taxons (P. anubis and P. hamadryas). defaultDb only has one (P. anubis), so the // query comes up empty for the other. If so, try again using orderKey instead of defaultDb: if (isEmpty(db)) { sqlSafef(query, sizeof(query), - "select name from %s where taxId = %d order by orderKey limit 1", + "select name from %s where taxId = %d and active = 1 order by orderKey limit 1", dbDbTable(), taxon); - db = sqlQuickString(centralConn, query); + db = firstExistingDbFromQuery(centralConn, query); } hDisconnectCentral(¢ralConn); } return db; } char *hDefaultDb() /* Return the default db if all else fails */ { char *genome = cfgOptionDefault("defaultGenome", DEFAULT_GENOME); return hDefaultDbForGenome(genome); } char *hDefaultChrom(char *db) /* Return some sequence named in chromInfo from the given db, or NULL if db