9364b996a1edba88386fa5919e6d5a24a1ff910d angie Tue Feb 2 11:34:23 2016 -0800 Now that dbDb has a taxId field, update hDbForTaxon to use it instead of going throught UniProt. Also added hTaxId() for looking up a db's taxId. diff --git src/hg/lib/hdb.c src/hg/lib/hdb.c index cbde9ad..3f66fdb 100644 --- src/hg/lib/hdb.c +++ src/hg/lib/hdb.c @@ -460,57 +460,56 @@ { char *db = NULL; char query[256]; struct sqlConnection *centralConn = hConnectCentral(); sqlSafef(query, sizeof(query), "select f.name from %s d,%s f " "where d.scientificName='%s' " "and d.name = f.name ", dbDbTable(), defaultDbTable(), sciName); db = sqlQuickString(centralConn, query); hDisconnectCentral(¢ralConn); return db; } -char *hDbForTaxon(struct sqlConnection *conn, int taxon) -/* Get database associated with NCBI taxon number if any. */ +char *hDbForTaxon(int taxon) +/* Get defaultDb database associated with NCBI taxon number if any. */ { char *db = NULL; -char *binomial = NULL; -char query[256]; - -/* Figure out scientific name. */ if (taxon != 0) { - sqlSafef(query, sizeof(query), - "select binomial from uniProt.taxon where id=%d", taxon); - binomial = sqlQuickString(conn, query); - } -/* Get default database for that organism. */ -if (binomial != NULL) - { struct sqlConnection *centralConn = hConnectCentral(); + char query[512]; sqlSafef(query, sizeof(query), - "select f.name from %s d,%s f " - "where d.scientificName='%s' " + "select d.name from %s d, %s f " + "where d.taxId = %d " "and d.name not like 'zoo%%' " - "and d.name = f.name ", dbDbTable(), defaultDbTable(), binomial); + "and d.name = f.name ", dbDbTable(), defaultDbTable(), taxon); db = sqlQuickString(centralConn, query); + // Rarely, we have one genome (like Baboon) that actually encompasses different species + // and taxons (P. anubis and P. hamadryas). defaultDb only has one (P. anubis), so the + // query comes up empty for the other. If so, try again using orderKey instead of defaultDb: + if (isEmpty(db)) + { + sqlSafef(query, sizeof(query), + "select name from %s where taxId = %d order by orderKey limit 1", + dbDbTable(), taxon); + db = sqlQuickString(centralConn, query); + } hDisconnectCentral(¢ralConn); } -freeMem(binomial); return db; } char *hDefaultDb() /* Return the default db if all else fails */ { char *genome = cfgOptionDefault("defaultGenome", DEFAULT_GENOME); return hDefaultDbForGenome(genome); } char *hDefaultChrom(char *db) /* Return some sequence named in chromInfo from the given db, or NULL if db * has no chromInfo. */ { if (trackHubDatabase(db)) @@ -2470,30 +2469,39 @@ } char *hFreezeDate(char *database) /* Return freeze date of database. Use freeMem when done. */ { return hDbDbField(database, "description"); } char *hFreezeDateOpt(char *database) /* Return freeze date of database or NULL if unknown database * Use freeMem when done. */ { return hDbDbOptionalField(database, "description"); } +int hTaxId(char *database) +/* Return taxId (NCBI Taxonomy ID) associated with database. */ +{ +char *taxIdStr = hDbDbOptionalField(database, "taxId"); +if (isNotEmpty(taxIdStr)) + return atoi(taxIdStr); +return 0; +} + int hOrganismID(char *database) /* Get organism ID from relational organism table */ /* Return 0 if not found. */ { char query[256]; struct sqlConnection *conn = hAllocConn(database); int ret; sqlSafef(query, sizeof(query), "select id from %s where name = '%s'", organismTable, hScientificName(database)); ret = sqlQuickNum(conn, query); hFreeConn(&conn); return ret; }