3846f517009c43abc65d227a4695645c9b5f3e8a braney Fri Feb 15 18:31:21 2013 -0800 changes necessary to support assembly hubs (#8072) diff --git src/hg/lib/hdb.c src/hg/lib/hdb.c index fc6fe7d..97ebc03 100644 --- src/hg/lib/hdb.c +++ src/hg/lib/hdb.c @@ -25,30 +25,31 @@ #include "liftOver.h" #include "liftOverChain.h" #include "grp.h" #include "twoBit.h" #include "ra.h" #include "genbank.h" #include "chromInfo.h" #ifndef GBROWSE #include "axtInfo.h" #include "ctgPos.h" #include "hubConnect.h" #include "customTrack.h" #include "hgFind.h" #endif /* GBROWSE */ #include "hui.h" +#include "trackHub.h" #ifdef LOWELAB #define DEFAULT_PROTEINS "proteins060115" #define DEFAULT_GENOME "Pyrobaculum aerophilum" #else #define DEFAULT_PROTEINS "proteins" #define DEFAULT_GENOME "Human" #endif static struct sqlConnCache *hdbCc = NULL; /* cache for primary database connection */ static struct sqlConnCache *centralCc = NULL; static char *centralDb = NULL; static struct sqlConnCache *centralArchiveCc = NULL; @@ -58,30 +59,34 @@ static char *hdbTrackDb = NULL; /* cached list of tables in databases. This keeps a hash of databases to * hashes of track/table name to slName list of actual table names, which * might be split. Since individual tables can be mapped to different * profiles, and this information is only available when processing trackDb, * another table is kept to determine if these need to be checked. */ static struct hash *tableList = NULL; // db to track to tables static struct hash *tableListProfChecked = NULL; // profile:db that have been check static struct chromInfo *lookupChromInfo(char *db, char *chrom) /* Query db.chromInfo for the first entry matching chrom. */ { struct chromInfo *ci = NULL; + +if (trackHubDatabase(db) && ((ci = trackHubChromInfo(db, chrom)) != NULL)) + return ci; + struct sqlConnection *conn = hAllocConn(db); struct sqlResult *sr = NULL; char **row = NULL; char query[256]; safef(query, sizeof(query), "select * from chromInfo where chrom = '%s'", chrom); sr = sqlGetResult(conn, query); if ((row = sqlNextRow(sr)) != NULL) { ci = chromInfoLoad(row); } sqlFreeResult(&sr); hFreeConn(&conn); return ci; } @@ -270,30 +275,36 @@ /* Function to check if this is a valid db name */ { static struct hash *dbsChecked = NULL; if (dbsChecked) { struct hashEl *hel = hashLookup(dbsChecked, database); if (hel != NULL) return ptToInt(hel->val); } else dbsChecked = newHash(0); +if (trackHubDatabase(database)) + { + hashAddInt(dbsChecked, database, TRUE); + return TRUE; + } + struct sqlConnection *conn = hConnectCentral(); char buf[128]; char query[256]; char *escaped = sqlEscapeString(database); safef(query, sizeof(query), "select name from dbDb where name = '%s'", escaped); freez(&escaped); boolean res = (sqlQuickQuery(conn, query, buf, sizeof(buf)) != NULL); if (res) { // this is done instead of sqlDatabaseExists() since it uses the cache, // which will recycle free connections for new databases struct sqlConnection *conn2 = hAllocConnMaybe(database); res = (conn2 != NULL); hFreeConn(&conn2); } @@ -304,49 +315,60 @@ boolean hDbIsActive(char *database) /* Function to check if this is a valid and active db name */ { static struct hash *dbsChecked = NULL; if (dbsChecked) { void *hashDb = hashFindVal(dbsChecked, database); if (hashDb) return(hashIntVal(dbsChecked, database)); } else dbsChecked = newHash(0); +if (trackHubDatabase(database)) + { + hashAddInt(dbsChecked, database, TRUE); + return TRUE; + } + struct sqlConnection *conn = hConnectCentral(); char buf[128]; char query[256]; boolean res = FALSE; safef(query, sizeof(query), "select name from dbDb where name = '%s' and active = 1", database); res = (sqlQuickQuery(conn, query, buf, sizeof(buf)) != NULL); hDisconnectCentral(&conn); hashAddInt(dbsChecked, database, res); return res; } char *hDefaultDbForGenome(char *genome) /* Purpose: Return the default database matching the Genome. * param Genome - The Genome for which we are trying to get the * default database. * return - The default database name for this Genome * Free the returned database name. */ { +char *dbName; + +if ((dbName = trackHubGenomeNameToDb(genome)) != NULL) + return dbName; + struct sqlConnection *conn = hConnectCentral(); struct sqlResult *sr = NULL; char **row; struct defaultDb *db = NULL; char query [256]; char *result = NULL; if (NULL == genome) { genome = cfgOptionDefault("defaultGenome", DEFAULT_GENOME); } /* Get proper default from defaultDb table */ safef(query, sizeof(query), "select * from defaultDb where genome = '%s'", genome); @@ -370,33 +392,36 @@ if (db == NULL) errAbort("Can't find genome \"%s\" in central database table defaultDb.\n", genome); } sqlFreeResult(&sr); hDisconnectCentral(&conn); AllocArray(result, strlen(db->name) + 1); strcpy(result, db->name); defaultDbFree(&db); return result; } char *hDefaultGenomeForClade(char *clade) /* Return highest relative priority genome for clade. */ { +char *genome = NULL; +if ((genome = trackHubCladeToGenome(clade)) != NULL) + return genome; + struct sqlConnection *conn = hConnectCentral(); char query[512]; -char *genome = NULL; /* Get the top-priority genome *with an active database* so if genomeClade * gets pushed from hgwdev to hgwbeta/RR with genomes whose dbs haven't been * pushed yet, they'll be ignored. */ safef(query, sizeof(query), "select genomeClade.genome from genomeClade,dbDb " "where genomeClade.clade = '%s' and genomeClade.genome = dbDb.genome " "and dbDb.active = 1 " "order by genomeClade.priority limit 1", clade); genome = sqlQuickString(conn, query); hDisconnectCentral(&conn); return genome; } char *hDbForSciName(char *sciName) @@ -445,49 +470,54 @@ freeMem(binomial); return db; } char *hDefaultDb() /* Return the default db if all else fails */ { char *genome = cfgOptionDefault("defaultGenome", DEFAULT_GENOME); return hDefaultDbForGenome(genome); } char *hDefaultChrom(char *db) /* Return some sequence named in chromInfo from the given db, or NULL if db * has no chromInfo. */ { +if (trackHubDatabase(db)) + return trackHubDefaultChrom(db); + static struct hash *hash = NULL; struct hashEl *hel = NULL; if (hash == NULL) hash = hashNew(0); hel = hashStore(hash, db); if (hel->val == NULL) { struct sqlConnection *conn = hAllocConn(db); if (sqlTableExists(conn, "chromInfo")) hel->val = sqlQuickString(conn, "select chrom from chromInfo limit 1"); hFreeConn(&conn); } return hel->val; } int hChromCount(char *db) /* Return the number of chromosomes (scaffolds etc.) in the given db. */ { +if (trackHubDatabase(db)) + return trackHubChromCount(db); struct sqlConnection *conn = hAllocConn(db); int count = sqlQuickNum(conn, "select count(*) from chromInfo"); hFreeConn(&conn); return count; } struct sqlConnection *hAllocConn(char *db) /* Get free connection if possible. If not allocate a new one. */ { if (hdbCc == NULL) hdbCc = sqlConnCacheNew(); return sqlConnCacheAlloc(hdbCc, db); } struct sqlConnection *hAllocConnMaybe(char *db) @@ -961,88 +991,107 @@ safef(retNibName, HDB_MAX_PATH_STRING, "%s/%s.2bit", nibPath, db); if (!fileExists(retNibName)) { /* if 2bit file isn't there, try up one directory */ safef(retNibName, HDB_MAX_PATH_STRING, "%s/../%s.2bit", nibPath, db); if (!fileExists(retNibName)) { /* still no 2bit, let's just try to find a nib */ safef(retNibName, HDB_MAX_PATH_STRING, "%s/%s.nib", nibPath, chromName); } } } -static struct dnaSeq *fetchTwoBitSeq(char *fileName, char *seqName, int start, int end) + +static struct dnaSeq *fetchTwoBitSeqExt(char *fileName, char *seqName, int start, int end, boolean useUdc) /* fetch a sequence from a 2bit, caching open of the file */ { static struct twoBitFile *tbf = NULL; // cache of open file if ((tbf == NULL) || !sameString(fileName, tbf->fileName)) { twoBitClose(&tbf); - tbf = twoBitOpen(fileName); + tbf = twoBitOpenExt(fileName, useUdc); } struct dnaSeq *seq = twoBitReadSeqFrag(tbf, seqName, start, end); return seq; } +static struct dnaSeq *fetchTwoBitSeq(char *fileName, char *seqName, int start, int end) +/* fetch a sequence from a 2bit, caching open of the file */ +{ +return fetchTwoBitSeqExt(fileName, seqName, start, end, FALSE); +} + struct dnaSeq *hFetchSeqMixed(char *fileName, char *seqName, int start, int end) /* Fetch mixed case sequence. */ { if (twoBitIsFile(fileName)) return fetchTwoBitSeq(fileName, seqName, start, end); else return nibLoadPartMasked(NIB_MASK_MIXED, fileName, start, end-start); } struct dnaSeq *hFetchSeq(char *fileName, char *seqName, int start, int end) /* Fetch sequence from file. If it is a .2bit file then fetch the named sequence. If it is .nib then just ignore seqName. */ { if (twoBitIsFile(fileName)) { struct dnaSeq *seq = fetchTwoBitSeq(fileName, seqName, start, end); tolowers(seq->dna); return seq; } return nibLoadPart(fileName, start, end-start); } struct dnaSeq *hChromSeqMixed(char *db, char *chrom, int start, int end) /* Return mixed case (repeats in lower case) DNA from chromosome. */ { char fileName[HDB_MAX_PATH_STRING]; hNibForChrom(db, chrom, fileName); +if(trackHubDatabase(db)) + { + struct dnaSeq *seq = fetchTwoBitSeqExt(fileName, chrom, start, end, TRUE); + return seq; + } return hFetchSeqMixed(fileName, chrom, start, end); } struct dnaSeq *hChromSeqMixedFromPath(char *nibPath, char *db, char *chrom, int start, int end) /* Return mixed case (repeats in lower case) DNA from chromosome, given an * input nib path. */ { char fileName[HDB_MAX_PATH_STRING]; hNibForChromFromPath(nibPath, db, chrom, fileName); return hFetchSeqMixed(fileName, chrom, start, end); } struct dnaSeq *hChromSeq(char *db, char *chrom, int start, int end) /* Return lower case DNA from chromosome. */ { char fileName[HDB_MAX_PATH_STRING]; hNibForChrom(db, chrom, fileName); +if(trackHubDatabase(db)) + { + struct dnaSeq *seq = fetchTwoBitSeqExt(fileName, chrom, start, end, TRUE); + tolowers(seq->dna); + return seq; + } + return hFetchSeq(fileName, chrom, start, end); } struct dnaSeq *hChromSeqFromPath(char *nibPath, char *db, char *chrom, int start, int end) /* Return lower case DNA from chromosome. */ { char fileName[HDB_MAX_PATH_STRING]; hNibForChromFromPath(nibPath, db, chrom, fileName); return hFetchSeq(fileName, chrom, start, end); } struct dnaSeq *hSeqForBed(char *db, struct bed *bed) /* Get the sequence associated with a particular bed concatenated together. */ { @@ -1099,31 +1148,31 @@ s = sqlQuickQuery(conn, query, buf, sizeof(buf)); if (s == NULL) { s = ""; ok = FALSE; } safef(retBand, HDB_MAX_BAND_STRING, "%s%s", (isDmel ? "" : skipChr(chrom)), buf); return ok; } boolean hChromBand(char *db, char *chrom, int pos, char retBand[HDB_MAX_BAND_STRING]) /* Return text string that says what band pos is on. * Return FALSE if not on any band, or table missing. */ { -if (!hTableExists(db, "cytoBand")) +if (trackHubDatabase(db) || !hTableExists(db, "cytoBand")) return FALSE; else { struct sqlConnection *conn = hAllocConn(db); boolean ok = hChromBandConn(conn, chrom, pos, retBand); hFreeConn(&conn); return ok; } } boolean hScaffoldPos(char *db, char *chrom, int start, int end, char **retScaffold, int *retStart, int *retEnd) /* Return the scaffold, and start end coordinates on a scaffold, for * a chromosome range. If the range extends past end of a scaffold, * it is truncated to the scaffold end. @@ -1192,30 +1241,32 @@ touppers(seq->dna); } return seq; } struct dnaSeq *hLoadChrom(char *db, char *chromName) /* Fetch entire chromosome into memory. */ { int size = hChromSize(db, chromName); return hDnaFromSeq(db, chromName, 0, size, dnaLower); } struct slName *hAllChromNames(char *db) /* Get list of all chromosome names in database. */ { +if (trackHubDatabase(db)) + return trackHubAllChromNames(db); struct slName *list = NULL; struct sqlConnection *conn = hAllocConn(db); struct sqlResult *sr; char **row; sr = sqlGetResult(conn, "select chrom from chromInfo"); while ((row = sqlNextRow(sr)) != NULL) { struct slName *el = slNameNew(row[0]); slAddHead(&list, el); } sqlFreeResult(&sr); hFreeConn(&conn); return list; } @@ -2103,30 +2154,35 @@ } char *hPdbFromGdb(char *genomeDb) /* Find proteome database name given genome database name */ /* With the retirement of the proteome browser, we always use the most * recent version of the database which is called "proteome" */ { return "proteome"; } static char *hFreezeDbConversion(char *database, char *freeze) /* Find freeze given database or vice versa. Pass in NULL * for parameter that is unknown and it will be returned * as a result. This result can be freeMem'd when done. */ { +if ((database != NULL) && trackHubDatabase(database)) + { + return trackHubAssemblyField(database, "description"); + } + struct sqlConnection *conn = hConnectCentral(); struct sqlResult *sr; char **row; char *ret = NULL; struct dyString *dy = newDyString(128); if (database != NULL) dyStringPrintf(dy, "select description from dbDb where name = '%s'", database); else if (freeze != NULL) dyStringPrintf(dy, "select name from dbDb where description = '%s'", freeze); else internalErr(); sr = sqlGetResult(conn, dy->string); if ((row = sqlNextRow(sr)) != NULL) ret = cloneString(row[0]); @@ -2204,31 +2260,40 @@ hDisconnectCentral(&conn); return res; } char *hArchiveDbDbOptionalField(char *database, char *field) /* Wrapper for hArchiveOrCentralDbDbOptionalField to * look up in the archive database. */ { return hArchiveOrCentralDbDbOptionalField(database, field, TRUE); } char *hDbDbOptionalField(char *database, char *field) /* Wrapper for hArchiveOrCentralDbDbOptionalField to * look up in the regular central database. */ { -return hArchiveOrCentralDbDbOptionalField(database, field, FALSE); +if (trackHubDatabase(database)) + { + if (sameString(field, "genome")) + field = "organism"; + return trackHubAssemblyField(database, field); + } + +char *res = hArchiveOrCentralDbDbOptionalField(database, field, FALSE); + +return res; } char *hDbDbField(char *database, char *field) /* Look up field in dbDb table keyed by database. * Free this string when you are done. */ { char *res = hDbDbOptionalField(database, field); if (res == NULL) errAbort("Can't find %s for %s", field, database); return res; } char *hDefaultPos(char *database) /* Return default chromosome position for the organism associated with database. use freeMem on @@ -2328,58 +2393,64 @@ } boolean hGotClade() /* Return TRUE if central db contains clade info tables. */ { struct sqlConnection *conn = hConnectCentral(); boolean gotClade = hGotCladeConn(conn); hDisconnectCentral(&conn); return gotClade; } char *hClade(char *genome) /* If central database has clade tables, return the clade for the * given genome; otherwise return NULL. */ { +char *clade; +if ((clade = trackHubAssemblyClade(genome)) != NULL) + return clade; + struct sqlConnection *conn = hConnectCentral(); if (hGotCladeConn(conn)) { char query[512]; - char *clade; safef(query, sizeof(query), "select clade from genomeClade where genome = '%s'", genome); clade = sqlQuickString(conn, query); hDisconnectCentral(&conn); if (clade == NULL) { warn("Warning: central database genomeClade doesn't contain " "genome \"%s\"", genome); return cloneString("other"); } else return clade; } else { hDisconnectCentral(&conn); return NULL; } } struct dbDb *hDbDb(char *database) /* Return dbDb entry for a database */ { +if (trackHubDatabase(database)) + return trackHubDbDbFromAssemblyDb(database); + struct sqlConnection *conn = hConnectCentral(); struct sqlResult *sr; char **row; struct dbDb *db = NULL; struct dyString *ds = dyStringNew(0); dyStringPrintf(ds, "select * from dbDb where name='%s'", database); sr = sqlGetResult(conn, ds->string); if ((row = sqlNextRow(sr)) != NULL) db = dbDbLoad(row); sqlFreeResult(&sr); hDisconnectCentral(&conn); dyStringFree(&ds); return db; } @@ -3448,30 +3519,33 @@ else { trackDbFree(&oneRow); } } } hFreeConn(&conn); return exists; } static struct trackDb *loadTrackDb(char *db, char *where) /* Load each trackDb table. Will put supertracks in parent field of given tracks but * these are still in track list. */ { +if (trackHubDatabase(db)) + return NULL; + struct trackDb *tdbList = NULL; struct slName *tableList = hTrackDbList(), *one; boolean foundOne = FALSE; struct hash *loaded = hashNew(0); for (one = tableList; one != NULL; one = one->next) { if (loadOneTrackDb(db, where, one->name, &tdbList, loaded)) foundOne = TRUE; } if (!foundOne) errAbort("can not find any trackDb tables for %s, check db.trackDb specification in hg.conf", db); slNameFreeList(&tableList); hashFree(&loaded); @@ -3993,31 +4067,33 @@ trackDbFree(&tdb); } hFreeConn(&conn); return ret; } static struct hash *makeTrackSettingsHash(char *db) /* Create a hash of hashes with all track settings for database. * The returned hash is keyed by track. The contained hashes * are keyed by tags and contain generic text values, corresponding * to the trackDb.ra settings for that track. Generally you want to * call the version that caches results below instead. */ { struct hash *hash = hashNew(0); struct slName *trackTable, *trackTableList = hTrackDbList(); -struct sqlConnection *conn = hAllocConn(db); +struct sqlConnection *conn =NULL; +if (!trackHubDatabase(db)) + conn = hAllocConn(db); for (trackTable = trackTableList; trackTable != NULL; trackTable = trackTable->next) { if (hTableExists(db, trackTable->name)) { char query[512]; safef(query, sizeof(query), "select tableName,settings from %s", trackTable->name); struct sqlResult *sr = sqlGetResult(conn, query); char **row; while ((row = sqlNextRow(sr)) != NULL) { struct hash *settings = trackDbSettingsFromString(row[1]); hashAdd(hash, row[0], settings); } sqlFreeResult(&sr); } @@ -4102,30 +4178,32 @@ if (table == NULL) table = track; return table; } static struct dbDb *hGetIndexedDbsMaybeClade(char *theDb) /* Get list of active databases, in theDb's clade if theDb is not NULL. * Dispose of this with dbDbFreeList. */ { char *theClade = theDb ? hClade(hGenome(theDb)) : NULL; struct sqlConnection *conn = hConnectCentral(); // after hClade, since it access hgcentral too struct sqlResult *sr = NULL; char **row; struct dbDb *dbList = NULL, *db; +dbList = trackHubGetDbDbs(theClade); + /* Scan through dbDb table, loading into list */ if (theClade != NULL) { char query[1024]; safef(query, sizeof(query), "select dbDb.* from dbDb,genomeClade where dbDb.active = 1 and " "dbDb.genome = genomeClade.genome and genomeClade.clade = \"%s\" " "order by dbDb.orderKey,dbDb.name desc", theClade); sr = sqlGetResult(conn, query); } else sr = sqlGetResult(conn, "select * from dbDb where active = 1 order by orderKey,name desc"); while ((row = sqlNextRow(sr)) != NULL) { @@ -4740,32 +4818,35 @@ if (!isAllDigits(startStr)) return FALSE; if (!isAllDigits(endStr)) return FALSE; *retChrom = chrom; *retStart = sqlUnsigned(startStr) - 1; *retEnd = sqlUnsigned(endStr); return TRUE; } static struct grp* loadGrps(char *db, char *confName, char *defaultTbl) /* load all of the grp rows from a table. The table name is first looked up * in hg.conf with confName. If not there, use defaultTbl. If the table * doesn't exist, return NULL */ { -char query[128]; +if (trackHubDatabase(db)) + return trackHubLoadGroups(db); + struct grp *grps = NULL; +char query[128]; char *tbl = cfgOption(confName); struct slName *tables = NULL, *table; if (tbl == NULL) tbl = defaultTbl; tables = slNameListFromComma(tbl); slReverse(&tables); for (table = tables; table != NULL; table = table->next) { struct grp *oneTable = NULL; char *actualTableName = NULL; struct sqlConnection *conn = hAllocConnProfileTbl(db, table->name, &actualTableName); if (sqlTableExists(conn, actualTableName)) {