3846f517009c43abc65d227a4695645c9b5f3e8a
braney
  Fri Feb 15 18:31:21 2013 -0800
changes necessary to support assembly hubs (#8072)
diff --git src/hg/lib/hdb.c src/hg/lib/hdb.c
index fc6fe7d..97ebc03 100644
--- src/hg/lib/hdb.c
+++ src/hg/lib/hdb.c
@@ -25,30 +25,31 @@
 #include "liftOver.h"
 #include "liftOverChain.h"
 #include "grp.h"
 #include "twoBit.h"
 #include "ra.h"
 #include "genbank.h"
 #include "chromInfo.h"
 #ifndef GBROWSE
 #include "axtInfo.h"
 #include "ctgPos.h"
 #include "hubConnect.h"
 #include "customTrack.h"
 #include "hgFind.h"
 #endif /* GBROWSE */
 #include "hui.h"
+#include "trackHub.h"
 
 
 #ifdef LOWELAB
 #define DEFAULT_PROTEINS "proteins060115"
 #define DEFAULT_GENOME "Pyrobaculum aerophilum"
 #else
 #define DEFAULT_PROTEINS "proteins"
 #define DEFAULT_GENOME "Human"
 #endif
 
 
 static struct sqlConnCache *hdbCc = NULL;  /* cache for primary database connection */
 static struct sqlConnCache *centralCc = NULL;
 static char *centralDb = NULL;
 static struct sqlConnCache *centralArchiveCc = NULL;
@@ -58,30 +59,34 @@
 static char *hdbTrackDb = NULL;
 
 /* cached list of tables in databases.  This keeps a hash of databases to
  * hashes of track/table name to slName list of actual table names, which
  * might be split.  Since individual tables can be mapped to different
  * profiles, and this information is only available when processing trackDb,
  * another table is kept to determine if these need to be checked.
  */
 static struct hash *tableList = NULL; // db to track to tables
 static struct hash *tableListProfChecked = NULL;  // profile:db that have been check
 
 static struct chromInfo *lookupChromInfo(char *db, char *chrom)
 /* Query db.chromInfo for the first entry matching chrom. */
 {
 struct chromInfo *ci = NULL;
+
+if (trackHubDatabase(db) && ((ci = trackHubChromInfo(db, chrom)) != NULL))
+    return ci;
+
 struct sqlConnection *conn = hAllocConn(db);
 struct sqlResult *sr = NULL;
 char **row = NULL;
 char query[256];
 safef(query, sizeof(query), "select * from chromInfo where chrom = '%s'",
       chrom);
 sr = sqlGetResult(conn, query);
 if ((row = sqlNextRow(sr)) != NULL)
     {
     ci = chromInfoLoad(row);
     }
 sqlFreeResult(&sr);
 hFreeConn(&conn);
 return ci;
 }
@@ -270,30 +275,36 @@
 /*
   Function to check if this is a valid db name
 */
 {
 static struct hash *dbsChecked = NULL;
 
 if (dbsChecked)
     {
     struct hashEl *hel = hashLookup(dbsChecked, database);
     if (hel != NULL)
 	return ptToInt(hel->val);
     }
 else
     dbsChecked = newHash(0);
 
+if (trackHubDatabase(database))
+    {
+    hashAddInt(dbsChecked, database, TRUE);
+    return TRUE;
+    }
+
 struct sqlConnection *conn = hConnectCentral();
 char buf[128];
 char query[256];
 char *escaped = sqlEscapeString(database);
 safef(query, sizeof(query), "select name from dbDb where name = '%s'", escaped);
 freez(&escaped);
 boolean res = (sqlQuickQuery(conn, query, buf, sizeof(buf)) != NULL);
 if (res)
     {
     // this is done instead of sqlDatabaseExists() since it uses the cache,
     // which will recycle free connections for new databases
     struct sqlConnection *conn2 = hAllocConnMaybe(database);
     res = (conn2 != NULL);
     hFreeConn(&conn2);
     }
@@ -304,49 +315,60 @@
 
 boolean hDbIsActive(char *database)
 /* Function to check if this is a valid and active db name */
 {
 static struct hash *dbsChecked = NULL;
 
 if (dbsChecked)
     {
     void *hashDb = hashFindVal(dbsChecked, database);
     if (hashDb)
 	return(hashIntVal(dbsChecked, database));
     }
 else
     dbsChecked = newHash(0);
 
+if (trackHubDatabase(database))
+    {
+    hashAddInt(dbsChecked, database, TRUE);
+    return TRUE;
+    }
+
 struct sqlConnection *conn = hConnectCentral();
 char buf[128];
 char query[256];
 boolean res = FALSE;
 safef(query, sizeof(query),
       "select name from dbDb where name = '%s' and active = 1", database);
 res = (sqlQuickQuery(conn, query, buf, sizeof(buf)) != NULL);
 hDisconnectCentral(&conn);
 hashAddInt(dbsChecked, database, res);
 return res;
 }
 
 char *hDefaultDbForGenome(char *genome)
 /* Purpose: Return the default database matching the Genome.
  * param Genome - The Genome for which we are trying to get the
  *    default database.
  * return - The default database name for this Genome
  * Free the returned database name. */
 {
+char *dbName;
+
+if ((dbName = trackHubGenomeNameToDb(genome)) != NULL)
+    return dbName;
+
 struct sqlConnection *conn = hConnectCentral();
 struct sqlResult *sr = NULL;
 char **row;
 struct defaultDb *db = NULL;
 char query [256];
 char *result = NULL;
 
 if (NULL == genome)
     {
     genome = cfgOptionDefault("defaultGenome", DEFAULT_GENOME);
     }
 
 /* Get proper default from defaultDb table */
 safef(query, sizeof(query), "select * from defaultDb where genome = '%s'",
       genome);
@@ -370,33 +392,36 @@
     if (db == NULL)
 	errAbort("Can't find genome \"%s\" in central database table defaultDb.\n", genome);
     }
 
 sqlFreeResult(&sr);
 hDisconnectCentral(&conn);
 AllocArray(result, strlen(db->name) + 1);
 strcpy(result, db->name);
 defaultDbFree(&db);
 return result;
 }
 
 char *hDefaultGenomeForClade(char *clade)
 /* Return highest relative priority genome for clade. */
 {
+char *genome = NULL;
+if ((genome = trackHubCladeToGenome(clade)) != NULL) 
+    return genome;
+
 struct sqlConnection *conn = hConnectCentral();
 char query[512];
-char *genome = NULL;
 /* Get the top-priority genome *with an active database* so if genomeClade
  * gets pushed from hgwdev to hgwbeta/RR with genomes whose dbs haven't been
  * pushed yet, they'll be ignored. */
 safef(query, sizeof(query),
       "select genomeClade.genome from genomeClade,dbDb "
       "where genomeClade.clade = '%s' and genomeClade.genome = dbDb.genome "
       "and dbDb.active = 1 "
       "order by genomeClade.priority limit 1",
       clade);
 genome = sqlQuickString(conn, query);
 hDisconnectCentral(&conn);
 return genome;
 }
 
 char *hDbForSciName(char *sciName)
@@ -445,49 +470,54 @@
 freeMem(binomial);
 return db;
 }
 
 char *hDefaultDb()
 /* Return the default db if all else fails */
 {
 char *genome = cfgOptionDefault("defaultGenome", DEFAULT_GENOME);
 return hDefaultDbForGenome(genome);
 }
 
 char *hDefaultChrom(char *db)
 /* Return some sequence named in chromInfo from the given db, or NULL if db
  * has no chromInfo. */
 {
+if (trackHubDatabase(db))
+    return trackHubDefaultChrom(db);
+
 static struct hash *hash = NULL;
 struct hashEl *hel = NULL;
 
 if (hash == NULL)
     hash = hashNew(0);
 hel = hashStore(hash, db);
 if (hel->val == NULL)
     {
     struct sqlConnection *conn = hAllocConn(db);
     if (sqlTableExists(conn, "chromInfo"))
 	hel->val = sqlQuickString(conn, "select chrom from chromInfo limit 1");
     hFreeConn(&conn);
     }
 return hel->val;
 }
 
 int hChromCount(char *db)
 /* Return the number of chromosomes (scaffolds etc.) in the given db. */
 {
+if (trackHubDatabase(db))
+    return trackHubChromCount(db);
 struct sqlConnection *conn = hAllocConn(db);
 int count = sqlQuickNum(conn, "select count(*) from chromInfo");
 hFreeConn(&conn);
 return count;
 }
 
 struct sqlConnection *hAllocConn(char *db)
 /* Get free connection if possible. If not allocate a new one. */
 {
 if (hdbCc == NULL)
     hdbCc = sqlConnCacheNew();
 return sqlConnCacheAlloc(hdbCc, db);
 }
 
 struct sqlConnection *hAllocConnMaybe(char *db)
@@ -961,88 +991,107 @@
 safef(retNibName, HDB_MAX_PATH_STRING, "%s/%s.2bit", nibPath, db);
 if (!fileExists(retNibName))
     {
     /* if 2bit file isn't there, try up one directory */
     safef(retNibName, HDB_MAX_PATH_STRING, "%s/../%s.2bit",
 	  nibPath, db);
     if (!fileExists(retNibName))
 	{
 	/* still no 2bit, let's just try to find a nib */
 	safef(retNibName, HDB_MAX_PATH_STRING, "%s/%s.nib",
 	      nibPath, chromName);
 	}
     }
 }
 
-static struct dnaSeq *fetchTwoBitSeq(char *fileName, char *seqName, int start, int end)
+
+static struct dnaSeq *fetchTwoBitSeqExt(char *fileName, char *seqName, int start, int end, boolean useUdc)
 /* fetch a sequence from a 2bit, caching open of the file */
 {
 static struct twoBitFile *tbf = NULL;  // cache of open file
 if ((tbf == NULL) || !sameString(fileName, tbf->fileName))
     {
     twoBitClose(&tbf);
-    tbf = twoBitOpen(fileName);
+    tbf = twoBitOpenExt(fileName, useUdc);
     }
 struct dnaSeq *seq = twoBitReadSeqFrag(tbf, seqName, start, end);
 return seq;
 }
 
+static struct dnaSeq *fetchTwoBitSeq(char *fileName, char *seqName, int start, int end)
+/* fetch a sequence from a 2bit, caching open of the file */
+{
+return fetchTwoBitSeqExt(fileName, seqName, start, end, FALSE);
+}
+
 struct dnaSeq *hFetchSeqMixed(char *fileName, char *seqName, int start, int end)
 /* Fetch mixed case sequence. */
 {
 if (twoBitIsFile(fileName))
     return fetchTwoBitSeq(fileName, seqName, start, end);
 else
     return nibLoadPartMasked(NIB_MASK_MIXED, fileName, start, end-start);
 }
 
 struct dnaSeq *hFetchSeq(char *fileName, char *seqName, int start, int end)
 /* Fetch sequence from file.  If it is a .2bit file then fetch the named sequence.
    If it is .nib then just ignore seqName. */
 {
 if (twoBitIsFile(fileName))
     {
     struct dnaSeq *seq = fetchTwoBitSeq(fileName, seqName, start, end);
     tolowers(seq->dna);
     return seq;
     }
 return nibLoadPart(fileName, start, end-start);
 }
 
 struct dnaSeq *hChromSeqMixed(char *db, char *chrom, int start, int end)
 /* Return mixed case (repeats in lower case) DNA from chromosome. */
 {
 char fileName[HDB_MAX_PATH_STRING];
 hNibForChrom(db, chrom, fileName);
+if(trackHubDatabase(db))
+    {
+    struct dnaSeq *seq = fetchTwoBitSeqExt(fileName, chrom, start, end, TRUE);
+    return seq;
+    }
 return hFetchSeqMixed(fileName, chrom, start, end);
 }
 
 struct dnaSeq *hChromSeqMixedFromPath(char *nibPath, char *db, char *chrom,
 				      int start, int end)
 /* Return mixed case (repeats in lower case) DNA from chromosome, given an
  * input nib path. */
 {
 char fileName[HDB_MAX_PATH_STRING];
 hNibForChromFromPath(nibPath, db, chrom, fileName);
 return hFetchSeqMixed(fileName, chrom, start, end);
 }
 
 struct dnaSeq *hChromSeq(char *db, char *chrom, int start, int end)
 /* Return lower case DNA from chromosome. */
 {
 char fileName[HDB_MAX_PATH_STRING];
 hNibForChrom(db, chrom, fileName);
+if(trackHubDatabase(db))
+    {
+    struct dnaSeq *seq = fetchTwoBitSeqExt(fileName, chrom, start, end, TRUE);
+    tolowers(seq->dna);
+    return seq;
+    }
+
 return hFetchSeq(fileName, chrom, start, end);
 }
 
 struct dnaSeq *hChromSeqFromPath(char *nibPath, char *db, char *chrom,
 				 int start, int end)
 /* Return lower case DNA from chromosome. */
 {
 char fileName[HDB_MAX_PATH_STRING];
 hNibForChromFromPath(nibPath, db, chrom, fileName);
 return hFetchSeq(fileName, chrom, start, end);
 }
 
 struct dnaSeq *hSeqForBed(char *db, struct bed *bed)
 /* Get the sequence associated with a particular bed concatenated together. */
 {
@@ -1099,31 +1148,31 @@
 s = sqlQuickQuery(conn, query, buf, sizeof(buf));
 if (s == NULL)
    {
    s = "";
    ok = FALSE;
    }
 safef(retBand, HDB_MAX_BAND_STRING, "%s%s",
       (isDmel ? "" : skipChr(chrom)), buf);
 return ok;
 }
 
 boolean hChromBand(char *db, char *chrom, int pos, char retBand[HDB_MAX_BAND_STRING])
 /* Return text string that says what band pos is on.
  * Return FALSE if not on any band, or table missing. */
 {
-if (!hTableExists(db, "cytoBand"))
+if (trackHubDatabase(db) || !hTableExists(db, "cytoBand"))
     return FALSE;
 else
     {
     struct sqlConnection *conn = hAllocConn(db);
     boolean ok = hChromBandConn(conn, chrom, pos, retBand);
     hFreeConn(&conn);
     return ok;
     }
 }
 
 boolean hScaffoldPos(char *db, char *chrom, int start, int end,
                      char **retScaffold, int *retStart, int *retEnd)
 /* Return the scaffold, and start end coordinates on a scaffold, for
  * a chromosome range.  If the range extends past end of a scaffold,
  * it is truncated to the scaffold end.
@@ -1192,30 +1241,32 @@
 	  touppers(seq->dna);
 	}
 return seq;
 }
 
 struct dnaSeq *hLoadChrom(char *db, char *chromName)
 /* Fetch entire chromosome into memory. */
 {
 int size = hChromSize(db, chromName);
 return hDnaFromSeq(db, chromName, 0, size, dnaLower);
 }
 
 struct slName *hAllChromNames(char *db)
 /* Get list of all chromosome names in database. */
 {
+if (trackHubDatabase(db))
+    return trackHubAllChromNames(db);
 struct slName *list = NULL;
 struct sqlConnection *conn = hAllocConn(db);
 struct sqlResult *sr;
 char **row;
 
 sr = sqlGetResult(conn, "select chrom from chromInfo");
 while ((row = sqlNextRow(sr)) != NULL)
     {
     struct slName *el = slNameNew(row[0]);
     slAddHead(&list, el);
     }
 sqlFreeResult(&sr);
 hFreeConn(&conn);
 return list;
 }
@@ -2103,30 +2154,35 @@
 }
 
 char *hPdbFromGdb(char *genomeDb)
 /* Find proteome database name given genome database name */
 /* With the retirement of the proteome browser, we always use the most
  * recent version of the database which is called "proteome" */
 {
 return "proteome";
 }
 
 static char *hFreezeDbConversion(char *database, char *freeze)
 /* Find freeze given database or vice versa.  Pass in NULL
  * for parameter that is unknown and it will be returned
  * as a result.  This result can be freeMem'd when done. */
 {
+if ((database != NULL) && trackHubDatabase(database))
+    {
+    return trackHubAssemblyField(database, "description");
+    }
+
 struct sqlConnection *conn = hConnectCentral();
 struct sqlResult *sr;
 char **row;
 char *ret = NULL;
 struct dyString *dy = newDyString(128);
 
 if (database != NULL)
     dyStringPrintf(dy, "select description from dbDb where name = '%s'", database);
 else if (freeze != NULL)
     dyStringPrintf(dy, "select name from dbDb where description = '%s'", freeze);
 else
     internalErr();
 sr = sqlGetResult(conn, dy->string);
 if ((row = sqlNextRow(sr)) != NULL)
     ret = cloneString(row[0]);
@@ -2204,31 +2260,40 @@
 hDisconnectCentral(&conn);
 return res;
 }
 
 char *hArchiveDbDbOptionalField(char *database, char *field)
 /* Wrapper for hArchiveOrCentralDbDbOptionalField to
  * look up in the archive database. */
 {
 return hArchiveOrCentralDbDbOptionalField(database, field, TRUE);
 }
 
 char *hDbDbOptionalField(char *database, char *field)
 /* Wrapper for hArchiveOrCentralDbDbOptionalField to
  * look up in the regular central database. */
 {
-return hArchiveOrCentralDbDbOptionalField(database, field, FALSE);
+if (trackHubDatabase(database))
+    {
+    if (sameString(field, "genome"))
+	field = "organism";
+    return trackHubAssemblyField(database, field);
+    }
+
+char *res = hArchiveOrCentralDbDbOptionalField(database, field, FALSE);
+
+return res;
 }
 
 char *hDbDbField(char *database, char *field)
 /* Look up field in dbDb table keyed by database.
  * Free this string when you are done. */
 {
 char *res = hDbDbOptionalField(database, field);
 if (res == NULL)
     errAbort("Can't find %s for %s", field, database);
 return res;
 }
 
 char *hDefaultPos(char *database)
 /* Return default chromosome position for the
   organism associated with database.   use freeMem on
@@ -2328,58 +2393,64 @@
 }
 
 boolean hGotClade()
 /* Return TRUE if central db contains clade info tables. */
 {
 struct sqlConnection *conn = hConnectCentral();
 boolean gotClade = hGotCladeConn(conn);
 hDisconnectCentral(&conn);
 return gotClade;
 }
 
 char *hClade(char *genome)
 /* If central database has clade tables, return the clade for the
  * given genome; otherwise return NULL. */
 {
+char *clade;
+if ((clade = trackHubAssemblyClade(genome)) != NULL)
+    return clade;
+
 struct sqlConnection *conn = hConnectCentral();
 if (hGotCladeConn(conn))
     {
     char query[512];
-    char *clade;
     safef(query, sizeof(query),
 	  "select clade from genomeClade where genome = '%s'", genome);
     clade = sqlQuickString(conn, query);
     hDisconnectCentral(&conn);
     if (clade == NULL)
 	{
 	warn("Warning: central database genomeClade doesn't contain "
 	     "genome \"%s\"", genome);
 	return cloneString("other");
 	}
     else
 	return clade;
     }
 else
     {
     hDisconnectCentral(&conn);
     return NULL;
     }
 }
 
 struct dbDb *hDbDb(char *database)
 /* Return dbDb entry for a database */
 {
+if (trackHubDatabase(database))
+    return trackHubDbDbFromAssemblyDb(database);
+
 struct sqlConnection *conn = hConnectCentral();
 struct sqlResult *sr;
 char **row;
 struct dbDb *db = NULL;
 
 struct dyString *ds = dyStringNew(0);
 dyStringPrintf(ds, "select * from dbDb where name='%s'", database);
 sr = sqlGetResult(conn, ds->string);
 if ((row = sqlNextRow(sr)) != NULL)
     db = dbDbLoad(row);
 sqlFreeResult(&sr);
 hDisconnectCentral(&conn);
 dyStringFree(&ds);
 return db;
 }
@@ -3448,30 +3519,33 @@
         else
 	    {
             trackDbFree(&oneRow);
 	    }
         }
     }
 
 hFreeConn(&conn);
 return exists;
 }
 
 static struct trackDb *loadTrackDb(char *db, char *where)
 /* Load each trackDb table.  Will put supertracks in parent field of given tracks but
  * these are still in track list. */
 {
+if (trackHubDatabase(db))
+    return NULL;
+
 struct trackDb *tdbList = NULL;
 struct slName *tableList = hTrackDbList(), *one;
 boolean foundOne = FALSE;
 struct hash *loaded = hashNew(0);
 for (one = tableList; one != NULL; one = one->next)
     {
     if (loadOneTrackDb(db, where, one->name, &tdbList, loaded))
         foundOne = TRUE;
     }
 if (!foundOne)
     errAbort("can not find any trackDb tables for %s, check db.trackDb specification in hg.conf",
              db);
 slNameFreeList(&tableList);
 hashFree(&loaded);
 
@@ -3993,31 +4067,33 @@
     trackDbFree(&tdb);
     }
 hFreeConn(&conn);
 return ret;
 }
 
 static struct hash *makeTrackSettingsHash(char *db)
 /* Create  a hash of hashes with all track settings for database.
  * The returned hash is keyed by track.   The contained hashes
  * are keyed by tags and contain generic text values, corresponding
  * to the trackDb.ra settings for that track. Generally you want to
  * call the version that caches results below instead. */
 {
 struct hash *hash = hashNew(0);
 struct slName *trackTable, *trackTableList = hTrackDbList();
-struct sqlConnection *conn = hAllocConn(db);
+struct sqlConnection *conn =NULL;
+if (!trackHubDatabase(db))
+    conn = hAllocConn(db);
 for (trackTable = trackTableList; trackTable != NULL; trackTable = trackTable->next)
     {
     if (hTableExists(db, trackTable->name))
         {
 	char query[512];
 	safef(query, sizeof(query), "select tableName,settings from %s", trackTable->name);
 	struct sqlResult *sr = sqlGetResult(conn, query);
 	char **row;
 	while ((row = sqlNextRow(sr)) != NULL)
 	    {
 	    struct hash *settings = trackDbSettingsFromString(row[1]);
 	    hashAdd(hash, row[0], settings);
 	    }
 	sqlFreeResult(&sr);
 	}
@@ -4102,30 +4178,32 @@
 if (table == NULL)
     table = track;
 return table;
 }
 
 static struct dbDb *hGetIndexedDbsMaybeClade(char *theDb)
 /* Get list of active databases, in theDb's clade if theDb is not NULL.
  * Dispose of this with dbDbFreeList. */
 {
 char *theClade = theDb ? hClade(hGenome(theDb)) : NULL;
 struct sqlConnection *conn = hConnectCentral(); // after hClade, since it access hgcentral too
 struct sqlResult *sr = NULL;
 char **row;
 struct dbDb *dbList = NULL, *db;
 
+dbList = trackHubGetDbDbs(theClade);
+
 /* Scan through dbDb table, loading into list */
 if (theClade != NULL)
     {
     char query[1024];
     safef(query, sizeof(query),
 	  "select dbDb.* from dbDb,genomeClade where dbDb.active = 1 and "
 	  "dbDb.genome = genomeClade.genome and genomeClade.clade = \"%s\" "
 	  "order by dbDb.orderKey,dbDb.name desc", theClade);
     sr = sqlGetResult(conn, query);
     }
     else
 	sr = sqlGetResult(conn,
 	   "select * from dbDb where active = 1 order by orderKey,name desc");
 while ((row = sqlNextRow(sr)) != NULL)
     {
@@ -4740,32 +4818,35 @@
 if (!isAllDigits(startStr))
     return FALSE;
 if (!isAllDigits(endStr))
     return FALSE;
 *retChrom = chrom;
 *retStart = sqlUnsigned(startStr) - 1;
 *retEnd = sqlUnsigned(endStr);
 return TRUE;
 }
 
 static struct grp* loadGrps(char *db, char *confName, char *defaultTbl)
 /* load all of the grp rows from a table.  The table name is first looked up
  * in hg.conf with confName. If not there, use defaultTbl.  If the table
  * doesn't exist, return NULL */
 {
-char query[128];
+if (trackHubDatabase(db))
+    return trackHubLoadGroups(db);
+
 struct grp *grps = NULL;
+char query[128];
 char *tbl = cfgOption(confName);
 struct slName *tables = NULL, *table;
 
 if (tbl == NULL)
     tbl = defaultTbl;
 tables = slNameListFromComma(tbl);
 slReverse(&tables);
 
 for (table = tables; table != NULL; table = table->next)
     {
     struct grp *oneTable = NULL;
     char *actualTableName = NULL;
     struct sqlConnection *conn = hAllocConnProfileTbl(db, table->name, &actualTableName);
     if (sqlTableExists(conn, actualTableName))
 	{