a345d17f7912a5f0e121951f6d261b496cce608b
braney
  Fri May 10 09:25:54 2013 -0700
first cut at track item search in track hubs that have bigBed files with a searchIndex field refs #10426
diff --git src/hg/lib/trackHub.c src/hg/lib/trackHub.c
index 3407631..de1b095 100644
--- src/hg/lib/trackHub.c
+++ src/hg/lib/trackHub.c
@@ -23,30 +23,34 @@
 #include "ra.h"
 #include "filePath.h"
 #include "htmlPage.h"
 #include "trackDb.h"
 #include "trackHub.h"
 #include "errCatch.h"
 #include "hgBam.h"
 #include "bigWig.h"
 #include "bigBed.h"
 #include "hdb.h"
 #include "chromInfo.h"
 #include "grp.h"
 #include "twoBit.h"
 #include "dbDb.h"
 #include "net.h"
+#include "bbiFile.h"
+#include "bPlusTree.h"
+#include "hgFind.h"
+#include "hubConnect.h"
 
 static struct hash *hubCladeHash;  // mapping of clade name to hub pointer
 static struct hash *hubAssemblyHash; // mapping of assembly name to genome struct
 static struct hash *hubOrgHash;   // mapping from organism name to hub pointer
 struct trackHub *globalAssemblyHubList; // list of trackHubs in the user's cart
 
 char *trackHubRelativeUrl(char *hubUrl, char *path)
 /* Return full path (in URL form if it's a remote hub) given
  * path possibly relative to hubUrl. Do a freeMem of result
  * when done. */
 {
 /* If path itself is a URL then just return a copy of it. */
 if (hasProtocol(path))
     return cloneString(path);
 
@@ -70,82 +74,89 @@
 /* Given a track hub clade(hub name) return the default genome. */
 {
 if (hubCladeHash == NULL)
     return FALSE;
 struct hashEl *hel = hashLookup(hubCladeHash, clade);
 if (hel == NULL)
     return FALSE;
 struct trackHub *trackHub = hel->val;
 struct trackHubGenome *hubGenome = trackHub->genomeList;
 for(; hubGenome; hubGenome=hubGenome->next)
     if (hubGenome->twoBitPath != NULL)
 	return hubGenome->organism ;
 return NULL;
 }
 
-boolean trackHubDatabase(char *database)
-/* Is this an assembly from an Assembly Data hub? */
+struct trackHubGenome *trackHubGetGenome(char *database)
 {
 if (hubAssemblyHash == NULL)
-    return FALSE;
+    errAbort("requesting hub genome with no hubs loaded");
+
 struct hashEl *hel = hashLookup(hubAssemblyHash, database);
+
 if (hel == NULL)
+    return NULL;
+
+return (struct trackHubGenome *)hel->val;
+}
+
+boolean trackHubDatabase(char *database)
+/* Is this an assembly from an Assembly Data hub? */
+{
+if (hubAssemblyHash == NULL)
     return FALSE;
-return TRUE;
+
+return trackHubGetGenome(database) != NULL;
 }
 
 char *trackHubAssemblyField(char *database, char *field)
 /* Get data field from a assembly data hub. */
 {
-if (hubAssemblyHash == NULL)
-    errAbort("requesting hub assembly field with no hubs loaded");
-struct hashEl *hel = hashLookup(hubAssemblyHash, database);
-if (hel == NULL)
+struct trackHubGenome *genome = trackHubGetGenome(database);
+
+if (genome == NULL)
     return NULL;
 
-struct trackHubGenome *genome = hel->val;
 char *ret = hashFindVal(genome->settingsHash, field);
 
 return cloneString(ret);
 }
 
 static struct dbDb *makeDbDbFromAssemblyGenome(struct trackHubGenome *hubGenome)
 /* Make a dbdb struture from a single assembly hub database. */
 {
 struct dbDb *db;
 
 AllocVar(db);
 db->genome = cloneString(hubGenome->organism);
 db->organism = cloneString(hubGenome->organism);
 db->name = cloneString(hubGenome->name);
 db->active = TRUE;
 db->description = cloneString(hubGenome->description);
 db->orderKey = sqlUnsigned(hashFindVal(hubGenome->settingsHash, "orderKey"));
 
 return db;
 }
 
 struct dbDb *trackHubDbDbFromAssemblyDb(char *database)
 /* Return a dbDb structure for just this database. */
 {
-if (hubAssemblyHash == NULL)
-    errAbort("requesting hub assembly dbDb with no hubs loaded");
-struct hashEl *hel = hashLookup(hubAssemblyHash, database);
-if (hel == NULL)
+struct trackHubGenome *genome = trackHubGetGenome(database);
+
+if (genome == NULL)
     return NULL;
 
-struct trackHubGenome *genome = hel->val;
 return makeDbDbFromAssemblyGenome(genome);
 }
 
 struct slPair *trackHubGetCladeLabels()
 /* Get a list of labels describing the loaded assembly data hubs. */
 {
 if (globalAssemblyHubList == NULL)
     return NULL;
 
 struct slPair *clade, *cladeList = NULL;
 
 struct trackHub *trackHub = globalAssemblyHubList;
 
 for(;trackHub; trackHub = trackHub->next)
     {
@@ -177,106 +188,106 @@
 	    {
 	    if (hubGenome->twoBitPath != NULL)
 		{
 		db = makeDbDbFromAssemblyGenome(hubGenome);
 		slAddHead(&dbList, db);
 		}
 	    }
 	}
     }
 
 slSort(&dbList, hDbDbCmpOrderKey);
 slReverse(&dbList);
 return dbList;
 }
 
-int trackHubChromCount(char *database)
-/* Return number of chromosomes in a assembly data hub. */
-{
-struct hashEl *hel = hashLookup(hubAssemblyHash, database);
-if (hel == NULL)
-    return 0;
-
-struct trackHubGenome *genome = hel->val;
-struct slName *chromList = twoBitSeqNames(genome->twoBitPath);
-
-int num = slCount(chromList);
-slFreeList(&chromList);
-return  num;
-}
-
 struct slName *trackHubAllChromNames(char *database)
 /* Return a list of all the chrom names in this assembly hub database. */
 /* Free with slFreeList. */
 {
-struct hashEl *hel = hashLookup(hubAssemblyHash, database);
-if (hel == NULL)
-    return 0;
+struct trackHubGenome *genome = trackHubGetGenome(database);
+if (genome == NULL)
+    return NULL;
 
-struct trackHubGenome *genome = hel->val;
 struct slName *chromList = twoBitSeqNames(genome->twoBitPath);
 
 return chromList;
 }
 
+int trackHubChromCount(char *database)
+/* Return number of chromosomes in a assembly data hub. */
+{
+struct slName *chromList = trackHubAllChromNames(database);
+
+int num = slCount(chromList);
+slFreeList(&chromList);
+return  num;
+}
+
 char *trackHubDefaultChrom(char *database)
 /* Return the default chromosome for this track hub assembly. */
 {
-struct hashEl *hel = hashLookup(hubAssemblyHash, database);
-if (hel == NULL)
-    return NULL;
+struct slName *chromList = trackHubAllChromNames(database);
 
-struct trackHubGenome *genome = hel->val;
-struct slName *chromList = twoBitSeqNames(genome->twoBitPath);
+if (chromList == NULL)
+    return NULL;
 
 char *defaultName = cloneString( chromList->name);
 slFreeList(&chromList);
 
 return defaultName;
 }
 
-struct chromInfo *trackHubChromInfo(char *database, char *chrom)
-/* Return a chromInfo structure for just this chrom in this database. */
+struct chromInfo *trackHubMaybeChromInfo(char *database, char *chrom)
+/* Return a chromInfo structure for just this chrom in this database. 
+ * Return NULL if chrom doesn't exist. */
 {
-if (hubAssemblyHash == NULL)
+struct trackHubGenome *genome = trackHubGetGenome(database);
+if (genome == NULL)
     return NULL;
 
-struct hashEl *hel = hashLookup(hubAssemblyHash, database);
-
-if (hel == NULL)
+if (!twoBitIsSequence(genome->tbf, chrom))
     return NULL;
 
-struct trackHubGenome *genome = hel->val;
 struct chromInfo *ci;
-
 AllocVar(ci);
 ci->chrom = cloneString(chrom);
 ci->fileName = genome->twoBitPath;
 ci->size = twoBitSeqSize(genome->tbf, chrom);
 
 return ci;
 }
 
-struct chromInfo *trackHubAllChromInfo(char *db)
-/* Return a chromInfo structure for all the chroms in this database. */
+struct chromInfo *trackHubChromInfo(char *database, char *chrom)
+/* Return a chromInfo structure for just this chrom in this database. 
+ * errAbort if chrom doesn't exist. */
 {
-struct hashEl *hel = hashLookup(hubAssemblyHash, db);
+struct chromInfo *ci = trackHubMaybeChromInfo(database, chrom);
 
-if (hel == NULL)
+if (ci == NULL)
+    errAbort("%s is not in %s", chrom, database);
+
+return ci;
+}
+
+struct chromInfo *trackHubAllChromInfo(char *database)
+/* Return a chromInfo structure for all the chroms in this database. */
+{
+struct trackHubGenome *genome = trackHubGetGenome(database);
+if (genome == NULL)
     return NULL;
 
-struct trackHubGenome *genome = hel->val;
 struct chromInfo *ci, *ciList = NULL;
 struct slName *chromList = twoBitSeqNames(genome->twoBitPath);
 
 for(; chromList; chromList = chromList->next)
     {
     AllocVar(ci);
     ci->chrom = cloneString(chromList->name);
     ci->fileName = genome->twoBitPath;
     ci->size = twoBitSeqSize(genome->tbf, chromList->name);
     slAddHead(&ciList, ci);
     }
 slFreeList(&chromList);
 return ciList;
 }
 
@@ -308,39 +319,33 @@
     grp->label = cloneString(getRequiredGrpSetting(ra, "label", lf));
     grp->priority = atof(getRequiredGrpSetting(ra, "priority", lf));
     grp->defaultIsClosed = sqlUnsigned(getRequiredGrpSetting(ra,"defaultIsClosed",lf));
     hashFree(&ra);
     }
 if (list)
     slReverse(&list);
 lineFileClose(&lf);
 
 return list;
 }
 
 struct grp *trackHubLoadGroups(char *database)
 /* Load the grp structures for this track hub database. */
 {
-if (hubAssemblyHash == NULL)
-    return NULL;
-
-struct hashEl *hel = hashLookup(hubAssemblyHash, database);
-
-if (hel == NULL)
+struct trackHubGenome *genome = trackHubGetGenome(database);
+if (genome == NULL)
     return NULL;
-
-struct trackHubGenome *genome = hel->val;
 struct grp *list = readGroupRa(genome->groups);
 return list;
 }
 
 char *trackHubGenomeNameToDb(char *genome)
 /* Return assembly name given a genome name if one exists, otherwise NULL. */
 {
 struct hashEl *hel;
 if ((hubOrgHash != NULL) && (hel = hashLookup(hubOrgHash, genome)) != NULL)
     {
     struct trackHub *hub = hel->val;
     struct trackHubGenome *genomeList = hub->genomeList;
 
     for(; genomeList; genomeList=genomeList->next)
 	if ((genomeList->organism != NULL ) && 
@@ -943,15 +948,99 @@
 
 if (hub == NULL)
     return 1;
 
 verbose(2, "hub %s\nshortLabel %s\nlongLabel %s\n", hubUrl, hub->shortLabel, hub->longLabel);
 verbose(2, "%s has %d elements\n", hub->genomesFile, slCount(hub->genomeList));
 struct trackHubGenome *genome;
 for (genome = hub->genomeList; genome != NULL; genome = genome->next)
     {
     retVal |= hubCheckGenome(hub, genome, errors, checkTracks);
     }
 trackHubClose(&hub);
 
 return retVal;
 }
+
+
+static struct hgPos *bigBedIntervalListToHgPositions(struct bbiFile *bbi, char *term, struct bigBedInterval *intervalList)
+/* Given an open bigBed file, and an interval list, return a pointer to a list of hgPos structures. */
+{
+struct hgPos *posList = NULL;
+char chromName[bbi->chromBpt->keySize+1];
+int lastChromId = -1;
+struct bigBedInterval *interval;
+
+for (interval = intervalList; interval != NULL; interval = interval->next)
+    {
+    struct hgPos *hgPos;
+    AllocVar(hgPos);
+    slAddHead(&posList, hgPos);
+
+    bbiCachedChromLookup(bbi, interval->chromId, lastChromId, chromName, sizeof(chromName));
+    lastChromId = interval->chromId;
+
+    hgPos->chrom = cloneString(chromName);
+    hgPos->chromStart = interval->start;
+    hgPos->chromEnd = interval->end;
+    hgPos->name = term;
+    }
+
+return posList;
+}
+
+static struct hgPos *getPosFromBigBed(char *bigDataUrl, char *indexField, char *term)
+/* Given a bigBed file with a search index, check for term. */
+{
+struct bbiFile *bbi = bigBedFileOpen(bigDataUrl);
+int fieldIx;
+struct bptFile *bpt = bigBedOpenExtraIndex(bbi, indexField, &fieldIx);
+struct lm *lm = lmInit(0);
+struct bigBedInterval *intervalList;
+intervalList = bigBedNameQuery(bbi, bpt, fieldIx, term, lm);
+
+return bigBedIntervalListToHgPositions(bbi, term, intervalList);
+}
+
+static void findPosInTdbList(struct trackDb *tdbList, char *term, struct hgPositions *hgp)
+/* Given a trackHub's trackDb entries, check each of them for a searchIndex */
+{
+struct trackDb *tdb;
+
+for(tdb=tdbList; tdb; tdb = tdb->next)
+    {
+    char *indexField = trackDbSetting(tdb, "searchIndex");
+    char *bigDataUrl = trackDbSetting(tdb, "bigDataUrl");
+
+    if (indexField && bigDataUrl)
+	{
+	struct hgPos *posList = getPosFromBigBed(bigDataUrl, indexField, term);
+
+	if (posList != NULL)
+	    {
+	    struct hgPosTable *table;
+
+	    AllocVar(table);
+	    slAddHead(&hgp->tableList, table);
+	    table->description = cloneString(tdb->table);
+	    table->name = cloneString(tdb->table);
+
+	    table->posList = posList;
+	    }
+	}
+    }
+}
+
+void trackHubFindPos(char *db, char *term, struct hgPositions *hgp)
+/* Look for term in track hubs.  Update hgp if found */
+{
+struct trackDb *tdbList = NULL;
+if (trackHubDatabase(db))
+    {
+    struct trackHubGenome *genome = trackHubGetGenome(db);
+    tdbList = trackHubTracksForGenome(genome->trackHub, genome);
+    }
+else
+    tdbList = hubCollectTracks(db, NULL);
+
+findPosInTdbList(tdbList, term, hgp);
+}