130c65d9512af860b1a14c406620f3ac71296ddf
braney
  Sun May 26 12:52:25 2013 -0700
added TRIX search for track hubs.  UDC'ified trix library.  Refs #10426
diff --git src/hg/lib/trackHub.c src/hg/lib/trackHub.c
index 9d13a6d..9236afe 100644
--- src/hg/lib/trackHub.c
+++ src/hg/lib/trackHub.c
@@ -27,30 +27,31 @@
 #include "trackHub.h"
 #include "errCatch.h"
 #include "hgBam.h"
 #include "bigWig.h"
 #include "bigBed.h"
 #include "hdb.h"
 #include "chromInfo.h"
 #include "grp.h"
 #include "twoBit.h"
 #include "dbDb.h"
 #include "net.h"
 #include "bbiFile.h"
 #include "bPlusTree.h"
 #include "hgFind.h"
 #include "hubConnect.h"
+#include "trix.h"
 
 static struct hash *hubCladeHash;  // mapping of clade name to hub pointer
 static struct hash *hubAssemblyHash; // mapping of assembly name to genome struct
 static struct hash *hubOrgHash;   // mapping from organism name to hub pointer
 struct trackHub *globalAssemblyHubList; // list of trackHubs in the user's cart
 
 char *trackHubRelativeUrl(char *hubUrl, char *path)
 /* Return full path (in URL form if it's a remote hub) given
  * path possibly relative to hubUrl. Do a freeMem of result
  * when done. */
 {
 /* If path itself is a URL then just return a copy of it. */
 if (hasProtocol(path))
     return cloneString(path);
 
@@ -563,30 +564,38 @@
     	hub->url, genome->name, tdb->track);
 return val;
 }
 
 static void expandBigDataUrl(struct trackHub *hub, struct trackHubGenome *genome,
 	struct trackDb *tdb)
 /* Expand bigDataUrls so that no longer relative to genome->trackDbFile */
 {
 struct hashEl *hel = hashLookup(tdb->settingsHash, "bigDataUrl");
 if (hel != NULL)
     {
     char *oldVal = hel->val;
     hel->val = trackHubRelativeUrl(genome->trackDbFile, oldVal);
     freeMem(oldVal);
     }
+
+hel = hashLookup(tdb->settingsHash, "searchTrix");
+if (hel != NULL)
+    {
+    char *oldVal = hel->val;
+    hel->val = trackHubRelativeUrl(genome->trackDbFile, oldVal);
+    freeMem(oldVal);
+    }
 }
 
 struct trackHubGenome *trackHubFindGenome(struct trackHub *hub, char *genomeName)
 /* Return trackHubGenome of given name associated with hub.  Return NULL if no
  * such genome. */
 {
 return hashFindVal(hub->genomeHash, genomeName);
 }
 
 static void validateOneTrack( struct trackHub *hub, 
     struct trackHubGenome *genome, struct trackDb *tdb)
 /* Validate a track's trackDb entry. */
 {
 /* Check for existence of fields required in all tracks */
 requiredSetting(hub, genome, tdb, "shortLabel");
@@ -986,61 +995,104 @@
     }
 
 return posList;
 }
 
 static struct hgPos *getPosFromBigBed(char *bigDataUrl, char *indexField, char *term)
 /* Given a bigBed file with a search index, check for term. */
 {
 struct bbiFile *bbi = bigBedFileOpen(bigDataUrl);
 int fieldIx;
 struct bptFile *bpt = bigBedOpenExtraIndex(bbi, indexField, &fieldIx);
 struct lm *lm = lmInit(0);
 struct bigBedInterval *intervalList;
 intervalList = bigBedNameQuery(bbi, bpt, fieldIx, term, lm);
 
-return bigBedIntervalListToHgPositions(bbi, term, intervalList);
+struct hgPos *posList = bigBedIntervalListToHgPositions(bbi, term, intervalList);
+bbiFileClose(&bbi);
+return posList;
+}
+
+static struct hgPos *doTrixSearch(char *trixFile, char *indexField, char *bigDataUrl, char *term)
+{
+struct trix *trix = trixOpen(trixFile);
+int trixWordCount = 0;
+char *tmp = cloneString(term);
+char *val = nextWord(&tmp);
+char *trixWords[128];
+
+while (val != NULL)
+    {
+    trixWords[trixWordCount] = strLower(val);
+    trixWordCount++;
+    if (trixWordCount == sizeof(trixWords)/sizeof(char*))
+	errAbort("exhausted space for trixWords");
+
+    val = nextWord(&tmp);        
 }
 
+if (trixWordCount == 0)
+    return NULL;
+
+struct trixSearchResult *tsList = trixSearch(trix, trixWordCount, trixWords, TRUE);
+struct hgPos *posList = NULL;
+for ( ; tsList != NULL; tsList = tsList->next)
+    {
+    struct hgPos *posList2 = getPosFromBigBed(bigDataUrl, indexField, tsList->itemId);
+
+    posList = slCat(posList, posList2);
+    }
+
+return posList;
+}
+
+
 static void findPosInTdbList(struct trackDb *tdbList, char *term, struct hgPositions *hgp)
 /* Given a trackHub's trackDb entries, check each of them for a searchIndex */
 {
 struct trackDb *tdb;
 
 for(tdb=tdbList; tdb; tdb = tdb->next)
     {
     char *indexField = trackDbSetting(tdb, "searchIndex");
     char *bigDataUrl = trackDbSetting(tdb, "bigDataUrl");
+    struct hgPos *posList1 = NULL, *posList2 = NULL;
 
     if (indexField && bigDataUrl)
 	{
-	struct hgPos *posList = getPosFromBigBed(bigDataUrl, indexField, term);
+	char *trixFile = trackDbSetting(tdb, "searchTrix");
+	if (trixFile != NULL)
+	    posList1 = doTrixSearch(trixFile, indexField, bigDataUrl, term);
+
+	posList2 = getPosFromBigBed(bigDataUrl, indexField, term);
+	}
+
+    struct hgPos *posList = slCat(posList1, posList2);
 
 	if (posList != NULL)
 	    {
 	    struct hgPosTable *table;
 
 	    AllocVar(table);
 	    slAddHead(&hgp->tableList, table);
 	    table->description = cloneString(tdb->table);
 	    table->name = cloneString(tdb->table);
 
 	    table->posList = posList;
 	    }
 	}
     }
-}
 
 void trackHubFindPos(char *db, char *term, struct hgPositions *hgp)
 /* Look for term in track hubs.  Update hgp if found */
 {
 struct trackDb *tdbList = NULL;
 if (trackHubDatabase(db))
     {
     struct trackHubGenome *genome = trackHubGetGenome(db);
     tdbList = trackHubTracksForGenome(genome->trackHub, genome);
     }
 else
     tdbList = hubCollectTracks(db, NULL);
 
 findPosInTdbList(tdbList, term, hgp);
 }