130c65d9512af860b1a14c406620f3ac71296ddf braney Sun May 26 12:52:25 2013 -0700 added TRIX search for track hubs. UDC'ified trix library. Refs #10426 diff --git src/hg/lib/trackHub.c src/hg/lib/trackHub.c index 9d13a6d..9236afe 100644 --- src/hg/lib/trackHub.c +++ src/hg/lib/trackHub.c @@ -27,30 +27,31 @@ #include "trackHub.h" #include "errCatch.h" #include "hgBam.h" #include "bigWig.h" #include "bigBed.h" #include "hdb.h" #include "chromInfo.h" #include "grp.h" #include "twoBit.h" #include "dbDb.h" #include "net.h" #include "bbiFile.h" #include "bPlusTree.h" #include "hgFind.h" #include "hubConnect.h" +#include "trix.h" static struct hash *hubCladeHash; // mapping of clade name to hub pointer static struct hash *hubAssemblyHash; // mapping of assembly name to genome struct static struct hash *hubOrgHash; // mapping from organism name to hub pointer struct trackHub *globalAssemblyHubList; // list of trackHubs in the user's cart char *trackHubRelativeUrl(char *hubUrl, char *path) /* Return full path (in URL form if it's a remote hub) given * path possibly relative to hubUrl. Do a freeMem of result * when done. */ { /* If path itself is a URL then just return a copy of it. */ if (hasProtocol(path)) return cloneString(path); @@ -563,30 +564,38 @@ hub->url, genome->name, tdb->track); return val; } static void expandBigDataUrl(struct trackHub *hub, struct trackHubGenome *genome, struct trackDb *tdb) /* Expand bigDataUrls so that no longer relative to genome->trackDbFile */ { struct hashEl *hel = hashLookup(tdb->settingsHash, "bigDataUrl"); if (hel != NULL) { char *oldVal = hel->val; hel->val = trackHubRelativeUrl(genome->trackDbFile, oldVal); freeMem(oldVal); } + +hel = hashLookup(tdb->settingsHash, "searchTrix"); +if (hel != NULL) + { + char *oldVal = hel->val; + hel->val = trackHubRelativeUrl(genome->trackDbFile, oldVal); + freeMem(oldVal); + } } struct trackHubGenome *trackHubFindGenome(struct trackHub *hub, char *genomeName) /* Return trackHubGenome of given name associated with hub. Return NULL if no * such genome. */ { return hashFindVal(hub->genomeHash, genomeName); } static void validateOneTrack( struct trackHub *hub, struct trackHubGenome *genome, struct trackDb *tdb) /* Validate a track's trackDb entry. */ { /* Check for existence of fields required in all tracks */ requiredSetting(hub, genome, tdb, "shortLabel"); @@ -986,61 +995,104 @@ } return posList; } static struct hgPos *getPosFromBigBed(char *bigDataUrl, char *indexField, char *term) /* Given a bigBed file with a search index, check for term. */ { struct bbiFile *bbi = bigBedFileOpen(bigDataUrl); int fieldIx; struct bptFile *bpt = bigBedOpenExtraIndex(bbi, indexField, &fieldIx); struct lm *lm = lmInit(0); struct bigBedInterval *intervalList; intervalList = bigBedNameQuery(bbi, bpt, fieldIx, term, lm); -return bigBedIntervalListToHgPositions(bbi, term, intervalList); +struct hgPos *posList = bigBedIntervalListToHgPositions(bbi, term, intervalList); +bbiFileClose(&bbi); +return posList; +} + +static struct hgPos *doTrixSearch(char *trixFile, char *indexField, char *bigDataUrl, char *term) +{ +struct trix *trix = trixOpen(trixFile); +int trixWordCount = 0; +char *tmp = cloneString(term); +char *val = nextWord(&tmp); +char *trixWords[128]; + +while (val != NULL) + { + trixWords[trixWordCount] = strLower(val); + trixWordCount++; + if (trixWordCount == sizeof(trixWords)/sizeof(char*)) + errAbort("exhausted space for trixWords"); + + val = nextWord(&tmp); } +if (trixWordCount == 0) + return NULL; + +struct trixSearchResult *tsList = trixSearch(trix, trixWordCount, trixWords, TRUE); +struct hgPos *posList = NULL; +for ( ; tsList != NULL; tsList = tsList->next) + { + struct hgPos *posList2 = getPosFromBigBed(bigDataUrl, indexField, tsList->itemId); + + posList = slCat(posList, posList2); + } + +return posList; +} + + static void findPosInTdbList(struct trackDb *tdbList, char *term, struct hgPositions *hgp) /* Given a trackHub's trackDb entries, check each of them for a searchIndex */ { struct trackDb *tdb; for(tdb=tdbList; tdb; tdb = tdb->next) { char *indexField = trackDbSetting(tdb, "searchIndex"); char *bigDataUrl = trackDbSetting(tdb, "bigDataUrl"); + struct hgPos *posList1 = NULL, *posList2 = NULL; if (indexField && bigDataUrl) { - struct hgPos *posList = getPosFromBigBed(bigDataUrl, indexField, term); + char *trixFile = trackDbSetting(tdb, "searchTrix"); + if (trixFile != NULL) + posList1 = doTrixSearch(trixFile, indexField, bigDataUrl, term); + + posList2 = getPosFromBigBed(bigDataUrl, indexField, term); + } + + struct hgPos *posList = slCat(posList1, posList2); if (posList != NULL) { struct hgPosTable *table; AllocVar(table); slAddHead(&hgp->tableList, table); table->description = cloneString(tdb->table); table->name = cloneString(tdb->table); table->posList = posList; } } } -} void trackHubFindPos(char *db, char *term, struct hgPositions *hgp) /* Look for term in track hubs. Update hgp if found */ { struct trackDb *tdbList = NULL; if (trackHubDatabase(db)) { struct trackHubGenome *genome = trackHubGetGenome(db); tdbList = trackHubTracksForGenome(genome->trackHub, genome); } else tdbList = hubCollectTracks(db, NULL); findPosInTdbList(tdbList, term, hgp); }