3a6295a1ed76f597499293b2dae416a6e6b32c17 braney Wed Jun 18 15:59:01 2014 -0700 enable bigBed item search on native bigBed tracks diff --git src/hg/lib/trackHub.c src/hg/lib/trackHub.c index 0930429..e19c534 100644 --- src/hg/lib/trackHub.c +++ src/hg/lib/trackHub.c @@ -33,31 +33,31 @@ #include "bigWig.h" #include "bigBed.h" #include "hdb.h" #include "chromInfo.h" #include "grp.h" #include "twoBit.h" #include "dbDb.h" #include "net.h" #include "bbiFile.h" #include "bPlusTree.h" #include "hgFind.h" #include "hubConnect.h" #include "trix.h" #include "vcf.h" #include "htmshell.h" -#include "hubConnect.h" +#include "bigBedFind.h" static struct hash *hubCladeHash; // mapping of clade name to hub pointer static struct hash *hubAssemblyHash; // mapping of assembly name to genome struct static struct hash *hubOrgHash; // mapping from organism name to hub pointer static struct trackHub *globalAssemblyHubList; // list of trackHubs in the user's cart static struct hash *trackHubHash; char *trackHubRelativeUrl(char *hubUrl, char *path) /* Return full path (in URL form if it's a remote hub) given * path possibly relative to hubUrl. Do a freeMem of result * when done. */ { /* If path itself is a URL then just return a copy of it. */ if (hasProtocol(path)) return cloneString(path); @@ -1134,167 +1134,44 @@ return 0; } struct trackHubGenome *genome; for (genome = hub->genomeList; genome != NULL; genome = genome->next) { retVal |= hubCheckGenome(hub, genome, errors, checkTracks, NULL); } trackHubClose(&hub); return retVal; } -static struct hgPos *bigBedIntervalListToHgPositions(struct bbiFile *bbi, char *term, struct bigBedInterval *intervalList, char *description) -/* Given an open bigBed file, and an interval list, return a pointer to a list of hgPos structures. */ -{ -struct hgPos *posList = NULL; -char chromName[bbi->chromBpt->keySize+1]; -int lastChromId = -1; -struct bigBedInterval *interval; - -for (interval = intervalList; interval != NULL; interval = interval->next) - { - struct hgPos *hgPos; - AllocVar(hgPos); - slAddHead(&posList, hgPos); - - bbiCachedChromLookup(bbi, interval->chromId, lastChromId, chromName, sizeof(chromName)); - lastChromId = interval->chromId; - - hgPos->chrom = cloneString(chromName); - hgPos->chromStart = interval->start; - hgPos->chromEnd = interval->end; - hgPos->name = cloneString(term); - hgPos->browserName = cloneString(term); - hgPos->description = cloneString(description); - } - -return posList; -} - -static struct hgPos *getPosFromBigBed(char *bigDataUrl, char *indexField, char *term, char *description) -/* Given a bigBed file with a search index, check for term. */ -{ -struct bbiFile *bbi = bigBedFileOpen(bigDataUrl); -int fieldIx; -struct bptFile *bpt = bigBedOpenExtraIndex(bbi, indexField, &fieldIx); -struct lm *lm = lmInit(0); -struct bigBedInterval *intervalList; -intervalList = bigBedNameQuery(bbi, bpt, fieldIx, term, lm); - -struct hgPos *posList = bigBedIntervalListToHgPositions(bbi, term, - intervalList, description); -bbiFileClose(&bbi); -return posList; -} - -static struct hgPos *doTrixSearch(char *trixFile, struct slName *indices, char *bigDataUrl, char *term) -{ -struct trix *trix = trixOpen(trixFile); -int trixWordCount = 0; -char *tmp = cloneString(term); -char *val = nextWord(&tmp); -char *trixWords[128]; - -while (val != NULL) - { - trixWords[trixWordCount] = strLower(val); - trixWordCount++; - if (trixWordCount == sizeof(trixWords)/sizeof(char*)) - errAbort("exhausted space for trixWords"); - - val = nextWord(&tmp); - } - -if (trixWordCount == 0) - return NULL; - -struct trixSearchResult *tsList = trixSearch(trix, trixWordCount, trixWords, TRUE); -struct hgPos *posList = NULL; -char *description = NULL; // we're not filling in this field at the moment -for ( ; tsList != NULL; tsList = tsList->next) - { - struct slName *oneIndex = indices; - for (; oneIndex; oneIndex = oneIndex->next) - { - struct hgPos *posList2 = getPosFromBigBed(bigDataUrl, oneIndex->name, tsList->itemId, description); - - posList = slCat(posList, posList2); - } - } - -return posList; -} - - -static void findPosInTdbList(struct trackDb *tdbList, char *term, struct hgPositions *hgp) -/* Given a trackHub's trackDb entries, check each of them for a searchIndex */ -{ -struct trackDb *tdb; - -for(tdb=tdbList; tdb; tdb = tdb->next) - { - char *indexField = trackDbSetting(tdb, "searchIndex"); - char *bigDataUrl = trackDbSetting(tdb, "bigDataUrl"); - if (!(indexField && bigDataUrl)) - continue; - - struct slName *indexList = slNameListFromString(indexField, ','); - struct hgPos *posList1 = NULL, *posList2 = NULL; - char *trixFile = trackDbSetting(tdb, "searchTrix"); - // if there is a trix file, use it to search for the term - if (trixFile != NULL) - posList1 = doTrixSearch(trixFile, indexList, bigDataUrl, term); - - // now search for the raw id's - struct slName *oneIndex=indexList; - for (; oneIndex; oneIndex = oneIndex->next) - { - posList2 = getPosFromBigBed(bigDataUrl, oneIndex->name, term, NULL); - posList1 = slCat(posList1, posList2); - } - - if (posList1 != NULL) - { - struct hgPosTable *table; - - AllocVar(table); - slAddHead(&hgp->tableList, table); - table->description = cloneString(tdb->table); - table->name = cloneString(tdb->table); - - table->posList = posList1; - } - } -} void trackHubFindPos(char *db, char *term, struct hgPositions *hgp) /* Look for term in track hubs. Update hgp if found */ { struct trackDb *tdbList = NULL; if (trackHubDatabase(db)) { struct trackHubGenome *genome = trackHubGetGenome(db); tdbList = trackHubTracksForGenome(genome->trackHub, genome); } else tdbList = hubCollectTracks(db, NULL); -findPosInTdbList(tdbList, term, hgp); +findBigBedPosInTdbList(db, tdbList, term, hgp); } boolean trackHubGetBlatParams(char *database, boolean isTrans, char **pHost, char **pPort) { char *hostPort; if (isTrans) { hostPort = trackHubAssemblyField(database, "transBlat"); } else { hostPort = trackHubAssemblyField(database, "blat"); }