c397f1cfaad424493151da93caa510748a74d9ad braney Wed Jun 12 13:15:28 2013 -0700 add the ability to list more than one index for bigBed's to support track hub item search diff --git src/hg/lib/trackHub.c src/hg/lib/trackHub.c index 9c794d9..fb7dc0e 100644 --- src/hg/lib/trackHub.c +++ src/hg/lib/trackHub.c @@ -959,138 +959,150 @@ return 1; verbose(2, "hub %s\nshortLabel %s\nlongLabel %s\n", hubUrl, hub->shortLabel, hub->longLabel); verbose(2, "%s has %d elements\n", hub->genomesFile, slCount(hub->genomeList)); struct trackHubGenome *genome; for (genome = hub->genomeList; genome != NULL; genome = genome->next) { retVal |= hubCheckGenome(hub, genome, errors, checkTracks); } trackHubClose(&hub); return retVal; } -static struct hgPos *bigBedIntervalListToHgPositions(struct bbiFile *bbi, char *term, struct bigBedInterval *intervalList) +static struct hgPos *bigBedIntervalListToHgPositions(struct bbiFile *bbi, char *term, struct bigBedInterval *intervalList, char *description) /* Given an open bigBed file, and an interval list, return a pointer to a list of hgPos structures. */ { struct hgPos *posList = NULL; char chromName[bbi->chromBpt->keySize+1]; int lastChromId = -1; struct bigBedInterval *interval; for (interval = intervalList; interval != NULL; interval = interval->next) { struct hgPos *hgPos; AllocVar(hgPos); slAddHead(&posList, hgPos); bbiCachedChromLookup(bbi, interval->chromId, lastChromId, chromName, sizeof(chromName)); lastChromId = interval->chromId; hgPos->chrom = cloneString(chromName); hgPos->chromStart = interval->start; hgPos->chromEnd = interval->end; hgPos->name = cloneString(term); hgPos->browserName = cloneString(term); + hgPos->description = cloneString(description); } return posList; } -static struct hgPos *getPosFromBigBed(char *bigDataUrl, char *indexField, char *term) +static struct hgPos *getPosFromBigBed(char *bigDataUrl, char *indexField, char *term, char *description) /* Given a bigBed file with a search index, check for term. */ { struct bbiFile *bbi = bigBedFileOpen(bigDataUrl); int fieldIx; struct bptFile *bpt = bigBedOpenExtraIndex(bbi, indexField, &fieldIx); struct lm *lm = lmInit(0); struct bigBedInterval *intervalList; intervalList = bigBedNameQuery(bbi, bpt, fieldIx, term, lm); -struct hgPos *posList = bigBedIntervalListToHgPositions(bbi, term, intervalList); +struct hgPos *posList = bigBedIntervalListToHgPositions(bbi, term, + intervalList, description); bbiFileClose(&bbi); return posList; } -static struct hgPos *doTrixSearch(char *trixFile, char *indexField, char *bigDataUrl, char *term) +static struct hgPos *doTrixSearch(char *trixFile, struct slName *indices, char *bigDataUrl, char *term) { struct trix *trix = trixOpen(trixFile); int trixWordCount = 0; char *tmp = cloneString(term); char *val = nextWord(&tmp); char *trixWords[128]; while (val != NULL) { trixWords[trixWordCount] = strLower(val); trixWordCount++; if (trixWordCount == sizeof(trixWords)/sizeof(char*)) errAbort("exhausted space for trixWords"); val = nextWord(&tmp); } if (trixWordCount == 0) return NULL; struct trixSearchResult *tsList = trixSearch(trix, trixWordCount, trixWords, TRUE); struct hgPos *posList = NULL; +char *description = NULL; // we're not filling in this field at the moment for ( ; tsList != NULL; tsList = tsList->next) { - struct hgPos *posList2 = getPosFromBigBed(bigDataUrl, indexField, tsList->itemId); + struct slName *oneIndex = indices; + for (; oneIndex; oneIndex = oneIndex->next) + { + struct hgPos *posList2 = getPosFromBigBed(bigDataUrl, oneIndex->name, tsList->itemId, description); posList = slCat(posList, posList2); } + } return posList; } static void findPosInTdbList(struct trackDb *tdbList, char *term, struct hgPositions *hgp) /* Given a trackHub's trackDb entries, check each of them for a searchIndex */ { struct trackDb *tdb; for(tdb=tdbList; tdb; tdb = tdb->next) { char *indexField = trackDbSetting(tdb, "searchIndex"); char *bigDataUrl = trackDbSetting(tdb, "bigDataUrl"); - struct hgPos *posList1 = NULL, *posList2 = NULL; + if (!(indexField && bigDataUrl)) + continue; - if (indexField && bigDataUrl) - { + struct slName *indexList = slNameListFromString(indexField, ','); + struct hgPos *posList1 = NULL, *posList2 = NULL; char *trixFile = trackDbSetting(tdb, "searchTrix"); + // if there is a trix file, use it to search for the term if (trixFile != NULL) - posList1 = doTrixSearch(trixFile, indexField, bigDataUrl, term); + posList1 = doTrixSearch(trixFile, indexList, bigDataUrl, term); - posList2 = getPosFromBigBed(bigDataUrl, indexField, term); + // now search for the raw id's + struct slName *oneIndex=indexList; + for (; oneIndex; oneIndex = oneIndex->next) + { + posList2 = getPosFromBigBed(bigDataUrl, oneIndex->name, term, NULL); + posList1 = slCat(posList1, posList2); } - struct hgPos *posList = slCat(posList1, posList2); - - if (posList != NULL) + if (posList1 != NULL) { struct hgPosTable *table; AllocVar(table); slAddHead(&hgp->tableList, table); table->description = cloneString(tdb->table); table->name = cloneString(tdb->table); - table->posList = posList; + table->posList = posList1; } } } void trackHubFindPos(char *db, char *term, struct hgPositions *hgp) /* Look for term in track hubs. Update hgp if found */ { struct trackDb *tdbList = NULL; if (trackHubDatabase(db)) { struct trackHubGenome *genome = trackHubGetGenome(db); tdbList = trackHubTracksForGenome(genome->trackHub, genome); } else tdbList = hubCollectTracks(db, NULL);