af3a143571e5aa064eab75c34f9444b35413b562 chmalee Tue Nov 30 15:28:15 2021 -0800 Add snippet support to trix searching. Required changing the wordPos from the first highest matching wordIndex to the wordIndex of the actual span. Have trixContextIndex create a second level index for fast retrieval of line offsets in original text file used by ixIxx. Create a simple UI for navigating hgFind search results. diff --git src/hg/lib/bigBedFind.c src/hg/lib/bigBedFind.c index 61e3ef1..f81e4f0 100644 --- src/hg/lib/bigBedFind.c +++ src/hg/lib/bigBedFind.c @@ -76,31 +76,32 @@ bptFileDetach(&bpt); } errCatchEnd(errCatch); if (errCatch->gotError) { // we fail silently if there is a problem e.g. bad index name return NULL; } return posList; } static struct hgPos *doTrixSearch(struct cart *cart, struct trackDb *tdb, char *trixFile, struct slName *indices, struct bbiFile *bbi, char *term, char *description, struct hgFindSpec *hfs) -/* search a trix file in the "searchTrix" field of a bigBed trackDb */ +/* search a trix file in the "searchTrix" field of a bigBed trackDb + * TODO: Get trixSearchResult snippets in here*/ { struct trix *trix = trixOpen(trixFile); int trixWordCount = 0; char *tmp = cloneString(term); char *val = nextWord(&tmp); char *trixWords[128]; while (val != NULL) { trixWords[trixWordCount] = strLower(val); trixWordCount++; if (trixWordCount == sizeof(trixWords)/sizeof(char*)) errAbort("exhausted space for trixWords"); val = nextWord(&tmp); @@ -133,50 +134,51 @@ const struct hgPos *b = *((struct hgPos **)vb); int diff = strcmp(a->name, b->name); if (diff == 0) { diff = strcmp(a->chrom, b->chrom); if (diff == 0) { diff = a->chromStart - b->chromStart; if (diff == 0) diff = a->chromEnd - b->chromEnd; } } return diff; } -boolean findBigBedPosInTdbList(struct cart *cart, char *db, struct trackDb *tdbList, char *term, struct hgPositions *hgp, struct hgFindSpec *hfs) +boolean findBigBedPosInTdbList(struct cart *cart, char *db, struct trackDb *tdbList, char *term, struct hgPositions *hgp, struct hgFindSpec *hfs, boolean measureTiming) /* Given a list of trackDb entries, check each of them for a searchIndex */ { char *description = NULL; if (hfs) { char buf[2048]; if (isNotEmpty(hfs->searchDescription)) truncatef(buf, sizeof(buf), "%s", hfs->searchDescription); else safef(buf, sizeof(buf), "%s", hfs->searchTable); description = cloneString(buf); } struct trackDb *tdb; boolean found = FALSE; for(tdb=tdbList; tdb; tdb = tdb->next) { + long startTime = clock1000(); if (tdb->subtracks) { - found = findBigBedPosInTdbList(cart, db, tdb->subtracks, term, hgp, hfs) || found; + found = findBigBedPosInTdbList(cart, db, tdb->subtracks, term, hgp, hfs, measureTiming) || found; continue; } if (startsWith("bigWig", tdb->type) || !startsWith("big", tdb->type)) continue; // Which field(s) to search? Look for searchIndex in search spec, then in trackDb for // backwards compat. char *indexField = NULL; if (hfs) indexField = hgFindSpecSetting(hfs, "searchIndex"); if (!indexField) indexField = trackDbSetting(tdb, "searchIndex"); if (!indexField && !hfs) continue; @@ -241,23 +243,132 @@ posList2 = getPosFromBigBed(cart, tdb, bbi, oneIndex->name, term, NULL, hfs); posList1 = slCat(posList1, posList2); } // the trix search and the id search may have found the same item so uniqify: slUniqify(&posList1, posListCompare, hgPosFree); if (posList1 != NULL) { struct hgPosTable *table; found = TRUE; AllocVar(table); slAddHead(&hgp->tableList, table); table->description = cloneString(description ? description : tdb->longLabel); table->name = cloneString(tdb->table); + if (measureTiming) + table->searchTime = clock1000() - startTime; table->posList = posList1; } bigBedFileClose(&bbi); } freeMem(description); return found; } + +boolean isTdbSearchable(struct trackDb *tdb) +/* Check if a single tdb is searchable */ +{ +if (tdb->subtracks) + { + boolean searchable = FALSE; + struct trackDb *sub; + for (sub = tdb->subtracks; sub != NULL; sub = sub->next) + searchable |= isTdbSearchable(sub); + return searchable; + } +if (startsWith("bigWig", tdb->type) || !startsWith("big", tdb->type)) + return FALSE; + +char *indexField = NULL; +indexField = trackDbSetting(tdb, "searchIndex"); +if (!indexField) + return FALSE; + +// If !indexField but we do have an index on the bigBed use that +char *fileName = trackDbSetting(tdb, "bigDataUrl"); +if (!fileName) + return FALSE; + +// we fail silently if bigBed can't be opened. +struct bbiFile *bbi = NULL; +struct errCatch *errCatch = errCatchNew(); +if (errCatchStart(errCatch)) + { + bbi = bigBedFileOpen(fileName); + } +errCatchEnd(errCatch); +if (errCatch->gotError) + return FALSE; + +if (!indexField) + { + struct slName *indexFields = bigBedListExtraIndexes(bbi); + if (slNameInList(indexFields, "name")) + indexField = "name"; + slNameFreeList(&indexFields); + } +if (!indexField) + { + bigBedFileClose(&bbi); + return FALSE; + } +return TRUE; +} + +struct trackDb *getSearchableBigBeds(struct trackDb *tdbList) +/* Given a list of tracks from a hub, return those that are searchable */ +{ +struct trackDb *tdb, *next, *ret = NULL; +for (tdb = tdbList ; tdb; tdb = next) + { + next = tdb->next; + if (tdb->subtracks) + { + struct trackDb *subtrackList = getSearchableBigBeds(tdb->subtracks); + if (subtrackList) + ret = slCat(subtrackList, ret); + continue; + } + if (startsWith("bigWig", tdb->type) || !startsWith("big", tdb->type)) + continue; + + char *indexField = NULL; + indexField = trackDbSetting(tdb, "searchIndex"); + if (!indexField) + continue; + + // If !indexField but we do have an index on the bigBed use that + char *fileName = trackDbSetting(tdb, "bigDataUrl"); + if (!fileName) + continue; + + // we fail silently if bigBed can't be opened. + struct bbiFile *bbi = NULL; + struct errCatch *errCatch = errCatchNew(); + if (errCatchStart(errCatch)) + { + bbi = bigBedFileOpen(fileName); + } + errCatchEnd(errCatch); + if (errCatch->gotError) + continue; + + if (!indexField) + { + struct slName *indexFields = bigBedListExtraIndexes(bbi); + if (slNameInList(indexFields, "name")) + indexField = "name"; + slNameFreeList(&indexFields); + } + if (!indexField) + { + bigBedFileClose(&bbi); + continue; + } + + // finally we have verified the track is searchable, add it to our list we're returning + slAddHead(&ret, tdb); + } +return ret; +}