130c65d9512af860b1a14c406620f3ac71296ddf braney Sun May 26 12:52:25 2013 -0700 added TRIX search for track hubs. UDC'ified trix library. Refs #10426 diff --git src/lib/trix.c src/lib/trix.c index a2a8999..b3107e8 100644 --- src/lib/trix.c +++ src/lib/trix.c @@ -1,23 +1,25 @@ /* trix - text retrieval index. Stuff for fast two level index * of text for fast word searches. */ #include "common.h" #include "hash.h" #include "linefile.h" #include "trix.h" #include "sqlNum.h" +#include "udc.h" +#include "net.h" /* Some local structures for the search. */ struct trixHitPos /* A hit to the index. */ { struct trixHitPos *next; /* Next in list */ char *itemId; /* Associated itemId */ int wordIx; /* Which word this is part of. */ int leftoverLetters; /* Number of letters at end of word not matched */ }; struct trixWordResult /* Results of a search on one word. */ { @@ -180,49 +182,86 @@ void trixAddToIxx(struct trix *trix, off_t pos, char *prefix) /* Add to trix->ixx. */ { struct trixIxx *ixx; if (trix->ixxSize >= trix->ixxAlloc) { trix->ixxAlloc += trix->ixxAlloc; /* Double allocation. */ ExpandArray(trix->ixx, trix->ixxSize, trix->ixxAlloc); } ixx = trix->ixx + trix->ixxSize; ixx->pos = pos; memcpy(ixx->prefix, prefix, sizeof(ixx->prefix)); trix->ixxSize += 1; } +// wrappers around the udc or lineFile routines +static void *ourOpen(struct trix *trix, char *fileName) +{ +if (trix->useUdc) + return (void *)udcFileOpen(fileName, NULL); +return (void *)lineFileOpen(fileName, TRUE); +} + +static boolean ourReadLine(struct trix *trix, void *lf, char **line) +{ +if (trix->useUdc) + { + *line = udcReadLine((struct udcFile *)lf); + return *line != NULL; + } +return lineFileNext((struct lineFile *)lf, line, NULL); +} + +static void ourClose(struct trix *trix, void **lf) +{ +if (trix->useUdc) + udcFileClose((struct udcFile **)lf); +else + lineFileClose((struct lineFile **)lf); +} + +void ourSeek(struct trix *trix, off_t ixPos) +{ +if (trix->useUdc) + udcSeek((struct udcFile *)trix->lf, ixPos); +else + lineFileSeek((struct lineFile *)trix->lf, ixPos, SEEK_SET); +} + struct trix *trixOpen(char *ixFile) /* Open up index. Load second level index in memory. */ { +struct trix *trix = trixNew(); +trix->useUdc = FALSE; +if (hasProtocol(ixFile)) + trix->useUdc = TRUE; + char ixxFile[PATH_LEN]; -struct trix *trix; -struct lineFile *lf; +void *lf; char *line; initUnhexTable(); safef(ixxFile, sizeof(ixxFile), "%sx", ixFile); -lf = lineFileOpen(ixxFile, TRUE); -trix = trixNew(); -while (lineFileNext(lf, &line, NULL)) +lf = ourOpen(trix, ixxFile); +while (ourReadLine(trix, lf, &line) ) { off_t pos = unhex(line+trixPrefixSize); trixAddToIxx(trix, pos, line); } -lineFileClose(&lf); -trix->lf = lineFileOpen(ixFile, TRUE); +ourClose(trix, &lf); +trix->lf = ourOpen(trix, ixFile); return trix; } void trixCopyToPrefix(char *word, char *prefix) /* Copy first part of word to prefix. If need be end pad with spaces. */ { int len = strlen(word); if (len >= trixPrefixSize) memcpy(prefix, word, trixPrefixSize); else { memset(prefix, ' ', trixPrefixSize); memcpy(prefix, word, len); } } @@ -358,32 +397,32 @@ } struct trixWordResult *trixSearchWordResults(struct trix *trix, char *searchWord, boolean expand) /* Get results for single word from index. Returns NULL if no matches. */ { char *line, *word; struct trixWordResult *twr = NULL; struct trixHitPos *hitList = hashFindVal(trix->wordHitHash, searchWord); if (hitList == NULL) { struct trixHitPos *oneHitList; off_t ixPos = trixFindIndexStartLine(trix, searchWord); - lineFileSeek(trix->lf, ixPos, SEEK_SET); - while (lineFileNext(trix->lf, &line, NULL)) + ourSeek(trix, ixPos); + while (ourReadLine(trix, trix->lf, &line)) { word = nextWord(&line); if (startsWith(searchWord, word)) { int leftoverLetters = reasonablePrefix(searchWord, word, expand); /* uglyf("reasonablePrefix(%s,%s)=%d
\n", searchWord, word, leftoverLetters); */ if (leftoverLetters >= 0) { oneHitList = trixParseHitList(searchWord, line, leftoverLetters); hitList = mergeHits(hitList, oneHitList); } } else if (strcmp(searchWord, word) < 0) break;