ae58fc2850911b1d1fbaf842aed88d22c2776640 angie Fri Feb 12 12:59:23 2016 -0800 Adding a third search mode to trixSearch(), for less stringent searches such as autocomplete. The first two modes were previously designated by a boolean expand -- now there is an enum trixSearchMode whose first two values correspond to that (expand==FALSE = tsmExact, expand==TRUE = tsmExpand). The third mode, tsmFirstFive, returns matches of five or more characters even if there are many characters left in a long word. For example, "arabi" will match "arabidopsis" in tsmFirstFive mode, but not in tsmExpand mode because that leaves six unmatched letters after "arabi". diff --git src/lib/trix.c src/lib/trix.c index cb655a3..ee1407c 100644 --- src/lib/trix.c +++ src/lib/trix.c @@ -356,81 +356,84 @@ aNext = a->next; slAddHead(&newList, a); a = aNext; } else { bNext = b->next; slAddHead(&newList, b); b = bNext; } } slReverse(&newList); return newList; } -static int reasonablePrefix(char *prefix, char *word, boolean expand) +static int reasonablePrefix(char *prefix, char *word, enum trixSearchMode mode) /* Return non-negative if prefix is reasonable for word. * Returns number of letters left in word not matched by * prefix. */ { int prefixLen = strlen(prefix); int wordLen = strlen(word); int suffixLen = wordLen - prefixLen; if (suffixLen == 0) return 0; -else if (expand && prefixLen >= 3) +else if ((mode == tsmExpand && prefixLen >= 3) || + (mode == tsmFirstFive && prefixLen >= 5)) { int wordEnd; char *suffix = word + prefixLen; boolean prefixEndsInDigit = isdigit(word[prefixLen-1]); /* Find a word marker - either end of string, '-', '.', or '_' * or a number. */ for (wordEnd=0; wordEnd < suffixLen; ++wordEnd) { char c = suffix[wordEnd]; if (c == '-' || c == '.' || c == '_' || (!prefixEndsInDigit && isdigit(c))) break; } if (wordEnd <= 2) return wordEnd; if (wordEnd == 3 && startsWith("ing", suffix)) return wordEnd; + if (mode == tsmFirstFive && prefixLen >= 5) + return wordEnd; } return -1; } struct trixWordResult *trixSearchWordResults(struct trix *trix, - char *searchWord, boolean expand) + char *searchWord, enum trixSearchMode mode) /* Get results for single word from index. Returns NULL if no matches. */ { char *line, *word; struct trixWordResult *twr = NULL; struct trixHitPos *hitList = hashFindVal(trix->wordHitHash, searchWord); if (hitList == NULL) { struct trixHitPos *oneHitList; off_t ixPos = trixFindIndexStartLine(trix, searchWord); ourSeek(trix, ixPos); while (ourReadLine(trix, trix->lf, &line)) { word = nextWord(&line); if (startsWith(searchWord, word)) { - int leftoverLetters = reasonablePrefix(searchWord, word, expand); + int leftoverLetters = reasonablePrefix(searchWord, word, mode); /* uglyf("reasonablePrefix(%s,%s)=%d
\n", searchWord, word, leftoverLetters); */ if (leftoverLetters >= 0) { oneHitList = trixParseHitList(searchWord, line, leftoverLetters); hitList = mergeHits(hitList, oneHitList); } } else if (strcmp(searchWord, word) < 0) break; } hashAdd(trix->wordHitHash, searchWord, hitList); } if (hitList != NULL) { @@ -683,71 +686,73 @@ if (dif == 0) { dif = a->orderedSpan - b->orderedSpan; if (dif == 0) { dif = a->leftoverLetters - b->leftoverLetters; if (dif == 0) dif = a->wordPos - b->wordPos; } } return dif; } struct trixSearchResult *trixSearch(struct trix *trix, int wordCount, char **words, - boolean expand) + enum trixSearchMode mode) /* Return a list of items that match all words. This will be sorted so that * multiple-word matches where the words are closer to each other and in the - * right order will be first. Do a trixSearchResultFreeList when done. - * If expand is TRUE then this will match not only the input words, but also - * additional words that start with the input words. */ + * right order will be first. Single word matches will be prioritized so that those + * closer to the start of the search text will appear before those later. + * Do a trixSearchResultFreeList when done. If mode is tsmExpand or tsmFirstFive then + * this will match not only the input words, but also additional words that start with + * the input words. */ { struct trixWordResult *twr, *twrList = NULL; struct trixSearchResult *ts, *tsList = NULL; int wordIx; boolean gotMiss = FALSE; if (wordCount == 1) { struct trixHitPos *hit; char *lastId = ""; - twr = twrList = trixSearchWordResults(trix, words[0], expand); + twr = twrList = trixSearchWordResults(trix, words[0], mode); if (twr == NULL) return NULL; for (hit = twr->hitList; hit != NULL; hit = hit->next) { if (!sameString(lastId, hit->itemId)) { lastId = hit->itemId; AllocVar(ts); ts->itemId = hit->itemId; /* Transfer itemId */ hit->itemId = NULL; ts->orderedSpan = 1; ts->unorderedSpan = 1; ts->wordPos = hit->wordIx; ts->leftoverLetters = hit->leftoverLetters; slAddHead(&tsList, ts); } } } else { for (wordIx=0; wordIx