ae58fc2850911b1d1fbaf842aed88d22c2776640
angie
Fri Feb 12 12:59:23 2016 -0800
Adding a third search mode to trixSearch(), for less stringent searches such as autocomplete.
The first two modes were previously designated by a boolean expand -- now there is an enum
trixSearchMode whose first two values correspond to that (expand==FALSE = tsmExact,
expand==TRUE = tsmExpand). The third mode, tsmFirstFive, returns matches of five or more
characters even if there are many characters left in a long word. For example, "arabi" will
match "arabidopsis" in tsmFirstFive mode, but not in tsmExpand mode because that leaves six
unmatched letters after "arabi".
diff --git src/lib/trix.c src/lib/trix.c
index cb655a3..ee1407c 100644
--- src/lib/trix.c
+++ src/lib/trix.c
@@ -356,81 +356,84 @@
aNext = a->next;
slAddHead(&newList, a);
a = aNext;
}
else
{
bNext = b->next;
slAddHead(&newList, b);
b = bNext;
}
}
slReverse(&newList);
return newList;
}
-static int reasonablePrefix(char *prefix, char *word, boolean expand)
+static int reasonablePrefix(char *prefix, char *word, enum trixSearchMode mode)
/* Return non-negative if prefix is reasonable for word.
* Returns number of letters left in word not matched by
* prefix. */
{
int prefixLen = strlen(prefix);
int wordLen = strlen(word);
int suffixLen = wordLen - prefixLen;
if (suffixLen == 0)
return 0;
-else if (expand && prefixLen >= 3)
+else if ((mode == tsmExpand && prefixLen >= 3) ||
+ (mode == tsmFirstFive && prefixLen >= 5))
{
int wordEnd;
char *suffix = word + prefixLen;
boolean prefixEndsInDigit = isdigit(word[prefixLen-1]);
/* Find a word marker - either end of string, '-', '.', or '_'
* or a number. */
for (wordEnd=0; wordEnd < suffixLen; ++wordEnd)
{
char c = suffix[wordEnd];
if (c == '-' || c == '.' || c == '_' || (!prefixEndsInDigit && isdigit(c)))
break;
}
if (wordEnd <= 2)
return wordEnd;
if (wordEnd == 3 && startsWith("ing", suffix))
return wordEnd;
+ if (mode == tsmFirstFive && prefixLen >= 5)
+ return wordEnd;
}
return -1;
}
struct trixWordResult *trixSearchWordResults(struct trix *trix,
- char *searchWord, boolean expand)
+ char *searchWord, enum trixSearchMode mode)
/* Get results for single word from index. Returns NULL if no matches. */
{
char *line, *word;
struct trixWordResult *twr = NULL;
struct trixHitPos *hitList = hashFindVal(trix->wordHitHash, searchWord);
if (hitList == NULL)
{
struct trixHitPos *oneHitList;
off_t ixPos = trixFindIndexStartLine(trix, searchWord);
ourSeek(trix, ixPos);
while (ourReadLine(trix, trix->lf, &line))
{
word = nextWord(&line);
if (startsWith(searchWord, word))
{
- int leftoverLetters = reasonablePrefix(searchWord, word, expand);
+ int leftoverLetters = reasonablePrefix(searchWord, word, mode);
/* uglyf("reasonablePrefix(%s,%s)=%d
\n", searchWord, word, leftoverLetters); */
if (leftoverLetters >= 0)
{
oneHitList = trixParseHitList(searchWord, line,
leftoverLetters);
hitList = mergeHits(hitList, oneHitList);
}
}
else if (strcmp(searchWord, word) < 0)
break;
}
hashAdd(trix->wordHitHash, searchWord, hitList);
}
if (hitList != NULL)
{
@@ -683,71 +686,73 @@
if (dif == 0)
{
dif = a->orderedSpan - b->orderedSpan;
if (dif == 0)
{
dif = a->leftoverLetters - b->leftoverLetters;
if (dif == 0)
dif = a->wordPos - b->wordPos;
}
}
return dif;
}
struct trixSearchResult *trixSearch(struct trix *trix, int wordCount, char **words,
- boolean expand)
+ enum trixSearchMode mode)
/* Return a list of items that match all words. This will be sorted so that
* multiple-word matches where the words are closer to each other and in the
- * right order will be first. Do a trixSearchResultFreeList when done.
- * If expand is TRUE then this will match not only the input words, but also
- * additional words that start with the input words. */
+ * right order will be first. Single word matches will be prioritized so that those
+ * closer to the start of the search text will appear before those later.
+ * Do a trixSearchResultFreeList when done. If mode is tsmExpand or tsmFirstFive then
+ * this will match not only the input words, but also additional words that start with
+ * the input words. */
{
struct trixWordResult *twr, *twrList = NULL;
struct trixSearchResult *ts, *tsList = NULL;
int wordIx;
boolean gotMiss = FALSE;
if (wordCount == 1)
{
struct trixHitPos *hit;
char *lastId = "";
- twr = twrList = trixSearchWordResults(trix, words[0], expand);
+ twr = twrList = trixSearchWordResults(trix, words[0], mode);
if (twr == NULL)
return NULL;
for (hit = twr->hitList; hit != NULL; hit = hit->next)
{
if (!sameString(lastId, hit->itemId))
{
lastId = hit->itemId;
AllocVar(ts);
ts->itemId = hit->itemId; /* Transfer itemId */
hit->itemId = NULL;
ts->orderedSpan = 1;
ts->unorderedSpan = 1;
ts->wordPos = hit->wordIx;
ts->leftoverLetters = hit->leftoverLetters;
slAddHead(&tsList, ts);
}
}
}
else
{
for (wordIx=0; wordIx