af3a143571e5aa064eab75c34f9444b35413b562 chmalee Tue Nov 30 15:28:15 2021 -0800 Add snippet support to trix searching. Required changing the wordPos from the first highest matching wordIndex to the wordIndex of the actual span. Have trixContextIndex create a second level index for fast retrieval of line offsets in original text file used by ixIxx. Create a simple UI for navigating hgFind search results. diff --git src/index/ixIxx/ixIxx.c src/index/ixIxx/ixIxx.c index e02ea1e..d6cf824 100644 --- src/index/ixIxx/ixIxx.c +++ src/index/ixIxx/ixIxx.c @@ -1,103 +1,57 @@ /* ixIxx - Create indices for simple line-oriented file of format * <symbol> <free text>. */ #include "common.h" #include "linefile.h" #include "hash.h" #include "options.h" #include "trix.h" /* Variables that can be set from command line. */ -int prefixSize = trixPrefixSize; +int prefixSize; int binSize = 64*1024; int maxFailedWordLength = 0; int maxWordLength = 31; void usage() /* Explain usage and exit. */ { errAbort( "ixIxx - Create indices for simple line-oriented file of format \n" "<symbol> <free text>\n" "usage:\n" " ixIxx in.text out.ix out.ixx\n" "Where out.ix is a word index, and out.ixx is an index into the index.\n" "options:\n" " -prefixSize=N Size of prefix to index on in ixx. Default is 5.\n" " -binSize=N Size of bins in ixx. Default is 64k.\n" " -maxWordLength=N Maximum allowed word length. \n" " Words with more characters than this limit are ignored and will not appear in index or be searchable. Default is %d.\n" , maxWordLength ); } static struct optionSpec options[] = { {"prefixSize", OPTION_INT}, {"binSize", OPTION_INT}, {"maxWordLength", OPTION_INT}, {NULL, 0}, }; -bool wordMiddleChars[256]; /* Characters that may be part of a word. */ -bool wordBeginChars[256]; - -void initCharTables() -/* Initialize tables that describe characters. */ -{ -int c; -for (c=0; c<256; ++c) - if (isalnum(c)) - wordBeginChars[c] = wordMiddleChars[c] = TRUE; -wordBeginChars['_'] = wordMiddleChars['_'] = TRUE; -wordMiddleChars['.'] = TRUE; -wordMiddleChars['-'] = TRUE; -} - - -char *skipToWord(char *s) -/* Skip to next word character. Return NULL at end of string. */ -{ -unsigned char c; -while ((c = *s) != 0) - { - if (wordBeginChars[c]) - return s; - s += 1; - } -return NULL; -} - -char *skipOutWord(char *start) -/* Skip to next non-word character. Returns empty string at end. */ -{ -char *s = start; -unsigned char c; -while ((c = *s) != 0) - { - if (!wordMiddleChars[c]) - break; - s += 1; - } -while (s > start && !wordBeginChars[(int)(s[-1])]) - s -= 1; -return s; -} - - struct wordPos /* Word position. */ { struct wordPos *next; /* Next wordPos in list. */ char *itemId; /* ID of associated item. Not allocated here*/ int wordIx; /* Word number within doc. */ }; int wordPosCmp(const void *va, const void *vb) /* Compare two wordPos by itemId. */ { const struct wordPos *a = *((struct wordPos **)va); const struct wordPos *b = *((struct wordPos **)vb); int dif; dif = strcmp(a->itemId, b->itemId); @@ -269,23 +223,23 @@ } void ixIxx(char *inText, char *outIx, char *outIxx) /* ixIxx - Create indices for simple line-oriented file of format * <symbol> <free text>. */ { makeIx(inText, outIx); makeIxx(outIx, outIxx); } int main(int argc, char *argv[]) /* Process command line. */ { optionInit(&argc, argv, options); -prefixSize = optionInt("prefixSize", prefixSize); +prefixSize = optionInt("prefixSize", trixPrefixSize); binSize = optionInt("binSize", binSize); maxWordLength = optionInt("maxWordLength", maxWordLength); if (argc != 4) usage(); ixIxx(argv[1], argv[2], argv[3]); return 0; }