af3a143571e5aa064eab75c34f9444b35413b562
chmalee
  Tue Nov 30 15:28:15 2021 -0800
Add snippet support to trix searching. Required changing the
wordPos from the first highest matching wordIndex to the
wordIndex of the actual span. Have trixContextIndex create a
second level index for fast retrieval of line offsets in
original text file used by ixIxx. Create a simple UI for navigating
hgFind search results.

diff --git src/inc/trix.h src/inc/trix.h
index 817310f79..7d8a060 100644
--- src/inc/trix.h
+++ src/inc/trix.h
@@ -2,67 +2,107 @@
  * of text for fast word searches.  Generally you use the ixIxx program
  * to make the indexes. */
 
 #ifndef TRIX_H
 #define TRIX_H
 
 struct trix
 /* A two level index */
     {
     struct lineFile *lf;	/* Open file on first level index. */
     struct trixIxx *ixx;	/* Second level index in memory. */
     int ixxSize;		/* Size of second level index. */
     int ixxAlloc;	        /* Space allocated for index. */
     struct hash *wordHitHash;	/* Hash of word hitsLists, so search on "the the the" works fast. */
     boolean useUdc;            /* are we using UDC or lineFile */
+    struct snippetIndex *snippetIndex; /* A second index for retrieving snippets around word matches */ 
+    };
+
+struct trixIxx
+/* A prefix and offset */
+    {
+    off_t pos;	   /* Position where prefix first occurs in file. */
+    char *prefix;/* Space padded first five letters of what we're indexing. */
+    };
+
+
+struct snippetIndex
+/* An index of the original file fed into ixIxx. Used for making snippets. Making snippets
+ * requires 3 files:
+ * 1. The original text file that we will seek into as necessary (original.txt)
+ * 2. A file of ids and offsets of the lines in the original file (original.offsets)
+ * 3. An ixx index of the offsets file (original.offsets.ixx) */
+    {
+    struct lineFile *origFile; /* Original text file */
+    struct lineFile *textIndex; /* Open file of file offsets in textFile */
+    struct trixIxx *ixx; /* Second level index of the offsets file */
+    int ixxSize;		/* Size of second level index. */
+    int ixxAlloc;	        /* Space allocated for index. */
     };
 
 struct trixSearchResult
 /* Result of a trix search. */
     {
     struct trixSearchResult *next;
     char *itemId;               /* ID of matching item */
     int unorderedSpan;          /* Minimum span in single doc with words in any order. */
     int orderedSpan;            /* Minimum span in single doc with words in search order. */
-    int wordPos;		/* Position of word in doc more or less. */
+    int *wordPos;               /* Position(s) of word(s) in doc in search. */
     int leftoverLetters;        /* Number of leftover letters in words. */
+    int wordPosSize;            /* Number of positions in wordPos  */
+    char *snippet;              /* The original text surrounding a match */
     };
 
 enum trixSearchMode
 /* How stringent is the search? */
     {
     tsmExact,                   /* Require whole-word matches. */
     tsmExpand,                  /* Match words that differ from the search term only in the
                                  * last two letters stopping at a word boundary, or that are
                                  * the search word plus "ing". */
     tsmFirstFive                /* Like tsmExpand, but also match words that have the same
                                  * first 5 letters. */
     };
 
-#define trixPrefixSize 5	/* Size of prefix in second level index. */
+// Size of prefix in second level index. Default is 5 for ixIxx but trixContextIndex and snippet
+// searching defaults to 15
+extern int trixPrefixSize;
 
 struct trix *trixOpen(char *ixFile);
 /* Open up index.  Load second level index in memory. */
 
 void trixClose(struct trix **pTrix);
 /* Close up index and free up associated resources. */
 
 struct trixSearchResult *trixSearch(struct trix *trix, int wordCount, char **words,
                                     enum trixSearchMode mode);
 /* Return a list of items that match all words.  This will be sorted so that
  * multiple-word matches where the words are closer to each other and in the
  * right order will be first.  Single word matches will be prioritized so that those
  * closer to the start of the search text will appear before those later.
  * Do a trixSearchResultFreeList when done.  If mode is tsmExpand or tsmFirstFive then
  * this will match not only the input words, but also additional words that start with
  * the input words. */
 
 void trixSearchResultFree(struct trixSearchResult **pTsr);
 /* Free up data associated with trixSearchResult. */
 
 void trixSearchResultFreeList(struct trixSearchResult **pList);
 /* Free up a list of trixSearchResults. */
 
 int trixSearchResultCmp(const void *va, const void *vb);
 /* Compare two trixSearchResult in such a way that most relevant searches tend to be first. */
 
+extern bool wordMiddleChars[];  /* Characters that may be part of a word. */
+extern bool wordBeginChars[];
+
+void initCharTables();
+/* Initialize tables that describe characters. */
+
+char *skipToWord(char *s);
+/* Skip to next word character.  Return NULL at end of string. */
+
+char *skipOutWord(char *start);
+/* Skip to next non-word character.  Returns empty string at end. */
+
+void addSnippetsToSearchResults(struct trixSearchResult *tsrList, struct trix *trix);
 #endif //ndef TRIX_H