98aa95938f3af948c60e20044318595f10fee780
braney
  Thu May 13 15:22:00 2021 -0700
ongoing work on cart rewrite system

diff --git src/lib/regexHelper.c src/lib/regexHelper.c
index fe2e938..6d1c9bd 100644
--- src/lib/regexHelper.c
+++ src/lib/regexHelper.c
@@ -123,15 +123,174 @@
  * If substr was not matched, return 0; you can check first with regexSubstrMatched() if
  * that's not the desired behavior for unmatched substr. */
 {
 int val = 0;
 if (regexSubstrMatched(substr))
     {
     int len = substr.rm_eo - substr.rm_so;
     char buf[len+1];
     regexSubstringCopy(string, substr, buf, sizeof(buf));
     val = atoi(buf);
     }
 else
     val = 0;
 return val;
 }
+
+static struct regexSnippet *parseSnippets(char *input)
+/* Generate a data structure that describes how the parenthetical
+ * regular expressions should be substituted in the output.
+ */
+{
+char *in = input;
+struct regexSnippet *out = NULL;
+
+char *prev = input;
+for(;;)
+    {
+    if ((*in == 0) || ((*in == '\\') && isdigit(in[1])))
+        {
+        struct regexSnippet *snippet;
+        AllocVar(snippet);
+
+        int size = in - prev;
+        if (size)
+            {
+            char buffer[size + 1];
+            strncpy(buffer, prev, size);
+            buffer[size] = 0;
+            snippet->precursor = cloneString(buffer);
+            snippet->precursorLen = size;
+            prev = in;
+            }
+
+        if (*in)
+            {
+            in++;
+            snippet->num = atoi(in);
+            while (isdigit(*in))
+                in++;
+            }
+
+        slAddHead(&out, snippet);
+        if (*in == 0)
+            break;
+        }
+    else
+        in++;
+    }
+
+slReverse(&out);
+
+return out;
+}
+
+static struct regexCompiledEdit *compileEdits(struct regexEdit *editArray, unsigned numEdits, boolean quiet)
+/* Compile all the edits. */
+{
+struct regexCompiledEdit *compiledEdits, *compiledEdit;
+
+AllocArray(compiledEdits, numEdits);
+compiledEdit = compiledEdits;
+
+for(; numEdits; numEdits--, editArray++, compiledEdit++)
+    {
+    regex_t *compiledExp = NULL;
+    int errNum = 0;
+    int compileFlags = 0;
+
+    AllocVar(compiledExp);
+    errNum = regcomp(compiledExp, editArray->query, compileFlags);
+
+    if (errNum != 0)
+        {
+        if (quiet)
+            return NULL;
+
+        char errBuf[4096];
+        regerror(errNum, compiledExp, errBuf, sizeof(errBuf));
+        errAbort("regular expression compilation error %d: %s", errNum, errBuf);
+        }
+
+    compiledEdit->compiledExp = compiledExp;
+    compiledEdit->snippets = parseSnippets(editArray->substitution);
+    }
+
+return compiledEdits;
+}
+
+static char *doSubEdits(struct regexSnippet *snippets, regmatch_t *matches, char *source, int *plength)
+/* Do the substitions on parenthetical expressions. */
+{
+char output[40 * 1024], *out = output;
+*out = 0;
+
+for(; snippets ; matches++, snippets = snippets->next)
+    {
+    // copy the part before the match
+    strncpy(out, snippets->precursor, snippets->precursorLen);
+    out += snippets->precursorLen;
+    *plength += snippets->precursorLen;
+
+    if (matches->rm_so == -1)
+        break;
+
+    // copy in the part that matches the regular expression
+    int size = matches->rm_eo - matches->rm_so;
+    strncpy(out, &source[matches->rm_so], size);
+    *plength += size;
+    out += size;
+    }
+*out = 0;
+return cloneString(output);
+}
+
+static char *doOneEdit( struct regexCompiledEdit *edit, char *input, boolean quiet)
+/* Perform one edit on the input string.  Errabort if !quiet and there is an error. */
+{
+char buffer[40 * 1024];
+char *source = input;
+regmatch_t matches[1024];
+int lastSrc = 0;
+int offset = 0;
+
+for(;;)
+    {
+    /* if there's not a match, we're done. */
+    if (regexec(edit->compiledExp, source, ArraySize(matches), matches, 0))
+        break;
+
+    int size =  matches->rm_so;
+    strncpy(&buffer[lastSrc], source, size);
+    lastSrc += size;
+
+    int subSize = 0;
+    // do the substitions on any matching parenthetical expressions
+    char *subEdit = doSubEdits(edit->snippets, matches+1, source, &subSize);
+
+    strncpy(&buffer[lastSrc], subEdit, subSize);
+    lastSrc += subSize;
+    offset += matches->rm_eo;
+    //
+    // keep looking after the last match
+    source = &input[offset];
+    }
+
+strcpy(&buffer[lastSrc], source);
+
+return cloneString(buffer);
+}
+
+char *regexEdit(struct regexEdit *editArray, unsigned numEdits, char *input, boolean quiet)
+/* Perform a list of edits on a string. */
+{
+struct regexCompiledEdit *compiledEdits = compileEdits(editArray, numEdits, quiet);
+
+if (compiledEdits == NULL)
+    return FALSE;
+
+char *outString = input;
+for(; numEdits && outString; compiledEdits++, numEdits--)
+    outString = doOneEdit(compiledEdits, outString, quiet);
+
+return outString;
+}