98aa95938f3af948c60e20044318595f10fee780 braney Thu May 13 15:22:00 2021 -0700 ongoing work on cart rewrite system diff --git src/lib/regexHelper.c src/lib/regexHelper.c index fe2e938..6d1c9bd 100644 --- src/lib/regexHelper.c +++ src/lib/regexHelper.c @@ -123,15 +123,174 @@ * If substr was not matched, return 0; you can check first with regexSubstrMatched() if * that's not the desired behavior for unmatched substr. */ { int val = 0; if (regexSubstrMatched(substr)) { int len = substr.rm_eo - substr.rm_so; char buf[len+1]; regexSubstringCopy(string, substr, buf, sizeof(buf)); val = atoi(buf); } else val = 0; return val; } + +static struct regexSnippet *parseSnippets(char *input) +/* Generate a data structure that describes how the parenthetical + * regular expressions should be substituted in the output. + */ +{ +char *in = input; +struct regexSnippet *out = NULL; + +char *prev = input; +for(;;) + { + if ((*in == 0) || ((*in == '\\') && isdigit(in[1]))) + { + struct regexSnippet *snippet; + AllocVar(snippet); + + int size = in - prev; + if (size) + { + char buffer[size + 1]; + strncpy(buffer, prev, size); + buffer[size] = 0; + snippet->precursor = cloneString(buffer); + snippet->precursorLen = size; + prev = in; + } + + if (*in) + { + in++; + snippet->num = atoi(in); + while (isdigit(*in)) + in++; + } + + slAddHead(&out, snippet); + if (*in == 0) + break; + } + else + in++; + } + +slReverse(&out); + +return out; +} + +static struct regexCompiledEdit *compileEdits(struct regexEdit *editArray, unsigned numEdits, boolean quiet) +/* Compile all the edits. */ +{ +struct regexCompiledEdit *compiledEdits, *compiledEdit; + +AllocArray(compiledEdits, numEdits); +compiledEdit = compiledEdits; + +for(; numEdits; numEdits--, editArray++, compiledEdit++) + { + regex_t *compiledExp = NULL; + int errNum = 0; + int compileFlags = 0; + + AllocVar(compiledExp); + errNum = regcomp(compiledExp, editArray->query, compileFlags); + + if (errNum != 0) + { + if (quiet) + return NULL; + + char errBuf[4096]; + regerror(errNum, compiledExp, errBuf, sizeof(errBuf)); + errAbort("regular expression compilation error %d: %s", errNum, errBuf); + } + + compiledEdit->compiledExp = compiledExp; + compiledEdit->snippets = parseSnippets(editArray->substitution); + } + +return compiledEdits; +} + +static char *doSubEdits(struct regexSnippet *snippets, regmatch_t *matches, char *source, int *plength) +/* Do the substitions on parenthetical expressions. */ +{ +char output[40 * 1024], *out = output; +*out = 0; + +for(; snippets ; matches++, snippets = snippets->next) + { + // copy the part before the match + strncpy(out, snippets->precursor, snippets->precursorLen); + out += snippets->precursorLen; + *plength += snippets->precursorLen; + + if (matches->rm_so == -1) + break; + + // copy in the part that matches the regular expression + int size = matches->rm_eo - matches->rm_so; + strncpy(out, &source[matches->rm_so], size); + *plength += size; + out += size; + } +*out = 0; +return cloneString(output); +} + +static char *doOneEdit( struct regexCompiledEdit *edit, char *input, boolean quiet) +/* Perform one edit on the input string. Errabort if !quiet and there is an error. */ +{ +char buffer[40 * 1024]; +char *source = input; +regmatch_t matches[1024]; +int lastSrc = 0; +int offset = 0; + +for(;;) + { + /* if there's not a match, we're done. */ + if (regexec(edit->compiledExp, source, ArraySize(matches), matches, 0)) + break; + + int size = matches->rm_so; + strncpy(&buffer[lastSrc], source, size); + lastSrc += size; + + int subSize = 0; + // do the substitions on any matching parenthetical expressions + char *subEdit = doSubEdits(edit->snippets, matches+1, source, &subSize); + + strncpy(&buffer[lastSrc], subEdit, subSize); + lastSrc += subSize; + offset += matches->rm_eo; + // + // keep looking after the last match + source = &input[offset]; + } + +strcpy(&buffer[lastSrc], source); + +return cloneString(buffer); +} + +char *regexEdit(struct regexEdit *editArray, unsigned numEdits, char *input, boolean quiet) +/* Perform a list of edits on a string. */ +{ +struct regexCompiledEdit *compiledEdits = compileEdits(editArray, numEdits, quiet); + +if (compiledEdits == NULL) + return FALSE; + +char *outString = input; +for(; numEdits && outString; compiledEdits++, numEdits--) + outString = doOneEdit(compiledEdits, outString, quiet); + +return outString; +}