dd33f82508a1b779c3b827d221cb7651a253cd36 kent Fri Aug 23 22:52:34 2019 -0700 Adding in builtins letter_range, word_range, and chop_range. diff --git src/lib/strex.c src/lib/strex.c index 2e4d952..62bce88 100644 --- src/lib/strex.c +++ src/lib/strex.c @@ -59,30 +59,33 @@ strexBuiltInUntsv, strexBuiltInReplace, strexBuiltInFix, strexBuiltInStrip, strexBuiltInLen, strexBuiltInSymbol, strexBuiltInLower, strexBuiltInUpper, strexBuiltInIn, strexBuiltInStarts, strexBuiltInEnds, strexBuiltInSame, strexBuiltInTidy, strexBuiltInWarn, strexBuiltInError, + strexBuiltInLetterRange, + strexBuiltInWordRange, + strexBuiltInChopRange, }; struct strexBuiltIn /* Information to describe a built in function */ { char *name; /* Name in strex language: trim, split, etc */ enum strexBuiltInFunc func; /* enum version: strexBuiltInTrim strexBuiltInChop etc. */ enum strexType returnType; /* Type of return value */ int paramCount; /* Number of parameters, not flexible in this language! */ enum strexType *paramTypes; /* Array of types, one for each parameter */ }; union strexVal /* Some value of arbirary type that can be of any type corresponding to strexType */ { @@ -155,56 +158,62 @@ /* Input to the strex parser - tokenizer and a hash full of built in functions. */ { struct tokenizer *tkz; /* Get next text input from here */ struct hash *builtInHash; /* Hash of built in functions */ void *symbols; /* NULL or pointer to a symbol table to check */ StrexLookup lookup; /* lookup something in symbol table if we have it */ struct hash *importHash; /* Hash of importex expressions keyed by file name */ }; /* Some predefined lists of parameter types */ static enum strexType oneString[] = {strexTypeString}; static enum strexType twoStrings[] = {strexTypeString, strexTypeString}; static enum strexType threeStrings[] = {strexTypeString, strexTypeString, strexTypeString}; static enum strexType stringInt[] = {strexTypeString, strexTypeInt}; static enum strexType stringStringInt[] = {strexTypeString, strexTypeString, strexTypeInt}; +static enum strexType stringIntInt[] = {strexTypeString, strexTypeInt, strexTypeInt}; +static enum strexType stringStringIntInt[] = {strexTypeString, strexTypeString, + strexTypeInt, strexTypeInt}; /* There's one element here for each built in function. There's also a few switches you'll need to * fill in if you add a new built in function. */ static struct strexBuiltIn builtins[] = { { "trim", strexBuiltInTrim, strexTypeString, 1, oneString, }, { "between", strexBuiltInBetween, strexTypeString, 3, threeStrings }, { "word", strexBuiltInWord, strexTypeString, 2, stringInt }, { "now", strexBuiltInNow, strexTypeString, 0, NULL }, { "md5", strexBuiltInMd5, strexTypeString, 1, oneString }, { "chop", strexBuiltInChop, strexTypeString, 3, stringStringInt }, { "uncsv", strexBuiltInUncsv, strexTypeString, 2, stringInt }, { "untsv", strexBuiltInUntsv, strexTypeString, 2, stringInt }, { "replace", strexBuiltInReplace, strexTypeString, 3, threeStrings }, { "fix", strexBuiltInFix, strexTypeString, 3, threeStrings }, { "strip", strexBuiltInStrip, strexTypeString, 2, twoStrings }, { "len", strexBuiltInLen, strexTypeInt, 1, oneString}, { "symbol", strexBuiltInSymbol, strexTypeString, 2, twoStrings }, { "upper", strexBuiltInUpper, strexTypeString, 1, oneString }, { "lower", strexBuiltInLower, strexTypeString, 1, oneString }, { "in", strexBuiltInIn, strexTypeBoolean, 2, twoStrings }, { "starts_with", strexBuiltInStarts, strexTypeBoolean, 2, twoStrings}, { "ends_with", strexBuiltInEnds, strexTypeBoolean, 2, twoStrings}, { "same", strexBuiltInSame, strexTypeBoolean, 2, twoStrings}, { "tidy", strexBuiltInTidy, strexTypeString, 3, threeStrings }, { "warn", strexBuiltInWarn, strexTypeString, 1, oneString}, { "error", strexBuiltInError, strexTypeString, 1, oneString}, + { "letter_range", strexBuiltInLetterRange, strexTypeString, 3, stringIntInt}, + { "word_range", strexBuiltInWordRange, strexTypeString, 3, stringIntInt}, + { "chop_range", strexBuiltInChopRange, strexTypeString, 4, stringStringIntInt}, }; static struct hash *hashBuiltIns() /* Build a hash of builtins keyed by name */ { struct hash *hash = hashNew(0); int i; for (i=0; ichildren; struct strexParse *index = array->next; struct strexEval arrayVal = strexLocalEval(array, run); struct strexEval indexVal = strexLocalEval(index, run); int ix = indexVal.val.i; if (ix < 0) ix = strlen(arrayVal.val.s) + ix; struct strexEval res; res.val.s = lmCloneStringZ(run->lm, arrayVal.val.s + ix, 1); res.type = strexTypeString; return res; } -static struct strexEval strexEvalArrayRange(struct strexParse *p, struct strexRun *run) -/* Handle parse tree generated by array range expression, which by now - * has just been turned into two integer values. */ +static void pythonInterpretEnds(int *pStart, int *pEnd, int len) +/* Adjusts *pStart and *pEnd from python ideas that may include negative + * numbers back to our ideas */ +{ +int start = *pStart, end = *pEnd; +// Negative numbers count from end of array +if (start < 0) + start = len + start; +if (end < 0) + end = len + end; + +// Do some clipping to keep results in range +if (start < 0) + start = 0; +if (end > len) + end = len; +if (end < start) end = start; + +// Return results +*pStart = start; +*pEnd = end; +} + +static char *joinString(char **parts, char *joiner, int start, int end, struct strexRun *run) +/* Return a string that is just all the parts concatenated together with the joiner char + * between them */ +{ +int joinerSize = strlen(joiner); + +/* Figure out output string size. */ +int i; +int outSize = -joinerSize; // Simplifies calcs on size to init to this +for (i=start; ilm, outSize + 1); +char *out = result; +char *onePart = parts[start]; +int oneSize = strlen(onePart); +strcpy(out, onePart); +out += oneSize; + +/* Fall into loop to copy joiner/part/joiner/part/.../joiner/part */ +for (i=start+1; i= end || wordCount <= 0) + return ""; +else + { + /* We aren't super efficient here, we parse them all out and then glue + * the ones we want back together */ + char **words; + lmAllocArray(run->lm, words, wordCount); + char *clone = lmCloneString(run->lm, string); + chopByWhite(clone, words, wordCount); + return joinString(words, " ", start, end, run); + } +} + +static char *strexEvalChopRange(char *string, char *splits, int start, int end, + struct strexRun *run) +{ +char splitter = splits[0]; +int wordCount = chopByChar(string, splitter, NULL, 0); +pythonInterpretEnds(&start, &end, wordCount); +if (start >= end || wordCount <= 0) + return ""; +else + { + /* We aren't super efficient here, we parse them all out and then glue + * the ones we want back together */ + char **words; + lmAllocArray(run->lm, words, wordCount); + char *clone = lmCloneString(run->lm, string); + chopByChar(clone, splitter, words, wordCount); + char joiner[2] = {splitter, 0}; + return joinString(words, joiner, start, end, run); + } +} + + +static struct strexEval strexEvalArrayStartEnd(struct strexParse *array, + struct strexParse *index1, struct strexParse *index2, struct strexRun *run) +/* Evaluate expression to return result of character array selection between + * index1 and index2 */ { -struct strexParse *array = p->children; -struct strexParse *index1 = array->next; -struct strexParse *index2 = index1->next; struct strexEval arrayVal = strexLocalEval(array, run); struct strexEval rangeStart = strexLocalEval(index1, run); struct strexEval rangeEnd = strexLocalEval(index2, run); char *arraySource = arrayVal.val.s; int start = rangeStart.val.i; int end = rangeEnd.val.i; int len = strlen(arraySource); -if (start < 0) - start = strlen(arraySource) + start; -if (end < 0) - end = strlen(arraySource) + end; -if (start < 0) - start = 0; -if (end > len) - end = len; -if (end < start) end = start; // errors apparently just get truncated in this language. hmm +pythonInterpretEnds(&start, &end, len); struct strexEval res; res.val.s = lmCloneStringZ(run->lm, arrayVal.val.s + start, end-start); res.type = strexTypeString; return res; } +static struct strexEval strexEvalArrayRange(struct strexParse *p, struct strexRun *run) +/* Handle parse tree generated by array range expression, which by now + * has just been turned into two integer values. */ +{ +struct strexParse *array = p->children; +struct strexParse *index1 = array->next; +struct strexParse *index2 = index1->next; +return strexEvalArrayStartEnd(array, index1, index2, run); +} + static char *splitString(char *words, int ix, struct lm *lm) /* Return the space-delimited word of index ix as clone into lm */ { if (ix < 0) // Negative index. We got to count, dang { int wordCount = chopByWhite(words, NULL, 0); ix = wordCount + ix; if (ix < 0) return ""; } char *s = words; int i; for (i=0; ; ++i) { s = skipLeadingSpaces(s); @@ -1732,30 +1838,50 @@ case strexBuiltInError: { /* Figure out the message we want to convey, send it to warning handler * before returning it. */ struct strexEval a = strexLocalEval(p->children, run); char *message = a.val.s; char *output = lmJoinStrings(run->lm, "ERROR: ", message); if (run->warnHandler != NULL) run->warnHandler(run->symbols, output); if (run->abortHandler != NULL) run->abortHandler(run->symbols); errAbort("%s", output); res.val.s = output; // some sort of OCD makes me fill this in in spite of errAbort above break; } + case strexBuiltInLetterRange: + { + res = strexEvalArrayStartEnd(p->children, p->children->next, p->children->next->next, run); + break; + } + case strexBuiltInWordRange: + { + struct strexEval string = strexLocalEval(p->children, run); + struct strexEval start = strexLocalEval(p->children->next, run); + struct strexEval end = strexLocalEval(p->children->next->next, run); + res.val.s = strexEvalWordRange(string.val.s, start.val.i, end.val.i, run); + } + case strexBuiltInChopRange: + { + struct strexEval string = strexLocalEval(p->children, run); + struct strexEval splitter = strexLocalEval(p->children->next, run); + struct strexEval start = strexLocalEval(p->children->next->next, run); + struct strexEval end = strexLocalEval(p->children->next->next->next, run); + res.val.s = strexEvalChopRange(string.val.s, splitter.val.s, start.val.i, end.val.i, run); + } } return res; } static struct strexEval strexEvalPick(struct strexParse *pick, struct strexRun *run) /* Evaluate a pick operator. */ { /* Evaluate the keyValue */ struct strexParse *p = pick->children; struct strexEval keyVal = strexLocalEval(p, run); p = p->next; /* Get pointer to default expression but don't evaluate it yet */ struct strexParse *defaultExp = p; p = p->next;