a055c0563f27e4f5b4cc10aef680a41b89ef3992 kent Wed Aug 14 19:07:18 2019 -0700 Adding upper, lower, and symbol built in functions. Making it so that you can pass a symbol table to parser for error checking. diff --git src/lib/strex.c src/lib/strex.c index 021c03b..c80c8cc 100644 --- src/lib/strex.c +++ src/lib/strex.c @@ -48,30 +48,33 @@ * Each built in function needs a value here, to keep it simple there's * aa correspondence between these names and the built in function name */ { strexBuiltInTrim, strexBuiltInBetween, strexBuiltInSplit, strexBuiltInNow, strexBuiltInMd5, strexBuiltInSeparate, strexBuiltInUncsv, strexBuiltInUntsv, strexBuiltInReplace, strexBuiltInFix, strexBuiltInStrip, strexBuiltInLen, + strexBuiltInSymbol, + strexBuiltInLower, + strexBuiltInUpper, }; struct strexBuiltIn /* Information to describe a built in function */ { char *name; /* Name in strex language: trim, split, etc */ enum strexBuiltInFunc func; /* enum version: strexBuiltInTrim strexBuiltInSplit etc. */ enum strexType returnType; /* Type of return value */ int paramCount; /* Number of parameters, not flexible in this language! */ enum strexType *paramTypes; /* Array of types, one for each parameter */ }; union strexVal /* Some value of arbirary type that can be of any type corresponding to strexType */ { @@ -131,77 +134,85 @@ struct strexParse /* A strex parse-tree node. The tree itself is just the root node. */ { struct strexParse *next; /* Points to younger sibling if any. */ struct strexParse *children; /* Points to oldest child if any. */ enum strexOp op; /* Operation at this node. */ enum strexType type; /* Return type of this operation. */ union strexVal val; /* Return value of this operation. */ }; struct strexIn /* Input to the strex parser - tokenizer and a hash full of built in functions. */ { struct tokenizer *tkz; /* Get next text input from here */ struct hash *builtInHash; /* Hash of built in functions */ + void *symbols; /* NULL or pointer to a symbol table to check */ + StrexLookup lookup; /* lookup something in symbol table if we have it */ }; /* Some predefined lists of parameter types */ static enum strexType oneString[] = {strexTypeString}; static enum strexType twoStrings[] = {strexTypeString, strexTypeString}; static enum strexType threeStrings[] = {strexTypeString, strexTypeString, strexTypeString}; static enum strexType stringInt[] = {strexTypeString, strexTypeInt}; static enum strexType stringStringInt[] = {strexTypeString, strexTypeString, strexTypeInt}; /* There's one element here for each built in function. There's also a few switches you'll need to * fill in if you add a new built in function. */ static struct strexBuiltIn builtins[] = { { "trim", strexBuiltInTrim, strexTypeString, 1, oneString, }, { "between", strexBuiltInBetween, strexTypeString, 3, threeStrings }, { "split", strexBuiltInSplit, strexTypeString, 2, stringInt }, { "now", strexBuiltInNow, strexTypeString, 0, NULL }, { "md5", strexBuiltInMd5, strexTypeString, 1, oneString }, { "separate", strexBuiltInSeparate, strexTypeString, 3, stringStringInt }, { "uncsv", strexBuiltInUncsv, strexTypeString, 2, stringInt }, { "untsv", strexBuiltInUntsv, strexTypeString, 2, stringInt }, { "replace", strexBuiltInReplace, strexTypeString, 3, threeStrings }, { "fix", strexBuiltInFix, strexTypeString, 3, threeStrings }, { "strip", strexBuiltInStrip, strexTypeString, 2, twoStrings }, { "len", strexBuiltInLen, strexTypeInt, 1, oneString}, + { "symbol", strexBuiltInSymbol, strexTypeString, 2, twoStrings }, + { "upper", strexBuiltInUpper, strexTypeString, 1, oneString }, + { "lower", strexBuiltInLower, strexTypeString, 1, oneString }, }; static struct hash *hashBuiltIns() /* Build a hash of builtins keyed by name */ { struct hash *hash = hashNew(0); int i; for (i=0; i<ArraySize(builtins); ++i) hashAdd(hash, builtins[i].name, &builtins[i]); return hash; } -static struct strexIn *strexInNew(char *expression, char *fileName, int fileLineNumber) +static struct strexIn *strexInNew(char *expression, char *fileName, int fileLineNumber, + void *symbols, StrexLookup lookup) /* Return a new strexIn structure wrapped around expression */ { struct lineFile *lf = lineFileOnString(fileName, TRUE, expression); lf->lineIx = fileLineNumber; struct tokenizer *tkz = tokenizerOnLineFile(lf); tkz->leaveQuotes = TRUE; struct strexIn *si; AllocVar(si); si->tkz = tkz; si->builtInHash = hashBuiltIns(); +si->symbols = symbols; +si->lookup = lookup; return si; } static void strexInFree(struct strexIn **pSi) /* Free up memory associated with strexIn structure */ { struct strexIn *si = *pSi; if (si != NULL) { hashFree(&si->builtInHash); tokenizerFree(&si->tkz); freez(pSi); } } @@ -388,30 +399,39 @@ for (;;) // Join together . separated things into single symbol */ { dyStringAppend(dy, tok); if ((tok = tokenizerNext(tkz)) == NULL) break; if (tok[0] != '.') { tokenizerReuse(tkz); break; } dyStringAppend(dy, tok); if ((tok = tokenizerNext(tkz)) == NULL) break; } p->val.s = dyStringCannibalize(&dy); + // We look ahead a little to see if it's a function - hmm + tok = tokenizerNext(tkz); + boolean isFunct = (tok != NULL && tok[0] == '('); + tokenizerReuse(tkz); + if (!isFunct && in->symbols != NULL && in->lookup(in->symbols, p->val.s) == NULL ) + { + errAbort("No field %s exists line %d of %s", p->val.s, + in->tkz->lf->lineIx, in->tkz->lf->fileName); + } } else if (isdigit(c)) { p->op = strexOpLiteral; p->type = strexTypeInt; p->val.i = sqlUnsigned(tok); if ((tok = tokenizerNext(tkz)) != NULL) { if (tok[0] == '.') { char buf[32]; tok = tokenizerMustHaveNext(tkz); safef(buf, sizeof(buf), "%lld.%s", p->val.i, tok); p->type = strexTypeDouble; p->val.x = sqlDouble(buf); @@ -906,43 +926,44 @@ /* Parse out an expression. Leaves input at next expression. */ { return strexParseOr(in); } static void ensureAtEnd(struct strexIn *in) /* Make sure that we are at end of input. */ { struct tokenizer *tkz = in->tkz; char *leftover = tokenizerNext(tkz); if (leftover != NULL) errAbort("Extra input starting with '%s' line %d of %s", leftover, tkz->lf->lineIx, tkz->lf->fileName); } -struct strexParse *strexParseString(char *s, char *fileName, int fileLineNumber) +struct strexParse *strexParseString(char *s, char *fileName, int fileLineNumber, + void *symbols, StrexLookup lookup) /* Parse out string expression in s and return root of tree. */ { -struct strexIn *si = strexInNew(s, fileName, fileLineNumber); +struct strexIn *si = strexInNew(s, fileName, fileLineNumber, symbols, lookup); struct strexParse *parseTree = strexParseExpression(si); ensureAtEnd(si); strexInFree(&si); return parseTree; } -/* The parsing section is done, now for the evaluation section. */ +/************ The parsing section is done, now for the evaluation section. **************/ -static struct strexEval strexLocalEval(struct strexParse *p, void *record, StrexEvalLookup lookup, +static struct strexEval strexLocalEval(struct strexParse *p, void *record, StrexLookup lookup, struct lm *lm); /* Evaluate self on parse tree, allocating memory if needed from lm. */ static struct strexEval strexEvalCoerceToString(struct strexEval r, char *buf, int bufSize) /* Return a version of r with .val.s filled in with something reasonable even * if r input is not a string */ { assert(bufSize >= 32); switch (r.type) { case strexTypeBoolean: r.val.s = (r.val.b ? "true" : "false"); break; case strexTypeString: @@ -953,31 +974,31 @@ break; case strexTypeDouble: safef(buf, bufSize, "%g", r.val.x); r.val.s = buf; break; default: uglyf("Weird, r.type is %s\n", strexTypeToString(r.type)); internalErr(); r.val.s = NULL; break; } r.type = strexTypeString; return r; } -static struct strexEval strexEvalAdd(struct strexParse *p, void *record, StrexEvalLookup lookup, +static struct strexEval strexEvalAdd(struct strexParse *p, void *record, StrexLookup lookup, struct lm *lm) /* Return a + b. */ { struct strexParse *lp = p->children; struct strexParse *rp = lp->next; struct strexEval lv = strexLocalEval(lp, record, lookup, lm); struct strexEval rv = strexLocalEval(rp, record, lookup, lm); struct strexEval res; assert(lv.type == rv.type); // Is our type automatic casting working? switch (lv.type) { case strexTypeInt: res.val.i = (lv.val.i + rv.val.i); break; case strexTypeDouble: @@ -995,31 +1016,31 @@ char *s = lmAlloc(lm, lLen + rLen + 1); memcpy(s, lv.val.s, lLen); memcpy(s+lLen, rv.val.s, rLen); res.val.s = s; break; } default: internalErr(); res.val.b = FALSE; break; } res.type = lv.type; return res; } -static struct strexEval strexEvalOr(struct strexParse *p, void *record, StrexEvalLookup lookup, +static struct strexEval strexEvalOr(struct strexParse *p, void *record, StrexLookup lookup, struct lm *lm) /* Return a or b. */ { struct strexParse *lp = p->children; struct strexParse *rp = lp->next; struct strexEval lv = strexLocalEval(lp, record, lookup, lm); struct strexEval rv = strexLocalEval(rp, record, lookup, lm); struct strexEval res; assert(lv.type == rv.type); // Is our type automatic casting working? switch (lv.type) { case strexTypeBoolean: res.val.b = (lv.val.b || rv.val.b); break; case strexTypeInt: @@ -1028,31 +1049,31 @@ case strexTypeDouble: res.val.x = (lv.val.x != 0.0 ? lv.val.x : rv.val.x); break; case strexTypeString: res.val.s = (lv.val.s[0] ? lv.val.s : rv.val.s); break; default: internalErr(); res.val.b = FALSE; break; } res.type = lv.type; return res; } -static struct strexEval strexEvalAnd(struct strexParse *p, void *record, StrexEvalLookup lookup, +static struct strexEval strexEvalAnd(struct strexParse *p, void *record, StrexLookup lookup, struct lm *lm) /* Return a and b. */ { struct strexParse *lp = p->children; struct strexParse *rp = lp->next; struct strexEval lv = strexLocalEval(lp, record, lookup, lm); struct strexEval rv = strexLocalEval(rp, record, lookup, lm); struct strexEval res; assert(lv.type == rv.type); // Is our type automatic casting working? switch (lv.type) { case strexTypeBoolean: res.val.b = (lv.val.b && rv.val.b); break; case strexTypeInt: @@ -1076,48 +1097,48 @@ static char *csvParseOneOut(char *csvIn, int ix, struct dyString *scratch) /* Return csv value of given index or NULL if at end */ { char *pos = csvIn; int i; for (i=0; i<ix; ++i) { if (csvParseNext(&pos,scratch) == NULL) return NULL; } return csvParseNext(&pos, scratch); } static struct strexEval strexEvalArrayIx(struct strexParse *p, - void *record, StrexEvalLookup lookup, struct lm *lm) + void *record, StrexLookup lookup, struct lm *lm) /* Handle parse tree generated by an indexed array. */ { struct strexParse *array = p->children; struct strexParse *index = array->next; struct strexEval arrayVal = strexLocalEval(array, record, lookup, lm); struct strexEval indexVal = strexLocalEval(index, record, lookup, lm); int ix = indexVal.val.i; if (ix < 0) ix = strlen(arrayVal.val.s) + ix; struct strexEval res; res.val.s = lmCloneStringZ(lm, arrayVal.val.s + ix, 1); res.type = strexTypeString; return res; } static struct strexEval strexEvalArrayRange(struct strexParse *p, - void *record, StrexEvalLookup lookup, struct lm *lm) + void *record, StrexLookup lookup, struct lm *lm) /* Handle parse tree generated by array range expression, which by now * has just been turned into two integer values. */ { struct strexParse *array = p->children; struct strexParse *index1 = array->next; struct strexParse *index2 = index1->next; struct strexEval arrayVal = strexLocalEval(array, record, lookup, lm); struct strexEval rangeStart = strexLocalEval(index1, record, lookup, lm); struct strexEval rangeEnd = strexLocalEval(index2, record, lookup, lm); char *arraySource = arrayVal.val.s; int start = rangeStart.val.i; int end = rangeEnd.val.i; int len = strlen(arraySource); if (start < 0) start = strlen(arraySource) + start; @@ -1218,33 +1239,60 @@ return result; } } } static char *stripAll(char *in, char *toRemove, struct lm *lm) /* Remove every occurrence of any of the chars in toRemove from in. */ { char *result = lmCloneString(lm, in); // Move to local memory char c, *s = toRemove; while ((c = *s++) != 0) stripChar(result, c); return result; } +static char *symbolify(char *prefix, char *original, struct lm *lm) +/* Convert original to something could use as a C language symbol with dots maybe. */ +{ +int prefixSize = strlen(prefix); +char *result = lmAlloc(lm, strlen(original) + prefixSize + 1); // Move to local memory +memcpy(result, prefix, prefixSize); +char *in = skipLeadingSpaces(original); +char *out = result + prefixSize; +char c; +while ((c = *in++) != 0) + { + if (isspace(c)) + *out++ = '_'; + else if (isalnum(c) || c == '.' || c == '_') + *out++ = c; + } +*out++ = 0; +int len = strlen(result) - prefixSize; +if (len > 32) + { + char *md5 = hmacMd5("", original); + strcpy(result + prefixSize, md5); + freeMem(md5); + } + +return result; +} static struct strexEval strexEvalCallBuiltIn(struct strexParse *p, - void *record, StrexEvalLookup lookup, struct lm *lm) + void *record, StrexLookup lookup, struct lm *lm) /* Handle parse tree generated by call to a built in function. */ { struct strexBuiltIn *builtIn = p->val.builtIn; struct strexEval res; res.type = builtIn->returnType; switch (builtIn->func) { case strexBuiltInTrim: { struct strexEval a = strexLocalEval(p->children, record, lookup, lm); res.val.s = trimSpaces(a.val.s); break; } case strexBuiltInBetween: @@ -1319,64 +1367,82 @@ res.val.s = newVal.val.s; } else res.val.s = string.val.s; break; } case strexBuiltInStrip: { struct strexEval a = strexLocalEval(p->children, record, lookup, lm); struct strexEval b = strexLocalEval(p->children->next, record, lookup, lm); res.val.s = stripAll(a.val.s, b.val.s, lm); break; } case strexBuiltInLen: { - uglyf("builtInLen\n"); struct strexEval a = strexLocalEval(p->children, record, lookup, lm); - uglyf(" of %s\n", a.val.s); res.val.i = strlen(a.val.s); - uglyf(" = %lld\n", res.val.i); + break; + } + case strexBuiltInSymbol: // Convert string to something could use as a C language symbol + { + struct strexEval a = strexLocalEval(p->children, record, lookup, lm); + struct strexEval b = strexLocalEval(p->children->next, record, lookup, lm); + res.val.s = symbolify(a.val.s, b.val.s, lm); + break; + } + case strexBuiltInLower: + { + struct strexEval a = strexLocalEval(p->children, record, lookup, lm); + res.val.s = lmCloneString(lm, a.val.s); + tolowers(res.val.s); + break; + } + case strexBuiltInUpper: + { + struct strexEval a = strexLocalEval(p->children, record, lookup, lm); + res.val.s = lmCloneString(lm, a.val.s); + touppers(res.val.s); break; } } return res; } static struct strexEval nullValForType(enum strexType type) /* Return 0, "", 0.0 depending */ { struct strexEval res = {.type=type}; switch (type) { case strexTypeInt: res.val.i = 0; break; case strexTypeDouble: res.val.x = 0.0; break; case strexTypeBoolean: res.val.b = FALSE; break; case strexTypeString: res.val.s = ""; break; } return res; } -static struct strexEval strexEvalPick(struct strexParse *pick, void *record, StrexEvalLookup lookup, +static struct strexEval strexEvalPick(struct strexParse *pick, void *record, StrexLookup lookup, struct lm *lm) /* Evaluate a pick operator. */ { /* Evaluate the keyValue */ struct strexParse *p = pick->children; struct strexEval keyVal = strexLocalEval(p, record, lookup, lm); p = p->next; struct strexEval res; boolean gotMatch = FALSE; while (p != NULL) { struct strexEval key = strexLocalEval(p, record, lookup, lm); p = p->next; // Parser guarantees this non-null struct strexParse *valExp = p; @@ -1395,31 +1461,31 @@ case strexTypeString: gotMatch = sameString(keyVal.val.s, key.val.s); break; } if (gotMatch) { return strexLocalEval(valExp, record, lookup, lm); } } res = nullValForType(pick->type); return res; } -static struct strexEval strexLocalEval(struct strexParse *p, void *record, StrexEvalLookup lookup, +static struct strexEval strexLocalEval(struct strexParse *p, void *record, StrexLookup lookup, struct lm *lm) /* Evaluate self on parse tree, allocating memory if needed from lm. */ { struct strexEval res; switch (p->op) { case strexOpLiteral: res.val = p->val; res.type = p->type; break; case strexOpSymbol: res.type = strexTypeString; char *s = lookup(record, p->val.s); if (s == NULL) res.val.s = ""; @@ -1540,28 +1606,28 @@ res = strexEvalOr(p, record, lookup, lm); break; case strexOpAnd: res = strexEvalAnd(p, record, lookup, lm); break; default: errAbort("Unknown op %s\n", strexOpToString(p->op)); res.type = strexTypeInt; // Keep compiler from complaining. res.val.i = 0; // Keep compiler from complaining. break; } return res; } -char *strexEvalAsString(struct strexParse *p, void *record, StrexEvalLookup lookup) +char *strexEvalAsString(struct strexParse *p, void *record, StrexLookup lookup) /* Evaluating a strex expression on a symbol table with a lookup function for variables and * return result as a string value. */ { struct lm *lm = lmInit(0); struct strexEval res = strexLocalEval(p, record, lookup, lm); char numBuf[32]; struct strexEval strRes = strexEvalCoerceToString(res, numBuf, sizeof(numBuf)); char *ret = cloneString(strRes.val.s); lmCleanup(&lm); return ret; }