a055c0563f27e4f5b4cc10aef680a41b89ef3992
kent
  Wed Aug 14 19:07:18 2019 -0700
Adding upper, lower, and symbol built in functions.  Making it so that you can pass a symbol table to parser for error checking.

diff --git src/lib/strex.c src/lib/strex.c
index 021c03b..c80c8cc 100644
--- src/lib/strex.c
+++ src/lib/strex.c
@@ -48,30 +48,33 @@
  * Each built in function needs a value here, to keep it simple there's
  * aa correspondence between these names and the built in function name */
     {
     strexBuiltInTrim,
     strexBuiltInBetween,
     strexBuiltInSplit,
     strexBuiltInNow,
     strexBuiltInMd5,
     strexBuiltInSeparate,
     strexBuiltInUncsv,
     strexBuiltInUntsv,
     strexBuiltInReplace,
     strexBuiltInFix,
     strexBuiltInStrip,
     strexBuiltInLen,
+    strexBuiltInSymbol,
+    strexBuiltInLower,
+    strexBuiltInUpper,
     };
 
 struct strexBuiltIn
 /* Information to describe a built in function */
     {
     char *name;		/* Name in strex language:  trim, split, etc */
     enum strexBuiltInFunc func;  /* enum version: strexBuiltInTrim strexBuiltInSplit etc. */
     enum strexType returnType;	 /* Type of return value */
     int paramCount;	/* Number of parameters, not flexible in this language! */
     enum strexType *paramTypes;  /* Array of types, one for each parameter */
     };
 
 union strexVal
 /* Some value of arbirary type that can be of any type corresponding to strexType */
     {
@@ -131,77 +134,85 @@
 struct strexParse
 /* A strex parse-tree node.  The tree itself is just the root node. */
     {
     struct strexParse *next;	/* Points to younger sibling if any. */
     struct strexParse *children;	/* Points to oldest child if any. */
     enum strexOp op;		/* Operation at this node. */
     enum strexType type;		/* Return type of this operation. */
     union strexVal val;		/* Return value of this operation. */
     };
 
 struct strexIn
 /* Input to the strex parser - tokenizer and a hash full of built in functions. */
     {
     struct tokenizer *tkz;  /* Get next text input from here */
     struct hash *builtInHash;  /* Hash of built in functions */
+    void *symbols;	    /* NULL or pointer to a symbol table to check */
+    StrexLookup lookup; /* lookup something in symbol table if we have it */
     };
 
 /* Some predefined lists of parameter types */
 static enum strexType oneString[] = {strexTypeString};
 static enum strexType twoStrings[] = {strexTypeString, strexTypeString};
 static enum strexType threeStrings[] = {strexTypeString, strexTypeString, strexTypeString};
 static enum strexType stringInt[] = {strexTypeString, strexTypeInt};
 static enum strexType stringStringInt[] = {strexTypeString, strexTypeString, strexTypeInt};
 
 /* There's one element here for each built in function.  There's also a few switches you'll need to
  * fill in if you add a new built in function. */
 static struct strexBuiltIn builtins[] = {
     { "trim", strexBuiltInTrim, strexTypeString, 1, oneString, },
     { "between", strexBuiltInBetween, strexTypeString, 3, threeStrings },
     { "split", strexBuiltInSplit, strexTypeString, 2, stringInt },
     { "now", strexBuiltInNow, strexTypeString, 0, NULL },
     { "md5", strexBuiltInMd5, strexTypeString, 1, oneString },
     { "separate", strexBuiltInSeparate, strexTypeString, 3, stringStringInt },
     { "uncsv", strexBuiltInUncsv, strexTypeString, 2, stringInt },
     { "untsv", strexBuiltInUntsv, strexTypeString, 2, stringInt },
     { "replace", strexBuiltInReplace, strexTypeString, 3, threeStrings },
     { "fix", strexBuiltInFix, strexTypeString, 3, threeStrings },
     { "strip", strexBuiltInStrip, strexTypeString, 2, twoStrings },
     { "len", strexBuiltInLen, strexTypeInt, 1, oneString},
+    { "symbol", strexBuiltInSymbol, strexTypeString, 2, twoStrings },
+    { "upper", strexBuiltInUpper, strexTypeString, 1, oneString },
+    { "lower", strexBuiltInLower, strexTypeString, 1, oneString },
 };
 
 static struct hash *hashBuiltIns()
 /* Build a hash of builtins keyed by name */
 {
 struct hash *hash = hashNew(0);
 int i;
 for (i=0; i<ArraySize(builtins); ++i)
     hashAdd(hash, builtins[i].name, &builtins[i]);
 return hash;
 }
 
-static struct strexIn *strexInNew(char *expression, char *fileName, int fileLineNumber)
+static struct strexIn *strexInNew(char *expression, char *fileName, int fileLineNumber,
+    void *symbols, StrexLookup lookup)
 /* Return a new strexIn structure wrapped around expression */
 {
 struct lineFile *lf = lineFileOnString(fileName, TRUE, expression);
 lf->lineIx = fileLineNumber;
 struct tokenizer *tkz = tokenizerOnLineFile(lf);
 tkz->leaveQuotes = TRUE;
 struct strexIn *si;
 AllocVar(si);
 si->tkz = tkz;
 si->builtInHash = hashBuiltIns();
+si->symbols = symbols;
+si->lookup = lookup;
 return si;
 }
 
 static void strexInFree(struct strexIn **pSi)
 /* Free up memory associated with strexIn structure */
 {
 struct strexIn *si = *pSi;
 if (si != NULL)
     {
     hashFree(&si->builtInHash);
     tokenizerFree(&si->tkz);
     freez(pSi);
     }
 }
 
@@ -388,30 +399,39 @@
     for (;;)  // Join together . separated things into single symbol */
 	{
 	dyStringAppend(dy, tok);
 	if ((tok = tokenizerNext(tkz)) == NULL)
 	    break;
 	if (tok[0] != '.')
 	    {
 	    tokenizerReuse(tkz);
 	    break;
 	    }
 	dyStringAppend(dy, tok);
 	if ((tok = tokenizerNext(tkz)) == NULL)
 	    break;
 	}
     p->val.s = dyStringCannibalize(&dy);
+    // We look ahead a little to see if it's a function - hmm
+    tok = tokenizerNext(tkz);
+    boolean isFunct = (tok != NULL && tok[0] == '(');
+    tokenizerReuse(tkz);
+    if (!isFunct && in->symbols != NULL && in->lookup(in->symbols, p->val.s) == NULL )
+	{
+	errAbort("No field %s exists line %d of %s", p->val.s, 
+	    in->tkz->lf->lineIx, in->tkz->lf->fileName);
+	}
     }
 else if (isdigit(c))
     {
     p->op = strexOpLiteral;
     p->type = strexTypeInt;
     p->val.i = sqlUnsigned(tok);
     if ((tok = tokenizerNext(tkz)) != NULL)
 	{
 	if (tok[0] == '.')
 	    {
 	    char buf[32];
 	    tok = tokenizerMustHaveNext(tkz);
 	    safef(buf, sizeof(buf), "%lld.%s", p->val.i, tok);
 	    p->type = strexTypeDouble;
 	    p->val.x = sqlDouble(buf);
@@ -906,43 +926,44 @@
 /* Parse out an expression. Leaves input at next expression. */
 {
 return strexParseOr(in);
 }
 
 static void ensureAtEnd(struct strexIn *in)
 /* Make sure that we are at end of input. */
 {
 struct tokenizer *tkz = in->tkz;
 char *leftover = tokenizerNext(tkz);
 if (leftover != NULL)
     errAbort("Extra input starting with '%s' line %d of %s", leftover, tkz->lf->lineIx,
 	tkz->lf->fileName);
 }
 
-struct strexParse *strexParseString(char *s, char *fileName, int fileLineNumber)
+struct strexParse *strexParseString(char *s, char *fileName, int fileLineNumber,
+    void *symbols, StrexLookup lookup)
 /* Parse out string expression in s and return root of tree. */
 {
-struct strexIn *si = strexInNew(s, fileName, fileLineNumber);
+struct strexIn *si = strexInNew(s, fileName, fileLineNumber, symbols, lookup);
 struct strexParse *parseTree = strexParseExpression(si);
 ensureAtEnd(si);
 strexInFree(&si);
 return parseTree;
 }
 
-/* The parsing section is done, now for the evaluation section. */
+/************ The parsing section is done, now for the evaluation section. **************/
 
-static struct strexEval strexLocalEval(struct strexParse *p, void *record, StrexEvalLookup lookup, 
+static struct strexEval strexLocalEval(struct strexParse *p, void *record, StrexLookup lookup, 
 	struct lm *lm);
 /* Evaluate self on parse tree, allocating memory if needed from lm. */
 
 
 static struct strexEval strexEvalCoerceToString(struct strexEval r, char *buf, int bufSize)
 /* Return a version of r with .val.s filled in with something reasonable even
  * if r input is not a string */
 {
 assert(bufSize >= 32);
 switch (r.type)
     {
     case strexTypeBoolean:
         r.val.s = (r.val.b ? "true" : "false");
 	break;
     case strexTypeString:
@@ -953,31 +974,31 @@
 	break;
     case strexTypeDouble:
 	safef(buf, bufSize, "%g", r.val.x);
 	r.val.s = buf;
 	break;
     default:
 	uglyf("Weird, r.type is %s\n", strexTypeToString(r.type));
 	internalErr();
 	r.val.s = NULL;
 	break;
     }
 r.type = strexTypeString;
 return r;
 }
 
-static struct strexEval strexEvalAdd(struct strexParse *p, void *record, StrexEvalLookup lookup,
+static struct strexEval strexEvalAdd(struct strexParse *p, void *record, StrexLookup lookup,
 	struct lm *lm)
 /* Return a + b. */
 {
 struct strexParse *lp = p->children;
 struct strexParse *rp = lp->next;
 struct strexEval lv = strexLocalEval(lp, record, lookup, lm);
 struct strexEval rv = strexLocalEval(rp, record, lookup, lm);
 struct strexEval res;
 assert(lv.type == rv.type);   // Is our type automatic casting working?
 switch (lv.type)
     {
     case strexTypeInt:
 	res.val.i = (lv.val.i + rv.val.i);
 	break;
     case strexTypeDouble:
@@ -995,31 +1016,31 @@
 	char *s = lmAlloc(lm, lLen + rLen + 1);
 	memcpy(s, lv.val.s, lLen);
 	memcpy(s+lLen, rv.val.s, rLen);
 	res.val.s = s;
 	break;
 	}
     default:
 	internalErr();
 	res.val.b = FALSE;
 	break;
     }
 res.type = lv.type;
 return res;
 }
 
-static struct strexEval strexEvalOr(struct strexParse *p, void *record, StrexEvalLookup lookup,
+static struct strexEval strexEvalOr(struct strexParse *p, void *record, StrexLookup lookup,
 	struct lm *lm)
 /* Return a or b. */
 {
 struct strexParse *lp = p->children;
 struct strexParse *rp = lp->next;
 struct strexEval lv = strexLocalEval(lp, record, lookup, lm);
 struct strexEval rv = strexLocalEval(rp, record, lookup, lm);
 struct strexEval res;
 assert(lv.type == rv.type);   // Is our type automatic casting working?
 switch (lv.type)
     {
     case strexTypeBoolean:
         res.val.b = (lv.val.b || rv.val.b);
 	break;
     case strexTypeInt:
@@ -1028,31 +1049,31 @@
     case strexTypeDouble:
 	res.val.x = (lv.val.x != 0.0 ? lv.val.x : rv.val.x);
 	break;
     case strexTypeString:
 	res.val.s = (lv.val.s[0] ? lv.val.s : rv.val.s);
 	break;
     default:
 	internalErr();
 	res.val.b = FALSE;
 	break;
     }
 res.type = lv.type;
 return res;
 }
 
-static struct strexEval strexEvalAnd(struct strexParse *p, void *record, StrexEvalLookup lookup,
+static struct strexEval strexEvalAnd(struct strexParse *p, void *record, StrexLookup lookup,
 	struct lm *lm)
 /* Return a and b. */
 {
 struct strexParse *lp = p->children;
 struct strexParse *rp = lp->next;
 struct strexEval lv = strexLocalEval(lp, record, lookup, lm);
 struct strexEval rv = strexLocalEval(rp, record, lookup, lm);
 struct strexEval res;
 assert(lv.type == rv.type);   // Is our type automatic casting working?
 switch (lv.type)
     {
     case strexTypeBoolean:
         res.val.b = (lv.val.b && rv.val.b);
 	break;
     case strexTypeInt:
@@ -1076,48 +1097,48 @@
 
 static char *csvParseOneOut(char *csvIn, int ix, struct dyString *scratch)
 /* Return csv value of given index or NULL if at end */
 {
 char *pos = csvIn;
 int i;
 for (i=0; i<ix; ++i)
     {
     if (csvParseNext(&pos,scratch) == NULL)
         return NULL;
     }
 return csvParseNext(&pos, scratch);
 }
 
 static struct strexEval strexEvalArrayIx(struct strexParse *p, 
-	void *record, StrexEvalLookup lookup, struct lm *lm)
+	void *record, StrexLookup lookup, struct lm *lm)
 /* Handle parse tree generated by an indexed array. */
 {
 struct strexParse *array = p->children;
 struct strexParse *index = array->next;
 struct strexEval arrayVal = strexLocalEval(array, record, lookup, lm);
 struct strexEval indexVal = strexLocalEval(index, record, lookup, lm);
 int ix = indexVal.val.i;
 if (ix  < 0)
     ix = strlen(arrayVal.val.s) + ix;
 struct strexEval res;
 res.val.s = lmCloneStringZ(lm, arrayVal.val.s + ix, 1);
 res.type = strexTypeString;
 return res;
 }
 
 static struct strexEval strexEvalArrayRange(struct strexParse *p, 
-    void *record, StrexEvalLookup lookup, struct lm *lm)
+    void *record, StrexLookup lookup, struct lm *lm)
 /* Handle parse tree generated by array range expression, which by now
  * has just been turned into two integer values. */
 {
 struct strexParse *array = p->children;
 struct strexParse *index1 = array->next;
 struct strexParse *index2 = index1->next;
 struct strexEval arrayVal = strexLocalEval(array, record, lookup, lm);
 struct strexEval rangeStart = strexLocalEval(index1, record, lookup, lm);
 struct strexEval rangeEnd = strexLocalEval(index2, record, lookup, lm);
 char *arraySource = arrayVal.val.s;
 int start = rangeStart.val.i;
 int end = rangeEnd.val.i;
 int len = strlen(arraySource); 
 if (start < 0)
     start = strlen(arraySource) + start;
@@ -1218,33 +1239,60 @@
 	return result;
 	}
     }
 }
 
 static char *stripAll(char *in, char *toRemove, struct lm *lm)
 /* Remove every occurrence of any of the chars in toRemove from in. */
 {
 char *result = lmCloneString(lm, in);  // Move to local memory
 char c, *s = toRemove;
 while ((c = *s++) != 0)
     stripChar(result, c);
 return result;
 }
 
+static char *symbolify(char *prefix, char *original, struct lm *lm)
+/* Convert original to something could use as a C language symbol with dots maybe. */
+{
+int prefixSize = strlen(prefix);
+char *result = lmAlloc(lm, strlen(original) + prefixSize + 1);  // Move to local memory
+memcpy(result, prefix, prefixSize);
+char *in = skipLeadingSpaces(original); 
+char *out = result + prefixSize;
+char c;
+while ((c = *in++) != 0)
+     {
+     if (isspace(c))
+	 *out++ = '_';
+     else if (isalnum(c) || c == '.' || c == '_')
+         *out++ = c;
+     }
+*out++ = 0;
+int len = strlen(result) - prefixSize;
+if (len > 32)
+    {
+    char *md5 = hmacMd5("", original);
+    strcpy(result + prefixSize, md5);
+    freeMem(md5);
+    }
+
+return result;
+}
 
 static struct strexEval strexEvalCallBuiltIn(struct strexParse *p, 
-    void *record, StrexEvalLookup lookup, struct lm *lm)
+    void *record, StrexLookup lookup, struct lm *lm)
 /* Handle parse tree generated by call to a built in function. */
 {
 struct strexBuiltIn *builtIn = p->val.builtIn;
 struct strexEval res;
 res.type = builtIn->returnType;
 
 switch (builtIn->func)
     {
     case strexBuiltInTrim:
 	{
         struct strexEval a = strexLocalEval(p->children, record, lookup, lm);
 	res.val.s = trimSpaces(a.val.s);
 	break;
 	}
     case strexBuiltInBetween:
@@ -1319,64 +1367,82 @@
 	    res.val.s = newVal.val.s;
 	    }
 	else
 	    res.val.s = string.val.s;
 	break;
 	}
     case strexBuiltInStrip:
         {
         struct strexEval a = strexLocalEval(p->children, record, lookup, lm);
         struct strexEval b = strexLocalEval(p->children->next, record, lookup, lm);
 	res.val.s = stripAll(a.val.s, b.val.s, lm);
 	break;
 	}
     case strexBuiltInLen:
         {
-	uglyf("builtInLen\n");
         struct strexEval a = strexLocalEval(p->children, record, lookup, lm);
-	uglyf(" of %s\n", a.val.s);
 	res.val.i = strlen(a.val.s);
-	uglyf(" = %lld\n", res.val.i);
+	break;
+	}
+    case strexBuiltInSymbol:  // Convert string to something could use as a C language symbol
+        {
+        struct strexEval a = strexLocalEval(p->children, record, lookup, lm);
+        struct strexEval b = strexLocalEval(p->children->next, record, lookup, lm);
+	res.val.s = symbolify(a.val.s, b.val.s, lm);
+	break;
+	}
+    case strexBuiltInLower:
+        {
+        struct strexEval a = strexLocalEval(p->children, record, lookup, lm);
+	res.val.s = lmCloneString(lm, a.val.s);
+	tolowers(res.val.s);
+	break;
+	}
+    case strexBuiltInUpper:
+        {
+        struct strexEval a = strexLocalEval(p->children, record, lookup, lm);
+	res.val.s = lmCloneString(lm, a.val.s);
+	touppers(res.val.s);
 	break;
 	}
     }
 return res;
 }
 
 static struct strexEval nullValForType(enum strexType type)
 /* Return 0, "", 0.0 depending */
 {
 struct strexEval res = {.type=type};
 switch (type)
     {
      case strexTypeInt:
 	  res.val.i = 0;
 	  break;
      case strexTypeDouble:
 	  res.val.x = 0.0;
 	  break;
      case strexTypeBoolean:
 	  res.val.b = FALSE;
 	  break;
      case strexTypeString:
 	  res.val.s = "";
 	  break;
     }
 return res;
 }
 
-static struct strexEval strexEvalPick(struct strexParse *pick, void *record, StrexEvalLookup lookup,
+static struct strexEval strexEvalPick(struct strexParse *pick, void *record, StrexLookup lookup,
     struct lm *lm)
 /* Evaluate a pick operator. */
 {
 /* Evaluate the keyValue */
 struct strexParse *p = pick->children;
 struct strexEval keyVal = strexLocalEval(p, record, lookup, lm);
 p = p->next;
 
 struct strexEval res;
 boolean gotMatch = FALSE;
 while (p != NULL)
     {
     struct strexEval key = strexLocalEval(p, record, lookup, lm);
     p = p->next;  // Parser guarantees this non-null
     struct strexParse *valExp = p;
@@ -1395,31 +1461,31 @@
 	 case strexTypeString:
 	      gotMatch = sameString(keyVal.val.s, key.val.s);
 	      break;
 	 }
     if (gotMatch)
 	 {
          return strexLocalEval(valExp, record, lookup, lm);
 	 }
     }
 res = nullValForType(pick->type);
 return res;
 }
 
 
 
-static struct strexEval strexLocalEval(struct strexParse *p, void *record, StrexEvalLookup lookup, 
+static struct strexEval strexLocalEval(struct strexParse *p, void *record, StrexLookup lookup, 
 	struct lm *lm)
 /* Evaluate self on parse tree, allocating memory if needed from lm. */
 {
 struct strexEval res;
 switch (p->op)
     {
     case strexOpLiteral:
 	res.val = p->val;
 	res.type = p->type;
 	break;
     case strexOpSymbol:
 	res.type = strexTypeString;
 	char *s = lookup(record, p->val.s);
 	if (s == NULL)
 	    res.val.s = "";
@@ -1540,28 +1606,28 @@
        res = strexEvalOr(p, record, lookup, lm);
        break;
     case strexOpAnd:
        res = strexEvalAnd(p, record, lookup, lm);
        break;
 
     default:
         errAbort("Unknown op %s\n", strexOpToString(p->op));
 	res.type = strexTypeInt;	// Keep compiler from complaining.
 	res.val.i = 0;	// Keep compiler from complaining.
 	break;
     }
 return res;
 }
 
-char *strexEvalAsString(struct strexParse *p, void *record, StrexEvalLookup lookup)
+char *strexEvalAsString(struct strexParse *p, void *record, StrexLookup lookup)
 /* Evaluating a strex expression on a symbol table with a lookup function for variables and
  * return result as a string value. */
 {
 struct lm *lm = lmInit(0);
 struct strexEval res = strexLocalEval(p, record, lookup, lm);
 char numBuf[32];
 struct strexEval strRes = strexEvalCoerceToString(res, numBuf, sizeof(numBuf));
 char *ret = cloneString(strRes.val.s);
 lmCleanup(&lm);
 return ret;
 }