8577c06e473e9a888f83df214675ea3b1a4ede30
kent
  Tue Aug 13 18:51:01 2019 -0700
Implementing pick() operation,  more or less the strex equivalent to switch.

diff --git src/lib/strex.c src/lib/strex.c
index aab95e1..021c03b 100644
--- src/lib/strex.c
+++ src/lib/strex.c
@@ -47,66 +47,69 @@
 /* One of these for each builtIn.  We'll just do a switch to implement 
  * Each built in function needs a value here, to keep it simple there's
  * aa correspondence between these names and the built in function name */
     {
     strexBuiltInTrim,
     strexBuiltInBetween,
     strexBuiltInSplit,
     strexBuiltInNow,
     strexBuiltInMd5,
     strexBuiltInSeparate,
     strexBuiltInUncsv,
     strexBuiltInUntsv,
     strexBuiltInReplace,
     strexBuiltInFix,
     strexBuiltInStrip,
+    strexBuiltInLen,
     };
 
 struct strexBuiltIn
 /* Information to describe a built in function */
     {
     char *name;		/* Name in strex language:  trim, split, etc */
     enum strexBuiltInFunc func;  /* enum version: strexBuiltInTrim strexBuiltInSplit etc. */
+    enum strexType returnType;	 /* Type of return value */
     int paramCount;	/* Number of parameters, not flexible in this language! */
     enum strexType *paramTypes;  /* Array of types, one for each parameter */
     };
 
 union strexVal
 /* Some value of arbirary type that can be of any type corresponding to strexType */
     {
     boolean b;
     char *s;
     long long i;
     double x;
     struct strexBuiltIn *builtIn;
     };
 
 struct strexEval
 /* Result of evaluation of parse tree. */
     {
     enum strexType type;
     union strexVal val;
     };
 
 enum strexOp
 /* An operation in the parse tree. */
     {
     strexOpUnknown,	/* Should not occur */
     strexOpLiteral,        /* Literal string or number. */
     strexOpSymbol,	/* A symbol name. */
 
     strexOpBuiltInCall,	/* Call a built in function */
+    strexOpPick,	/* Similar to built in but pick deserves it's own op. */
 
     strexOpArrayIx,	/* An array with an index. */
     strexOpArrayRange,	/* An array with a range. */
 
     strexOpStrlen,	/* Length of a string */
 
     /* Unary minus for numbers */
     strexOpUnaryMinusInt,
     strexOpUnaryMinusDouble,
 
     /* Binary operations. */
     strexOpAdd,
     strexOpOr,
     strexOpAnd,
 
@@ -140,41 +143,42 @@
     {
     struct tokenizer *tkz;  /* Get next text input from here */
     struct hash *builtInHash;  /* Hash of built in functions */
     };
 
 /* Some predefined lists of parameter types */
 static enum strexType oneString[] = {strexTypeString};
 static enum strexType twoStrings[] = {strexTypeString, strexTypeString};
 static enum strexType threeStrings[] = {strexTypeString, strexTypeString, strexTypeString};
 static enum strexType stringInt[] = {strexTypeString, strexTypeInt};
 static enum strexType stringStringInt[] = {strexTypeString, strexTypeString, strexTypeInt};
 
 /* There's one element here for each built in function.  There's also a few switches you'll need to
  * fill in if you add a new built in function. */
 static struct strexBuiltIn builtins[] = {
-    { "trim", strexBuiltInTrim, 1, oneString, },
-    { "between", strexBuiltInBetween, 3, threeStrings },
-    { "split", strexBuiltInSplit, 2, stringInt },
-    { "now", strexBuiltInNow, 0, NULL },
-    { "md5", strexBuiltInMd5, 1, oneString },
-    { "separate", strexBuiltInSeparate, 3, stringStringInt },
-    { "uncsv", strexBuiltInUncsv, 2, stringInt },
-    { "untsv", strexBuiltInUntsv, 2, stringInt },
-    { "replace", strexBuiltInReplace, 3, threeStrings },
-    { "fix", strexBuiltInFix, 3, threeStrings },
-    { "strip", strexBuiltInStrip, 2, twoStrings },
+    { "trim", strexBuiltInTrim, strexTypeString, 1, oneString, },
+    { "between", strexBuiltInBetween, strexTypeString, 3, threeStrings },
+    { "split", strexBuiltInSplit, strexTypeString, 2, stringInt },
+    { "now", strexBuiltInNow, strexTypeString, 0, NULL },
+    { "md5", strexBuiltInMd5, strexTypeString, 1, oneString },
+    { "separate", strexBuiltInSeparate, strexTypeString, 3, stringStringInt },
+    { "uncsv", strexBuiltInUncsv, strexTypeString, 2, stringInt },
+    { "untsv", strexBuiltInUntsv, strexTypeString, 2, stringInt },
+    { "replace", strexBuiltInReplace, strexTypeString, 3, threeStrings },
+    { "fix", strexBuiltInFix, strexTypeString, 3, threeStrings },
+    { "strip", strexBuiltInStrip, strexTypeString, 2, twoStrings },
+    { "len", strexBuiltInLen, strexTypeInt, 1, oneString},
 };
 
 static struct hash *hashBuiltIns()
 /* Build a hash of builtins keyed by name */
 {
 struct hash *hash = hashNew(0);
 int i;
 for (i=0; i<ArraySize(builtins); ++i)
     hashAdd(hash, builtins[i].name, &builtins[i]);
 return hash;
 }
 
 static struct strexIn *strexInNew(char *expression, char *fileName, int fileLineNumber)
 /* Return a new strexIn structure wrapped around expression */
 {
@@ -199,34 +203,44 @@
     tokenizerFree(&si->tkz);
     freez(pSi);
     }
 }
 
 struct strexParse *strexParseNew(enum strexOp op, enum strexType type)
 /* Return a fresh strexParse of the given op and type with the val set to 0/NULL */
 {
 struct strexParse *p;
 AllocVar(p);
 p->op = op;
 p->type = type;
 return p;
 }
 
-static void strexValDump(union strexVal val, enum strexType type, FILE *f)
+static void strexParseValDump(struct strexParse *p, FILE *f)
 /* Dump out value to file. */
 {
-switch (type)
+union strexVal val = p->val;
+switch (p->op)
+    {
+    case strexOpBuiltInCall:
+        fprintf(f, "%s", val.builtIn->name);
+	return;
+    default:
+        break;
+    }
+
+switch (p->type)
     {
     case strexTypeBoolean:
         fprintf(f, "%s", (val.b ? "true" : "false") );
 	break;
     case strexTypeString:
         fprintf(f, "%s", (val.s == NULL ? "(null)" : val.s));
 	break;
     case strexTypeInt:
         fprintf(f, "%lld", val.i);
 	break;
     case strexTypeDouble:
         fprintf(f, "%f", val.x);
 	break;
     }
 }
@@ -237,30 +251,31 @@
 switch (type)
     {
     case strexTypeBoolean:
 	return "boolean";
 	break;
     case strexTypeString:
 	return "string";
 	break;
     case strexTypeInt:
 	return "integer";
 	break;
     case strexTypeDouble:
 	return "floating point";
 	break;
     default:
+	uglyf("Weird, type is %d\n", (int)type);
         internalErr();
 	return NULL;
     }
 }
 
 static char *strexOpToString(enum strexOp op)
 /* Return string representation of parse op. */
 {
 switch (op)
     {
     case strexOpLiteral:
 	return "strexOpLiteral";
     case strexOpSymbol:
 	return "strexOpSymbol";
     
@@ -292,49 +307,51 @@
     case strexOpUnaryMinusInt:
         return "strexOpUnaryMinusInt";
     case strexOpUnaryMinusDouble:
         return "strexOpUnaryMinusDouble";
 
 
     case strexOpAdd:
 	return "strexOpAdd";
     case strexOpOr:
 	return "strexOpOr";
     case strexOpAnd:
 	return "strexOpAnd";
 
     case strexOpBuiltInCall:
         return "strexOpBuiltInCall";
+    case strexOpPick:
+        return "strexOpPick";
 
     case strexOpArrayIx:
         return "strexOpArrayIx";
     case strexOpArrayRange:
         return "strexOpArrayRange";
 
     case strexOpStrlen:
         return "strexOpStrlen";
     default:
 	return "strexOpUnknown";
     }
 }
 
 void strexParseDump(struct strexParse *p, int depth, FILE *f)
 /* Dump out strexParse tree and children. */
 {
 spaceOut(f, 3*depth);
 fprintf(f, "%s ", strexOpToString(p->op));
-strexValDump(p->val, p->type,  f);
+strexParseValDump(p,  f);
 fprintf(f, "\n");
 struct strexParse *child;
 for (child = p->children; child != NULL; child= child->next)
     strexParseDump(child, depth+1, f);
 }
 
 static void expectingGot(struct strexIn *in, char *expecting, char *got)
 /* Print out error message about unexpected input. */
 {
 errAbort("Expecting %s, got %s, line %d of %s", expecting, got, in->tkz->lf->lineIx,
 	in->tkz->lf->fileName);
 }
 
 static void skipOverRequired(struct strexIn *in, char *expecting)
 /* Make sure that next token is tok, and skip over it. */
@@ -542,53 +559,101 @@
 	default:
 	    internalErr();
 	    break;
 	}
     return cast;
     }
 }
 
 static struct strexParse *strexParseFunction(struct strexIn *in)
 /* Handle the (a,b,c) in funcCall(a,b,c).  Convert it into tree:
 *         strexOpBuiltInCall
 *            strexParse(a)
 *            strexParse(b)
 *            strexParse(c)
 * or something like that.  With no parameters 
-*            strexParseFunction */
+*        strexOpBuiltInCall */
 {
 struct tokenizer *tkz = in->tkz;
 struct strexParse *function = strexParseAtom(in);
 char *tok = tokenizerNext(tkz);
 if (tok == NULL)
     tokenizerReuse(tkz);
 else if (tok[0] == '(')
     {
     /* Check that the current op, is a pure symbol. */
     if (function->op != strexOpSymbol)
         errAbort("Unexpected '(' line %d of %s", tkz->lf->lineIx, tkz->lf->fileName);
 
     /* Look up function to call and complain if it doesn't exist */
     char *functionName = function->val.s;
+
+    /* Deal with special named ops like pick */
+    if (sameString(functionName, "pick"))
+        {
+	/* Yay, the pick operation.  It looks like
+	 *    pick( keyExp,  key1, val1, key2, val2, ..., keyN, valN)
+	 * the logic is to evaluate keyExp, and then pick one of the valN's to return,
+	 * the one where the keyN is the same as keyExp */
+	struct strexParse *keyExp = strexParseExpression(in);
+	slAddHead(&function->children, keyExp);
+	skipOverRequired(in, ",");
+
+	struct strexParse *firstVal = NULL;
+	for (;;)
+	    {
+	    struct strexParse *key = strexParseCoerce(strexParseExpression(in), keyExp->type);
+	    slAddHead(&function->children, key);
+	    skipOverRequired(in, ":");
+	    struct strexParse *val = strexParseExpression(in);
+	    if (firstVal == NULL)
+	        firstVal = val;
+	    else
+		{
+		if (firstVal->type != val->type)
+		    {
+		    errAbort("Mixed value types %s and %s in pick() expression line %d of %s",
+		        strexTypeToString(firstVal->type), strexTypeToString(val->type),
+			tkz->lf->lineIx, tkz->lf->fileName);
+		    }
+	        val = strexParseCoerce(val, firstVal->type);
+		}
+	    slAddHead(&function->children, val);
+	    tok = tokenizerMustHaveNext(tkz);
+	    if (tok[0] == ')')
+		break;
+	    else if (tok[0] != ',')
+		errAbort("Error in pick parameter list line %d of %s", 
+		    tkz->lf->lineIx, tkz->lf->fileName);
+	    }
+	slReverse(&function->children);
+
+	/* Going to reuse current op, turn it into pick */
+	function->op = strexOpPick;
+	function->type = firstVal->type;
+	}
+    else
+	{
+	/* It's a builtin function as opposed to a special op.  Figure out which one.*/
 	struct strexBuiltIn *builtIn = hashFindVal(in->builtInHash, functionName);
 	if (builtIn == NULL)
 	    errAbort("No built in function %s exists line %d of %s", functionName, tkz->lf->lineIx,
 		tkz->lf->fileName);
 
 	/* We're going to reuse this current op */
 	function->op = strexOpBuiltInCall;
-    function->type = strexTypeString;
+	function->type = builtIn->returnType;
 	function->val.builtIn = builtIn;
 
 	tok = tokenizerMustHaveNext(tkz);
 	if (tok[0] != ')')
 	    {
 	    tokenizerReuse(tkz);
 	    for (;;)
 		{
 		struct strexParse *param = strexParseExpression(in);
 		slAddHead(&function->children, param);
 		tok = tokenizerMustHaveNext(tkz);
 		if (tok[0] == ')')
 		    break;
 		else if (tok[0] != ',')
 		    errAbort("Error in parameter list for %s line %d of %s", function->val.s, 
@@ -604,30 +669,31 @@
 		builtIn->name, childCount, builtIn->paramCount, tkz->lf->lineIx, tkz->lf->fileName);
 		
 	/* Check function parameter types */
 	int i;
 	struct strexParse *p;
 	for (i=0, p=function->children; i<childCount; ++i, p = p->next)
 	    {
 	    if (p->type != builtIn->paramTypes[i])
 		{
 		errAbort("Parameter #%d to %s needs to be type %s not %s line %d of %s",
 		    i, builtIn->name,  strexTypeToString(builtIn->paramTypes[i]), 
 		    strexTypeToString(p->type), tkz->lf->lineIx, tkz->lf->fileName);
 		}
 	    }
 	}
+    }
 else
     tokenizerReuse(tkz);
 return function;
 }
 
 struct strexParse *arrayRangeTree(struct strexParse *array, 
     struct strexParse *firstIndex, struct strexParse *secondIndex)
 /* Creat an array range parse tree */
 {
 struct strexParse *p = strexParseNew(strexOpArrayRange, strexTypeString);
 p->children = array;
 array->next = firstIndex;
 firstIndex->next = secondIndex;
 return p;
 }
@@ -660,31 +726,31 @@
 	    tokenizerReuse(tkz);    
 	    struct strexParse *firstIndex = strexParseNew(strexOpLiteral, strexTypeInt);
 	    struct strexParse *secondIndex = strexParseCoerce(strexParseExpression(in), strexTypeInt);
 	    p = arrayRangeTree(array, firstIndex, secondIndex);
 	    }
         }
     else
 	{
 	tokenizerReuse(tkz);
 	struct strexParse *firstIndex = strexParseCoerce(strexParseExpression(in), strexTypeInt);
 	tok = tokenizerMustHaveNext(tkz);
 	if (tok[0] == ':')
 	    {
 	    struct strexParse *secondIndex;
 	    tok = tokenizerMustHaveNext(tkz);
-	    if (tok[0] == ']')  // Case where second half of rang is empty
+	    if (tok[0] == ']')  // Case where second half of range is empty
 		{
 	        tokenizerReuse(tkz);
 		secondIndex = strexParseNew(strexOpStrlen, strexTypeInt);
 		secondIndex->children = array;
 		}
 	    else
 	        {
 	        tokenizerReuse(tkz);
 		secondIndex = strexParseCoerce(strexParseExpression(in), strexTypeInt);
 		}
 	    p = arrayRangeTree(array, firstIndex, secondIndex);
 	    }
 	else
 	    {
 	    // Simple no range case
@@ -866,41 +932,43 @@
 
 static struct strexEval strexLocalEval(struct strexParse *p, void *record, StrexEvalLookup lookup, 
 	struct lm *lm);
 /* Evaluate self on parse tree, allocating memory if needed from lm. */
 
 
 static struct strexEval strexEvalCoerceToString(struct strexEval r, char *buf, int bufSize)
 /* Return a version of r with .val.s filled in with something reasonable even
  * if r input is not a string */
 {
 assert(bufSize >= 32);
 switch (r.type)
     {
     case strexTypeBoolean:
         r.val.s = (r.val.b ? "true" : "false");
+	break;
     case strexTypeString:
 	break;	/* It's already done. */
     case strexTypeInt:
 	safef(buf, bufSize, "%lld", r.val.i);
 	r.val.s = buf;
 	break;
     case strexTypeDouble:
 	safef(buf, bufSize, "%g", r.val.x);
 	r.val.s = buf;
 	break;
     default:
+	uglyf("Weird, r.type is %s\n", strexTypeToString(r.type));
 	internalErr();
 	r.val.s = NULL;
 	break;
     }
 r.type = strexTypeString;
 return r;
 }
 
 static struct strexEval strexEvalAdd(struct strexParse *p, void *record, StrexEvalLookup lookup,
 	struct lm *lm)
 /* Return a + b. */
 {
 struct strexParse *lp = p->children;
 struct strexParse *rp = lp->next;
 struct strexEval lv = strexLocalEval(lp, record, lookup, lm);
@@ -1153,35 +1221,36 @@
 }
 
 static char *stripAll(char *in, char *toRemove, struct lm *lm)
 /* Remove every occurrence of any of the chars in toRemove from in. */
 {
 char *result = lmCloneString(lm, in);  // Move to local memory
 char c, *s = toRemove;
 while ((c = *s++) != 0)
     stripChar(result, c);
 return result;
 }
 
 
 static struct strexEval strexEvalCallBuiltIn(struct strexParse *p, 
     void *record, StrexEvalLookup lookup, struct lm *lm)
-/* Handle parse tree generated by an indexed array. */
+/* Handle parse tree generated by call to a built in function. */
 {
 struct strexBuiltIn *builtIn = p->val.builtIn;
 struct strexEval res;
-res.type = strexTypeString;
+res.type = builtIn->returnType;
+
 switch (builtIn->func)
     {
     case strexBuiltInTrim:
 	{
         struct strexEval a = strexLocalEval(p->children, record, lookup, lm);
 	res.val.s = trimSpaces(a.val.s);
 	break;
 	}
     case strexBuiltInBetween:
 	{
         struct strexEval a = strexLocalEval(p->children, record, lookup, lm);
         struct strexEval b = strexLocalEval(p->children->next, record, lookup, lm);
         struct strexEval c = strexLocalEval(p->children->next->next, record, lookup, lm);
 	char *between = stringBetween(a.val.s, c.val.s, b.val.s);
 	res.val.s = emptyForNull(lmCloneString(lm, between));
@@ -1248,35 +1317,108 @@
 	if (sameString(string.val.s, oldVal.val.s))
 	    {
 	    res.val.s = newVal.val.s;
 	    }
 	else
 	    res.val.s = string.val.s;
 	break;
 	}
     case strexBuiltInStrip:
         {
         struct strexEval a = strexLocalEval(p->children, record, lookup, lm);
         struct strexEval b = strexLocalEval(p->children->next, record, lookup, lm);
 	res.val.s = stripAll(a.val.s, b.val.s, lm);
 	break;
 	}
+    case strexBuiltInLen:
+        {
+	uglyf("builtInLen\n");
+        struct strexEval a = strexLocalEval(p->children, record, lookup, lm);
+	uglyf(" of %s\n", a.val.s);
+	res.val.i = strlen(a.val.s);
+	uglyf(" = %lld\n", res.val.i);
+	break;
+	}
+    }
+return res;
+}
+
+static struct strexEval nullValForType(enum strexType type)
+/* Return 0, "", 0.0 depending */
+{
+struct strexEval res = {.type=type};
+switch (type)
+    {
+     case strexTypeInt:
+	  res.val.i = 0;
+	  break;
+     case strexTypeDouble:
+	  res.val.x = 0.0;
+	  break;
+     case strexTypeBoolean:
+	  res.val.b = FALSE;
+	  break;
+     case strexTypeString:
+	  res.val.s = "";
+	  break;
+    }
+return res;
 }
+
+static struct strexEval strexEvalPick(struct strexParse *pick, void *record, StrexEvalLookup lookup,
+    struct lm *lm)
+/* Evaluate a pick operator. */
+{
+/* Evaluate the keyValue */
+struct strexParse *p = pick->children;
+struct strexEval keyVal = strexLocalEval(p, record, lookup, lm);
+p = p->next;
+
+struct strexEval res;
+boolean gotMatch = FALSE;
+while (p != NULL)
+    {
+    struct strexEval key = strexLocalEval(p, record, lookup, lm);
+    p = p->next;  // Parser guarantees this non-null
+    struct strexParse *valExp = p;
+    p = p->next;
+    switch (key.type)
+         {
+	 case strexTypeInt:
+	      gotMatch = (keyVal.val.i == key.val.i);
+	      break;
+	 case strexTypeDouble:
+	      gotMatch = (keyVal.val.x == key.val.x);
+	      break;
+	 case strexTypeBoolean:
+	      gotMatch = (keyVal.val.b = key.val.b);
+	      break;
+	 case strexTypeString:
+	      gotMatch = sameString(keyVal.val.s, key.val.s);
+	      break;
+	 }
+    if (gotMatch)
+	 {
+         return strexLocalEval(valExp, record, lookup, lm);
+	 }
+    }
+res = nullValForType(pick->type);
 return res;
 }
 
 
+
 static struct strexEval strexLocalEval(struct strexParse *p, void *record, StrexEvalLookup lookup, 
 	struct lm *lm)
 /* Evaluate self on parse tree, allocating memory if needed from lm. */
 {
 struct strexEval res;
 switch (p->op)
     {
     case strexOpLiteral:
 	res.val = p->val;
 	res.type = p->type;
 	break;
     case strexOpSymbol:
 	res.type = strexTypeString;
 	char *s = lookup(record, p->val.s);
 	if (s == NULL)
@@ -1367,48 +1509,52 @@
 	res.val.x = -res.val.x;
 	break;
 
     case strexOpArrayIx:
        res = strexEvalArrayIx(p, record, lookup, lm);
        break;
     case strexOpArrayRange:
        res = strexEvalArrayRange(p, record, lookup, lm);
        break;
     case strexOpStrlen:
        res = strexLocalEval(p->children, record, lookup, lm);
        res.type = strexTypeInt;
        res.val.i = strlen(res.val.s);
        break;
 
+    /* More complicated ops. */
     case strexOpBuiltInCall:
        res = strexEvalCallBuiltIn(p, record, lookup, lm);
        break;
+    case strexOpPick:
+       res = strexEvalPick(p, record, lookup, lm);
+       break;
+
 
     /* Mathematical ops, simple binary type */
     case strexOpAdd:
        res = strexEvalAdd(p, record, lookup, lm);
        break;
 
     /* Logical ops, simple binary type */
     case strexOpOr:
        res = strexEvalOr(p, record, lookup, lm);
        break;
     case strexOpAnd:
        res = strexEvalAnd(p, record, lookup, lm);
        break;
 
-
     default:
         errAbort("Unknown op %s\n", strexOpToString(p->op));
 	res.type = strexTypeInt;	// Keep compiler from complaining.
 	res.val.i = 0;	// Keep compiler from complaining.
 	break;
     }
 return res;
 }
 
 char *strexEvalAsString(struct strexParse *p, void *record, StrexEvalLookup lookup)
 /* Evaluating a strex expression on a symbol table with a lookup function for variables and
  * return result as a string value. */
 {
 struct lm *lm = lmInit(0);
 struct strexEval res = strexLocalEval(p, record, lookup, lm);