b34dfdc4c7db63cd27ff9843ff4f4ac617905059
kent
  Fri Aug 9 17:45:48 2019 -0700
Adding uncsv and untsv builtin functions.  Making [] operator just select zero based character within a string.

diff --git src/lib/strex.c src/lib/strex.c
index 79aed4e..c42fd4b 100644
--- src/lib/strex.c
+++ src/lib/strex.c
@@ -42,30 +42,32 @@
     strexTypeInt = 3,
     strexTypeDouble = 4,
     };
 
 enum strexBuiltInFunc
 /* One of these for each builtIn.  We'll just do a switch to implement 
  * Each built in function needs a value here, to keep it simple there's
  * aa correspondence between these names and the built in function name */
     {
     strexBuiltInTrim,
     strexBuiltInBetween,
     strexBuiltInSplit,
     strexBuiltInNow,
     strexBuiltInMd5,
     strexBuiltInSeparate,
+    strexBuiltInUncsv,
+    strexBuiltInUntsv,
     };
 
 struct strexBuiltIn
 /* Information to describe a built in function */
     {
     char *name;		/* Name in strex language:  trim, split, etc */
     enum strexBuiltInFunc func;  /* enum version: strexBuiltInTrim strexBuiltInSplit etc. */
     int paramCount;	/* Number of parameters, not flexible in this language! */
     enum strexType *paramTypes;  /* Array of types, one for each parameter */
     };
 
 union strexVal
 /* Some value of arbirary type that can be of any type corresponding to strexType */
     {
     boolean b;
@@ -135,30 +137,32 @@
 static enum strexType oneString[] = {strexTypeString};
 // static enum strexType twoStrings[] = {strexTypeString, strexTypeString};
 static enum strexType threeStrings[] = {strexTypeString, strexTypeString, strexTypeString};
 static enum strexType stringInt[] = {strexTypeString, strexTypeInt};
 static enum strexType stringStringInt[] = {strexTypeString, strexTypeString, strexTypeInt};
 
 /* There's one element here for each built in function.  There's also a few switches you'll need to
  * fill in if you add a new built in function. */
 static struct strexBuiltIn builtins[] = {
     { "trim", strexBuiltInTrim, 1, oneString, },
     { "between", strexBuiltInBetween, 3, threeStrings, },
     { "split", strexBuiltInSplit, 2, stringInt },
     { "now", strexBuiltInNow, 0, NULL },
     { "md5", strexBuiltInMd5, 1, oneString },
     { "separate", strexBuiltInSeparate, 3, stringStringInt },
+    { "uncsv", strexBuiltInUncsv, 2, stringInt },
+    { "untsv", strexBuiltInUntsv, 2, stringInt },
 };
 
 static struct hash *hashBuiltIns()
 /* Build a hash of builtins keyed by name */
 {
 struct hash *hash = hashNew(0);
 int i;
 for (i=0; i<ArraySize(builtins); ++i)
     hashAdd(hash, builtins[i].name, &builtins[i]);
 return hash;
 }
 
 static struct strexIn *strexInNew(char *expression, char *fileName, int fileLineNumber)
 /* Return a new strexIn structure wrapped around expression */
 {
@@ -584,30 +588,31 @@
 *         strexOpArrayIx
 *            strexParseFunction
 *            strexParseFunction */
 {
 struct tokenizer *tkz = in->tkz;
 struct strexParse *collection = strexParseFunction(in);
 struct strexParse *p = collection;
 char *tok = tokenizerNext(tkz);
 if (tok == NULL)
     tokenizerReuse(tkz);
 else if (tok[0] == '[')
     {
     struct strexParse *index = strexParseExpression(in);
     // struct strexParse *index = strexParseFunction(in);
     index = strexParseCoerce(index, strexTypeInt);
+    collection = strexParseCoerce(collection, strexTypeString);
     skipOverRequired(in, "]");
     AllocVar(p);
     p->op = strexOpArrayIx;
     p->type = strexTypeString;
     p->children = collection;
     p->val.s = cloneString("");
     collection->next = index;
     }
 else
     tokenizerReuse(tkz);
 return p;
 }
 
 
 static struct strexParse *strexParseUnaryMinus(struct strexIn *in)
@@ -789,75 +794,89 @@
     if (csvParseNext(&pos,scratch) == NULL)
         return NULL;
     }
 return csvParseNext(&pos, scratch);
 }
 
 static struct strexEval strexEvalArrayIx(struct strexParse *p, void *record, StrexEvalLookup lookup,
 	struct lm *lm)
 /* Handle parse tree generated by an indexed array. */
 {
 struct strexParse *array = p->children;
 struct strexParse *index = array->next;
 struct strexEval arrayVal = strexLocalEval(array, record, lookup, lm);
 struct strexEval indexVal = strexLocalEval(index, record, lookup, lm);
 struct strexEval res;
-struct dyString *scratch = dyStringNew(0);
-char *val = emptyForNull(csvParseOneOut(arrayVal.val.s, indexVal.val.i, scratch));
-res.val.s = cloneString(val);
+res.val.s = lmCloneStringZ(lm, arrayVal.val.s + indexVal.val.i, 1);
 res.type = strexTypeString;
 return res;
 }
 
 static char *splitString(char *words,  int ix,  struct lm *lm)
 /* Return the space-delimited word of index ix as clone into lm */
 {
 char *s = words;
 int i;
 for (i=0; ; ++i)
     {
     s = skipLeadingSpaces(s);
     if (isEmpty(s))
         errAbort("There aren't %d words in %s", ix+1, words);
     char *end = skipToSpaces(s);
     if (i == ix)
         {
 	if (end == NULL)
 	    return lmCloneString(lm, s);
 	else
 	    return lmCloneMem(lm, s, end - s);
 	}
     s = end;
     }
 }
 
+static char *uncsvString(char *csvIn,  int ix,  struct lm *lm)
+/* Return the comma separated value of index ix. Memory for result is lm */
+{
+struct dyString *scratch = dyStringNew(0);
+char *one = csvParseOneOut(csvIn, ix, scratch); 
+char *res = lmCloneString(lm, one);	// Save in more permanent memory
+dyStringFree(&scratch);
+return res;
+}
+
 static char *separateString(char *string, char *splitter, int ix, struct lm *lm)
 /* Return the ix'th part of string as split apart by splitter */
 {
 int splitterSize = strlen(splitter);
 if (splitterSize != 1)
     errAbort("Separator parameter to split must be a single character, not %s", splitter);
 int count = chopByChar(string, splitter[0], NULL, 0);
 if (ix >= count)
     errAbort("There aren't %d fields separated by %s in %s", ix+1, splitter, string);
 char **row;
 lmAllocArray(lm, row, count);
 char *scratch = lmCloneString(lm, string);
 chopByChar(scratch, splitter[0], row, count);
 return row[ix];
 }
 
+static char *untsvString(char *tsvIn, int ix, struct lm *lm)
+/* Return the tab separated value at given index living somewhere in lm. */
+{
+return separateString(tsvIn, "\t", ix, lm);
+}
+
 static struct strexEval strexEvalCallBuiltIn(struct strexParse *p, 
     void *record, StrexEvalLookup lookup, struct lm *lm)
 /* Handle parse tree generated by an indexed array. */
 {
 struct strexBuiltIn *builtIn = p->val.builtIn;
 struct strexEval res;
 res.type = strexTypeString;
 switch (builtIn->func)
     {
     case strexBuiltInTrim:
 	{
         struct strexEval a = strexLocalEval(p->children, record, lookup, lm);
 	res.val.s = trimSpaces(a.val.s);
 	break;
 	}
@@ -889,30 +908,44 @@
         {
         struct strexEval a = strexLocalEval(p->children, record, lookup, lm);
 	char *md5 = hmacMd5("", a.val.s);
 	res.val.s = lmCloneString(lm, md5);
 	freez(&md5);
 	break;
 	}
     case strexBuiltInSeparate:
         {
         struct strexEval a = strexLocalEval(p->children, record, lookup, lm);
         struct strexEval b = strexLocalEval(p->children->next, record, lookup, lm);
         struct strexEval c = strexLocalEval(p->children->next->next, record, lookup, lm);
 	res.val.s = separateString(a.val.s, b.val.s, c.val.i, lm);
 	break;
 	}
+    case strexBuiltInUncsv:
+        {
+        struct strexEval a = strexLocalEval(p->children, record, lookup, lm);
+        struct strexEval b = strexLocalEval(p->children->next, record, lookup, lm);
+	res.val.s = uncsvString(a.val.s, b.val.i, lm);
+	break;
+	}
+    case strexBuiltInUntsv:
+        {
+        struct strexEval a = strexLocalEval(p->children, record, lookup, lm);
+        struct strexEval b = strexLocalEval(p->children->next, record, lookup, lm);
+	res.val.s = untsvString(a.val.s, b.val.i, lm);
+	break;
+	}
     }
 return res;
 }
 
 
 static struct strexEval strexLocalEval(struct strexParse *p, void *record, StrexEvalLookup lookup, 
 	struct lm *lm)
 /* Evaluate self on parse tree, allocating memory if needed from lm. */
 {
 struct strexEval res;
 switch (p->op)
     {
     case strexOpLiteral:
 	res.val = p->val;
 	res.type = p->type;