dd33f82508a1b779c3b827d221cb7651a253cd36
kent
  Fri Aug 23 22:52:34 2019 -0700
Adding in builtins letter_range, word_range, and chop_range.

diff --git src/lib/strex.c src/lib/strex.c
index 2e4d952..62bce88 100644
--- src/lib/strex.c
+++ src/lib/strex.c
@@ -59,30 +59,33 @@
     strexBuiltInUntsv,
     strexBuiltInReplace,
     strexBuiltInFix,
     strexBuiltInStrip,
     strexBuiltInLen,
     strexBuiltInSymbol,
     strexBuiltInLower,
     strexBuiltInUpper,
     strexBuiltInIn, 
     strexBuiltInStarts,
     strexBuiltInEnds,
     strexBuiltInSame,
     strexBuiltInTidy,
     strexBuiltInWarn,
     strexBuiltInError,
+    strexBuiltInLetterRange,
+    strexBuiltInWordRange,
+    strexBuiltInChopRange,
     };
 
 struct strexBuiltIn
 /* Information to describe a built in function */
     {
     char *name;		/* Name in strex language:  trim, split, etc */
     enum strexBuiltInFunc func;  /* enum version: strexBuiltInTrim strexBuiltInChop etc. */
     enum strexType returnType;	 /* Type of return value */
     int paramCount;	/* Number of parameters, not flexible in this language! */
     enum strexType *paramTypes;  /* Array of types, one for each parameter */
     };
 
 union strexVal
 /* Some value of arbirary type that can be of any type corresponding to strexType */
     {
@@ -155,56 +158,62 @@
 /* Input to the strex parser - tokenizer and a hash full of built in functions. */
     {
     struct tokenizer *tkz;  /* Get next text input from here */
     struct hash *builtInHash;  /* Hash of built in functions */
     void *symbols;	    /* NULL or pointer to a symbol table to check */
     StrexLookup lookup; /* lookup something in symbol table if we have it */
     struct hash *importHash;   /* Hash of importex expressions keyed by file name */
     };
 
 /* Some predefined lists of parameter types */
 static enum strexType oneString[] = {strexTypeString};
 static enum strexType twoStrings[] = {strexTypeString, strexTypeString};
 static enum strexType threeStrings[] = {strexTypeString, strexTypeString, strexTypeString};
 static enum strexType stringInt[] = {strexTypeString, strexTypeInt};
 static enum strexType stringStringInt[] = {strexTypeString, strexTypeString, strexTypeInt};
+static enum strexType stringIntInt[] = {strexTypeString, strexTypeInt, strexTypeInt};
+static enum strexType stringStringIntInt[] = {strexTypeString, strexTypeString,
+    strexTypeInt, strexTypeInt};
 
 /* There's one element here for each built in function.  There's also a few switches you'll need to
  * fill in if you add a new built in function. */
 static struct strexBuiltIn builtins[] = {
     { "trim", strexBuiltInTrim, strexTypeString, 1, oneString, },
     { "between", strexBuiltInBetween, strexTypeString, 3, threeStrings },
     { "word", strexBuiltInWord, strexTypeString, 2, stringInt },
     { "now", strexBuiltInNow, strexTypeString, 0, NULL },
     { "md5", strexBuiltInMd5, strexTypeString, 1, oneString },
     { "chop", strexBuiltInChop, strexTypeString, 3, stringStringInt },
     { "uncsv", strexBuiltInUncsv, strexTypeString, 2, stringInt },
     { "untsv", strexBuiltInUntsv, strexTypeString, 2, stringInt },
     { "replace", strexBuiltInReplace, strexTypeString, 3, threeStrings },
     { "fix", strexBuiltInFix, strexTypeString, 3, threeStrings },
     { "strip", strexBuiltInStrip, strexTypeString, 2, twoStrings },
     { "len", strexBuiltInLen, strexTypeInt, 1, oneString},
     { "symbol", strexBuiltInSymbol, strexTypeString, 2, twoStrings },
     { "upper", strexBuiltInUpper, strexTypeString, 1, oneString },
     { "lower", strexBuiltInLower, strexTypeString, 1, oneString },
     { "in", strexBuiltInIn, strexTypeBoolean, 2, twoStrings },
     { "starts_with", strexBuiltInStarts, strexTypeBoolean, 2, twoStrings}, 
     { "ends_with", strexBuiltInEnds, strexTypeBoolean, 2, twoStrings}, 
     { "same", strexBuiltInSame, strexTypeBoolean, 2, twoStrings}, 
     { "tidy", strexBuiltInTidy, strexTypeString, 3, threeStrings },
     { "warn", strexBuiltInWarn, strexTypeString, 1, oneString},
     { "error", strexBuiltInError, strexTypeString, 1, oneString},
+    { "letter_range", strexBuiltInLetterRange, strexTypeString, 3, stringIntInt},
+    { "word_range", strexBuiltInWordRange, strexTypeString, 3, stringIntInt},
+    { "chop_range", strexBuiltInChopRange, strexTypeString, 4, stringStringIntInt},
 };
 
 static struct hash *hashBuiltIns()
 /* Build a hash of builtins keyed by name */
 {
 struct hash *hash = hashNew(0);
 int i;
 for (i=0; i<ArraySize(builtins); ++i)
     hashAdd(hash, builtins[i].name, &builtins[i]);
 return hash;
 }
 
 static struct strexIn *strexInNew(struct lineFile *lf,
     void *symbols, StrexLookup lookup)
 /* Return a new strexIn structure wrapped around lineFile */
@@ -1348,59 +1357,156 @@
 /* Handle parse tree generated by an indexed array. */
 {
 struct strexParse *array = p->children;
 struct strexParse *index = array->next;
 struct strexEval arrayVal = strexLocalEval(array, run);
 struct strexEval indexVal = strexLocalEval(index, run);
 int ix = indexVal.val.i;
 if (ix  < 0)
     ix = strlen(arrayVal.val.s) + ix;
 struct strexEval res;
 res.val.s = lmCloneStringZ(run->lm, arrayVal.val.s + ix, 1);
 res.type = strexTypeString;
 return res;
 }
 
-static struct strexEval strexEvalArrayRange(struct strexParse *p, struct strexRun *run)
-/* Handle parse tree generated by array range expression, which by now
- * has just been turned into two integer values. */
+static void pythonInterpretEnds(int *pStart, int *pEnd, int len)
+/* Adjusts *pStart and *pEnd from python ideas that may include negative
+ * numbers back to our ideas */
+{
+int start = *pStart, end = *pEnd;
+// Negative numbers count from end of array
+if (start < 0)
+    start = len + start;
+if (end < 0)
+    end = len + end;
+
+// Do some clipping to keep results in range
+if (start < 0)
+   start = 0;
+if (end > len)
+    end = len;
+if (end < start) end = start;
+
+// Return results
+*pStart = start;
+*pEnd = end;
+}
+
+static char *joinString(char **parts, char *joiner, int start, int end, struct strexRun *run)
+/* Return a string that is just all the parts concatenated together with the joiner char
+ * between them */
+{
+int joinerSize = strlen(joiner);
+
+/* Figure out output string size. */
+int i;
+int outSize = -joinerSize;  // Simplifies calcs on size to init to this
+for (i=start; i<end; ++i)
+    outSize += strlen(parts[i]) + joinerSize;
+
+/* Allocate output string and copy first word in range to it.  */
+char *result = lmAlloc(run->lm, outSize + 1);
+char *out = result;
+char *onePart = parts[start];
+int oneSize = strlen(onePart);
+strcpy(out, onePart);
+out += oneSize;
+
+/* Fall into loop to copy joiner/part/joiner/part/.../joiner/part */
+for (i=start+1; i<end; ++i)
+     {
+     strcpy(out, joiner);
+     out += joinerSize;
+     onePart = parts[i];
+     oneSize = strlen(onePart);
+     strcpy(out, onePart);
+     out += oneSize;
+     }
+return result;
+}
+
+static char *strexEvalWordRange(char *string, int start, int end, 
+    struct strexRun *run)
+{
+int wordCount = chopByWhite(string, NULL, 0);
+pythonInterpretEnds(&start, &end, wordCount);
+if (start >= end || wordCount <= 0)
+    return "";
+else
+    {
+    /* We aren't super efficient here, we parse them all out and then glue
+     * the ones we want back together */
+    char **words;
+    lmAllocArray(run->lm, words, wordCount);
+    char *clone = lmCloneString(run->lm, string);
+    chopByWhite(clone, words, wordCount);
+    return joinString(words, " ", start, end, run);
+    }
+}
+
+static char *strexEvalChopRange(char *string, char *splits, int start, int end, 
+    struct strexRun *run)
+{
+char splitter = splits[0];
+int wordCount = chopByChar(string, splitter, NULL, 0);
+pythonInterpretEnds(&start, &end, wordCount);
+if (start >= end || wordCount <= 0)
+    return "";
+else
+    {
+    /* We aren't super efficient here, we parse them all out and then glue
+     * the ones we want back together */
+    char **words;
+    lmAllocArray(run->lm, words, wordCount);
+    char *clone = lmCloneString(run->lm, string);
+    chopByChar(clone, splitter, words, wordCount);
+    char joiner[2] = {splitter, 0};
+    return joinString(words, joiner, start, end, run);
+    }
+}
+
+
+static struct strexEval strexEvalArrayStartEnd(struct strexParse *array, 
+    struct strexParse *index1, struct strexParse *index2, struct strexRun *run)
+/* Evaluate expression to return result of character array selection between
+ * index1 and index2 */
 {
-struct strexParse *array = p->children;
-struct strexParse *index1 = array->next;
-struct strexParse *index2 = index1->next;
 struct strexEval arrayVal = strexLocalEval(array, run);
 struct strexEval rangeStart = strexLocalEval(index1, run);
 struct strexEval rangeEnd = strexLocalEval(index2, run);
 char *arraySource = arrayVal.val.s;
 int start = rangeStart.val.i;
 int end = rangeEnd.val.i;
 int len = strlen(arraySource); 
-if (start < 0)
-    start = strlen(arraySource) + start;
-if (end < 0)
-    end = strlen(arraySource) + end;
-if (start < 0)
-   start = 0;
-if (end > len)
-    end = len;
-if (end < start) end = start;   // errors apparently just get truncated in this language.  hmm
+pythonInterpretEnds(&start, &end, len);
 struct strexEval res;
 res.val.s = lmCloneStringZ(run->lm, arrayVal.val.s + start, end-start);
 res.type = strexTypeString;
 return res;
 }
 
+static struct strexEval strexEvalArrayRange(struct strexParse *p, struct strexRun *run)
+/* Handle parse tree generated by array range expression, which by now
+ * has just been turned into two integer values. */
+{
+struct strexParse *array = p->children;
+struct strexParse *index1 = array->next;
+struct strexParse *index2 = index1->next;
+return strexEvalArrayStartEnd(array, index1, index2, run);
+}
+
 static char *splitString(char *words,  int ix,  struct lm *lm)
 /* Return the space-delimited word of index ix as clone into lm */
 {
 if (ix < 0)	// Negative index.  We got to count, dang
     {
     int wordCount = chopByWhite(words, NULL, 0);   
     ix = wordCount + ix;
     if (ix < 0)
         return "";
     }
 char *s = words;
 int i;
 for (i=0; ; ++i)
     {
     s = skipLeadingSpaces(s);
@@ -1732,30 +1838,50 @@
     case strexBuiltInError:
         {
 	/* Figure out the message we want to convey, send it to warning handler
 	 * before returning it. */
         struct strexEval a = strexLocalEval(p->children, run);
 	char *message = a.val.s;
 	char *output = lmJoinStrings(run->lm, "ERROR: ", message);
 	if (run->warnHandler != NULL) 
 	    run->warnHandler(run->symbols, output);
 	if (run->abortHandler != NULL) 
 	    run->abortHandler(run->symbols);
 	errAbort("%s", output);
 	res.val.s = output;   // some sort of OCD makes me fill this in in spite of errAbort above
 	break;
 	}
+    case strexBuiltInLetterRange:
+        {
+	res = strexEvalArrayStartEnd(p->children, p->children->next, p->children->next->next, run);
+	break;
+	}
+    case strexBuiltInWordRange:
+        {
+        struct strexEval string = strexLocalEval(p->children, run);
+        struct strexEval start = strexLocalEval(p->children->next, run);
+        struct strexEval end = strexLocalEval(p->children->next->next, run);
+	res.val.s = strexEvalWordRange(string.val.s, start.val.i, end.val.i, run);
+	}
+    case strexBuiltInChopRange:
+        {
+        struct strexEval string = strexLocalEval(p->children, run);
+        struct strexEval splitter = strexLocalEval(p->children->next, run);
+        struct strexEval start = strexLocalEval(p->children->next->next, run);
+        struct strexEval end = strexLocalEval(p->children->next->next->next, run);
+	res.val.s = strexEvalChopRange(string.val.s, splitter.val.s, start.val.i, end.val.i, run);
+	}
     }
 return res;
 }
 
 static struct strexEval strexEvalPick(struct strexParse *pick, struct strexRun *run)
 /* Evaluate a pick operator. */
 {
 /* Evaluate the keyValue */
 struct strexParse *p = pick->children;
 struct strexEval keyVal = strexLocalEval(p, run);
 p = p->next;
 
 /* Get pointer to default expression but don't evaluate it yet */
 struct strexParse *defaultExp = p;
 p = p->next;