a220449e97fcf9dd90acd89f7af51e694829d662
kent
  Fri Aug 9 23:10:47 2019 -0700
Making array ranges and negative indexes work as they do in python.

diff --git src/lib/strex.c src/lib/strex.c
index c42fd4b..3d20516 100644
--- src/lib/strex.c
+++ src/lib/strex.c
@@ -80,31 +80,35 @@
 struct strexEval
 /* Result of evaluation of parse tree. */
     {
     enum strexType type;
     union strexVal val;
     };
 
 enum strexOp
 /* An operation in the parse tree. */
     {
     strexOpUnknown,	/* Should not occur */
     strexOpLiteral,        /* Literal string or number. */
     strexOpSymbol,	/* A symbol name. */
 
     strexOpBuiltInCall,	/* Call a built in function */
+
     strexOpArrayIx,	/* An array with an index. */
+    strexOpArrayRange,	/* An array with a range. */
+
+    strexOpStrlen,	/* Length of a string */
 
     /* Unary minus for numbers */
     strexOpUnaryMinusInt,
     strexOpUnaryMinusDouble,
 
     /* Binary operations. */
     strexOpAdd,
 
     /* Type conversions - possibly a few more than we actually need at the moment. */
     strexOpStringToBoolean,
     strexOpIntToBoolean,
     strexOpDoubleToBoolean,
     strexOpStringToInt,
     strexOpDoubleToInt,
     strexOpBooleanToInt,
@@ -177,30 +181,40 @@
 return si;
 }
 
 static void strexInFree(struct strexIn **pSi)
 /* Free up memory associated with strexIn structure */
 {
 struct strexIn *si = *pSi;
 if (si != NULL)
     {
     hashFree(&si->builtInHash);
     tokenizerFree(&si->tkz);
     freez(pSi);
     }
 }
 
+struct strexParse *strexParseNew(enum strexOp op, enum strexType type)
+/* Return a fresh strexParse of the given op and type with the val set to 0/NULL */
+{
+struct strexParse *p;
+AllocVar(p);
+p->op = op;
+p->type = type;
+return p;
+}
+
 static void strexValDump(union strexVal val, enum strexType type, FILE *f)
 /* Dump out value to file. */
 {
 switch (type)
     {
     case strexTypeBoolean:
         fprintf(f, "%s", (val.b ? "true" : "false") );
 	break;
     case strexTypeString:
         fprintf(f, "%s", (val.s == NULL ? "(null)" : val.s));
 	break;
     case strexTypeInt:
         fprintf(f, "%lld", val.i);
 	break;
     case strexTypeDouble:
@@ -269,31 +283,35 @@
 
     case strexOpUnaryMinusInt:
         return "strexOpUnaryMinusInt";
     case strexOpUnaryMinusDouble:
         return "strexOpUnaryMinusDouble";
 
 
     case strexOpAdd:
 	return "strexOpAdd";
 
     case strexOpBuiltInCall:
         return "strexOpBuiltInCall";
 
     case strexOpArrayIx:
         return "strexOpArrayIx";
+    case strexOpArrayRange:
+        return "strexOpArrayRange";
 
+    case strexOpStrlen:
+        return "strexOpStrlen";
     default:
 	return "strexOpUnknown";
     }
 }
 
 void strexParseDump(struct strexParse *p, int depth, FILE *f)
 /* Dump out strexParse tree and children. */
 {
 spaceOut(f, 3*depth);
 fprintf(f, "%s ", strexOpToString(p->op));
 strexValDump(p->val, p->type,  f);
 fprintf(f, "\n");
 struct strexParse *child;
 for (child = p->children; child != NULL; child= child->next)
     strexParseDump(child, depth+1, f);
@@ -571,55 +589,107 @@
     for (i=0, p=function->children; i<childCount; ++i, p = p->next)
         {
 	if (p->type != builtIn->paramTypes[i])
 	    {
 	    errAbort("Parameter #%d to %s needs to be type %s not %s line %d of %s",
 		i, builtIn->name,  strexTypeToString(builtIn->paramTypes[i]), 
 		strexTypeToString(p->type), tkz->lf->lineIx, tkz->lf->fileName);
 	    }
 	}
     }
 else
     tokenizerReuse(tkz);
 return function;
 }
 
+struct strexParse *arrayRangeTree(struct strexParse *array, 
+    struct strexParse *firstIndex, struct strexParse *secondIndex)
+/* Creat an array range parse tree */
+{
+struct strexParse *p = strexParseNew(strexOpArrayRange, strexTypeString);
+p->children = array;
+array->next = firstIndex;
+firstIndex->next = secondIndex;
+return p;
+}
+
 static struct strexParse *strexParseIndex(struct strexIn *in)
 /* Handle the [] in this[6].  Convert it into tree:
 *         strexOpArrayIx
 *            strexParseFunction
 *            strexParseFunction */
 {
 struct tokenizer *tkz = in->tkz;
-struct strexParse *collection = strexParseFunction(in);
-struct strexParse *p = collection;
+struct strexParse *array = strexParseFunction(in);
+struct strexParse *p = array;
 char *tok = tokenizerNext(tkz);
 if (tok == NULL)
     tokenizerReuse(tkz);
 else if (tok[0] == '[')
     {
-    struct strexParse *index = strexParseExpression(in);
-    // struct strexParse *index = strexParseFunction(in);
-    index = strexParseCoerce(index, strexTypeInt);
-    collection = strexParseCoerce(collection, strexTypeString);
-    skipOverRequired(in, "]");
+    array = strexParseCoerce(array, strexTypeString);
+    tok = tokenizerMustHaveNext(tkz);
+    if (tok[0] == ':')  // Case where is a range with empty beginning.  We can even compute index.
+        {
+	tok = tokenizerMustHaveNext(tkz);
+	if (tok[0] == ']')
+	    {
+	    tokenizerReuse(tkz);    // Range is just whole array, do nothing really
+	    }
+	else
+	    {
+	    tokenizerReuse(tkz);    // Range is just whole array, do nothing really
+	    struct strexParse *firstIndex = strexParseNew(strexOpLiteral, strexTypeInt);
+	    struct strexParse *secondIndex = strexParseCoerce(strexParseExpression(in), strexTypeInt);
+	    p = arrayRangeTree(array, firstIndex, secondIndex);
+	    }
+        }
+    else
+	{
+	tokenizerReuse(tkz);
+	struct strexParse *firstIndex = strexParseCoerce(strexParseExpression(in), strexTypeInt);
+	tok = tokenizerMustHaveNext(tkz);
+	if (tok[0] == ':')
+	    {
+	    struct strexParse *secondIndex;
+	    tok = tokenizerMustHaveNext(tkz);
+	    if (tok[0] == ']')  // Case where second half of rang is empty
+		{
+	        tokenizerReuse(tkz);
+		secondIndex = strexParseNew(strexOpLiteral, strexTypeInt);
+		secondIndex->val.i = -1;
+		}
+	    else
+	        {
+	        tokenizerReuse(tkz);
+		secondIndex = strexParseCoerce(strexParseExpression(in), strexTypeInt);
+		}
+	    p = arrayRangeTree(array, firstIndex, secondIndex);
+	    }
+	else
+	    {
+	    // Simple no range case
+	    tokenizerReuse(tkz);
 	    AllocVar(p);
 	    p->op = strexOpArrayIx;
 	    p->type = strexTypeString;
-    p->children = collection;
+	    p->children = array;
 	    p->val.s = cloneString("");
-    collection->next = index;
+	    array->next = firstIndex;
+	    }
+	}
+    skipOverRequired(in, "]");
     }
 else
     tokenizerReuse(tkz);
 return p;
 }
 
 
 static struct strexParse *strexParseUnaryMinus(struct strexIn *in)
 /* Return unary minus sort of parse tree if there's a leading '-' */
 {
 struct tokenizer *tkz = in->tkz;
 char *tok = tokenizerMustHaveNext(tkz);
 if (tok[0] == '-')
     {
     struct strexParse *c = strexParseIndex(in);
@@ -785,40 +855,73 @@
 
 
 static char *csvParseOneOut(char *csvIn, int ix, struct dyString *scratch)
 /* Return csv value of given index or NULL if at end */
 {
 char *pos = csvIn;
 int i;
 for (i=0; i<ix; ++i)
     {
     if (csvParseNext(&pos,scratch) == NULL)
         return NULL;
     }
 return csvParseNext(&pos, scratch);
 }
 
-static struct strexEval strexEvalArrayIx(struct strexParse *p, void *record, StrexEvalLookup lookup,
-	struct lm *lm)
+static struct strexEval strexEvalArrayIx(struct strexParse *p, 
+	void *record, StrexEvalLookup lookup, struct lm *lm)
 /* Handle parse tree generated by an indexed array. */
 {
 struct strexParse *array = p->children;
 struct strexParse *index = array->next;
 struct strexEval arrayVal = strexLocalEval(array, record, lookup, lm);
 struct strexEval indexVal = strexLocalEval(index, record, lookup, lm);
+int ix = indexVal.val.i;
+if (ix  < 0)
+    ix = strlen(arrayVal.val.s) + ix;
 struct strexEval res;
-res.val.s = lmCloneStringZ(lm, arrayVal.val.s + indexVal.val.i, 1);
+res.val.s = lmCloneStringZ(lm, arrayVal.val.s + ix, 1);
+res.type = strexTypeString;
+return res;
+}
+
+static struct strexEval strexEvalArrayRange(struct strexParse *p, 
+    void *record, StrexEvalLookup lookup, struct lm *lm)
+/* Handle parse tree generated by array range expression, which by now
+ * has just been turned into two integer values. */
+{
+struct strexParse *array = p->children;
+struct strexParse *index1 = array->next;
+struct strexParse *index2 = index1->next;
+struct strexEval arrayVal = strexLocalEval(array, record, lookup, lm);
+struct strexEval rangeStart = strexLocalEval(index1, record, lookup, lm);
+struct strexEval rangeEnd = strexLocalEval(index2, record, lookup, lm);
+char *arraySource = arrayVal.val.s;
+int start = rangeStart.val.i;
+int end = rangeEnd.val.i;
+int len = strlen(arraySource); 
+if (start < 0)
+    start = strlen(arraySource) + start;
+if (end < 0)
+    end = strlen(arraySource) + end + 1;
+if (start < 0)
+   start = 0;
+if (end > len)
+    end = len;
+if (end < start) end = start;   // errors apparently just get truncated in this language.  hmm
+struct strexEval res;
+res.val.s = lmCloneStringZ(lm, arrayVal.val.s + start, end-start);
 res.type = strexTypeString;
 return res;
 }
 
 static char *splitString(char *words,  int ix,  struct lm *lm)
 /* Return the space-delimited word of index ix as clone into lm */
 {
 char *s = words;
 int i;
 for (i=0; ; ++i)
     {
     s = skipLeadingSpaces(s);
     if (isEmpty(s))
         errAbort("There aren't %d words in %s", ix+1, words);
     char *end = skipToSpaces(s);
@@ -1032,30 +1135,33 @@
 	break;
 
     /* Arithmetical negation. */
     case strexOpUnaryMinusInt:
         res = strexLocalEval(p->children, record, lookup, lm);
 	res.val.i = -res.val.i;
 	break;
     case strexOpUnaryMinusDouble:
         res = strexLocalEval(p->children, record, lookup, lm);
 	res.val.x = -res.val.x;
 	break;
 
     case strexOpArrayIx:
        res = strexEvalArrayIx(p, record, lookup, lm);
        break;
+    case strexOpArrayRange:
+       res = strexEvalArrayRange(p, record, lookup, lm);
+       break;
 
     case strexOpBuiltInCall:
        res = strexEvalCallBuiltIn(p, record, lookup, lm);
        break;
 
     /* Mathematical ops, simple binary type */
     case strexOpAdd:
        res = strexEvalAdd(p, record, lookup, lm);
        break;
 
     default:
         errAbort("Unknown op %s\n", strexOpToString(p->op));
 	res.type = strexTypeInt;	// Keep compiler from complaining.
 	res.val.i = 0;	// Keep compiler from complaining.
 	break;