a220449e97fcf9dd90acd89f7af51e694829d662 kent Fri Aug 9 23:10:47 2019 -0700 Making array ranges and negative indexes work as they do in python. diff --git src/lib/strex.c src/lib/strex.c index c42fd4b..3d20516 100644 --- src/lib/strex.c +++ src/lib/strex.c @@ -80,31 +80,35 @@ struct strexEval /* Result of evaluation of parse tree. */ { enum strexType type; union strexVal val; }; enum strexOp /* An operation in the parse tree. */ { strexOpUnknown, /* Should not occur */ strexOpLiteral, /* Literal string or number. */ strexOpSymbol, /* A symbol name. */ strexOpBuiltInCall, /* Call a built in function */ + strexOpArrayIx, /* An array with an index. */ + strexOpArrayRange, /* An array with a range. */ + + strexOpStrlen, /* Length of a string */ /* Unary minus for numbers */ strexOpUnaryMinusInt, strexOpUnaryMinusDouble, /* Binary operations. */ strexOpAdd, /* Type conversions - possibly a few more than we actually need at the moment. */ strexOpStringToBoolean, strexOpIntToBoolean, strexOpDoubleToBoolean, strexOpStringToInt, strexOpDoubleToInt, strexOpBooleanToInt, @@ -177,30 +181,40 @@ return si; } static void strexInFree(struct strexIn **pSi) /* Free up memory associated with strexIn structure */ { struct strexIn *si = *pSi; if (si != NULL) { hashFree(&si->builtInHash); tokenizerFree(&si->tkz); freez(pSi); } } +struct strexParse *strexParseNew(enum strexOp op, enum strexType type) +/* Return a fresh strexParse of the given op and type with the val set to 0/NULL */ +{ +struct strexParse *p; +AllocVar(p); +p->op = op; +p->type = type; +return p; +} + static void strexValDump(union strexVal val, enum strexType type, FILE *f) /* Dump out value to file. */ { switch (type) { case strexTypeBoolean: fprintf(f, "%s", (val.b ? "true" : "false") ); break; case strexTypeString: fprintf(f, "%s", (val.s == NULL ? "(null)" : val.s)); break; case strexTypeInt: fprintf(f, "%lld", val.i); break; case strexTypeDouble: @@ -269,31 +283,35 @@ case strexOpUnaryMinusInt: return "strexOpUnaryMinusInt"; case strexOpUnaryMinusDouble: return "strexOpUnaryMinusDouble"; case strexOpAdd: return "strexOpAdd"; case strexOpBuiltInCall: return "strexOpBuiltInCall"; case strexOpArrayIx: return "strexOpArrayIx"; + case strexOpArrayRange: + return "strexOpArrayRange"; + case strexOpStrlen: + return "strexOpStrlen"; default: return "strexOpUnknown"; } } void strexParseDump(struct strexParse *p, int depth, FILE *f) /* Dump out strexParse tree and children. */ { spaceOut(f, 3*depth); fprintf(f, "%s ", strexOpToString(p->op)); strexValDump(p->val, p->type, f); fprintf(f, "\n"); struct strexParse *child; for (child = p->children; child != NULL; child= child->next) strexParseDump(child, depth+1, f); @@ -571,55 +589,107 @@ for (i=0, p=function->children; i<childCount; ++i, p = p->next) { if (p->type != builtIn->paramTypes[i]) { errAbort("Parameter #%d to %s needs to be type %s not %s line %d of %s", i, builtIn->name, strexTypeToString(builtIn->paramTypes[i]), strexTypeToString(p->type), tkz->lf->lineIx, tkz->lf->fileName); } } } else tokenizerReuse(tkz); return function; } +struct strexParse *arrayRangeTree(struct strexParse *array, + struct strexParse *firstIndex, struct strexParse *secondIndex) +/* Creat an array range parse tree */ +{ +struct strexParse *p = strexParseNew(strexOpArrayRange, strexTypeString); +p->children = array; +array->next = firstIndex; +firstIndex->next = secondIndex; +return p; +} + static struct strexParse *strexParseIndex(struct strexIn *in) /* Handle the [] in this[6]. Convert it into tree: * strexOpArrayIx * strexParseFunction * strexParseFunction */ { struct tokenizer *tkz = in->tkz; -struct strexParse *collection = strexParseFunction(in); -struct strexParse *p = collection; +struct strexParse *array = strexParseFunction(in); +struct strexParse *p = array; char *tok = tokenizerNext(tkz); if (tok == NULL) tokenizerReuse(tkz); else if (tok[0] == '[') { - struct strexParse *index = strexParseExpression(in); - // struct strexParse *index = strexParseFunction(in); - index = strexParseCoerce(index, strexTypeInt); - collection = strexParseCoerce(collection, strexTypeString); - skipOverRequired(in, "]"); + array = strexParseCoerce(array, strexTypeString); + tok = tokenizerMustHaveNext(tkz); + if (tok[0] == ':') // Case where is a range with empty beginning. We can even compute index. + { + tok = tokenizerMustHaveNext(tkz); + if (tok[0] == ']') + { + tokenizerReuse(tkz); // Range is just whole array, do nothing really + } + else + { + tokenizerReuse(tkz); // Range is just whole array, do nothing really + struct strexParse *firstIndex = strexParseNew(strexOpLiteral, strexTypeInt); + struct strexParse *secondIndex = strexParseCoerce(strexParseExpression(in), strexTypeInt); + p = arrayRangeTree(array, firstIndex, secondIndex); + } + } + else + { + tokenizerReuse(tkz); + struct strexParse *firstIndex = strexParseCoerce(strexParseExpression(in), strexTypeInt); + tok = tokenizerMustHaveNext(tkz); + if (tok[0] == ':') + { + struct strexParse *secondIndex; + tok = tokenizerMustHaveNext(tkz); + if (tok[0] == ']') // Case where second half of rang is empty + { + tokenizerReuse(tkz); + secondIndex = strexParseNew(strexOpLiteral, strexTypeInt); + secondIndex->val.i = -1; + } + else + { + tokenizerReuse(tkz); + secondIndex = strexParseCoerce(strexParseExpression(in), strexTypeInt); + } + p = arrayRangeTree(array, firstIndex, secondIndex); + } + else + { + // Simple no range case + tokenizerReuse(tkz); AllocVar(p); p->op = strexOpArrayIx; p->type = strexTypeString; - p->children = collection; + p->children = array; p->val.s = cloneString(""); - collection->next = index; + array->next = firstIndex; + } + } + skipOverRequired(in, "]"); } else tokenizerReuse(tkz); return p; } static struct strexParse *strexParseUnaryMinus(struct strexIn *in) /* Return unary minus sort of parse tree if there's a leading '-' */ { struct tokenizer *tkz = in->tkz; char *tok = tokenizerMustHaveNext(tkz); if (tok[0] == '-') { struct strexParse *c = strexParseIndex(in); @@ -785,40 +855,73 @@ static char *csvParseOneOut(char *csvIn, int ix, struct dyString *scratch) /* Return csv value of given index or NULL if at end */ { char *pos = csvIn; int i; for (i=0; i<ix; ++i) { if (csvParseNext(&pos,scratch) == NULL) return NULL; } return csvParseNext(&pos, scratch); } -static struct strexEval strexEvalArrayIx(struct strexParse *p, void *record, StrexEvalLookup lookup, - struct lm *lm) +static struct strexEval strexEvalArrayIx(struct strexParse *p, + void *record, StrexEvalLookup lookup, struct lm *lm) /* Handle parse tree generated by an indexed array. */ { struct strexParse *array = p->children; struct strexParse *index = array->next; struct strexEval arrayVal = strexLocalEval(array, record, lookup, lm); struct strexEval indexVal = strexLocalEval(index, record, lookup, lm); +int ix = indexVal.val.i; +if (ix < 0) + ix = strlen(arrayVal.val.s) + ix; struct strexEval res; -res.val.s = lmCloneStringZ(lm, arrayVal.val.s + indexVal.val.i, 1); +res.val.s = lmCloneStringZ(lm, arrayVal.val.s + ix, 1); +res.type = strexTypeString; +return res; +} + +static struct strexEval strexEvalArrayRange(struct strexParse *p, + void *record, StrexEvalLookup lookup, struct lm *lm) +/* Handle parse tree generated by array range expression, which by now + * has just been turned into two integer values. */ +{ +struct strexParse *array = p->children; +struct strexParse *index1 = array->next; +struct strexParse *index2 = index1->next; +struct strexEval arrayVal = strexLocalEval(array, record, lookup, lm); +struct strexEval rangeStart = strexLocalEval(index1, record, lookup, lm); +struct strexEval rangeEnd = strexLocalEval(index2, record, lookup, lm); +char *arraySource = arrayVal.val.s; +int start = rangeStart.val.i; +int end = rangeEnd.val.i; +int len = strlen(arraySource); +if (start < 0) + start = strlen(arraySource) + start; +if (end < 0) + end = strlen(arraySource) + end + 1; +if (start < 0) + start = 0; +if (end > len) + end = len; +if (end < start) end = start; // errors apparently just get truncated in this language. hmm +struct strexEval res; +res.val.s = lmCloneStringZ(lm, arrayVal.val.s + start, end-start); res.type = strexTypeString; return res; } static char *splitString(char *words, int ix, struct lm *lm) /* Return the space-delimited word of index ix as clone into lm */ { char *s = words; int i; for (i=0; ; ++i) { s = skipLeadingSpaces(s); if (isEmpty(s)) errAbort("There aren't %d words in %s", ix+1, words); char *end = skipToSpaces(s); @@ -1032,30 +1135,33 @@ break; /* Arithmetical negation. */ case strexOpUnaryMinusInt: res = strexLocalEval(p->children, record, lookup, lm); res.val.i = -res.val.i; break; case strexOpUnaryMinusDouble: res = strexLocalEval(p->children, record, lookup, lm); res.val.x = -res.val.x; break; case strexOpArrayIx: res = strexEvalArrayIx(p, record, lookup, lm); break; + case strexOpArrayRange: + res = strexEvalArrayRange(p, record, lookup, lm); + break; case strexOpBuiltInCall: res = strexEvalCallBuiltIn(p, record, lookup, lm); break; /* Mathematical ops, simple binary type */ case strexOpAdd: res = strexEvalAdd(p, record, lookup, lm); break; default: errAbort("Unknown op %s\n", strexOpToString(p->op)); res.type = strexTypeInt; // Keep compiler from complaining. res.val.i = 0; // Keep compiler from complaining. break;