src/utils/raSqlQuery/rqlParse.c 1.1
1.1 2009/11/20 07:41:56 kent
Adding in merge option. Supporting 'not like' operation. Splitting into modules.
Index: src/utils/raSqlQuery/rqlParse.c
===================================================================
RCS file: src/utils/raSqlQuery/rqlParse.c
diff -N src/utils/raSqlQuery/rqlParse.c
--- /dev/null 1 Jan 1970 00:00:00 -0000
+++ src/utils/raSqlQuery/rqlParse.c 20 Nov 2009 07:41:56 -0000 1.1
@@ -0,0 +1,572 @@
+/* rqlParse - a parse restricted sql-like query language */
+#include "common.h"
+#include "linefile.h"
+#include "hash.h"
+#include "dystring.h"
+#include "tokenizer.h"
+#include "sqlNum.h"
+#include "raRecord.h"
+#include "rql.h"
+
+char *rqlOpToString(enum rqlOp op)
+/* Return string representation of parse op. */
+{
+switch (op)
+ {
+ case rqlOpLiteral:
+ return "rqlOpLiteral";
+ case rqlOpSymbol:
+ return "rqlOpSymbol";
+ case rqlOpEq:
+ return "rqlOpEq";
+ case rqlOpNe:
+ return "rqlOpNe";
+ case rqlOpAnd:
+ return "rqlOpAnd";
+ case rqlOpOr:
+ return "rqlOpOr";
+
+ case rqlOpStringToBoolean:
+ return "rqlOpStringToBoolean";
+ case rqlOpIntToBoolean:
+ return "rqlOpIntToBoolean";
+ case rqlOpDoubleToBoolean:
+ return "rqlOpDoubleToBoolean";
+ case rqlOpStringToInt:
+ return "rqlOpStringToInt";
+ case rqlOpStringToDouble:
+ return "rqlOpStringToDouble";
+ case rqlOpBooleanToInt:
+ return "rqlOpBooleanToInt";
+ case rqlOpBooleanToDouble:
+ return "rqlOpBooleanToDouble";
+ case rqlOpIntToDouble:
+ return "rqlOpIntToDouble";
+
+ case rqlOpUnaryMinusDouble:
+ return "rqlOpUnaryMinusDouble";
+
+ case rqlOpGt:
+ return "rqlOpGt";
+ case rqlOpLt:
+ return "rqlOpLt";
+ case rqlOpGe:
+ return "rqlOpGe";
+ case rqlOpLe:
+ return "rqlOpLe";
+ case rqlOpLike:
+ return "rqlOpLike";
+
+ case rqlOpNot:
+ return "rqlOpNot";
+
+ default:
+ return "rqlOpUnknown";
+ }
+}
+
+void rqlValDump(union rqlVal val, enum rqlType type, FILE *f)
+/* Dump out value to file. */
+{
+switch (type)
+ {
+ case rqlTypeBoolean:
+ fprintf(f, "%s", (val.b ? "true" : "false") );
+ break;
+ case rqlTypeString:
+ fprintf(f, "%s", val.s);
+ break;
+ case rqlTypeInt:
+ fprintf(f, "%d", val.i);
+ break;
+ case rqlTypeDouble:
+ fprintf(f, "%f", val.x);
+ break;
+ }
+}
+
+void rqlParseDump(struct rqlParse *p, int depth, FILE *f)
+/* Dump out rqlParse tree and children. */
+{
+spaceOut(f, 3*depth);
+fprintf(f, "%s ", rqlOpToString(p->op));
+rqlValDump(p->val, p->type, f);
+fprintf(f, "\n");
+struct rqlParse *child;
+for (child = p->children; child != NULL; child= child->next)
+ rqlParseDump(child, depth+1, f);
+}
+
+static void expectingGot(struct tokenizer *tkz, char *expecting, char *got)
+/* Print out error message about unexpected input. */
+{
+errAbort("Expecting %s, got %s, line %d of %s", expecting, got, tkz->lf->lineIx,
+ tkz->lf->fileName);
+}
+
+static void skipOverRequired(struct tokenizer *tkz, char *expecting)
+/* Make sure that next token is tok, and skip over it. */
+{
+tokenizerMustHaveNext(tkz);
+if (!sameString(tkz->string, expecting))
+ expectingGot(tkz, expecting, tkz->string);
+}
+
+
+struct rqlParse *rqlParseExpression(struct tokenizer *tkz);
+/* Parse out a clause, usually a where clause. */
+
+static struct rqlParse *rqlParseAtom(struct tokenizer *tkz)
+/* Return low level (symbol or literal) */
+{
+char *tok = tokenizerMustHaveNext(tkz);
+struct rqlParse *p;
+AllocVar(p);
+char c = tok[0];
+if (c == '\'' || c == '"')
+ {
+ p->op = rqlOpLiteral;
+ p->type = rqlTypeString;
+ int len = strlen(tok+1);
+ p->val.s = cloneStringZ(tok+1, len-1);
+ }
+else if (isalpha(c) || c == '_')
+ {
+ p->op = rqlOpSymbol;
+ p->type = rqlTypeString; /* String until promoted at least. */
+ p->val.s = cloneString(tok);
+ }
+else if (isdigit(c))
+ {
+ p->op = rqlOpLiteral;
+ p->type = rqlTypeInt;
+ p->val.i = sqlUnsigned(tok);
+ if ((tok = tokenizerNext(tkz)) != NULL)
+ {
+ if (tok[0] == '.')
+ {
+ char buf[32];
+ tok = tokenizerMustHaveNext(tkz);
+ safef(buf, sizeof(buf), "%d.%s", p->val.i, tok);
+ p->type = rqlTypeDouble;
+ p->val.x = sqlDouble(buf);
+ }
+ else
+ tokenizerReuse(tkz);
+ }
+ }
+else if (c == '(')
+ {
+ p = rqlParseExpression(tkz);
+ skipOverRequired(tkz, ")");
+ }
+else
+ {
+ errAbort("Unexpected %s line %d of %s", tok, tkz->lf->lineIx, tkz->lf->fileName);
+ }
+return p;
+}
+
+static enum rqlType commonTypeForBop(enum rqlType left, enum rqlType right)
+/* Return type that will work for a binary operation. */
+{
+if (left == right)
+ return left;
+else if (left == rqlTypeDouble || right == rqlTypeDouble)
+ return rqlTypeDouble;
+else if (left == rqlTypeInt || right == rqlTypeInt)
+ return rqlTypeInt;
+else if (left == rqlTypeBoolean || right == rqlTypeBoolean)
+ return rqlTypeBoolean;
+else if (left == rqlTypeString || right == rqlTypeString)
+ return rqlTypeString;
+else
+ {
+ errAbort("Can't find commonTypeForBop");
+ return rqlTypeInt;
+ }
+}
+
+static enum rqlOp booleanCastOp(enum rqlType oldType)
+/* Return op to convert oldType to boolean. */
+{
+switch (oldType)
+ {
+ case rqlTypeString:
+ return rqlOpStringToBoolean;
+ case rqlTypeInt:
+ return rqlOpIntToBoolean;
+ case rqlTypeDouble:
+ return rqlOpDoubleToBoolean;
+ default:
+ internalErr();
+ return rqlOpUnknown;
+ }
+}
+
+static enum rqlOp intCastOp(enum rqlType oldType)
+/* Return op to convert oldType to int. */
+{
+switch (oldType)
+ {
+ case rqlTypeString:
+ return rqlOpStringToInt;
+ case rqlTypeBoolean:
+ return rqlOpBooleanToInt;
+ default:
+ internalErr();
+ return rqlOpUnknown;
+ }
+}
+
+static enum rqlOp doubleCastOp(enum rqlType oldType)
+/* Return op to convert oldType to double. */
+{
+switch (oldType)
+ {
+ case rqlTypeString:
+ return rqlOpStringToDouble;
+ case rqlTypeBoolean:
+ return rqlOpBooleanToDouble;
+ case rqlTypeInt:
+ return rqlOpIntToDouble;
+ default:
+ internalErr();
+ return rqlOpUnknown;
+ }
+}
+
+
+static struct rqlParse *rqlParseCoerce(struct rqlParse *p, enum rqlType type)
+/* If p is not of correct type, wrap type conversion node around it. */
+{
+if (p->type == type)
+ return p;
+else
+ {
+ struct rqlParse *cast;
+ AllocVar(cast);
+ cast->children = p;
+ cast->type = type;
+ switch (type)
+ {
+ case rqlTypeBoolean:
+ cast->op = booleanCastOp(p->type);
+ break;
+ case rqlTypeInt:
+ cast->op = intCastOp(p->type);
+ break;
+ case rqlTypeDouble:
+ cast->op = doubleCastOp(p->type);
+ break;
+ default:
+ internalErr();
+ break;
+ }
+ return cast;
+ }
+}
+
+static struct rqlParse *rqlParseUnaryMinus(struct tokenizer *tkz)
+/* Return unary minus sort of parse tree if there's a leading '-' */
+{
+char *tok = tokenizerMustHaveNext(tkz);
+if (tok[0] == '-')
+ {
+ struct rqlParse *c = rqlParseAtom(tkz);
+ c = rqlParseCoerce(c, rqlTypeDouble);
+ struct rqlParse *p;
+ AllocVar(p);
+ p->op = rqlOpUnaryMinusDouble;
+ p->type = rqlTypeDouble;
+ p->children = c;
+ return p;
+ }
+else
+ {
+ tokenizerReuse(tkz);
+ return rqlParseAtom(tkz);
+ }
+}
+
+static boolean eatMatchingTok(struct tokenizer *tkz, char *s)
+/* If next token matches s then eat it and return TRUE */
+{
+char *tok = tokenizerNext(tkz);
+if (tok != NULL && sameString(tok, s))
+ return TRUE;
+else
+ {
+ tokenizerReuse(tkz);
+ return FALSE;
+ }
+}
+
+static struct rqlParse *rqlParseCmp(struct tokenizer *tkz)
+/* Parse out comparison. */
+{
+struct rqlParse *l = rqlParseUnaryMinus(tkz);
+struct rqlParse *p = l;
+char *tok = tokenizerNext(tkz);
+boolean forceString = FALSE;
+boolean needNot = FALSE;
+if (tok != NULL)
+ {
+ enum rqlOp op = rqlOpUnknown;
+ if (sameString(tok, "="))
+ {
+ op = rqlOpEq;
+ }
+ else if (sameString(tok, "!"))
+ {
+ op = rqlOpNe;
+ skipOverRequired(tkz, "=");
+ }
+ else if (sameString(tok, ">"))
+ {
+ if (eatMatchingTok(tkz, "="))
+ op = rqlOpGe;
+ else
+ op = rqlOpGt;
+ }
+ else if (sameString(tok, "<"))
+ {
+ if (eatMatchingTok(tkz, "="))
+ op = rqlOpGe;
+ else
+ op = rqlOpLe;
+ }
+ else if (sameString(tok, "not"))
+ {
+ forceString = TRUE;
+ op = rqlOpLike;
+ needNot = TRUE;
+ skipOverRequired(tkz, "like");
+ }
+ else if (sameString(tok, "like"))
+ {
+ forceString = TRUE;
+ op = rqlOpLike;
+ }
+ else
+ {
+ tokenizerReuse(tkz);
+ return p;
+ }
+ struct rqlParse *r = rqlParseUnaryMinus(tkz);
+ AllocVar(p);
+ p->op = op;
+ p->type = rqlTypeBoolean;
+
+ /* Now force children to be the same type, inserting casts if need be. */
+ if (forceString)
+ {
+ if (l->type != rqlTypeString || r->type != rqlTypeString)
+ {
+ errAbort("Expecting string type around comparison line %d of %s",
+ tkz->lf->lineIx, tkz->lf->fileName);
+ }
+ }
+ else
+ {
+ enum rqlType childType = commonTypeForBop(l->type, r->type);
+ l = rqlParseCoerce(l, childType);
+ r = rqlParseCoerce(r, childType);
+ }
+
+ /* Now hang children onto node. */
+ p->children = l;
+ l->next = r;
+
+ /* Put in a not around self if need be. */
+ if (needNot)
+ {
+ struct rqlParse *n;
+ AllocVar(n);
+ n->op = rqlOpNot;
+ n->type = rqlTypeBoolean;
+ n->children = p;
+ p = n;
+ }
+ }
+return p;
+}
+
+static struct rqlParse *rqlParseAnd(struct tokenizer *tkz)
+/* Parse out and or or. */
+{
+struct rqlParse *l = rqlParseCoerce(rqlParseCmp(tkz), rqlTypeBoolean);
+struct rqlParse *parent = NULL;
+struct rqlParse *p = l;
+for (;;)
+ {
+ char *tok = tokenizerNext(tkz);
+ if (tok == NULL || !sameString(tok, "and"))
+ {
+ tokenizerReuse(tkz);
+ return p;
+ }
+ else
+ {
+ if (parent == NULL)
+ {
+ AllocVar(parent);
+ parent->op = rqlOpAnd;
+ parent->type = rqlTypeBoolean;
+ parent->children = p;
+ p = parent;
+ }
+ struct rqlParse *r = rqlParseCoerce(rqlParseCmp(tkz), rqlTypeBoolean);
+ slAddTail(&parent->children, r);
+ }
+ }
+}
+
+static struct rqlParse *rqlParseOr(struct tokenizer *tkz)
+/* Parse out and or or. */
+{
+struct rqlParse *l = rqlParseCoerce(rqlParseAnd(tkz), rqlTypeBoolean);
+struct rqlParse *parent = NULL;
+struct rqlParse *p = l;
+for (;;)
+ {
+ char *tok = tokenizerNext(tkz);
+ if (tok == NULL || !sameString(tok, "or"))
+ {
+ tokenizerReuse(tkz);
+ return p;
+ }
+ else
+ {
+ if (parent == NULL)
+ {
+ AllocVar(parent);
+ parent->op = rqlOpOr;
+ parent->type = rqlTypeBoolean;
+ parent->children = p;
+ p = parent;
+ }
+ struct rqlParse *r = rqlParseCoerce(rqlParseAnd(tkz), rqlTypeBoolean);
+ slAddTail(&parent->children, r);
+ }
+ }
+}
+
+struct rqlParse *rqlParseExpression(struct tokenizer *tkz)
+/* Parse out a clause, usually a where clause. */
+{
+return rqlParseOr(tkz);
+}
+
+static char *rqlParseFieldSpec(struct tokenizer *tkz, struct dyString *buf)
+/* Return a field spec, which may contain * and ?. Put results in buf, and
+ * return buf->string. */
+{
+boolean firstTime = TRUE;
+dyStringClear(buf);
+for (;;)
+ {
+ char *tok = tokenizerNext(tkz);
+ if (tok == NULL)
+ break;
+ char c = *tok;
+ if (c == '?' || c == '*' || isalpha(c) || c == '_')
+ {
+ if (firstTime)
+ dyStringAppend(buf, tok);
+ else
+ {
+ if (tkz->leadingSpaces == 0)
+ dyStringAppend(buf, tok);
+ else
+ {
+ tokenizerReuse(tkz);
+ break;
+ }
+ }
+ }
+ else
+ {
+ tokenizerReuse(tkz);
+ break;
+ }
+ firstTime = FALSE;
+ }
+if (buf->stringSize == 0)
+ errAbort("Expecting field name line %d of %s", tkz->lf->lineIx, tkz->lf->fileName);
+return buf->string;
+}
+
+struct rqlStatement *rqlStatementParse(struct lineFile *lf)
+/* Parse an RQL statement out of text */
+{
+struct tokenizer *tkz = tokenizerOnLineFile(lf);
+tkz->uncommentShell = TRUE;
+tkz->uncommentC = TRUE;
+tkz->leaveQuotes = TRUE;
+struct rqlStatement *rql;
+AllocVar(rql);
+rql->command = cloneString(tokenizerMustHaveNext(tkz));
+if (sameString(rql->command, "select"))
+ {
+ struct dyString *buf = dyStringNew(0);
+ struct slName *list = NULL;
+ char *tok = rqlParseFieldSpec(tkz, buf);
+ list = slNameNew(tok);
+ for (;;)
+ {
+ /* Parse out comma-separated field list. */
+ char *comma = tokenizerNext(tkz);
+ if (comma == NULL || comma[0] != ',')
+ {
+ tokenizerReuse(tkz);
+ break;
+ }
+ struct slName *field = slNameAddHead(&list, rqlParseFieldSpec(tkz, buf));
+ }
+ slReverse(&list);
+ rql->fieldList = list;
+ dyStringFree(&buf);
+ }
+else if (sameString(rql->command, "count"))
+ {
+ /* No parameters to count. */
+ }
+else
+ errAbort("Unknown RQL command '%s line %d of %s\n", rql->command, lf->lineIx, lf->fileName);
+
+char *where = tokenizerNext(tkz);
+if (where != NULL)
+ {
+ if (!sameString(where, "where"))
+ errAbort("Unknown clause '%s' line %d of %s", where, lf->lineIx, lf->fileName);
+ rql->whereClause = rqlParseExpression(tkz);
+ }
+
+char *extra = tokenizerNext(tkz);
+if (extra != NULL)
+ errAbort("Extra stuff starting with '%s' past end of statement line %d of %s",
+ extra, lf->lineIx, lf->fileName);
+return rql;
+}
+
+void rqlStatementDump(struct rqlStatement *rql, FILE *f)
+/* Print out statement to file. */
+{
+fprintf(f, "%s", rql->command);
+if (rql->fieldList)
+ {
+ fprintf(f, " ");
+ struct slName *field = rql->fieldList;
+ fprintf(f, "%s", field->name);
+ for (field = field->next; field != NULL; field = field->next)
+ fprintf(f, ",%s", field->name);
+ }
+if (rql->whereClause)
+ {
+ fprintf(f, " where:\n");
+ rqlParseDump(rql->whereClause, 0, f);
+ }
+fprintf(f, "\n");
+}
+