src/utils/raSqlQuery/rqlParse.c 1.1

1.1 2009/11/20 07:41:56 kent
Adding in merge option. Supporting 'not like' operation. Splitting into modules.
Index: src/utils/raSqlQuery/rqlParse.c
===================================================================
RCS file: src/utils/raSqlQuery/rqlParse.c
diff -N src/utils/raSqlQuery/rqlParse.c
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ src/utils/raSqlQuery/rqlParse.c	20 Nov 2009 07:41:56 -0000	1.1
@@ -0,0 +1,572 @@
+/* rqlParse - a parse restricted sql-like query language */
+#include "common.h"
+#include "linefile.h"
+#include "hash.h"
+#include "dystring.h"
+#include "tokenizer.h"
+#include "sqlNum.h"
+#include "raRecord.h"
+#include "rql.h"
+
+char *rqlOpToString(enum rqlOp op)
+/* Return string representation of parse op. */
+{
+switch (op)
+    {
+    case rqlOpLiteral:
+	return "rqlOpLiteral";
+    case rqlOpSymbol:
+	return "rqlOpSymbol";
+    case rqlOpEq:
+	return "rqlOpEq";
+    case rqlOpNe:
+	return "rqlOpNe";
+    case rqlOpAnd:
+	return "rqlOpAnd";
+    case rqlOpOr:
+	return "rqlOpOr";
+    
+    case rqlOpStringToBoolean:
+        return "rqlOpStringToBoolean";
+    case rqlOpIntToBoolean:
+        return "rqlOpIntToBoolean";
+    case rqlOpDoubleToBoolean:
+        return "rqlOpDoubleToBoolean";
+    case rqlOpStringToInt:
+        return "rqlOpStringToInt";
+    case rqlOpStringToDouble:
+        return "rqlOpStringToDouble";
+    case rqlOpBooleanToInt:
+        return "rqlOpBooleanToInt";
+    case rqlOpBooleanToDouble:
+        return "rqlOpBooleanToDouble";
+    case rqlOpIntToDouble:
+        return "rqlOpIntToDouble";
+
+    case rqlOpUnaryMinusDouble:
+        return "rqlOpUnaryMinusDouble";
+
+    case rqlOpGt:
+        return "rqlOpGt";
+    case rqlOpLt:
+        return "rqlOpLt";
+    case rqlOpGe:
+        return "rqlOpGe";
+    case rqlOpLe:
+        return "rqlOpLe";
+    case rqlOpLike:
+	return "rqlOpLike";
+
+    case rqlOpNot:
+        return "rqlOpNot";
+
+    default:
+	return "rqlOpUnknown";
+    }
+}
+
+void rqlValDump(union rqlVal val, enum rqlType type, FILE *f)
+/* Dump out value to file. */
+{
+switch (type)
+    {
+    case rqlTypeBoolean:
+        fprintf(f, "%s", (val.b ? "true" : "false") );
+	break;
+    case rqlTypeString:
+        fprintf(f, "%s", val.s);
+	break;
+    case rqlTypeInt:
+        fprintf(f, "%d", val.i);
+	break;
+    case rqlTypeDouble:
+        fprintf(f, "%f", val.x);
+	break;
+    }
+}
+
+void rqlParseDump(struct rqlParse *p, int depth, FILE *f)
+/* Dump out rqlParse tree and children. */
+{
+spaceOut(f, 3*depth);
+fprintf(f, "%s ", rqlOpToString(p->op));
+rqlValDump(p->val, p->type,  f);
+fprintf(f, "\n");
+struct rqlParse *child;
+for (child = p->children; child != NULL; child= child->next)
+    rqlParseDump(child, depth+1, f);
+}
+
+static void expectingGot(struct tokenizer *tkz, char *expecting, char *got)
+/* Print out error message about unexpected input. */
+{
+errAbort("Expecting %s, got %s, line %d of %s", expecting, got, tkz->lf->lineIx,
+	tkz->lf->fileName);
+}
+
+static void skipOverRequired(struct tokenizer *tkz, char *expecting)
+/* Make sure that next token is tok, and skip over it. */
+{
+tokenizerMustHaveNext(tkz);
+if (!sameString(tkz->string, expecting))
+    expectingGot(tkz, expecting, tkz->string);
+}
+
+
+struct rqlParse *rqlParseExpression(struct tokenizer *tkz);
+/* Parse out a clause, usually a where clause. */
+
+static struct rqlParse *rqlParseAtom(struct tokenizer *tkz)
+/* Return low level (symbol or literal) */
+{
+char *tok = tokenizerMustHaveNext(tkz);
+struct rqlParse *p;
+AllocVar(p);
+char c = tok[0];
+if (c == '\'' || c == '"')
+    {
+    p->op = rqlOpLiteral;
+    p->type = rqlTypeString;
+    int len = strlen(tok+1);
+    p->val.s = cloneStringZ(tok+1, len-1);
+    }
+else if (isalpha(c) || c == '_')
+    {
+    p->op = rqlOpSymbol;
+    p->type = rqlTypeString;	/* String until promoted at least. */
+    p->val.s = cloneString(tok);
+    }
+else if (isdigit(c))
+    {
+    p->op = rqlOpLiteral;
+    p->type = rqlTypeInt;
+    p->val.i = sqlUnsigned(tok);
+    if ((tok = tokenizerNext(tkz)) != NULL)
+	{
+	if (tok[0] == '.')
+	    {
+	    char buf[32];
+	    tok = tokenizerMustHaveNext(tkz);
+	    safef(buf, sizeof(buf), "%d.%s", p->val.i, tok);
+	    p->type = rqlTypeDouble;
+	    p->val.x = sqlDouble(buf);
+	    }
+	else
+	    tokenizerReuse(tkz);
+	}
+    }
+else if (c == '(')
+    {
+    p = rqlParseExpression(tkz);
+    skipOverRequired(tkz, ")");
+    }
+else
+    {
+    errAbort("Unexpected %s line %d of %s", tok, tkz->lf->lineIx, tkz->lf->fileName);
+    }
+return p;
+}
+
+static enum rqlType commonTypeForBop(enum rqlType left, enum rqlType right)
+/* Return type that will work for a binary operation. */
+{
+if (left == right)
+    return left;
+else if (left == rqlTypeDouble || right == rqlTypeDouble)
+    return rqlTypeDouble;
+else if (left == rqlTypeInt || right == rqlTypeInt)
+    return rqlTypeInt;
+else if (left == rqlTypeBoolean || right == rqlTypeBoolean)
+    return rqlTypeBoolean;
+else if (left == rqlTypeString || right == rqlTypeString)
+    return rqlTypeString;
+else
+    {
+    errAbort("Can't find commonTypeForBop");
+    return rqlTypeInt;
+    }
+}
+
+static enum rqlOp booleanCastOp(enum rqlType oldType)
+/* Return op to convert oldType to boolean. */
+{
+switch (oldType)
+    {
+    case rqlTypeString:
+        return rqlOpStringToBoolean;
+    case rqlTypeInt:
+        return rqlOpIntToBoolean;
+    case rqlTypeDouble:
+        return rqlOpDoubleToBoolean;
+    default:
+        internalErr();
+	return rqlOpUnknown;
+    }
+}
+
+static enum rqlOp intCastOp(enum rqlType oldType)
+/* Return op to convert oldType to int. */
+{
+switch (oldType)
+    {
+    case rqlTypeString:
+        return rqlOpStringToInt;
+    case rqlTypeBoolean:
+        return rqlOpBooleanToInt;
+    default:
+        internalErr();
+	return rqlOpUnknown;
+    }
+}
+
+static enum rqlOp doubleCastOp(enum rqlType oldType)
+/* Return op to convert oldType to double. */
+{
+switch (oldType)
+    {
+    case rqlTypeString:
+        return rqlOpStringToDouble;
+    case rqlTypeBoolean:
+        return rqlOpBooleanToDouble;
+    case rqlTypeInt:
+        return rqlOpIntToDouble;
+    default:
+        internalErr();
+	return rqlOpUnknown;
+    }
+}
+
+
+static struct rqlParse *rqlParseCoerce(struct rqlParse *p, enum rqlType type)
+/* If p is not of correct type, wrap type conversion node around it. */
+{
+if (p->type == type)
+    return p;
+else
+    {
+    struct rqlParse *cast;
+    AllocVar(cast);
+    cast->children = p;
+    cast->type = type;
+    switch (type)
+        {
+	case rqlTypeBoolean:
+	    cast->op = booleanCastOp(p->type);
+	    break;
+	case rqlTypeInt:
+	    cast->op = intCastOp(p->type);
+	    break;
+	case rqlTypeDouble:
+	    cast->op = doubleCastOp(p->type);
+	    break;
+	default:
+	    internalErr();
+	    break;
+	}
+    return cast;
+    }
+}
+
+static struct rqlParse *rqlParseUnaryMinus(struct tokenizer *tkz)
+/* Return unary minus sort of parse tree if there's a leading '-' */
+{
+char *tok = tokenizerMustHaveNext(tkz);
+if (tok[0] == '-')
+    {
+    struct rqlParse *c = rqlParseAtom(tkz);
+    c = rqlParseCoerce(c, rqlTypeDouble);
+    struct rqlParse *p;
+    AllocVar(p);
+    p->op = rqlOpUnaryMinusDouble;
+    p->type = rqlTypeDouble;
+    p->children = c;
+    return p;
+    }
+else
+    {
+    tokenizerReuse(tkz);
+    return rqlParseAtom(tkz);
+    }
+}
+
+static boolean eatMatchingTok(struct tokenizer *tkz, char *s)
+/* If next token matches s then eat it and return TRUE */
+{
+char *tok = tokenizerNext(tkz);
+if (tok != NULL && sameString(tok, s))
+    return TRUE;
+else
+    {
+    tokenizerReuse(tkz);
+    return FALSE;
+    }
+}
+
+static struct rqlParse *rqlParseCmp(struct tokenizer *tkz)
+/* Parse out comparison. */
+{
+struct rqlParse *l = rqlParseUnaryMinus(tkz);
+struct rqlParse *p = l;
+char *tok = tokenizerNext(tkz);
+boolean forceString = FALSE;
+boolean needNot = FALSE;
+if (tok != NULL)
+    {
+    enum rqlOp op = rqlOpUnknown;
+    if (sameString(tok, "="))
+        {
+	op = rqlOpEq;
+	}
+    else if (sameString(tok, "!"))
+        {
+	op = rqlOpNe;
+	skipOverRequired(tkz, "=");
+	}
+    else if (sameString(tok, ">"))
+        {
+	if (eatMatchingTok(tkz, "="))
+	    op = rqlOpGe;
+	else
+	    op = rqlOpGt;
+	}
+    else if (sameString(tok, "<"))
+        {
+	if (eatMatchingTok(tkz, "="))
+	    op = rqlOpGe;
+	else
+	    op = rqlOpLe;
+	}
+    else if (sameString(tok, "not"))
+        {
+	forceString = TRUE;
+	op = rqlOpLike;
+	needNot = TRUE;
+	skipOverRequired(tkz, "like");
+	}
+    else if (sameString(tok, "like"))
+        {
+	forceString = TRUE;
+	op = rqlOpLike;
+	}
+    else
+        {
+	tokenizerReuse(tkz);
+	return p;
+	}
+    struct rqlParse *r = rqlParseUnaryMinus(tkz);
+    AllocVar(p);
+    p->op = op;
+    p->type = rqlTypeBoolean;
+
+    /* Now force children to be the same type, inserting casts if need be. */
+    if (forceString)
+	{
+	if (l->type != rqlTypeString || r->type != rqlTypeString)
+	    {
+	    errAbort("Expecting string type around comparison line %d of %s",
+	    	tkz->lf->lineIx, tkz->lf->fileName);
+	    }
+	}
+    else
+	{
+	enum rqlType childType = commonTypeForBop(l->type, r->type);
+	l = rqlParseCoerce(l, childType);
+	r = rqlParseCoerce(r, childType);
+	}
+
+    /* Now hang children onto node. */
+    p->children = l;
+    l->next = r;
+
+    /* Put in a not around self if need be. */
+    if (needNot)
+        {
+	struct rqlParse *n;
+	AllocVar(n);
+	n->op = rqlOpNot;
+	n->type = rqlTypeBoolean;
+	n->children = p;
+	p = n;
+	}
+    }
+return p;
+}
+
+static struct rqlParse *rqlParseAnd(struct tokenizer *tkz)
+/* Parse out and or or. */
+{
+struct rqlParse *l = rqlParseCoerce(rqlParseCmp(tkz), rqlTypeBoolean);
+struct rqlParse *parent = NULL;
+struct rqlParse *p = l;
+for (;;)
+    {
+    char *tok = tokenizerNext(tkz);
+    if (tok == NULL || !sameString(tok, "and"))
+        {
+	tokenizerReuse(tkz);
+	return p;
+	}
+    else
+        {
+	if (parent == NULL)
+	    {
+	    AllocVar(parent);
+	    parent->op = rqlOpAnd;
+	    parent->type = rqlTypeBoolean;
+	    parent->children = p;
+	    p = parent;
+	    }
+	struct rqlParse *r = rqlParseCoerce(rqlParseCmp(tkz), rqlTypeBoolean);
+	slAddTail(&parent->children, r);
+	}
+    }
+}
+
+static struct rqlParse *rqlParseOr(struct tokenizer *tkz)
+/* Parse out and or or. */
+{
+struct rqlParse *l = rqlParseCoerce(rqlParseAnd(tkz), rqlTypeBoolean);
+struct rqlParse *parent = NULL;
+struct rqlParse *p = l;
+for (;;)
+    {
+    char *tok = tokenizerNext(tkz);
+    if (tok == NULL || !sameString(tok, "or"))
+        {
+	tokenizerReuse(tkz);
+	return p;
+	}
+    else
+        {
+	if (parent == NULL)
+	    {
+	    AllocVar(parent);
+	    parent->op = rqlOpOr;
+	    parent->type = rqlTypeBoolean;
+	    parent->children = p;
+	    p = parent;
+	    }
+	struct rqlParse *r = rqlParseCoerce(rqlParseAnd(tkz), rqlTypeBoolean);
+	slAddTail(&parent->children, r);
+	}
+    }
+}
+
+struct rqlParse *rqlParseExpression(struct tokenizer *tkz)
+/* Parse out a clause, usually a where clause. */
+{
+return rqlParseOr(tkz);
+}
+
+static char *rqlParseFieldSpec(struct tokenizer *tkz, struct dyString *buf)
+/* Return a field spec, which may contain * and ?. Put results in buf, and 
+ * return buf->string. */
+{
+boolean firstTime = TRUE;
+dyStringClear(buf);
+for (;;)
+   {
+   char *tok = tokenizerNext(tkz);
+   if (tok == NULL)
+       break;
+   char c = *tok;
+   if (c == '?' || c == '*' || isalpha(c) || c == '_')
+       {
+       if (firstTime)
+	   dyStringAppend(buf, tok);
+       else
+           {
+	   if (tkz->leadingSpaces == 0)
+	       dyStringAppend(buf, tok);
+	   else
+	       {
+	       tokenizerReuse(tkz);
+	       break;
+	       }
+	   }
+       }
+   else
+       {
+       tokenizerReuse(tkz);
+       break;
+       }
+    firstTime = FALSE;
+    }
+if (buf->stringSize == 0)
+    errAbort("Expecting field name line %d of %s", tkz->lf->lineIx, tkz->lf->fileName);
+return buf->string;
+}
+
+struct rqlStatement *rqlStatementParse(struct lineFile *lf)
+/* Parse an RQL statement out of text */
+{
+struct tokenizer *tkz = tokenizerOnLineFile(lf);
+tkz->uncommentShell = TRUE;
+tkz->uncommentC = TRUE;
+tkz->leaveQuotes = TRUE;
+struct rqlStatement *rql;
+AllocVar(rql);
+rql->command = cloneString(tokenizerMustHaveNext(tkz));
+if (sameString(rql->command, "select"))
+    {
+    struct dyString *buf = dyStringNew(0);
+    struct slName *list = NULL;
+    char *tok = rqlParseFieldSpec(tkz, buf);
+    list = slNameNew(tok);
+    for (;;)
+	{
+	/* Parse out comma-separated field list. */
+	char *comma = tokenizerNext(tkz);
+	if (comma == NULL || comma[0] != ',')
+	    {
+	    tokenizerReuse(tkz);
+	    break;
+	    }
+	struct slName *field = slNameAddHead(&list, rqlParseFieldSpec(tkz, buf));
+	}
+    slReverse(&list);
+    rql->fieldList = list;
+    dyStringFree(&buf);
+    }
+else if (sameString(rql->command, "count"))
+    {
+    /* No parameters to count. */
+    }
+else
+    errAbort("Unknown RQL command '%s line %d of %s\n", rql->command, lf->lineIx, lf->fileName);
+    
+char *where = tokenizerNext(tkz);
+if (where != NULL)
+    {
+    if (!sameString(where, "where"))
+        errAbort("Unknown clause '%s' line %d of %s", where, lf->lineIx, lf->fileName);
+    rql->whereClause = rqlParseExpression(tkz);
+    }
+
+char *extra = tokenizerNext(tkz);
+if (extra != NULL)
+    errAbort("Extra stuff starting with '%s' past end of statement line %d of %s", 
+    	extra, lf->lineIx, lf->fileName);
+return rql;
+}
+
+void rqlStatementDump(struct rqlStatement *rql, FILE *f)
+/* Print out statement to file. */
+{
+fprintf(f, "%s", rql->command);
+if (rql->fieldList)
+    {
+    fprintf(f, " ");
+    struct slName *field = rql->fieldList;
+    fprintf(f, "%s", field->name);
+    for (field = field->next; field != NULL; field = field->next)
+        fprintf(f, ",%s", field->name);
+    }
+if (rql->whereClause)
+    {
+    fprintf(f, " where:\n");
+    rqlParseDump(rql->whereClause, 0, f);
+    }
+fprintf(f, "\n");
+}
+