src/utils/raSqlQuery/rqlParse.c 1.9

1.9 2009/12/02 19:11:55 kent
Librarifying RQL parser and interpreter.
Index: src/utils/raSqlQuery/rqlParse.c
===================================================================
RCS file: src/utils/raSqlQuery/rqlParse.c
diff -N src/utils/raSqlQuery/rqlParse.c
--- src/utils/raSqlQuery/rqlParse.c	22 Nov 2009 05:28:52 -0000	1.8
+++ /dev/null	1 Jan 1970 00:00:00 -0000
@@ -1,643 +0,0 @@
-/* rqlParse - a parse restricted sql-like query language */
-#include "common.h"
-#include "linefile.h"
-#include "hash.h"
-#include "dystring.h"
-#include "tokenizer.h"
-#include "sqlNum.h"
-#include "raRecord.h"
-#include "rql.h"
-
-static char const rcsid[] = "$Id$";
-
-char *rqlOpToString(enum rqlOp op)
-/* Return string representation of parse op. */
-{
-switch (op)
-    {
-    case rqlOpLiteral:
-	return "rqlOpLiteral";
-    case rqlOpSymbol:
-	return "rqlOpSymbol";
-    case rqlOpEq:
-	return "rqlOpEq";
-    case rqlOpNe:
-	return "rqlOpNe";
-    case rqlOpAnd:
-	return "rqlOpAnd";
-    case rqlOpOr:
-	return "rqlOpOr";
-    
-    case rqlOpStringToBoolean:
-        return "rqlOpStringToBoolean";
-    case rqlOpIntToBoolean:
-        return "rqlOpIntToBoolean";
-    case rqlOpDoubleToBoolean:
-        return "rqlOpDoubleToBoolean";
-    case rqlOpStringToInt:
-        return "rqlOpStringToInt";
-    case rqlOpStringToDouble:
-        return "rqlOpStringToDouble";
-    case rqlOpBooleanToInt:
-        return "rqlOpBooleanToInt";
-    case rqlOpBooleanToDouble:
-        return "rqlOpBooleanToDouble";
-    case rqlOpIntToDouble:
-        return "rqlOpIntToDouble";
-
-    case rqlOpUnaryMinusDouble:
-        return "rqlOpUnaryMinusDouble";
-
-    case rqlOpGt:
-        return "rqlOpGt";
-    case rqlOpLt:
-        return "rqlOpLt";
-    case rqlOpGe:
-        return "rqlOpGe";
-    case rqlOpLe:
-        return "rqlOpLe";
-    case rqlOpLike:
-	return "rqlOpLike";
-
-    case rqlOpNot:
-        return "rqlOpNot";
-
-    default:
-	return "rqlOpUnknown";
-    }
-}
-
-void rqlValDump(union rqlVal val, enum rqlType type, FILE *f)
-/* Dump out value to file. */
-{
-switch (type)
-    {
-    case rqlTypeBoolean:
-        fprintf(f, "%s", (val.b ? "true" : "false") );
-	break;
-    case rqlTypeString:
-        fprintf(f, "%s", val.s);
-	break;
-    case rqlTypeInt:
-        fprintf(f, "%d", val.i);
-	break;
-    case rqlTypeDouble:
-        fprintf(f, "%f", val.x);
-	break;
-    }
-}
-
-void rqlParseDump(struct rqlParse *p, int depth, FILE *f)
-/* Dump out rqlParse tree and children. */
-{
-spaceOut(f, 3*depth);
-fprintf(f, "%s ", rqlOpToString(p->op));
-rqlValDump(p->val, p->type,  f);
-fprintf(f, "\n");
-struct rqlParse *child;
-for (child = p->children; child != NULL; child= child->next)
-    rqlParseDump(child, depth+1, f);
-}
-
-static void expectingGot(struct tokenizer *tkz, char *expecting, char *got)
-/* Print out error message about unexpected input. */
-{
-errAbort("Expecting %s, got %s, line %d of %s", expecting, got, tkz->lf->lineIx,
-	tkz->lf->fileName);
-}
-
-static void skipOverRequired(struct tokenizer *tkz, char *expecting)
-/* Make sure that next token is tok, and skip over it. */
-{
-tokenizerMustHaveNext(tkz);
-if (!sameString(tkz->string, expecting))
-    expectingGot(tkz, expecting, tkz->string);
-}
-
-
-struct rqlParse *rqlParseExpression(struct tokenizer *tkz);
-/* Parse out a clause, usually a where clause. */
-
-static struct rqlParse *rqlParseAtom(struct tokenizer *tkz)
-/* Return low level (symbol or literal) */
-{
-char *tok = tokenizerMustHaveNext(tkz);
-struct rqlParse *p;
-AllocVar(p);
-char c = tok[0];
-if (c == '\'' || c == '"')
-    {
-    p->op = rqlOpLiteral;
-    p->type = rqlTypeString;
-    int len = strlen(tok+1);
-    p->val.s = cloneStringZ(tok+1, len-1);
-    }
-else if (isalpha(c) || c == '_')
-    {
-    p->op = rqlOpSymbol;
-    p->type = rqlTypeString;	/* String until promoted at least. */
-    p->val.s = cloneString(tok);
-    }
-else if (isdigit(c))
-    {
-    p->op = rqlOpLiteral;
-    p->type = rqlTypeInt;
-    p->val.i = sqlUnsigned(tok);
-    if ((tok = tokenizerNext(tkz)) != NULL)
-	{
-	if (tok[0] == '.')
-	    {
-	    char buf[32];
-	    tok = tokenizerMustHaveNext(tkz);
-	    safef(buf, sizeof(buf), "%d.%s", p->val.i, tok);
-	    p->type = rqlTypeDouble;
-	    p->val.x = sqlDouble(buf);
-	    }
-	else
-	    tokenizerReuse(tkz);
-	}
-    }
-else if (c == '(')
-    {
-    p = rqlParseExpression(tkz);
-    skipOverRequired(tkz, ")");
-    }
-else
-    {
-    errAbort("Unexpected %s line %d of %s", tok, tkz->lf->lineIx, tkz->lf->fileName);
-    }
-return p;
-}
-
-static enum rqlType commonTypeForBop(enum rqlType left, enum rqlType right)
-/* Return type that will work for a binary operation. */
-{
-if (left == right)
-    return left;
-else if (left == rqlTypeDouble || right == rqlTypeDouble)
-    return rqlTypeDouble;
-else if (left == rqlTypeInt || right == rqlTypeInt)
-    return rqlTypeInt;
-else if (left == rqlTypeBoolean || right == rqlTypeBoolean)
-    return rqlTypeBoolean;
-else if (left == rqlTypeString || right == rqlTypeString)
-    return rqlTypeString;
-else
-    {
-    errAbort("Can't find commonTypeForBop");
-    return rqlTypeInt;
-    }
-}
-
-static enum rqlOp booleanCastOp(enum rqlType oldType)
-/* Return op to convert oldType to boolean. */
-{
-switch (oldType)
-    {
-    case rqlTypeString:
-        return rqlOpStringToBoolean;
-    case rqlTypeInt:
-        return rqlOpIntToBoolean;
-    case rqlTypeDouble:
-        return rqlOpDoubleToBoolean;
-    default:
-        internalErr();
-	return rqlOpUnknown;
-    }
-}
-
-static enum rqlOp intCastOp(enum rqlType oldType)
-/* Return op to convert oldType to int. */
-{
-switch (oldType)
-    {
-    case rqlTypeString:
-        return rqlOpStringToInt;
-    case rqlTypeBoolean:
-        return rqlOpBooleanToInt;
-    default:
-        internalErr();
-	return rqlOpUnknown;
-    }
-}
-
-static enum rqlOp doubleCastOp(enum rqlType oldType)
-/* Return op to convert oldType to double. */
-{
-switch (oldType)
-    {
-    case rqlTypeString:
-        return rqlOpStringToDouble;
-    case rqlTypeBoolean:
-        return rqlOpBooleanToDouble;
-    case rqlTypeInt:
-        return rqlOpIntToDouble;
-    default:
-        internalErr();
-	return rqlOpUnknown;
-    }
-}
-
-
-static struct rqlParse *rqlParseCoerce(struct rqlParse *p, enum rqlType type)
-/* If p is not of correct type, wrap type conversion node around it. */
-{
-if (p->type == type)
-    return p;
-else
-    {
-    struct rqlParse *cast;
-    AllocVar(cast);
-    cast->children = p;
-    cast->type = type;
-    switch (type)
-        {
-	case rqlTypeBoolean:
-	    cast->op = booleanCastOp(p->type);
-	    break;
-	case rqlTypeInt:
-	    cast->op = intCastOp(p->type);
-	    break;
-	case rqlTypeDouble:
-	    cast->op = doubleCastOp(p->type);
-	    break;
-	default:
-	    internalErr();
-	    break;
-	}
-    return cast;
-    }
-}
-
-static struct rqlParse *rqlParseUnaryMinus(struct tokenizer *tkz)
-/* Return unary minus sort of parse tree if there's a leading '-' */
-{
-char *tok = tokenizerMustHaveNext(tkz);
-if (tok[0] == '-')
-    {
-    struct rqlParse *c = rqlParseAtom(tkz);
-    c = rqlParseCoerce(c, rqlTypeDouble);
-    struct rqlParse *p;
-    AllocVar(p);
-    p->op = rqlOpUnaryMinusDouble;
-    p->type = rqlTypeDouble;
-    p->children = c;
-    return p;
-    }
-else
-    {
-    tokenizerReuse(tkz);
-    return rqlParseAtom(tkz);
-    }
-}
-
-static boolean eatMatchingTok(struct tokenizer *tkz, char *s)
-/* If next token matches s then eat it and return TRUE */
-{
-char *tok = tokenizerNext(tkz);
-if (tok != NULL && sameString(tok, s))
-    return TRUE;
-else
-    {
-    tokenizerReuse(tkz);
-    return FALSE;
-    }
-}
-
-static struct rqlParse *rqlParseCmp(struct tokenizer *tkz)
-/* Parse out comparison. */
-{
-struct rqlParse *l = rqlParseUnaryMinus(tkz);
-struct rqlParse *p = l;
-char *tok = tokenizerNext(tkz);
-boolean forceString = FALSE;
-boolean needNot = FALSE;
-if (tok != NULL)
-    {
-    enum rqlOp op = rqlOpUnknown;
-    if (sameString(tok, "="))
-        {
-	op = rqlOpEq;
-	}
-    else if (sameString(tok, "!"))
-        {
-	op = rqlOpNe;
-	skipOverRequired(tkz, "=");
-	}
-    else if (sameString(tok, ">"))
-        {
-	if (eatMatchingTok(tkz, "="))
-	    op = rqlOpGe;
-	else
-	    op = rqlOpGt;
-	}
-    else if (sameString(tok, "<"))
-        {
-	if (eatMatchingTok(tkz, "="))
-	    op = rqlOpGe;
-	else
-	    op = rqlOpLe;
-	}
-    else if (sameString(tok, "not"))
-        {
-	forceString = TRUE;
-	op = rqlOpLike;
-	needNot = TRUE;
-	skipOverRequired(tkz, "like");
-	}
-    else if (sameString(tok, "like"))
-        {
-	forceString = TRUE;
-	op = rqlOpLike;
-	}
-    else
-        {
-	tokenizerReuse(tkz);
-	return p;
-	}
-    struct rqlParse *r = rqlParseUnaryMinus(tkz);
-    AllocVar(p);
-    p->op = op;
-    p->type = rqlTypeBoolean;
-
-    /* Now force children to be the same type, inserting casts if need be. */
-    if (forceString)
-	{
-	if (l->type != rqlTypeString || r->type != rqlTypeString)
-	    {
-	    errAbort("Expecting string type around comparison line %d of %s",
-	    	tkz->lf->lineIx, tkz->lf->fileName);
-	    }
-	}
-    else
-	{
-	enum rqlType childType = commonTypeForBop(l->type, r->type);
-	l = rqlParseCoerce(l, childType);
-	r = rqlParseCoerce(r, childType);
-	}
-
-    /* Now hang children onto node. */
-    p->children = l;
-    l->next = r;
-
-    /* Put in a not around self if need be. */
-    if (needNot)
-        {
-	struct rqlParse *n;
-	AllocVar(n);
-	n->op = rqlOpNot;
-	n->type = rqlTypeBoolean;
-	n->children = p;
-	p = n;
-	}
-    }
-return p;
-}
-
-static struct rqlParse *rqlParseNot(struct tokenizer *tkz)
-/* parse out a logical not. */
-{
-char *tok = tokenizerNext(tkz);
-if (sameString(tok, "not"))
-    {
-    struct rqlParse *p = rqlParseCoerce(rqlParseCmp(tkz), rqlTypeBoolean);
-    struct rqlParse *n;
-    AllocVar(n);
-    n->op = rqlOpNot;
-    n->type = rqlTypeBoolean;
-    n->children = p;
-    return n;
-    }
-else
-    {
-    tokenizerReuse(tkz);
-    return rqlParseCmp(tkz);
-    }
-}
-
-static struct rqlParse *rqlParseAnd(struct tokenizer *tkz)
-/* Parse out and or or. */
-{
-struct rqlParse *l = rqlParseCoerce(rqlParseNot(tkz), rqlTypeBoolean);
-struct rqlParse *parent = NULL;
-struct rqlParse *p = l;
-for (;;)
-    {
-    char *tok = tokenizerNext(tkz);
-    if (tok == NULL || !sameString(tok, "and"))
-        {
-	tokenizerReuse(tkz);
-	return p;
-	}
-    else
-        {
-	if (parent == NULL)
-	    {
-	    AllocVar(parent);
-	    parent->op = rqlOpAnd;
-	    parent->type = rqlTypeBoolean;
-	    parent->children = p;
-	    p = parent;
-	    }
-	struct rqlParse *r = rqlParseCoerce(rqlParseNot(tkz), rqlTypeBoolean);
-	slAddTail(&parent->children, r);
-	}
-    }
-}
-
-static struct rqlParse *rqlParseOr(struct tokenizer *tkz)
-/* Parse out and or or. */
-{
-struct rqlParse *l = rqlParseCoerce(rqlParseAnd(tkz), rqlTypeBoolean);
-struct rqlParse *parent = NULL;
-struct rqlParse *p = l;
-for (;;)
-    {
-    char *tok = tokenizerNext(tkz);
-    if (tok == NULL || !sameString(tok, "or"))
-        {
-	tokenizerReuse(tkz);
-	return p;
-	}
-    else
-        {
-	if (parent == NULL)
-	    {
-	    AllocVar(parent);
-	    parent->op = rqlOpOr;
-	    parent->type = rqlTypeBoolean;
-	    parent->children = p;
-	    p = parent;
-	    }
-	struct rqlParse *r = rqlParseCoerce(rqlParseAnd(tkz), rqlTypeBoolean);
-	slAddTail(&parent->children, r);
-	}
-    }
-}
-
-struct rqlParse *rqlParseExpression(struct tokenizer *tkz)
-/* Parse out a clause, usually a where clause. */
-{
-return rqlParseOr(tkz);
-}
-
-static char *rqlParseFieldSpec(struct tokenizer *tkz, struct dyString *buf)
-/* Return a field spec, which may contain * and ?. Put results in buf, and 
- * return buf->string. */
-{
-boolean firstTime = TRUE;
-dyStringClear(buf);
-for (;;)
-   {
-   char *tok = tokenizerNext(tkz);
-   if (tok == NULL)
-       break;
-   char c = *tok;
-   if (c == '?' || c == '*' || isalpha(c) || c == '_' || c == '/' || c == '.')
-       {
-       if (firstTime)
-	   dyStringAppend(buf, tok);
-       else
-           {
-	   if (tkz->leadingSpaces == 0)
-	       dyStringAppend(buf, tok);
-	   else
-	       {
-	       tokenizerReuse(tkz);
-	       break;
-	       }
-	   }
-       }
-   else
-       {
-       tokenizerReuse(tkz);
-       break;
-       }
-    firstTime = FALSE;
-    }
-if (buf->stringSize == 0)
-    errAbort("Expecting field name line %d of %s", tkz->lf->lineIx, tkz->lf->fileName);
-return buf->string;
-}
-
-struct rqlStatement *rqlStatementParse(struct lineFile *lf)
-/* Parse an RQL statement out of text */
-{
-struct tokenizer *tkz = tokenizerOnLineFile(lf);
-tkz->uncommentShell = TRUE;
-tkz->uncommentC = TRUE;
-tkz->leaveQuotes = TRUE;
-struct rqlStatement *rql;
-AllocVar(rql);
-rql->command = cloneString(tokenizerMustHaveNext(tkz));
-if (sameString(rql->command, "select"))
-    {
-    struct dyString *buf = dyStringNew(0);
-    struct slName *list = NULL;
-    char *tok = rqlParseFieldSpec(tkz, buf);
-    /* Look for count(*) as special case. */
-    boolean countOnly = FALSE;
-    if (sameString(tok, "count"))
-        {
-	char *paren = tokenizerNext(tkz);
-	if (paren[0] == '(')
-	    {
-	    while ((paren = tokenizerMustHaveNext(tkz)) != NULL)
-	        {
-		if (paren[0] == ')')
-		    break;
-		}
-	    countOnly = TRUE;
-	    freez(&rql->command);
-	    rql->command = cloneString("count");
-	    }
-	else
-	    {
-	    tokenizerReuse(tkz);
-	    }
-	}
-    if (!countOnly)
-	{
-	list = slNameNew(tok);
-	for (;;)
-	    {
-	    /* Parse out comma-separated field list. */
-	    char *comma = tokenizerNext(tkz);
-	    if (comma == NULL || comma[0] != ',')
-		{
-		tokenizerReuse(tkz);
-		break;
-		}
-	    slNameAddHead(&list, rqlParseFieldSpec(tkz, buf));
-	    }
-	slReverse(&list);
-	rql->fieldList = list;
-	}
-    dyStringFree(&buf);
-    }
-else if (sameString(rql->command, "count"))
-    {
-    /* No parameters to count. */
-    }
-else
-    errAbort("Unknown RQL command '%s line %d of %s\n", rql->command, lf->lineIx, lf->fileName);
-    
-char *from = tokenizerNext(tkz);
-if (from != NULL)
-    {
-    if (sameString(from, "from"))
-        {
-	for (;;)
-	    {
-	    struct dyString *buf = dyStringNew(0);
-	    char *table = rqlParseFieldSpec(tkz, buf);
-	    slNameAddTail(&rql->tableList, table);
-	    char *comma = tokenizerNext(tkz);
-	    if (comma == NULL)
-	        break;
-	    if (comma[0] != ',')
-	        {
-		tokenizerReuse(tkz);
-		break;
-		}
-	    dyStringFree(&buf);
-	    }
-	}
-    else
-        tokenizerReuse(tkz);
-    }
-char *where = tokenizerNext(tkz);
-if (where != NULL)
-    {
-    if (!sameString(where, "where"))
-        errAbort("Unknown clause '%s' line %d of %s", where, lf->lineIx, lf->fileName);
-    rql->whereClause = rqlParseExpression(tkz);
-    }
-
-char *extra = tokenizerNext(tkz);
-if (extra != NULL)
-    errAbort("Extra stuff starting with '%s' past end of statement line %d of %s", 
-    	extra, lf->lineIx, lf->fileName);
-return rql;
-}
-
-void rqlStatementDump(struct rqlStatement *rql, FILE *f)
-/* Print out statement to file. */
-{
-fprintf(f, "%s", rql->command);
-if (rql->fieldList)
-    {
-    fprintf(f, " ");
-    struct slName *field = rql->fieldList;
-    fprintf(f, "%s", field->name);
-    for (field = field->next; field != NULL; field = field->next)
-        fprintf(f, ",%s", field->name);
-    }
-if (rql->whereClause)
-    {
-    fprintf(f, " where:\n");
-    rqlParseDump(rql->whereClause, 0, f);
-    }
-fprintf(f, "\n");
-}
-