93943b9bd3f5d6d9ef5a51e636636449b0c607b1
kent
  Mon Dec 9 11:10:26 2013 -0800
Separating out generic portable JSON parser from Genome Browser specific stuff in jsHelper module, creating jsonParse module.
diff --git src/lib/jsonParse.c src/lib/jsonParse.c
new file mode 100644
index 0000000..ef205df
--- /dev/null
+++ src/lib/jsonParse.c
@@ -0,0 +1,692 @@
+/* jsonParse - routines to parse JSON strings and traverse and pick things out of the
+ * resulting object tree. */
+
+#include "common.h"
+#include "hash.h"
+#include "dystring.h"
+#include "sqlNum.h"
+#include "jsonParse.h"
+
+// Global json hash - I'm wanting to move this to jsHelper, but one step at a time....
+struct jsonElement *jsonGlobalsHash = NULL; 
+
+static struct jsonElement *newJsonElement(jsonElementType type)
+// generic constructor for a jsonElement; callers fill in the appropriate value
+{
+struct jsonElement *ele;
+AllocVar(ele);
+ele->type = type;
+return ele;
+}
+
+struct jsonElement *newJsonString(char *str)
+{
+struct jsonElement *ele = newJsonElement(jsonString);
+ele->val.jeString = cloneString(str);
+return ele;
+}
+
+struct jsonElement *newJsonBoolean(boolean val)
+{
+struct jsonElement *ele = newJsonElement(jsonBoolean);
+ele->val.jeBoolean = val;
+return ele;
+}
+
+struct jsonElement *newJsonNumber(long val)
+{
+struct jsonElement *ele = newJsonElement(jsonNumber);
+ele->val.jeNumber = val;
+return ele;
+}
+
+struct jsonElement *newJsonDouble(double val)
+{
+struct jsonElement *ele = newJsonElement(jsonDouble);
+ele->val.jeDouble = val;
+return ele;
+}
+
+struct jsonElement *newJsonObject(struct hash *h)
+{
+struct jsonElement *ele = newJsonElement(jsonObject);
+ele->val.jeHash = h;
+return ele;
+}
+
+struct jsonElement *newJsonList(struct slRef *list)
+{
+struct jsonElement *ele = newJsonElement(jsonList);
+ele->val.jeList = list;
+return ele;
+}
+
+void jsonObjectAdd(struct jsonElement *h, char *name, struct jsonElement *ele)
+// Add a new element to a jsonObject; existing values are replaced.
+// NOTE: Adding to a NULL hash will add to the global "common" hash printed with jsonPrintGlobals();
+{
+if (h == NULL)  // If hash isn't provided, assume global
+    {
+    if (jsonGlobalsHash == NULL)
+        jsonGlobalsHash = newJsonObject(newHash(5));
+    h = jsonGlobalsHash;
+    }
+if(h->type != jsonObject)
+    errAbort("jsonObjectAdd called on element with incorrect type (%d)", h->type);
+hashReplace(h->val.jeHash, name, ele);
+}
+
+void jsonListAdd(struct jsonElement *list, struct jsonElement *ele)
+{
+if(list->type != jsonList)
+    errAbort("jsonListAdd called on element with incorrect type (%d)", list->type);
+slAddHead(&list->val.jeList, ele);
+}
+
+static void skipLeadingSpacesWithPos(char *s, int *posPtr)
+/* skip leading white space. */
+{
+for (;;)
+    {
+    char c = s[*posPtr];
+    if (!isspace(c))
+	return;
+    (*posPtr)++;
+    }
+}
+
+static void getSpecificChar(char c, char *str, int *posPtr)
+{
+// get specified char from string or errAbort
+if(str[*posPtr] != c)
+    errAbort("Unexpected character '%c' (expected '%c') - string position %d\n", str[*posPtr], c, *posPtr);
+(*posPtr)++;
+}
+
+static char *getString(char *str, int *posPtr)
+{
+// read a double-quote delimited string; we handle backslash escaping.
+// returns allocated string.
+boolean escapeMode = FALSE;
+int i;
+struct dyString *ds = dyStringNew(1024);
+getSpecificChar('"', str, posPtr);
+for(i = 0;; i++)
+    {
+    char c = str[*posPtr + i];
+    if(!c)
+        errAbort("Premature end of string (missing trailing double-quote); string position '%d'", *posPtr);
+    else if(escapeMode)
+        {
+        // We support escape sequences listed in http://www.json.org,
+        // except for Unicode which we cannot support in C-strings
+        switch(c)
+            {
+            case 'b':
+                c = '\b';
+                break;
+            case 'f':
+                c = '\f';
+                break;
+            case 'n':
+                c = '\n';
+                break;
+            case 'r':
+                c = '\r';
+                break;
+            case 't':
+                c = '\t';
+                break;
+            case 'u':
+                errAbort("Unicode in JSON is unsupported");
+                break;
+            default:
+                // we don't need to convert \,/ or "
+                break;
+            }
+        dyStringAppendC(ds, c);
+        escapeMode = FALSE;
+        }
+    else if(c == '"')
+        break;
+    else if(c == '\\')
+        escapeMode = TRUE;
+    else
+        {
+        dyStringAppendC(ds, c);
+        escapeMode = FALSE;
+        }
+    }
+*posPtr += i;
+getSpecificChar('"', str, posPtr);
+return dyStringCannibalize(&ds);
+}
+
+static struct jsonElement *jsonParseExpression(char *str, int *posPtr);
+
+static struct jsonElement *jsonParseObject(char *str, int *posPtr)
+{
+struct hash *h = newHash(5);
+getSpecificChar('{', str, posPtr);
+while(str[*posPtr] != '}')
+    {
+    // parse out a name : val pair
+    skipLeadingSpacesWithPos(str, posPtr);
+    char *name = getString(str, posPtr);
+    skipLeadingSpacesWithPos(str, posPtr);
+    getSpecificChar(':', str, posPtr);
+    skipLeadingSpacesWithPos(str, posPtr);
+    hashAdd(h, name, jsonParseExpression(str, posPtr));
+    skipLeadingSpacesWithPos(str, posPtr);
+    if(str[*posPtr] == ',')
+        (*posPtr)++;
+    else
+        break;
+    }
+skipLeadingSpacesWithPos(str, posPtr);
+getSpecificChar('}', str, posPtr);
+return newJsonObject(h);
+}
+
+static struct jsonElement *jsonParseList(char *str, int *posPtr)
+{
+struct slRef *list = NULL;
+getSpecificChar('[', str, posPtr);
+while(str[*posPtr] != ']')
+    {
+    struct slRef *e;
+    AllocVar(e);
+    skipLeadingSpacesWithPos(str, posPtr);
+    e->val = jsonParseExpression(str, posPtr);
+    slAddHead(&list, e);
+    skipLeadingSpacesWithPos(str, posPtr);
+    if(str[*posPtr] == ',')
+        (*posPtr)++;
+    else
+        break;
+    }
+skipLeadingSpacesWithPos(str, posPtr);
+getSpecificChar(']', str, posPtr);
+slReverse(&list);
+return newJsonList(list);
+}
+
+static struct jsonElement *jsonParseString(char *str, int *posPtr)
+{
+return newJsonString(getString(str, posPtr));
+}
+
+static struct jsonElement *jsonParseBoolean(char *str, int *posPtr)
+{
+struct jsonElement *ele = NULL;
+int i;
+for(i = 0; str[*posPtr + i] && isalpha(str[*posPtr + i]); i++)
+    ;
+char *val = cloneStringZ(str + *posPtr, i);
+if(sameString(val, "true"))
+    ele = newJsonBoolean(TRUE);
+else if(sameString(val, "false"))
+    ele =  newJsonBoolean(FALSE);
+else
+    errAbort("Invalid boolean value '%s'; pos: %d", val, *posPtr);
+*posPtr += i;
+freez(&val);
+return ele;
+}
+
+static struct jsonElement *jsonParseNumber(char *str, int *posPtr)
+{
+int i;
+boolean integral = TRUE;
+struct jsonElement *retVal = NULL;
+
+for(i = 0;; i++)
+    {
+    char c = str[*posPtr + i];
+    if(c == 'e' || c == 'E' || c == '.')
+        integral = FALSE;
+    else if(!c || (!isdigit(c) && c != '-'))
+        break;
+    }
+char *val = cloneStringZ(str + *posPtr, i);
+*posPtr += i;
+if(integral)
+    retVal = newJsonNumber(sqlLongLong(val));
+else
+    {
+    double d;
+    if(sscanf(val, "%lf", &d))
+        retVal = newJsonDouble(d);
+    else
+        errAbort("Invalid JSON Double: %s", val);
+    }
+freez(&val);
+return retVal;
+}
+
+static struct jsonElement *jsonParseExpression(char *str, int *posPtr)
+{
+skipLeadingSpacesWithPos(str, posPtr);
+char c = str[*posPtr];
+if(c == '{')
+    return jsonParseObject(str, posPtr);
+else if (c == '[')
+    return jsonParseList(str, posPtr);
+else if (c == '"')
+    return jsonParseString(str, posPtr);
+else if (isdigit(c) || c == '-')
+    return jsonParseNumber(str, posPtr);
+else
+    return jsonParseBoolean(str, posPtr);
+// XXXX support null?
+}
+
+struct jsonElement *jsonParse(char *str)
+{
+// parse string into an in-memory json representation
+int pos = 0;
+struct jsonElement *ele = jsonParseExpression(str, &pos);
+skipLeadingSpacesWithPos(str, &pos);
+if(str[pos])
+    errAbort("Invalid JSON: unprocessed trailing string at position: %d: %s", pos, str + pos);
+return ele;
+}
+
+char *jsonStringEscape(char *inString)
+/* backslash escape a string for use in a double quoted json string.
+ * More conservative than javaScriptLiteralEncode because
+ * some json parsers complain if you escape & or ' */
+{
+char c;
+int outSize = 0;
+char *outString, *out, *in;
+
+if (inString == NULL)
+    return(cloneString(""));
+
+/* Count up how long it will be */
+in = inString;
+while ((c = *in++) != 0)
+    {
+    switch(c)
+        {
+        case '\"':
+        case '\\':
+        case '/':
+        case '\b':
+        case '\f':
+        case '\n':
+        case '\r':
+        case '\t':
+            outSize += 2;
+            break;
+        default:
+            outSize += 1;
+        }
+    }
+outString = needMem(outSize+1);
+
+/* Encode string */
+in = inString;
+out = outString;
+while ((c = *in++) != 0)
+    {
+    switch(c)
+        {
+        case '\"':
+        case '\\':
+        case '/':
+        case '\b':
+        case '\f':
+        case '\n':
+        case '\r':
+        case '\t':
+            *out++ = '\\';
+            break;
+        }
+    *out++ = c;
+    }
+*out++ = 0;
+return outString;
+}
+
+void jsonFindNameRecurse(struct jsonElement *ele, char *jName, struct slName **pList)
+// Search the JSON tree recursively to find all the values associated to
+// the name, and add them to head of the list.  
+{
+switch (ele->type)
+    {
+    case jsonObject:
+        {
+        if(hashNumEntries(ele->val.jeHash))
+            {
+            struct hashEl *el, *list = hashElListHash(ele->val.jeHash);
+            slSort(&list, hashElCmp);
+            for (el = list; el != NULL; el = el->next)
+                {
+                struct jsonElement *val = el->val;
+                if sameString(el->name, jName)
+                    slNameAddHead(pList, jsonStringEscape(val->val.jeString));
+                jsonFindNameRecurse(val, jName, pList);
+                }
+            hashElFreeList(&list);
+            }
+        break;
+        }
+    case jsonList:
+        {
+        struct slRef *el;
+        if(ele->val.jeList)
+            {
+            for (el = ele->val.jeList; el != NULL; el = el->next)
+                {
+                struct jsonElement *val = el->val;
+                jsonFindNameRecurse(val, jName, pList);
+                }
+            }
+        break;
+        }
+    case jsonString:
+    case jsonBoolean:
+    case jsonNumber:
+    case jsonDouble:
+        {
+        break;
+        }
+    default:
+        {
+        errAbort("jsonFindNameRecurse; invalid type: %d", ele->type);
+        break;
+        }
+    }
+}
+
+struct slName *jsonFindName(struct jsonElement *json, char *jName)
+// Search the JSON tree to find all the values associated to the name
+// and add them to head of the list.  
+{
+struct slName *list = NULL;
+jsonFindNameRecurse(json, jName, &list);
+slReverse(&list);
+return list;
+}
+
+struct slName *jsonFindNameUniq(struct jsonElement *json, char *jName)
+// Search the JSON tree to find all the unique values associated to the name
+// and add them to head of the list. 
+{
+struct slName *list = NULL;
+jsonFindNameRecurse(json, jName, &list);
+slUniqify(&list, slNameCmp, slNameFree);
+slReverse(&list);
+return list;
+}
+
+void jsonElementRecurse(struct jsonElement *ele, char *name, boolean isLast,
+    void (*startCallback)(struct jsonElement *ele, char *name, boolean isLast, void *context),  
+    // Called at element start
+    void (*endCallback)(struct jsonElement *ele, char *name, boolean isLast, void *context),    
+    // Called at element end
+    void *context)
+/* Recurse through JSON tree calling callback functions with element and context.
+ * Either startCallback or endCallback may be NULL*/
+{
+if (startCallback != NULL)
+    startCallback(ele, name, isLast, context);
+switch (ele->type)
+    {
+    case jsonObject:
+        {
+        if(hashNumEntries(ele->val.jeHash))
+            {
+            struct hashEl *el, *list = hashElListHash(ele->val.jeHash);
+            slSort(&list, hashElCmp);
+            for (el = list; el != NULL; el = el->next)
+                {
+                struct jsonElement *val = el->val;
+                jsonElementRecurse(val, el->name, el->next == NULL, 
+		    startCallback, endCallback, context);
+                }
+            hashElFreeList(&list);
+            }
+        break;
+        }
+    case jsonList:
+        {
+        struct slRef *el;
+        if(ele->val.jeList)
+            {
+            for (el = ele->val.jeList; el != NULL; el = el->next)
+                {
+                struct jsonElement *val = el->val;
+                jsonElementRecurse(val, NULL, el->next == NULL, 
+		    startCallback, endCallback, context);
+                }
+            }
+        break;
+        }
+    case jsonString:
+    case jsonBoolean:
+    case jsonNumber:
+    case jsonDouble:
+        {
+        break;
+        }
+    default:
+        {
+        errAbort("jsonElementRecurse; invalid type: %d", ele->type);
+        break;
+        }
+    }
+if (endCallback != NULL)
+    endCallback(ele, name, isLast, context);
+}
+
+void jsonPrintOneStart(struct jsonElement *ele, char *name, boolean isLast, int indent, FILE *f)
+/* Print the start of one json element - just name and maybe an opening brace or bracket.
+ * Recursion is handled elsewhere. */
+{
+spaceOut(f, indent);
+if (name != NULL)
+    {
+    fprintf(f, "\"%s\": ", name);
+    }
+switch (ele->type)
+    {
+    case jsonObject:
+        {
+	fprintf(f, "{\n");
+        break;
+        }
+    case jsonList:
+        {
+	fprintf(f, "[\n");
+        break;
+        }
+    case jsonString:
+        {
+	char *escaped = jsonStringEscape(ele->val.jeString);
+	fprintf(f, "\"%s\"",  escaped);
+	freez(&escaped);
+	break;
+	}
+    case jsonBoolean:
+        {
+	char *val = (ele->val.jeBoolean ? "frue" : "false");
+	fprintf(f, "%s", val);
+	break;
+	}
+    case jsonNumber:
+        {
+	fprintf(f, "%ld", ele->val.jeNumber);
+	break;
+	}
+    case jsonDouble:
+        {
+	fprintf(f, "%g", ele->val.jeDouble);
+        break;
+        }
+    default:
+        {
+        errAbort("jsonPrintOneStart; invalid type: %d", ele->type);
+        break;
+        }
+    }
+}
+
+void jsonPrintOneEnd(struct jsonElement *ele, char *name, boolean isLast, boolean indent, FILE *f)
+/* Print object end */
+{
+switch (ele->type)
+    {
+    case jsonObject:
+        {
+	spaceOut(f, indent);
+	fprintf(f, "}");
+        break;
+        }
+    case jsonList:
+        {
+	spaceOut(f, indent);
+	fprintf(f, "]");
+        break;
+        }
+    case jsonString:
+    case jsonBoolean:
+    case jsonNumber:
+    case jsonDouble:
+        break;
+    default:
+        {
+        errAbort("jsonPrintOneEnd; invalid type: %d", ele->type);
+        break;
+        }
+    }
+if (!isLast)
+    fputc(',', f);
+fputc('\n', f);
+}
+
+struct jsonPrintContext
+/* Context for printing a JSON object nicely */
+    {
+    FILE *f;	// where to print it
+    int indent;	// How much to indent currently
+    int indentPer;  // How much to indent each level
+    };
+
+
+static void printIndentedNameStartCallback(struct jsonElement *ele, char *name, 
+    boolean isLast, void *context)
+{
+struct jsonPrintContext *jps = context;
+jsonPrintOneStart(ele, name,  isLast, jps->indent, jps->f);
+jps->indent += jps->indentPer;
+}
+
+static void printIndentedNameEndCallback(struct jsonElement *ele, char *name, 
+    boolean isLast, void *context)
+{
+struct jsonPrintContext *jps = context;
+jps->indent -= jps->indentPer;
+jsonPrintOneEnd(ele, name, isLast, jps->indent, stdout);
+}
+
+void jsonPrintToFile(struct jsonElement *root, char *name, FILE *f, int indentPer)
+/* Print out JSON object and all children nicely indented to f as JSON objects. 
+ * Name may be NULL.  Implemented via jsonPrintOneStart/jsonPrintOneEnd. */
+{
+struct jsonPrintContext jps = {f, 0, indentPer};
+jsonElementRecurse(root, NULL, TRUE, 
+	printIndentedNameStartCallback, printIndentedNameEndCallback, &jps);
+}
+
+/** Routines that check json type and return corresponding value. **/
+
+struct slRef *jsonListVal(struct jsonElement *ele, char *name)
+/* Enforce element is type jsonList.  Return list value */
+{
+if (ele->type != jsonList)
+    errAbort("json element %s is not a list", name);
+return ele->val.jeList;
+}
+
+struct hash *jsonObjectVal(struct jsonElement *ele, char *name)
+/* Enforce object is type jsonObject.  Return object hash */
+{
+if (ele->type != jsonObject)
+    errAbort("json element %s is not an object", name);
+return ele->val.jeHash;
+}
+
+long jsonNumberVal(struct jsonElement *ele, char *name)
+/* Enforce element is type jsonNumber and return value. */
+{
+if (ele->type != jsonNumber)
+    errAbort("json element %s is not a number", name);
+return ele->val.jeNumber;
+}
+
+long jsonDoubleVal(struct jsonElement *ele, char *name)
+/* Enforce element is type jsonDouble and return value. */
+{
+if (ele->type != jsonDouble)
+    errAbort("json element %s is not a number", name);
+return ele->val.jeDouble;
+}
+
+long jsonBooleanVal(struct jsonElement *ele, char *name)
+/* Enforce element is type jsonBoolean and return value. */
+{
+if (ele->type != jsonBoolean)
+    errAbort("json element %s is not a boolean", name);
+return ele->val.jeBoolean;
+}
+
+char *jsonStringVal(struct jsonElement *ele, char *eleName)
+/* Enforce element is type jsonString and return value. */
+{
+if (ele->type != jsonString)
+    errAbort("json element %s is not a string", eleName);
+return ele->val.jeString;
+}
+
+/** Routines that help work with json objects (bracket enclosed key/val pairs **/
+
+struct jsonElement *jsonFindNamedField(struct jsonElement *object, 
+    char *objectName, char *field)
+/* Find named field of object or return NULL if not found.  Abort if object
+ * is not actually an object. */
+{
+struct hash *hash = jsonObjectVal(object, objectName);
+return hashFindVal(hash, field);
+}
+
+struct jsonElement *jsonMustFindNamedField(struct jsonElement *object, 
+    char *objectName, char *field)
+/* Find named field of object or die trying. */
+{
+struct jsonElement *ele = jsonFindNamedField(object, objectName, field);
+if (ele == NULL)
+   errAbort("Couldn't find field %s in json object %s", field, objectName);
+return ele;
+}
+
+char *jsonOptionalStringField(struct jsonElement *object, char *field, char *defaultVal)
+/* Return string valued field of object, or defaultVal if it doesn't exist. */
+{
+struct jsonElement *ele = jsonFindNamedField(object, "", field);
+if (ele == NULL)
+     return defaultVal;
+return jsonStringVal(ele, field);
+}
+
+char *jsonStringField(struct jsonElement *object, char *field)
+/* Return string valued field of object or abort if field doesn't exist. */
+{
+char *val = jsonOptionalStringField(object, field, NULL);
+if (val == NULL)
+    errAbort("Field %s doesn't exist in json object", field);
+return val;
+}
+