b3d2a3a1d7e096b0f97a2a5295d4fa610336de04 angie Fri Oct 12 11:02:37 2018 -0700 Adding jsonQuery, which extracts values from a jsonElement tree using a simple path notation e.g. "journals[publisher=Nature].articles[*].author[0].lastName". aside: slFreeList doesn't check for NULL. I think it should, but it's used in a zillion places so for now I'll just check before calling. diff --git src/lib/jsonQuery.c src/lib/jsonQuery.c new file mode 100644 index 0000000..fdb0cf3 --- /dev/null +++ src/lib/jsonQuery.c @@ -0,0 +1,407 @@ +/* jsonQuery - simple path syntax for retrieving specific descendants of a jsonElement. */ + +/* Copyright (C) 2018 The Regents of the University of California + * See README in this or parent directory for licensing information. */ + +#include "common.h" +#include "dystring.h" +#include "hash.h" +#include "jsonQuery.h" + +static const char *findEndBracket(const char *path) +/* Return a pointer to the right bracket matching the first left bracket that we encounter. */ +{ +if (path == NULL) + return NULL; +const char *end = NULL; +int leftCount = 0, rightCount = 0; +int i; +for (i = 0; path[i] != '\0'; i++) + { + if (path[i] == '[') + leftCount++; + else if (path[i] == ']') + { + rightCount++; + if (rightCount == leftCount) + { + end = path+i; + break; + } + else if (rightCount > leftCount) + errAbort("findEndBracket: encountered right bracket before left bracket in '%s'", + path); + } + } +return end; +} + +static char *jsonPathPopHead(const char *pathIn, char **retPath, struct lm *lm) +/* Return the first component of pathIn and set retPath to the start of the next component in pathIn. + * If pathIn is empty/NULL, set retPath to NULL and return the empty string. */ +{ +if (isEmpty(pathIn)) + { + *retPath = NULL; + return lm ? "" : cloneString(""); + } +else + { + char *pDot = strchr(pathIn, '.'); + char *pBracket = strchr(pathIn, '['); + if (pDot && pBracket) + { + // Both found -- ignore the second one, handle only the first one. + if (pDot < pBracket) + pBracket = NULL; + else + pDot = NULL; + } + if (pDot) + { + if (pDot == pathIn) + errAbort("jsonPathPopHead: path '%s' should not start with '.'", pathIn); + *retPath = pDot + 1; + return lm ? lmCloneStringZ(lm, pathIn, pDot - pathIn) + : cloneStringZ(pathIn, pDot - pathIn); + } + else if (pBracket) + { + if (pBracket == pathIn) + { + // This path component is a list index; return contents of [] + const char *pEnd = findEndBracket(pathIn); + if (!pEnd) + errAbort("jsonPathPopHead: no matching ']' for '[' in '%s'", pathIn); + if (pBracket[1] != '*' && !isdigit(pBracket[1]) && !strchr(pBracket, '=')) + errAbort("jsonPathPopHead: contents of '[]' must be '*', a number, " + "or a path=val condition (not %s)", pathIn); + *retPath = (char *)(pEnd + 1); + if (*retPath[0] == '.') + *retPath = *retPath + 1; + return lm ? lmCloneStringZ(lm, pathIn + 1, pEnd - pBracket - 1) + : cloneStringZ(pathIn + 1, pEnd - pBracket - 1); + } + else + { + // The next path component is a list index + *retPath = pBracket; + return lm ? lmCloneStringZ(lm, pathIn, pBracket - pathIn) + : cloneStringZ(pathIn, pBracket - pathIn); + } + } + else + { + // Last component in path + *retPath = (char *)pathIn + strlen(pathIn); + return lm ? lmCloneString(lm, pathIn) : cloneString(pathIn); + } + } +errAbort("jsonPathPopHead: should have returned a result before this point"); +return NULL; +} + +// Forward declaration for mutual recursion: +static void rQueryElement(struct jsonElement *elIn, char *name, char *path, + struct slRef **pResultList, struct lm *lm); +/* Recursively search for descendants of jsonElements in inList matching path; add jsonElements + * that match to resultList. */ + +static void rQueryObject(struct jsonElement *el, char *name, char *id, char *path, + struct slRef **pResultList, struct lm *lm) +/* Given a JSON object and a child id, recursively search child for path. */ +{ +struct hash *hash = jsonObjectVal(el, name); +if (hash) + { + struct jsonElement *child = hashFindVal(hash, id); + if (child) + rQueryElement(child, name, path, pResultList, lm); + } +} + +static void rQueryList(struct jsonElement *el, char *name, char *id, char *path, + struct slRef **pResultList, struct lm *lm) +/* Given a JSON list and a child index (* for all children), recursively search child(ren) if found + * for path. */ +{ +struct slRef *list = jsonListVal(el, name); +char *equals = strchr(id, '='); +if (equals) + { + // Conditional query; filter list items by condPath=val. + char *value = equals+1; + long intVal = atol(value); + double doubleVal = atof(value); + boolean booleanVal = FALSE, valIsBoolean = FALSE; + if (sameString(value, "true") || sameString(value, "TRUE") || sameString(value, "1")) + { + booleanVal = TRUE; + valIsBoolean = TRUE; + } + else if (sameString(value, "false") || sameString(value, "FALSE") || sameString(value, "0")) + { + booleanVal = FALSE; + valIsBoolean = TRUE; + } + char condPath[strlen(id)+1]; + safencpy(condPath, sizeof condPath, id, (equals - id)); + struct slRef *ref; + for (ref = list; ref != NULL; ref = ref->next) + { + struct slRef *condPathResults = NULL; + rQueryElement(ref->val, name, condPath, &condPathResults, lm); + struct slRef *resRef; + for (resRef = condPathResults; resRef != NULL; resRef = resRef->next) + { + struct jsonElement *resEl = resRef->val; + boolean matches = FALSE; + switch (resEl->type) + { + case jsonString: + matches = sameString(jsonStringVal(resEl, condPath), value); + break; + case jsonNumber: + matches = (jsonNumberVal(resEl, condPath) == intVal); + break; + case jsonDouble: + matches = (jsonDoubleVal(resEl, condPath) == doubleVal); + break; + case jsonBoolean: + if (!valIsBoolean) + errAbort("jsonQueryElement: bad conditional value '%s' for boolean", value); + matches = (jsonBooleanVal(resEl, condPath) == booleanVal); + break; + case jsonNull: + matches = (sameString(value, "NULL") || sameString(value, "null")); + break; + default: + errAbort("jsonQueryElement: bad jsonElementType %d for conditional query", + resEl->type); + } + if (matches) + { + rQueryElement(ref->val, name, path, pResultList, lm); + break; + } + } + } + } +else + { + int idIx = -1; + if (isdigit(id[0])) + idIx = atoi(id); + else if (differentString(id, "*")) + errAbort("jsonQueryElement: invalid index '%s' for list", id); + struct slRef *ref; + int ix; + for (ref = list, ix = 0; ref != NULL; ref = ref->next, ix++) + { + if (idIx < 0 || ix == idIx) + { + rQueryElement(ref->val, name, path, pResultList, lm); + if (ix == idIx) + break; + } + } + } +} + +static void rQueryElement(struct jsonElement *elIn, char *name, char *path, + struct slRef **pResultList, struct lm *lm) +/* Recursively search for descendants of jsonElements in inList matching path; add jsonElements + * that match to resultList. */ +{ +char *pathNext = NULL; +char *id = jsonPathPopHead(path, &pathNext, lm); +struct dyString *dy = dyStringCreate("%s", name); +if (isNotEmpty(id)) + { + switch (elIn->type) + { + case jsonObject: + { + dyStringPrintf(dy, ".%s", id); + rQueryObject(elIn, dy->string, id, pathNext, pResultList, lm); + break; + } + case jsonList: + { + dyStringPrintf(dy, "[%s]", id); + rQueryList(elIn, dy->string, id, pathNext, pResultList, lm); + break; + } + case jsonString: + case jsonBoolean: + case jsonNumber: + case jsonDouble: + case jsonNull: + { + errAbort("jsonQueryElement: got element with scalar type (%d), but children specified " + "(%s)", elIn->type, id); + break; + } + default: + { + errAbort("jsonQueryElement: invalid type: %d", elIn->type); + break; + } + } + } +else + { + struct slRef *ref; + if (lm) + lmAllocVar(lm, ref) + else + AllocVar(ref); + ref->val = elIn; + slAddHead(pResultList, ref); + } +if (lm == NULL) + freez(&id); +dyStringFree(&dy); +} + +struct slRef *jsonQueryElementList(struct slRef *inList, char *name, char *path, struct lm *lm) +/* Return a ref list of jsonElement descendants matching path of all jsonElements in inList. + * name is for error reporting. */ +{ +struct slRef *resultList = NULL; +struct dyString *dy = dyStringNew(0); +boolean isMult = (inList->next != NULL); +struct slRef *ref; +int ix; +for (ref = inList, ix = 0; ref != NULL; ref = ref->next, ix++) + { + struct jsonElement *elIn = ref->val; + if (elIn) + { + dyStringClear(dy); + dyStringPrintf(dy, "%s", name); + if (isMult) + dyStringPrintf(dy, "[%d]", ix); + rQueryElement(elIn, dy->string, path, &resultList, lm); + } + } +slReverse(&resultList); +dyStringFree(&dy); +return resultList; +} + +struct slRef *jsonQueryElement(struct jsonElement *el, char *name, char *path, struct lm *lm) +/* Return a ref list of jsonElement descendants of el that match path. + * name is for error reporting. */ +{ +// Make an slRef wrapper for el and call jsonQueryElementList. +struct slRef elRef = { NULL, el }; +return jsonQueryElementList(&elRef, name, path, lm); +} + +static struct jsonElement *querySingle(struct jsonElement *el, char *name, char *path, struct lm *lm) +/* Return one jsonElement resulting from searching el for path (or NULL); errAbort if multiple. */ +{ +struct jsonElement *resEl = NULL; +struct slRef ref = { NULL, el }; +struct slRef *resultRef = jsonQueryElementList(&ref, name, path, lm); +if (resultRef) + { + if (resultRef->next) + errAbort("jsonQuerySingle: expected single result but got %d results", slCount(resultRef)); + resEl = resultRef->val; + } +if (lm == NULL) + freeMem(resultRef); +return resEl; +} + +char *jsonQueryString(struct jsonElement *el, char *name, char *path, struct lm *lm) +/* Alloc & return the string value at the end of path in el. May be NULL. */ +{ +struct jsonElement *resEl = querySingle(el, name, path, lm); +if (resEl == NULL) + return NULL; +else if (lm) + return lmCloneString(lm, jsonStringVal(resEl, path)); +else + return cloneString(jsonStringVal(resEl, path)); +} + +long jsonQueryInt(struct jsonElement *el, char *name, char *path, long defaultVal, struct lm *lm) +/* Return the int value at path in el, or defaultVal if not found. */ +{ +struct jsonElement *resEl = querySingle(el, name, path, lm); +long result = resEl ? jsonNumberVal(resEl, path) : defaultVal; +if (lm == NULL) + freeMem(resEl); +return result; +} + +boolean jsonQueryBoolean(struct jsonElement *el, char *name, char *path, boolean defaultVal, + struct lm *lm) +/* Return the boolean value at path in el, or defaultVal if not found. */ +{ +struct jsonElement *resEl = querySingle(el, name, path, lm); +boolean result = resEl ? jsonBooleanVal(resEl, path) : defaultVal; +if (lm == NULL) + freeMem(resEl); +return result; +} + +struct slName *jsonQueryStringList(struct slRef *inList, char *name, char *path, struct lm *lm) +/* Alloc & return a list of string values matching path in all elements of inList. May be NULL. */ +{ +struct slName *results = NULL; +struct slRef *resultRefs = jsonQueryElementList(inList, name, path, lm); +struct slRef *ref; +for (ref = resultRefs; ref != NULL; ref = ref->next) + { + struct jsonElement *resEl = ref->val; + char *string = jsonStringVal(resEl, path); + struct slName *sln = lm ? lmSlName(lm, string) : slNameNew(string); + slAddHead(&results, sln); + } +slReverse(&results); +if (lm == NULL && resultRefs) + slFreeList(resultRefs); +return results; +} + +struct slInt *jsonQueryIntList(struct slRef *inList, char *name, char *path, struct lm *lm) +/* Alloc & return a list of int values matching path in all elements of inList. May be NULL. */ +{ +struct slInt *results = NULL; +struct slRef *resultRefs = jsonQueryElementList(inList, name, path, lm); +struct slRef *ref; +for (ref = resultRefs; ref != NULL; ref = ref->next) + { + struct jsonElement *resEl = ref->val; + int val = jsonNumberVal(resEl, path); + struct slInt *sli; + if (lm) + lmAllocVar(lm, sli) + else + AllocVar(sli); + sli->val = val; + slAddHead(&results, sli); + } +slReverse(&results); +if (lm == NULL && resultRefs) + slFreeList(resultRefs); +return results; +} + +struct slName *jsonQueryStrings(struct jsonElement *el, char *name, char *path, struct lm *lm) +/* Alloc & return a list of string values matching path in el. May be NULL. */ +{ +struct slRef elRef = { NULL, el }; +return jsonQueryStringList(&elRef, name, path, lm); +} + +struct slInt *jsonQueryInts(struct jsonElement *el, char *name, char *path, struct lm *lm) +/* Alloc & return a list of int values matching path in el. May be NULL. */ +{ +struct slRef elRef = { NULL, el }; +return jsonQueryIntList(&elRef, name, path, lm); +}