b3d2a3a1d7e096b0f97a2a5295d4fa610336de04 angie Fri Oct 12 11:02:37 2018 -0700 Adding jsonQuery, which extracts values from a jsonElement tree using a simple path notation e.g. "journals[publisher=Nature].articles[*].author[0].lastName". aside: slFreeList doesn't check for NULL. I think it should, but it's used in a zillion places so for now I'll just check before calling. diff --git src/hg/utils/jsonQuery/jsonQuery.c src/hg/utils/jsonQuery/jsonQuery.c new file mode 100644 index 0000000..8c2f81c --- /dev/null +++ src/hg/utils/jsonQuery/jsonQuery.c @@ -0,0 +1,103 @@ +/* jsonQuery - Use a path syntax to retrieve elements/values from each line of JSON input. */ +#include "common.h" +#include "jsHelper.h" +#include "jsonQuery.h" +#include "linefile.h" +#include "obscure.h" +#include "options.h" + +void usage() +/* Explain usage and exit. */ +{ +errAbort( + "jsonQuery - Use a path syntax to retrieve elements/values from each line of JSON input\n" + "usage:\n" + " jsonQuery input.json path output.js\n" + "options:\n" + " -uniq Print out unique values as they are found (instead of all values)\n" + " -countUniq Print out unique values and the number of times each occurs at end of input\n" + "\n" + "Except for -uniq and -countUniq modes, objects and lists in output are pretty-printed\n" + "with newlines, so they are JavaScript values not JSON.\n" + ); +} + +/* Command line validation table. */ +static struct optionSpec options[] = { + { "uniq", OPTION_BOOLEAN }, + { "countUniq", OPTION_BOOLEAN }, + { NULL, 0 }, +}; + +void jsonQuery(char *inFile, char *path, char *outFile) +/* jsonQuery - Use a path syntax to retrieve elements/values from each line of JSON input. */ +{ +struct lineFile *lf = lineFileOpen(inFile, TRUE); +struct hash *uniqHash = NULL; +boolean countUniq = optionExists("countUniq"); +boolean uniq = optionExists("uniq") || countUniq; +if (uniq) + uniqHash = hashNew(0); +struct dyString *dy = dyStringNew(0); +FILE *outF = mustOpen(outFile, "w"); +char *line; +while (lineFileNextReal(lf, &line)) + { + struct lm *lm = lmInit(1<<16); + struct jsonElement *topEl = jsonParseLm(line, lm); + struct slRef topRef; + topRef.next = NULL; + topRef.val = topEl; + char desc[1024]; + safef(desc, sizeof desc, "line %d of %s", lf->lineIx, inFile); + struct slRef *results = jsonQueryElementList(&topRef, desc, path, lm); + struct slRef *result; + for (result = results; result != NULL; result = result->next) + { + struct jsonElement *el = result->val; + if (uniq) + { + dyStringClear(dy); + jsonDyStringPrint(dy, el, NULL, -1); + char *elStr = dy->string; + int count = hashIntValDefault(uniqHash, elStr, 0); + if (count < 1) + { + hashAddInt(uniqHash, elStr, 1); + verbose(2, "line %d: %s\n", lf->lineIx, elStr); + if (!countUniq) + { + fprintf(outF, "%s\n", elStr); + fflush(outF); + } + } + else + hashIncInt(uniqHash, elStr); + } + else + jsonPrintToFile(el, NULL, outF, 2); + } + lmCleanup(&lm); + } +lineFileClose(&lf); +if (countUniq) + { + struct hashEl *hel; + struct hashCookie cookie = hashFirst(uniqHash); + while ((hel = hashNext(&cookie)) != NULL) + { + fprintf(outF, "%10d %s\n", ptToInt(hel->val), hel->name); + } + } +carefulClose(&outF); +} + +int main(int argc, char *argv[]) +/* Process command line. */ +{ +optionInit(&argc, argv, options); +if (argc != 4) + usage(); +jsonQuery(argv[1], argv[2], argv[3]); +return 0; +}