2a8b200a7ce182a27d6f090fe0729e089d3cf9b0 angie Mon Nov 4 11:31:48 2024 -0800 Store Auspice JSON files gzip-compressed (.json --> .json.gz). Nextstrain display requires an Apache config change to serve .json with 'Content-Encoding: gzip' header instead of .json.gz. diff --git src/hg/hgPhyloPlace/treeToAuspiceJson.c src/hg/hgPhyloPlace/treeToAuspiceJson.c index 97c74ff..63db574 100644 --- src/hg/hgPhyloPlace/treeToAuspiceJson.c +++ src/hg/hgPhyloPlace/treeToAuspiceJson.c @@ -2,30 +2,31 @@ * mutations, protein changes and metadata. */ /* Copyright (C) 2020-2024 The Regents of the University of California */ #include "common.h" #include "dnaseq.h" #include "errCatch.h" #include "hash.h" #include "hui.h" #include "jsonWrite.h" #include "linefile.h" #include "obscure.h" #include "parsimonyProto.h" #include "phyloPlace.h" #include "phyloTree.h" +#include "pipeline.h" #include "variantProjector.h" static void auspiceMetaColoringCategoricalStart(struct jsonWrite *jw, char *key, char *title) /* Write key, title and type of a "categorical" coloring spec, but leave it open in case a * scale list needs to be added. */ { jsonWriteObjectStart(jw, NULL); jsonWriteString(jw, "key", key); jsonWriteString(jw, "title", title); jsonWriteString(jw, "type", "categorical"); } static void auspiceMetaColoringCategoricalEnd(struct jsonWrite *jw) /* Close out a coloring spec that was opened with auspiceMetaColoringCategoricalStart. */ @@ -910,57 +911,74 @@ { struct hash *anchorSamples = NULL; char *anchorFile = phyloPlaceRefSettingPath(org, db, "anchorSamples"); if (anchorFile && fileExists(anchorFile)) { anchorSamples = hashNew(0); struct lineFile *lf = lineFileOpen(anchorFile, TRUE); char *line; while (lineFileNextReal(lf, &line)) hashAddInt(anchorSamples, line, 1); lineFileClose(&lf); } return anchorSamples; } +static void dumpTextMaybeGzip(char *fileName, char *text) +/* If fileName ends with ".gz" then write gzip-compressed output to file, otherwise plain. */ +{ +if (endsWith(fileName, ".gz")) + { + static char *gzipCmd[] = {"gzip", "-c", NULL}; + struct pipeline *gzipPl = pipelineOpen1(gzipCmd, pipelineWrite, fileName, NULL, 0); + FILE *outF = pipelineFile(gzipPl); + fputs(text, outF); + pipelineClose(&gzipPl); + } +else + { + FILE *outF = mustOpen(fileName, "w"); + fputs(text, outF); + carefulClose(&outF); + } +} + void treeToAuspiceJson(struct subtreeInfo *sti, char *org, char *db, struct geneInfo *geneInfoList, struct seqWindow *gSeqWin, struct sampleMetadataStore *sampleMetadata, struct hash *sampleUrls, struct hash *samplePlacements, char *jsonFile, char *source) /* Write JSON for tree in Nextstrain's Augur/Auspice V2 JSON format * (https://github.com/nextstrain/augur/blob/master/augur/data/schema-export-v2.json). */ { struct phyloTree *tree = sti->subtree; -FILE *outF = mustOpen(jsonFile, "w"); struct jsonWrite *jw = jsonWriteNew(); jsonWriteObjectStart(jw, NULL); jsonWriteString(jw, "version", "v2"); boolean isRsv = (stringIn("GCF_000855545", db) || stringIn("GCF_002815475", db) || startsWith("RGCC", db)); boolean isFlu = (stringIn("GCF_000865085", db) || stringIn("GCF_001343785", db)); writeAuspiceMeta(jw, sti->subtreeUserSampleIds, source, org, db, geneInfoList, gSeqWin->end, isRsv, isFlu); jsonWriteObjectStart(jw, "tree"); int nodeNum = 10000; // Auspice.us starting node number for newick -> json int depth = 0; // Hash names in setting anchorSamples if found struct hash *anchorSamples = getAnchorSamples(org, db); // Add an extra root node because otherwise Auspice won't draw branch from big tree root to subtree struct phyloTree *root = phyloTreeNewNode("wrapper"); phyloAddEdge(root, tree); tree = root; struct auspiceJsonInfo aji = { jw, sti->subtreeUserSampleIds, geneInfoList, gSeqWin, sampleMetadata, sampleUrls, samplePlacements, anchorSamples, nodeNum, source }; char **branchAttrCols = NULL; int branchAttrCount = getBranchAttrCols(org, db, &branchAttrCols); rTreeToAuspiceJson(tree, depth, &aji, NULL, isRsv, branchAttrCount, branchAttrCols, NULL); jsonWriteObjectEnd(jw); // tree jsonWriteObjectEnd(jw); // top-level object -fputs(jw->dy->string, outF); +dumpTextMaybeGzip(jsonFile, jw->dy->string); jsonWriteFree(&jw); -carefulClose(&outF); }