2a8b200a7ce182a27d6f090fe0729e089d3cf9b0
angie
  Mon Nov 4 11:31:48 2024 -0800
Store Auspice JSON files gzip-compressed (.json --> .json.gz).  Nextstrain display requires an Apache config change to serve .json with 'Content-Encoding: gzip' header instead of .json.gz.

diff --git src/hg/hgPhyloPlace/treeToAuspiceJson.c src/hg/hgPhyloPlace/treeToAuspiceJson.c
index 97c74ff..63db574 100644
--- src/hg/hgPhyloPlace/treeToAuspiceJson.c
+++ src/hg/hgPhyloPlace/treeToAuspiceJson.c
@@ -2,30 +2,31 @@
  * mutations, protein changes and metadata. */
 
 /* Copyright (C) 2020-2024 The Regents of the University of California */
 
 #include "common.h"
 #include "dnaseq.h"
 #include "errCatch.h"
 #include "hash.h"
 #include "hui.h"
 #include "jsonWrite.h"
 #include "linefile.h"
 #include "obscure.h"
 #include "parsimonyProto.h"
 #include "phyloPlace.h"
 #include "phyloTree.h"
+#include "pipeline.h"
 #include "variantProjector.h"
 
 
 static void auspiceMetaColoringCategoricalStart(struct jsonWrite *jw, char *key, char *title)
 /* Write key, title and type of a "categorical" coloring spec, but leave it open in case a
  * scale list needs to be added. */
 {
 jsonWriteObjectStart(jw, NULL);
 jsonWriteString(jw, "key", key);
 jsonWriteString(jw, "title", title);
 jsonWriteString(jw, "type", "categorical");
 }
 
 static void auspiceMetaColoringCategoricalEnd(struct jsonWrite *jw)
 /* Close out a coloring spec that was opened with auspiceMetaColoringCategoricalStart. */
@@ -910,57 +911,74 @@
 {
 struct hash *anchorSamples = NULL;
 char *anchorFile = phyloPlaceRefSettingPath(org, db, "anchorSamples");
 if (anchorFile && fileExists(anchorFile))
     {
     anchorSamples = hashNew(0);
     struct lineFile *lf = lineFileOpen(anchorFile, TRUE);
     char *line;
     while (lineFileNextReal(lf, &line))
         hashAddInt(anchorSamples, line, 1);
     lineFileClose(&lf);
     }
 return anchorSamples;
 }
 
+static void dumpTextMaybeGzip(char *fileName, char *text)
+/* If fileName ends with ".gz" then write gzip-compressed output to file, otherwise plain. */
+{
+if (endsWith(fileName, ".gz"))
+    {
+    static char *gzipCmd[] = {"gzip", "-c", NULL};
+    struct pipeline *gzipPl = pipelineOpen1(gzipCmd, pipelineWrite, fileName, NULL, 0);
+    FILE *outF = pipelineFile(gzipPl);
+    fputs(text, outF);
+    pipelineClose(&gzipPl);
+    }
+else
+    {
+    FILE *outF = mustOpen(fileName, "w");
+    fputs(text, outF);
+    carefulClose(&outF);
+    }
+}
+
 void treeToAuspiceJson(struct subtreeInfo *sti, char *org, char *db, struct geneInfo *geneInfoList,
                        struct seqWindow *gSeqWin, struct sampleMetadataStore *sampleMetadata,
                        struct hash *sampleUrls, struct hash *samplePlacements,
                        char *jsonFile, char *source)
 /* Write JSON for tree in Nextstrain's Augur/Auspice V2 JSON format
  * (https://github.com/nextstrain/augur/blob/master/augur/data/schema-export-v2.json). */
 {
 struct phyloTree *tree = sti->subtree;
-FILE *outF = mustOpen(jsonFile, "w");
 struct jsonWrite *jw = jsonWriteNew();
 jsonWriteObjectStart(jw, NULL);
 jsonWriteString(jw, "version", "v2");
 boolean isRsv = (stringIn("GCF_000855545", db) || stringIn("GCF_002815475", db) ||
                  startsWith("RGCC", db));
 boolean isFlu = (stringIn("GCF_000865085", db) || stringIn("GCF_001343785", db));
 writeAuspiceMeta(jw, sti->subtreeUserSampleIds, source, org, db, geneInfoList,
                  gSeqWin->end, isRsv, isFlu);
 jsonWriteObjectStart(jw, "tree");
 int nodeNum = 10000; // Auspice.us starting node number for newick -> json
 int depth = 0;
 
 // Hash names in setting anchorSamples if found
 struct hash *anchorSamples = getAnchorSamples(org, db);
 
 // Add an extra root node because otherwise Auspice won't draw branch from big tree root to subtree
 struct phyloTree *root = phyloTreeNewNode("wrapper");
 phyloAddEdge(root, tree);
 tree = root;
 struct auspiceJsonInfo aji = { jw, sti->subtreeUserSampleIds, geneInfoList, gSeqWin,
                                sampleMetadata, sampleUrls, samplePlacements, anchorSamples,
                                nodeNum, source };
 
 char **branchAttrCols = NULL;
 int branchAttrCount = getBranchAttrCols(org, db, &branchAttrCols);
 rTreeToAuspiceJson(tree, depth, &aji, NULL, isRsv, branchAttrCount, branchAttrCols, NULL);
 jsonWriteObjectEnd(jw); // tree
 jsonWriteObjectEnd(jw); // top-level object
-fputs(jw->dy->string, outF);
+dumpTextMaybeGzip(jsonFile, jw->dy->string);
 jsonWriteFree(&jw);
-carefulClose(&outF);
 }