e267c9ec78e654975dbcd0bb8d5b4bf393187269
angie
  Thu Mar 4 15:39:32 2021 -0800
Add ZIP file of subtree JSON & Newick files for download.  Add subtree numbers to the filenames so it's not all trashDir soup.

diff --git src/hg/hgPhyloPlace/phyloPlace.c src/hg/hgPhyloPlace/phyloPlace.c
index cf2966e..ea6177b 100644
--- src/hg/hgPhyloPlace/phyloPlace.c
+++ src/hg/hgPhyloPlace/phyloPlace.c
@@ -8,30 +8,31 @@
 #include "errCatch.h"
 #include "fa.h"
 #include "genePred.h"
 #include "hCommon.h"
 #include "hash.h"
 #include "hgConfig.h"
 #include "htmshell.h"
 #include "hui.h"
 #include "iupac.h"
 #include "jsHelper.h"
 #include "linefile.h"
 #include "obscure.h"
 #include "parsimonyProto.h"
 #include "phyloPlace.h"
 #include "phyloTree.h"
+#include "pipeline.h"
 #include "psl.h"
 #include "ra.h"
 #include "regexHelper.h"
 #include "trashDir.h"
 #include "vcf.h"
 
 // Globals:
 static boolean measureTiming = FALSE;
 
 // wuhCor1-specific:
 char *chrom = "NC_045512v2";
 int chromSize = 29903;
 
 // Parameter constants:
 int maxGenotypes = 100;        // Upper limit on number of samples user can upload at once.
@@ -1977,62 +1978,94 @@
     struct aaMutInfo *ami = sChanges[ix];
     int sampleCount = slCount(ami->sampleIds);
     fprintf(f, "S:%s\t%d\t%f",
             ami->name, sampleCount, (double)sampleCount / (double)totalSampleCount);
     slReverse(&ami->sampleIds);
     fprintf(f, "\t%s", ami->sampleIds->name);
     struct slName *sample;
     for (sample = ami->sampleIds->next;  sample != NULL;  sample = sample->next)
         fprintf(f, ",%s", sample->name);
     fputc('\n', f);
     }
 carefulClose(&f);
 return tsvTn;
 }
 
+static struct tempName *makeSubtreeZipFile(struct usherResults *results, struct tempName *jsonTns[],
+                                           struct tempName *singleSubtreeJsonTn, int *pStartTime)
+/* Make a zip archive file containing all of the little subtree Newick and JSON files so
+ * user doesn't have to click on each one. */
+{
+struct tempName *zipTn;
+AllocVar(zipTn);
+trashDirFile(zipTn, "ct", "usher_subtrees", ".zip");
+int subtreeCount = slCount(results->subtreeInfoList);
+char *cmd[10 + 2*(subtreeCount+1)];
+char **cmds[] = { cmd, NULL };
+int cIx = 0, sIx = 0;
+cmd[cIx++] = "zip";
+cmd[cIx++] = "-j";
+cmd[cIx++] = zipTn->forCgi;
+cmd[cIx++] = singleSubtreeJsonTn->forCgi;
+cmd[cIx++] = results->singleSubtreeInfo->subtreeTn->forCgi;
+struct subtreeInfo *ti;
+for (ti = results->subtreeInfoList;  ti != NULL;  ti = ti->next, sIx++)
+    {
+    cmd[cIx++] = jsonTns[sIx]->forCgi;
+    cmd[cIx++] = ti->subtreeTn->forCgi;
+    }
+cmd[cIx++] = NULL;
+struct pipeline *pl = pipelineOpen(cmds, pipelineRead, NULL, NULL);
+pipelineClose(&pl);
+reportTiming(pStartTime, "make subtree zipfile");
+return zipTn;
+}
+
 static struct slName **getProblematicSites(char *db)
 /* If config.ra specfies maskFile them return array of lists (usually NULL) of reasons that
  * masking is recommended, one per position in genome; otherwise return NULL. */
 {
 struct slName **pSites = NULL;
 char *pSitesFile = phyloPlaceDbSettingPath(db, "maskFile");
 if (isNotEmpty(pSitesFile) && fileExists(pSitesFile))
     {
     AllocArray(pSites, chromSize);
     struct bbiFile *bbi = bigBedFileOpen(pSitesFile);
     struct lm *lm = lmInit(0);
     struct bigBedInterval *bb, *bbList = bigBedIntervalQuery(bbi, chrom, 0, chromSize, 0, lm);
     for (bb = bbList;  bb != NULL;  bb = bb->next)
         {
         char *extra = bb->rest;
         char *reason = nextWord(&extra);
         int i;
         for (i = bb->start;  i < bb->end;  i++)
             slNameAddHead(&pSites[i], reason);
         }
     bigBedFileClose(&bbi);
     }
 return pSites;
 }
 
-static void downloadsRow(char *treeFile, char *sampleSummaryFile, char *spikeSummaryFile)
+static void downloadsRow(char *treeFile, char *sampleSummaryFile, char *spikeSummaryFile,
+                         char *subtreeZipFile)
 /* Make a row of quick download file links, to appear between the button row & big summary table. */
 {
 printf("<p><b>Downloads:</b> | ");
 printf("<a href='%s' download>Global phylogenetic tree with your sequences</a> | ", treeFile);
 printf("<a href='%s' download>TSV summary of sequences and placements</a> | ", sampleSummaryFile);
 printf("<a href='%s' download>TSV summary of Spike mutations</a> | ", spikeSummaryFile);
+printf("<a href='%s' download>ZIP file of subtree JSON and Newick files</a> | ", subtreeZipFile);
 puts("</p>");
 }
 
 static int subTreeInfoUserSampleCmp(const void *pa, const void *pb)
 /* Compare subtreeInfo by number of user sample IDs (highest number first). */
 {
 struct subtreeInfo *tiA = *(struct subtreeInfo **)pa;
 struct subtreeInfo *tiB = *(struct subtreeInfo **)pb;
 return slCount(tiB->subtreeUserSampleIds) - slCount(tiA->subtreeUserSampleIds);
 }
 
 char *phyloPlaceSamples(struct lineFile *lf, char *db, char *defaultProtobuf,
                         boolean doMeasureTiming, int subtreeSize, int fontHeight)
 /* Given a lineFile that contains either FASTA or VCF, prepare VCF for usher;
  * if that goes well then run usher, report results, make custom track files
@@ -2147,55 +2180,60 @@
         char *bigGenePredFile = phyloPlaceDbSettingPath(db, "bigGenePredFile");
         struct geneInfo *geneInfoList = getGeneInfoList(bigGenePredFile, refGenome);
         struct seqWindow *gSeqWin = chromSeqWindowNew(db, chrom, 0, chromSize);
         struct hash *sampleMetadata = getSampleMetadata(metadataFile);
         struct tempName *singleSubtreeJsonTn;
         AllocVar(singleSubtreeJsonTn);
         trashDirFile(singleSubtreeJsonTn, "ct", "singleSubtreeAuspice", ".json");
         treeToAuspiceJson(results->singleSubtreeInfo, db, geneInfoList, gSeqWin, sampleMetadata,
                           singleSubtreeJsonTn->forCgi, source);
         struct tempName *jsonTns[subtreeCount];
         struct subtreeInfo *ti;
         int ix;
         for (ix = 0, ti = results->subtreeInfoList;  ti != NULL;  ti = ti->next, ix++)
             {
             AllocVar(jsonTns[ix]);
-            trashDirFile(jsonTns[ix], "ct", "subtreeAuspice", ".json");
+            char subtreeName[512];
+            safef(subtreeName, sizeof(subtreeName), "subtreeAuspice%d", ix+1);
+            trashDirFile(jsonTns[ix], "ct", subtreeName, ".json");
             treeToAuspiceJson(ti, db, geneInfoList, gSeqWin, sampleMetadata, jsonTns[ix]->forCgi,
                               source);
             }
         puts("<p></p>");
         int subtreeButtonCount = subtreeCount;
         if (seqCount > MAX_SEQ_DETAILS || subtreeCount > MAX_SUBTREE_BUTTONS)
             subtreeButtonCount = 0;
         makeButtonRow(singleSubtreeJsonTn, jsonTns, subtreeButtonCount, isFasta);
         printf("<p>If you have metadata you wish to display, click a 'view subtree in "
                "Nextstrain' button, and then you can drag on a CSV file to "
                "<a href='"NEXTSTRAIN_DRAG_DROP_DOC"' target=_blank>add it to the tree view</a>."
                "</p>\n");
 
         // Make custom tracks for uploaded samples and subtree(s).
         struct phyloTree *sampleTree = NULL;
         struct tempName *ctTn = writeCustomTracks(vcfTn, results, sampleIds, bigTree->tree,
                                                   source, fontHeight, &sampleTree, &startTime);
 
         // Make a sample summary TSV file and accumulate S gene changes
         struct hash *spikeChanges = hashNew(0);
         struct tempName *tsvTn = writeTsvSummary(results, sampleTree, sampleIds, seqInfoList,
                                                  geneInfoList, gSeqWin, spikeChanges, &startTime);
         struct tempName *sTsvTn = writeSpikeChangeSummary(spikeChanges, slCount(sampleIds));
-        downloadsRow(results->bigTreePlusTn->forHtml, tsvTn->forHtml, sTsvTn->forHtml);
+        struct tempName *zipTn = makeSubtreeZipFile(results, jsonTns, singleSubtreeJsonTn,
+                                                    &startTime);
+        downloadsRow(results->bigTreePlusTn->forHtml, tsvTn->forHtml, sTsvTn->forHtml,
+                     zipTn->forHtml);
 
         if (seqCount <= MAX_SEQ_DETAILS)
             {
             summarizeSequences(seqInfoList, isFasta, results, jsonTns, sampleMetadata, bigTree,
                                refGenome);
             reportTiming(&startTime, "write summary table (including reading in lineages)");
             for (ix = 0, ti = results->subtreeInfoList;  ti != NULL;  ti = ti->next, ix++)
                 {
                 int subtreeUserSampleCount = slCount(ti->subtreeUserSampleIds);
                 printf("<h3>Subtree %d: ", ix+1);
                 if (subtreeUserSampleCount > 1)
                     printf("%d related samples", subtreeUserSampleCount);
                 else if (subtreeCount > 1)
                     printf("Unrelated sample");
                 printf("</h3>\n");
@@ -2212,30 +2250,34 @@
         else
             printf("<p>(Skipping details and subtrees; "
                    "you uploaded %d sequences, and details/subtrees are shown only when "
                    "you upload at most %d sequences.)</p>\n",
                    seqCount, MAX_SEQ_DETAILS);
 
         // Offer big tree w/new samples for download
         puts("<h3>Downloads</h3>");
         puts("<ul>");
         printf("<li><a href='%s' download>SARS-CoV-2 phylogenetic tree "
                "with your samples (Newick file)</a>\n", results->bigTreePlusTn->forHtml);
         printf("<li><a href='%s' download>TSV summary of sequences and placements</a>\n",
                tsvTn->forHtml);
         printf("<li><a href='%s' download>TSV summary of S (Spike) gene changes</a>\n",
                sTsvTn->forHtml);
+        printf("<li><a href='%s' download>ZIP archive of subtree Newick and JSON files</a>\n",
+               zipTn->forHtml);
+        // For now, leave in the individual links so I don't break anybody's pipeline that's
+        // scraping this page...
         for (ix = 0, ti = results->subtreeInfoList;  ti != NULL;  ti = ti->next, ix++)
             {
             int subtreeUserSampleCount = slCount(ti->subtreeUserSampleIds);
             printf("<li><a href='%s' download>Subtree with %s", ti->subtreeTn->forHtml,
                    ti->subtreeUserSampleIds->name);
             if (subtreeUserSampleCount > 10)
                 printf(" and %d other samples", subtreeUserSampleCount - 1);
             else
                 {
                 struct slName *sln;
                 for (sln = ti->subtreeUserSampleIds->next;  sln != NULL;  sln = sln->next)
                     printf(", %s", sln->name);
                 }
             puts(" (Newick file)</a>");
             printf("<li><a href='%s' download>Auspice JSON for subtree with %s",