src/hg/hgPhyloPlace/phyloPlace.c 8441e4b8e776dea862c62a466f765c8cd94b1e3d

8441e4b8e776dea862c62a466f765c8cd94b1e3d
angie
  Mon Mar 1 00:55:04 2021 -0800
Add a TSV download file summarizing S (Spike) protein mutations, emphasizing antibody escape and Receptor Binding Domain mutations.  Don't show too many buttons at the top.  Try a little harder to find metadata for collapsed nodes in subtrees.

diff --git src/hg/hgPhyloPlace/phyloPlace.c src/hg/hgPhyloPlace/phyloPlace.c
index 7fc3fe5..184faff 100644
--- src/hg/hgPhyloPlace/phyloPlace.c
+++ src/hg/hgPhyloPlace/phyloPlace.c
@@ -144,31 +144,30 @@
                  filename, maxChoices);
             break;
             }
         struct dyString *dy = dyStringNew(0);
         addPathIfNecessary(dy, db, words[0]);
         treeChoices->protobufFiles[treeChoices->count] = cloneString(dy->string);
         addPathIfNecessary(dy, db, words[1]);
         treeChoices->metadataFiles[treeChoices->count] = cloneString(dy->string);
         treeChoices->sources[treeChoices->count] = cloneString(words[2]);
         // Description can be either a file or just some text.
         addPathIfNecessary(dy, db, words[3]);
         if (fileExists(dy->string))
             {
             char *desc = NULL;
             readInGulp(dy->string, &desc, NULL);
-            fprintf(stderr, "reading '%s' --> '%s'\n", dy->string, desc);
             treeChoices->descriptions[treeChoices->count] = desc;
             }
         else
             treeChoices->descriptions[treeChoices->count] = cloneString(words[3]);
         treeChoices->count++;
         dyStringFree(&dy);
         }
     lineFileClose(&lf);
     }
 return treeChoices;
 }
 
 static char *urlFromTn(struct tempName *tn)
 /* Make a full URL to a trash file that our net.c code will be able to follow, for when we can't
  * just leave it up to the user's web browser to do the right thing with "../". */
@@ -762,30 +761,37 @@
         met = hashFindVal(sampleMetadata, gbId);
     }
 if (!met)
     met = hashFindVal(sampleMetadata, sampleId);
 if (!met && strchr(sampleId, '|'))
     {
     char copy[strlen(sampleId)+1];
     safecpy(copy, sizeof copy, sampleId);
     char *words[4];
     int wordCount = chopString(copy, "|", words, ArraySize(words));
     if (isNotEmpty(words[0]))
         met = hashFindVal(sampleMetadata, words[0]);
     if (met == NULL && wordCount > 1 && isNotEmpty(words[1]))
         met = hashFindVal(sampleMetadata, words[1]);
     }
+// If it's one of our collapsed node names, dig out the example name and try that.
+if (!met && isdigit(sampleId[0]) && strstr(sampleId, "_from_"))
+    {
+    char *eg = strstr(sampleId, "_eg_");
+    if (eg)
+        met = hashFindVal(sampleMetadata, eg+strlen("_eg_"));
+    }
 return met;
 }
 
 static char *lineageForSample(struct hash *sampleMetadata, char *sampleId)
 /* Look up sampleId's lineage in epiToLineage file. Return NULL if we don't find a match. */
 {
 char *lineage = NULL;
 struct sampleMetadata *met = metadataForSample(sampleMetadata, sampleId);
 if (met)
     lineage = met->lineage;
 return lineage;
 }
 
 static void displayNearestNeighbors(struct mutationAnnotatedTree *bigTree, char *source,
                                     struct placementInfo *info, struct hash *sampleMetadata)
@@ -1613,63 +1619,231 @@
     }
 struct baseVal *bv;
 for (bv = imputedBases;  bv != NULL;  bv = bv->next)
     {
     struct singleNucChange *snc;
     AllocVar(snc);
     snc->chromStart = bv->chromStart;
     snc->refBase = '?';
     snc->newBase = bv->val[0];
     slAddHead(&sncList, snc);
     }
 slReverse(&sncList);
 return sncList;
 }
 
+struct aaMutInfo
+// A protein change, who has it, and how important we think it is.
+{
+    char *name;                 // The name that people are used to seeing, e.g. "E484K', "N501Y"
+    struct slName *sampleIds;   // The uploaded samples that have it
+    int priority;               // For sorting; lower number means scarier.
+    int pos;                    // 1-based position
+    char oldAa;                 // Reference AA
+    char newAa;                 // Alt AA
+};
+
+int aaMutInfoCmp(const void *a, const void *b)
+/* Compare aaMutInfo priority for sorting. */
+{
+const struct aaMutInfo *amiA = *(struct aaMutInfo * const *)a;
+const struct aaMutInfo *amiB = *(struct aaMutInfo * const *)b;
+return amiA->priority - amiB->priority;
+}
+
+// For now, hardcode SARS-CoV-2 Spike RBD coords and antibody escape positions (1-based).
+static int rbdStart = 319;
+static int rbdEnd = 541;
+static boolean *escapeMutPos = NULL;
+
+static void initEscapeMutPos()
+/* Allocate excapeMutPos and set positions implicated by at least a couple experiments from Bloom Lab
+ * or that appear in Whelan, Rappuoli or McCoy tracks. */
+{
+AllocArray(escapeMutPos, rbdEnd);
+escapeMutPos[332] = TRUE;
+escapeMutPos[334] = TRUE;
+escapeMutPos[335] = TRUE;
+escapeMutPos[337] = TRUE;
+escapeMutPos[339] = TRUE;
+escapeMutPos[340] = TRUE;
+escapeMutPos[345] = TRUE;
+escapeMutPos[346] = TRUE;
+escapeMutPos[348] = TRUE;
+escapeMutPos[352] = TRUE;
+escapeMutPos[357] = TRUE;
+escapeMutPos[359] = TRUE;
+escapeMutPos[361] = TRUE;
+escapeMutPos[362] = TRUE;
+escapeMutPos[363] = TRUE;
+escapeMutPos[365] = TRUE;
+escapeMutPos[366] = TRUE;
+escapeMutPos[367] = TRUE;
+escapeMutPos[369] = TRUE;
+escapeMutPos[370] = TRUE;
+escapeMutPos[371] = TRUE;
+escapeMutPos[372] = TRUE;
+escapeMutPos[373] = TRUE;
+escapeMutPos[374] = TRUE;
+escapeMutPos[376] = TRUE;
+escapeMutPos[378] = TRUE;
+escapeMutPos[383] = TRUE;
+escapeMutPos[384] = TRUE;
+escapeMutPos[385] = TRUE;
+escapeMutPos[386] = TRUE;
+escapeMutPos[408] = TRUE;
+escapeMutPos[417] = TRUE;
+escapeMutPos[441] = TRUE;
+escapeMutPos[444] = TRUE;
+escapeMutPos[445] = TRUE;
+escapeMutPos[445] = TRUE;
+escapeMutPos[447] = TRUE;
+escapeMutPos[449] = TRUE;
+escapeMutPos[450] = TRUE;
+escapeMutPos[452] = TRUE;
+escapeMutPos[455] = TRUE;
+escapeMutPos[456] = TRUE;
+escapeMutPos[458] = TRUE;
+escapeMutPos[472] = TRUE;
+escapeMutPos[473] = TRUE;
+escapeMutPos[474] = TRUE;
+escapeMutPos[476] = TRUE;
+escapeMutPos[477] = TRUE;
+escapeMutPos[478] = TRUE;
+escapeMutPos[479] = TRUE;
+escapeMutPos[483] = TRUE;
+escapeMutPos[484] = TRUE;
+escapeMutPos[485] = TRUE;
+escapeMutPos[486] = TRUE;
+escapeMutPos[487] = TRUE;
+escapeMutPos[489] = TRUE;
+escapeMutPos[490] = TRUE;
+escapeMutPos[494] = TRUE;
+escapeMutPos[499] = TRUE;
+escapeMutPos[504] = TRUE;
+escapeMutPos[514] = TRUE;
+escapeMutPos[517] = TRUE;
+escapeMutPos[522] = TRUE;
+escapeMutPos[525] = TRUE;
+escapeMutPos[526] = TRUE;
+escapeMutPos[527] = TRUE;
+escapeMutPos[528] = TRUE;
+escapeMutPos[529] = TRUE;
+escapeMutPos[530] = TRUE;
+escapeMutPos[531] = TRUE;
+}
+
+static int spikePriority(int pos, char newAa)
+/* Lower number for scarier spike mutation, per spike mutations track / RBD. */
+{
+if (escapeMutPos == NULL)
+    initEscapeMutPos();
+int priority = 0;
+if (pos >= rbdStart && pos <= rbdEnd)
+    {
+    // Receptor binding domain
+    priority = 100;
+    // Antibody escape mutation in Variant of Concern/Interest
+    if (pos == 484)
+        priority = 10;
+    else if (pos == 501)
+        priority = 15;
+    else if (pos == 452)
+        priority = 20;
+    // Other antibody escape mutations
+    else if (pos == 439 || pos == 477)
+        priority = 25;
+    else if (escapeMutPos[pos])
+        priority = 50;
+    }
+else if (pos >= 675 && pos <= 681)
+    // Furin cleavage site; circumstantial evidence for Q677{H,P} spread in US.
+    // Interesting threads on SPHERES 2021-02-26 about P681H tradeoff between infectivity vs
+    // range of cell types that can be infected and other observations about that region.
+    priority = 110;
+else if (pos == 614 && newAa == 'G')
+    // Old hat
+    priority = 1000;
+else
+    // Somewhere else in Spike
+    priority = 500;
+return priority;
+}
+
+static void addSpikeChange(struct hash *spikeChanges, char *aaMutStr, char *sampleId)
+/* Tally up an observance of a S gene change in a sample. */
+{
+// Parse oldAa, pos, newAA out of aaMutStr.  Need a more elegant way of doing this;
+// this is a rush job to get something usable out there asap.
+char oldAa = aaMutStr[0];
+int pos = atoi(aaMutStr+1);
+char newAa = aaMutStr[strlen(aaMutStr)-1];
+struct hashEl *hel = hashLookup(spikeChanges, aaMutStr);
+if (hel == NULL)
+    {
+    struct aaMutInfo *ami;
+    AllocVar(ami);
+    ami->name = cloneString(aaMutStr);
+    slNameAddHead(&ami->sampleIds, sampleId);
+    ami->priority = spikePriority(pos, newAa);
+    ami->pos = pos;
+    ami->oldAa = oldAa;
+    ami->newAa = newAa;
+    hashAdd(spikeChanges, aaMutStr, ami);
+    }
+else
+    {
+    struct aaMutInfo *ami = hel->val;
+    slNameAddHead(&ami->sampleIds, sampleId);
+    }
+}
+
 static void writeOneTsvRow(FILE *f, char *sampleId, struct usherResults *results,
                            struct hash *seqInfoHash, struct geneInfo *geneInfoList,
-                           struct seqWindow *gSeqWin)
-/* Write one row of tab-separate summary for sampleId. */
+                           struct seqWindow *gSeqWin, struct hash *spikeChanges)
+/* Write one row of tab-separate summary for sampleId.  Accumulate S gene AA change info. */
 {
     struct placementInfo *info = hashFindVal(results->samplePlacements, sampleId);
     // sample name / ID
     fprintf(f, "%s\t", sampleId);
     // nucleotide mutations
     struct slName *mut;
     for (mut = info->sampleMuts;  mut != NULL;  mut = mut->next)
         {
         if (mut != info->sampleMuts)
             fputc(',', f);
         fputs(mut->name, f);
         }
     fputc('\t', f);
     // AA mutations
     struct singleNucChange *sncList = sncListFromSampleMutsAndImputed(info->sampleMuts,
                                                                       info->imputedBases);
     struct slPair *geneAaMutations = getAaMutations(sncList, geneInfoList, gSeqWin);
     struct slPair *geneAaMut;
     boolean first = TRUE;
     for (geneAaMut = geneAaMutations;  geneAaMut != NULL;  geneAaMut = geneAaMut->next)
         {
         struct slName *aaMut;
         for (aaMut = geneAaMut->val;  aaMut != NULL;  aaMut = aaMut->next)
             {
             if (first)
                 first = FALSE;
             else
                 fputc(',', f);
             fprintf(f, "%s:%s", geneAaMut->name, aaMut->name);
+            if (sameString(geneAaMut->name, "S"))
+                addSpikeChange(spikeChanges, aaMut->name, sampleId);
             }
         }
     fputc('\t', f);
     // imputed bases (if any)
     struct baseVal *bv;
     for (bv = info->imputedBases;  bv != NULL;  bv = bv->next)
         {
         if (bv != info->imputedBases)
             fputc(',', f);
         fprintf(f, "%d%s", bv->chromStart+1, bv->val);
         }
     fputc('\t', f);
     // path through tree to sample
     printVariantPathNoNodeNames(f, info->variantPath);
     // number of equally parsimonious placements
@@ -1679,110 +1853,154 @@
     struct seqInfo *si = hashFindVal(seqInfoHash, sampleId);
     if (si)
         {
         if (si->psl)
             {
             // length
             fprintf(f, "\t%d", si->seq->size);
             struct psl *psl = si->psl;
             // aligned bases, indel counts & ranges
             int aliCount = psl->match + psl->misMatch + psl->repMatch;
             fprintf(f, "\t%d\t%d\t%s\t%d\t%s",
                     aliCount, si->insBases, emptyForNull(si->insRanges),
                     si->delBases, emptyForNull(si->delRanges));
             }
         else
-            fprintf(f, "\t\t\t\t\t\t");
+            fprintf(f, "\tn/a\tn/a\tn/a\tn/a\tn/a\tn/a");
         // SNVs that were masked (Problematic Sites track), not used in placement
         fputc('\t', f);
         struct singleNucChange *snc;
         for (snc = si->maskedSncList;  snc != NULL;  snc = snc->next)
             {
             if (snc != si->maskedSncList)
                 fputc(',', f);
             fprintf(f, "%c%d%c", snc->refBase, snc->chromStart+1, snc->newBase);
             }
         }
     else
         {
         warn("writeOneTsvRow: no sequenceInfo for sample '%s'", sampleId);
-        fprintf(f, "\t\t\t\t\t\t\t");
+        fprintf(f, "\tn/a\tn/a\tn/a\tn/a\tn/a\tn/a\tn/a");
         }
     fputc('\n', f);
 }
 
 static void rWriteTsvSummaryTreeOrder(struct phyloTree *node, FILE *f, struct usherResults *results,
                                       struct hash *seqInfoHash, struct geneInfo *geneInfoList,
-                                      struct seqWindow *gSeqWin)
+                                      struct seqWindow *gSeqWin, struct hash *spikeChanges)
 /* As we encounter leaves (user-uploaded samples) in depth-first search order, write out a line
  * of TSV summary for each one. */
 {
 if (node->numEdges)
     {
     int i;
     for (i = 0;  i < node->numEdges;  i++)
-        rWriteTsvSummaryTreeOrder(node->edges[i], f, results, seqInfoHash, geneInfoList, gSeqWin);
+        rWriteTsvSummaryTreeOrder(node->edges[i], f, results, seqInfoHash, geneInfoList, gSeqWin,
+                                  spikeChanges);
     }
 else
     {
-    writeOneTsvRow(f, node->ident->name, results, seqInfoHash, geneInfoList, gSeqWin);
+    writeOneTsvRow(f, node->ident->name, results, seqInfoHash, geneInfoList, gSeqWin, spikeChanges);
     }
 }
 
 
 static struct hash *hashFromSeqInfoList(struct seqInfo *seqInfoList)
 /* Hash sequence name to seqInfo for quick lookup. */
 {
 struct hash *hash = hashNew(0);
 struct seqInfo *si;
 for (si = seqInfoList;  si != NULL;  si = si->next)
     hashAdd(hash, si->seq->name, si);
 return hash;
 }
 
 static struct tempName *writeTsvSummary(struct usherResults *results, struct phyloTree *sampleTree,
                                         struct slName *sampleIds, struct seqInfo *seqInfoList,
                                         struct geneInfo *geneInfoList, struct seqWindow *gSeqWin,
-                                        int *pStartTime)
+                                        struct hash *spikeChanges, int *pStartTime)
 /* Write a tab-separated summary file for download.  If the user uploaded enough samples to make
- * a tree, then write out samples in tree order; otherwise use sampleIds list. */
+ * a tree, then write out samples in tree order; otherwise use sampleIds list.
+ * Accumulate S gene changes. */
 {
 struct tempName *tsvTn = NULL;
 AllocVar(tsvTn);
-trashDirFile(tsvTn, "ct", "usher", ".tsv");
+trashDirFile(tsvTn, "ct", "usher_samples", ".tsv");
 FILE *f = mustOpen(tsvTn->forCgi, "w");
 fprintf(f, "name\tnuc_mutations\taa_mutations\timputed_bases\tmutation_path"
         "\tplacement_count\tparsimony_score_increase\tlength\taligned_bases"
         "\tins_bases\tins_ranges\tdel_bases\tdel_ranges\tmasked_mutations"
         "\n");
 struct hash *seqInfoHash = hashFromSeqInfoList(seqInfoList);
 if (sampleTree)
     {
-    rWriteTsvSummaryTreeOrder(sampleTree, f, results, seqInfoHash, geneInfoList, gSeqWin);
+    rWriteTsvSummaryTreeOrder(sampleTree, f, results, seqInfoHash, geneInfoList, gSeqWin,
+                              spikeChanges);
     }
 else
     {
     struct slName *sample;
     for (sample = sampleIds;  sample != NULL;  sample = sample->next)
-        writeOneTsvRow(f, sample->name, results, seqInfoHash, geneInfoList, gSeqWin);
+        writeOneTsvRow(f, sample->name, results, seqInfoHash, geneInfoList, gSeqWin, spikeChanges);
     }
 carefulClose(&f);
 hashFree(&seqInfoHash);
 reportTiming(pStartTime, "write tsv summary");
 return tsvTn;
 }
 
+static struct tempName *writeSpikeChangeSummary(struct hash *spikeChanges, int totalSampleCount)
+/* Write a tab-separated summary of S (Spike) gene changes for download. */
+{
+struct tempName *tsvTn = NULL;
+AllocVar(tsvTn);
+trashDirFile(tsvTn, "ct", "usher_S_muts", ".tsv");
+FILE *f = mustOpen(tsvTn->forCgi, "w");
+fprintf(f, "aa_mutation\tsample_count\tsample_frequency\tsample_ids"
+        "\n");
+struct aaMutInfo *sChanges[spikeChanges->elCount];
+struct hashCookie cookie = hashFirst(spikeChanges);
+int ix = 0;
+struct hashEl *hel;
+while ((hel = hashNext(&cookie)) != NULL)
+    {
+    if (ix >= spikeChanges->elCount)
+        errAbort("writeSpikeChangeSummary: hash elCount is %d but got more elements",
+                 spikeChanges->elCount);
+    sChanges[ix++] = hel->val;
+    }
+if (ix != spikeChanges->elCount)
+    errAbort("writeSpikeChangeSummary: hash elCount is %d but got fewer elements (%d)",
+             spikeChanges->elCount, ix);
+qsort(sChanges, ix, sizeof(sChanges[0]), aaMutInfoCmp);
+for (ix = 0;  ix < spikeChanges->elCount;  ix++)
+    {
+    struct aaMutInfo *ami = sChanges[ix];
+    int sampleCount = slCount(ami->sampleIds);
+    fprintf(f, "S:%s\t%d\t%f",
+            ami->name, sampleCount, (double)sampleCount / (double)totalSampleCount);
+    slReverse(&ami->sampleIds);
+    fprintf(f, "\t%s", ami->sampleIds->name);
+    struct slName *sample;
+    for (sample = ami->sampleIds->next;  sample != NULL;  sample = sample->next)
+        fprintf(f, ",%s", sample->name);
+    fputc('\n', f);
+    }
+carefulClose(&f);
+return tsvTn;
+}
+
 static struct slName **getProblematicSites(char *db)
 /* If config.ra specfies maskFile them return array of lists (usually NULL) of reasons that
  * masking is recommended, one per position in genome; otherwise return NULL. */
 {
 struct slName **pSites = NULL;
 char *pSitesFile = phyloPlaceDbSettingPath(db, "maskFile");
 if (isNotEmpty(pSitesFile) && fileExists(pSitesFile))
     {
     AllocArray(pSites, chromSize);
     struct bbiFile *bbi = bigBedFileOpen(pSitesFile);
     struct lm *lm = lmInit(0);
     struct bigBedInterval *bb, *bbList = bigBedIntervalQuery(bbi, chrom, 0, chromSize, 0, lm);
     for (bb = bbList;  bb != NULL;  bb = bb->next)
         {
         char *extra = bb->rest;
@@ -1924,31 +2142,33 @@
         AllocVar(singleSubtreeJsonTn);
         trashDirFile(singleSubtreeJsonTn, "ct", "singleSubtreeAuspice", ".json");
         treeToAuspiceJson(results->singleSubtreeInfo, db, geneInfoList, gSeqWin, sampleMetadata,
                           singleSubtreeJsonTn->forCgi, source);
         struct tempName *jsonTns[subtreeCount];
         struct subtreeInfo *ti;
         int ix;
         for (ix = 0, ti = results->subtreeInfoList;  ti != NULL;  ti = ti->next, ix++)
             {
             AllocVar(jsonTns[ix]);
             trashDirFile(jsonTns[ix], "ct", "subtreeAuspice", ".json");
             treeToAuspiceJson(ti, db, geneInfoList, gSeqWin, sampleMetadata, jsonTns[ix]->forCgi,
                               source);
             }
         puts("<p></p>");
-        int subtreeButtonCount = (seqCount <= MAX_SEQ_DETAILS) ? subtreeCount : 0;
+        int subtreeButtonCount = subtreeCount;
+        if (seqCount > MAX_SEQ_DETAILS || subtreeCount > MAX_SUBTREE_BUTTONS)
+            subtreeButtonCount = 0;
         makeButtonRow(singleSubtreeJsonTn, jsonTns, subtreeButtonCount, isFasta);
         printf("<p>If you have metadata you wish to display, click a 'view subtree in "
                "Nextstrain' button, and then you can drag on a CSV file to "
                "<a href='"NEXTSTRAIN_DRAG_DROP_DOC"' target=_blank>add it to the tree view</a>."
                "</p>\n");
         if (seqCount <= MAX_SEQ_DETAILS)
             {
             summarizeSequences(seqInfoList, isFasta, results, jsonTns, sampleMetadata, bigTree,
                                refGenome);
             reportTiming(&startTime, "write summary table (including reading in lineages)");
             for (ix = 0, ti = results->subtreeInfoList;  ti != NULL;  ti = ti->next, ix++)
                 {
                 int subtreeUserSampleCount = slCount(ti->subtreeUserSampleIds);
                 printf("<h3>Subtree %d: ", ix+1);
                 if (subtreeUserSampleCount > 1)
@@ -1965,41 +2185,45 @@
                                          subtree, sampleMetadata, bigTree, source);
                 }
             reportTiming(&startTime, "describe placements");
             }
         else
             printf("<p>(Skipping details and subtrees; "
                    "you uploaded %d sequences, and details/subtrees are shown only when "
                    "you upload at most %d sequences.)</p>\n",
                    seqCount, MAX_SEQ_DETAILS);
 
         // Make custom tracks for uploaded samples and subtree(s).
         struct phyloTree *sampleTree = NULL;
         struct tempName *ctTn = writeCustomTracks(vcfTn, results, sampleIds, bigTree->tree,
                                                   source, fontHeight, &sampleTree, &startTime);
 
-        // Make a TSV summary file
+        // Make a sample summary TSV file and accumulate S gene changes
+        struct hash *spikeChanges = hashNew(0);
         struct tempName *tsvTn = writeTsvSummary(results, sampleTree, sampleIds, seqInfoList,
-                                                 geneInfoList, gSeqWin, &startTime);
+                                                 geneInfoList, gSeqWin, spikeChanges, &startTime);
+        struct tempName *sTsvTn = writeSpikeChangeSummary(spikeChanges, slCount(sampleIds));
 
         // Offer big tree w/new samples for download
         puts("<h3>Downloads</h3>");
         puts("<ul>");
         printf("<li><a href='%s' download>SARS-CoV-2 phylogenetic tree "
                "with your samples (Newick file)</a>\n", results->bigTreePlusTn->forHtml);
         printf("<li><a href='%s' download>TSV summary of sequences and placements</a>\n",
                tsvTn->forHtml);
+        printf("<li><a href='%s' download>TSV summary of S (Spike) gene changes</a>\n",
+               sTsvTn->forHtml);
         for (ix = 0, ti = results->subtreeInfoList;  ti != NULL;  ti = ti->next, ix++)
            {
             int subtreeUserSampleCount = slCount(ti->subtreeUserSampleIds);
             printf("<li><a href='%s' download>Subtree with %s", ti->subtreeTn->forHtml,
                    ti->subtreeUserSampleIds->name);
             if (subtreeUserSampleCount > 10)
                 printf(" and %d other samples", subtreeUserSampleCount - 1);
             else
                 {
                 struct slName *sln;
                 for (sln = ti->subtreeUserSampleIds->next;  sln != NULL;  sln = sln->next)
                     printf(", %s", sln->name);
                 }
             puts(" (Newick file)</a>");
             printf("<li><a href='%s' download>Auspice JSON for subtree with %s",