src/hg/hgPhyloPlace/phyloPlace.c cbf33d2dcdd87c41ee8e1afe0ff738c62422bf77

cbf33d2dcdd87c41ee8e1afe0ff738c62422bf77
angie
  Tue Mar 30 16:01:54 2021 -0700
Only search for nearest neighbor and neighboring lineage once; then use it in summary table, details output, and now also TSV output.  Also set parBase in sncList for TSV output (oops).

diff --git src/hg/hgPhyloPlace/phyloPlace.c src/hg/hgPhyloPlace/phyloPlace.c
index 0583fc3..3b61a80 100644
--- src/hg/hgPhyloPlace/phyloPlace.c
+++ src/hg/hgPhyloPlace/phyloPlace.c
@@ -520,90 +520,102 @@
 while (variantPath->next && isInternalNodeName(variantPath->next->nodeName, minNewNode))
     variantPath = variantPath->next;
 if (variantPath && isInternalNodeName(variantPath->nodeName, minNewNode))
     return variantPath;
 return NULL;
 }
 
 static int mutCountCmp(const void *a, const void *b)
 /* Compare number of mutations of phyloTree nodes a and b. */
 {
 const struct phyloTree *nodeA = *(struct phyloTree * const *)a;
 const struct phyloTree *nodeB = *(struct phyloTree * const *)b;
 return slCount(nodeA->priv) - slCount(nodeB->priv);
 }
 
-static struct slName *findNearestNeighbor(struct mutationAnnotatedTree *bigTree, char *sampleId,
-                                          struct variantPathNode *variantPath)
-/* Use the sequence of mutations in variantPath to find sampleId's parent node in bigTree,
- * then look for most similar leaf sibling(s). */
-{
-struct slName *neighbors = NULL;
-int bigTreeINodeCount = phyloCountInternalNodes(bigTree->tree);
-int minNewNode = bigTreeINodeCount + 1; // 1-based
-struct variantPathNode *lastOldNode = findLastInternalNode(variantPath, minNewNode);
-struct phyloTree *node = lastOldNode ? hashFindVal(bigTree->nodeHash, lastOldNode->nodeName) :
-                                       bigTree->tree;
-if (lastOldNode && !node)
-    errAbort("Can't find last internal node for sample %s", sampleId);
-// Look for a leaf kid with no mutations relative to the parent, should be closest.
-if (node->numEdges == 0)
-    {
-    struct slName *nodeList = hashFindVal(bigTree->condensedNodes, node->ident->name);
-    if (nodeList)
-        slNameAddHead(&neighbors, nodeList->name);
-    else
-        slNameAddHead(&neighbors, node->ident->name);
-    }
-else
+static char *leafWithLeastMuts(struct phyloTree *node, struct mutationAnnotatedTree *bigTree)
+/* If node has a leaf child with no mutations of its own, return the name of that leaf.
+ * Otherwise, if node has leaf children, return the name of the leaf with the fewest mutations.
+ * Otherwise return NULL. */
 {
+char *leafName = NULL;
 int leafCount = 0;
 int i;
 for (i = 0;  i < node->numEdges;  i++)
     {
     struct phyloTree *kid = node->edges[i];
     if (kid->numEdges == 0)
         {
         leafCount++;
         struct singleNucChange *kidMuts = kid->priv;
         if (!kidMuts)
             {
             struct slName *nodeList = hashFindVal(bigTree->condensedNodes, kid->ident->name);
             if (nodeList)
-                    slNameAddHead(&neighbors, nodeList->name);
+                leafName = cloneString(nodeList->name);
             else
-                    slNameAddHead(&neighbors, kid->ident->name);
+                leafName = cloneString(kid->ident->name);
             break;
             }
         }
     }
-    if (neighbors == NULL && leafCount)
+if (leafName == NULL && leafCount)
     {
     // Pick the leaf with the fewest mutations.
     struct phyloTree *leafKids[leafCount];
     int leafIx = 0;
     for (i = 0;  i < node->numEdges;  i++)
         {
         struct phyloTree *kid = node->edges[i];
         if (kid->numEdges == 0)
                 leafKids[leafIx++] = kid;
         }
     qsort(leafKids, leafCount, sizeof(leafKids[0]), mutCountCmp);
-        neighbors = slNameNew(leafKids[0]->ident->name);
+    leafName = cloneString(leafKids[0]->ident->name);
+    }
+return leafName;
+}
+
+static char *findNearestNeighbor(struct mutationAnnotatedTree *bigTree, char *sampleId,
+                                 struct variantPathNode *variantPath)
+/* Use the sequence of mutations in variantPath to find sampleId's parent node in bigTree,
+ * then look for most similar leaf sibling(s). */
+{
+char *nearestNeighbor = NULL;
+int bigTreeINodeCount = phyloCountInternalNodes(bigTree->tree);
+int minNewNode = bigTreeINodeCount + 1; // 1-based
+struct variantPathNode *lastOldNode = findLastInternalNode(variantPath, minNewNode);
+struct phyloTree *node = lastOldNode ? hashFindVal(bigTree->nodeHash, lastOldNode->nodeName) :
+                                       bigTree->tree;
+if (lastOldNode && !node)
+    errAbort("Can't find last internal node for sample %s", sampleId);
+// Look for a leaf kid with no mutations relative to the parent, should be closest.
+if (node->numEdges == 0)
+    {
+    struct slName *nodeList = hashFindVal(bigTree->condensedNodes, node->ident->name);
+    if (nodeList)
+        nearestNeighbor = cloneString(nodeList->name);
+    else
+        nearestNeighbor = cloneString(node->ident->name);
     }
+else
+    {
+    nearestNeighbor = leafWithLeastMuts(node, bigTree);
+    if (nearestNeighbor == NULL && node->parent != NULL)
+        nearestNeighbor = leafWithLeastMuts(node->parent, bigTree);
     }
-return neighbors;
+return nearestNeighbor;
 }
 
 static void printVariantPathNoNodeNames(FILE *f, struct variantPathNode *variantPath)
 /* Print out variant path with no node names (even if non-numeric) to f. */
 {
 struct variantPathNode *vpn;
 for (vpn = variantPath;  vpn != NULL;  vpn = vpn->next)
     {
     if (vpn != variantPath)
         fprintf(f, " > ");
     struct singleNucChange *snc;
     for (snc = vpn->sncList;  snc != NULL;  snc = snc->next)
         {
         if (snc != vpn->sncList)
             fprintf(f, ", ");
@@ -784,48 +796,57 @@
         met = hashFindVal(sampleMetadata, eg+strlen("_eg_"));
     }
 return met;
 }
 
 static char *lineageForSample(struct hash *sampleMetadata, char *sampleId)
 /* Look up sampleId's lineage in epiToLineage file. Return NULL if we don't find a match. */
 {
 char *lineage = NULL;
 struct sampleMetadata *met = metadataForSample(sampleMetadata, sampleId);
 if (met)
     lineage = met->lineage;
 return lineage;
 }
 
-static void displayNearestNeighbors(struct mutationAnnotatedTree *bigTree, char *source,
-                                    struct placementInfo *info, struct hash *sampleMetadata)
-/* Use info->variantPaths to find sample's nearest neighbor(s) in tree. */
+static void findNearestNeighbors(struct hash *samplePlacements, struct hash *sampleMetadata,
+                                 struct mutationAnnotatedTree *bigTree)
+/* For each placed sample, find the nearest neighbor in the bigTree and its assigned lineage,
+ * and fill in those two fields of placementInfo. */
 {
-if (bigTree)
-    {
-    printf("<p>Nearest neighboring %s sequence already in phylogenetic tree: ", source);
-    struct slName *neighbors = findNearestNeighbor(bigTree, info->sampleId, info->variantPath);
-    struct slName *neighbor;
-    for (neighbor = neighbors;  neighbor != NULL;  neighbor = neighbor->next)
+if (!bigTree)
+    return;
+struct hashCookie cookie = hashFirst(samplePlacements);
+struct hashEl *hel;
+while ((hel = hashNext(&cookie)) != NULL)
     {
-        if (neighbor != neighbors)
-            printf(", ");
-        printf("%s", neighbor->name);
-        char *lineage = lineageForSample(sampleMetadata, neighbor->name);
-        if (isNotEmpty(lineage))
-            printf(": lineage %s", lineage);
+    struct placementInfo *info = hel->val;
+    info->nearestNeighbor = findNearestNeighbor(bigTree, info->sampleId, info->variantPath);
+    if (isNotEmpty(info->nearestNeighbor))
+        info->neighborLineage = lineageForSample(sampleMetadata, info->nearestNeighbor);
+    }
 }
+
+static void displayNearestNeighbors(struct placementInfo *info, char *source)
+/* Use info->variantPaths to find sample's nearest neighbor(s) in tree. */
+{
+if (isNotEmpty(info->nearestNeighbor))
+    {
+    printf("<p>Nearest neighboring %s sequence already in phylogenetic tree: %s",
+           source, info->nearestNeighbor);
+    if (isNotEmpty(info->neighborLineage))
+        printf(": lineage %s", info->neighborLineage);
     puts("</p>");
     }
 }
 
 static void displayBestNodes(struct placementInfo *info, struct hash *sampleMetadata)
 /* Show the node(s) most closely related to sample. */
 {
 if (info->bestNodeCount == 1)
     printf("<p>The placement in the tree is unambiguous; "
            "there are no other parsimony-optimal placements in the phylogenetic tree.</p>\n");
 else if (info->bestNodeCount > 1)
     printf("<p>This placement is not the only parsimony-optimal placement in the tree; "
            "%d other placements exist.</p>\n", info->bestNodeCount - 1);
 if (showBestNodePaths && info->bestNodes)
     {
@@ -927,31 +948,31 @@
     else
         safecpy(indentForKids+indentLen - 4, 4 + 1, "|   ");
     }
 if (node->numEdges > 0)
     {
     char kidIndent[strlen(indent)+5];
     safef(kidIndent, sizeof kidIndent, "%s%s", indentForKids, "+---");
     int i;
     for (i = 0;  i < node->numEdges;  i++)
         asciiTree(node->edges[i], kidIndent, (i == node->numEdges - 1));
     }
 }
 
 static void describeSamplePlacements(struct slName *sampleIds, struct hash *samplePlacements,
                                      struct phyloTree *subtree, struct hash *sampleMetadata,
-                                     struct mutationAnnotatedTree *bigTree, char *source)
+                                     char *source)
 /* Report how each sample fits into the big tree. */
 {
 // Sort sample placements by sampleMuts so we can group identical samples.
 struct slRef *placementRefs = getPlacementRefList(sampleIds, samplePlacements);
 slSort(&placementRefs, placementInfoRefCmpSampleMuts);
 int relatedCount = slCount(placementRefs);
 int clumpSize = countIdentical(placementRefs);
 if (clumpSize < relatedCount && relatedCount > 2)
     {
     // Not all of the related sequences are identical, so they will be broken down into
     // separate "clumps".  List all related samples first to avoid confusion.
     puts("<pre>");
     asciiTree(subtree, "", TRUE);
     puts("</pre>");
     }
@@ -984,31 +1005,31 @@
         }
     refsToGo = ref;
     displaySampleMuts(info);
     if (info->imputedBases)
         {
         puts("<p>Base values imputed by parsimony:\n<ul>");
         struct baseVal *bv;
         for (bv = info->imputedBases;  bv != NULL;  bv = bv->next)
             printf("<li>%d: %s\n", bv->chromStart+1, bv->val);
         puts("</ul>");
         puts("</p>");
         }
     displayVariantPath(info->variantPath, clumpSize == 1 ? info->sampleId : "samples");
     displayBestNodes(info, sampleMetadata);
     if (!showBestNodePaths)
-        displayNearestNeighbors(bigTree, source, info, sampleMetadata);
+        displayNearestNeighbors(info, source);
     if (showParsimonyScore && info->parsimonyScore > 0)
         printf("<p>Parsimony score added by your sample: %d</p>\n", info->parsimonyScore);
         //#*** TODO: explain parsimony score
     }
 }
 
 struct phyloTree *phyloPruneToIds(struct phyloTree *node, struct slName *sampleIds)
 /* Prune all descendants of node that have no leaf descendants in sampleIds. */
 {
 if (node->numEdges)
     {
     struct phyloTree *prunedKids = NULL;
     int i;
     for (i = 0;  i < node->numEdges;  i++)
         {
@@ -1393,32 +1414,31 @@
 
 static void appendExcludingNs(struct dyString *dy, struct seqInfo *si)
 /* Append a note to dy about how many N bases and start and/or end are excluded from statistic. */
 {
 dyStringAppend(dy, "excluding ");
 if (si->nCountStart)
     dyStringPrintf(dy, "%d N bases at start", si->nCountStart);
 if (si->nCountStart && si->nCountEnd)
     dyStringAppend(dy, " and ");
 if (si->nCountEnd)
     dyStringPrintf(dy, "%d N bases at end", si->nCountEnd);
 }
 
 static void summarizeSequences(struct seqInfo *seqInfoList, boolean isFasta,
                                struct usherResults *ur, struct tempName *jsonTns[],
-                               struct hash *sampleMetadata, struct mutationAnnotatedTree *bigTree,
-                               struct dnaSeq *refGenome)
+                               struct hash *sampleMetadata, struct dnaSeq *refGenome)
 /* Show a table with composition & alignment stats for each sequence that passed basic QC. */
 {
 if (seqInfoList)
     {
     puts("<table class='seqSummary'>");
     boolean gotClades = FALSE, gotLineages = FALSE;
     lookForCladesAndLineages(seqInfoList, ur->samplePlacements, &gotClades, &gotLineages);
     printSummaryHeader(isFasta, gotClades, gotLineages);
     puts("<tbody>");
     struct dyString *dy = dyStringNew(0);
     struct seqInfo *si;
     for (si = seqInfoList;  si != NULL;  si = si->next)
         {
         puts("<tr>");
         printf("<th>%s</td>", replaceChars(si->seq->name, "|", " | "));
@@ -1535,35 +1555,33 @@
                     replaceChar(reason->name, '_', ' ');
                     dyStringPrintf(dy, ", %s", reason->name);
                     }
                 dyStringAppendC(dy, ')');
                 }
             printTooltip(dy->string);
             }
         printf("</td>");
         struct placementInfo *pi = hashFindVal(ur->samplePlacements, si->seq->name);
         if (pi)
             {
             if (gotClades)
                 printf("<td>%s</td>", pi->nextClade ? pi->nextClade : "n/a");
             if (gotLineages)
                 printf("<td>%s</td>", pi->pangoLineage ? pi->pangoLineage : "n/a");
-            struct slName *neighbor = findNearestNeighbor(bigTree, pi->sampleId, pi->variantPath);
-            char *lineage = neighbor ?  lineageForSample(sampleMetadata, neighbor->name) : "?";
             printf("<td>%s</td><td>%s</td>",
-                   neighbor ? replaceChars(neighbor->name, "|", " | ") : "?",
-                   lineage ? lineage : "?");
+                   pi->nearestNeighbor ? replaceChars(pi->nearestNeighbor, "|", " | ") : "?",
+                   pi->neighborLineage ? pi->neighborLineage : "?");
             int imputedCount = slCount(pi->imputedBases);
             printf("<td class='%s'>%d",
                    qcClassForImputedBases(imputedCount), imputedCount);
             if (imputedCount > 0)
                 {
                 dyStringClear(dy);
                 struct baseVal *bv;
                 for (bv = pi->imputedBases;  bv != NULL;  bv = bv->next)
                     {
                     dyStringAppendSep(dy, ", ");
                     dyStringPrintf(dy, "%d: %s", bv->chromStart+1, bv->val);
                     }
                 printTooltip(dy->string);
                 }
             printf("</td><td class='%s'>%d",
@@ -1616,31 +1634,31 @@
         errAbort("sncListFromSampleMuts: expected ref base value, got '%c' in '%s'",
                  ref, mut->name);
     int pos = atoi(&(mut->name[1]));
     if (pos < 1 || pos > chromSize)
         errAbort("sncListFromSampleMuts: expected pos between 1 and %d, got %d in '%s'",
                  chromSize, pos, mut->name);
     char alt = mut->name[strlen(mut->name)-1];
     if (alt < 'A' || alt > 'Z')
         errAbort("sncListFromSampleMuts: expected alt base value, got '%c' in '%s'",
                  alt, mut->name);
     if (isIupacAmbiguous(alt))
         continue;
     struct singleNucChange *snc;
     AllocVar(snc);
     snc->chromStart = pos-1;
-    snc->refBase = ref;
+    snc->refBase = snc->parBase = ref;
     snc->newBase = alt;
     slAddHead(&sncList, snc);
     }
 struct baseVal *bv;
 for (bv = imputedBases;  bv != NULL;  bv = bv->next)
     {
     struct singleNucChange *snc;
     AllocVar(snc);
     snc->chromStart = bv->chromStart;
     snc->refBase = '?';
     snc->newBase = bv->val[0];
     slAddHead(&sncList, snc);
     }
 slReverse(&sncList);
 return sncList;
@@ -1886,30 +1904,32 @@
         // SNVs that were masked (Problematic Sites track), not used in placement
         fputc('\t', f);
         struct singleNucChange *snc;
         for (snc = si->maskedSncList;  snc != NULL;  snc = snc->next)
             {
             if (snc != si->maskedSncList)
                 fputc(',', f);
             fprintf(f, "%c%d%c", snc->refBase, snc->chromStart+1, snc->newBase);
             }
         }
     else
         {
         warn("writeOneTsvRow: no sequenceInfo for sample '%s'", sampleId);
         fprintf(f, "\tn/a\tn/a\tn/a\tn/a\tn/a\tn/a\tn/a");
         }
+    fprintf(f, "\t%s", isNotEmpty(info->nearestNeighbor) ? info->nearestNeighbor : "n/a");
+    fprintf(f, "\t%s", isNotEmpty(info->neighborLineage) ? info->neighborLineage : "n/a");
     fputc('\n', f);
     }
 }
 
 static void rWriteTsvSummaryTreeOrder(struct phyloTree *node, FILE *f, struct usherResults *results,
                                       struct hash *seqInfoHash, struct geneInfo *geneInfoList,
                                       struct seqWindow *gSeqWin, struct hash *spikeChanges)
 /* As we encounter leaves (user-uploaded samples) in depth-first search order, write out a line
  * of TSV summary for each one. */
 {
 if (node->numEdges)
     {
     int i;
     for (i = 0;  i < node->numEdges;  i++)
         rWriteTsvSummaryTreeOrder(node->edges[i], f, results, seqInfoHash, geneInfoList, gSeqWin,
@@ -1935,30 +1955,31 @@
 static struct tempName *writeTsvSummary(struct usherResults *results, struct phyloTree *sampleTree,
                                         struct slName *sampleIds, struct seqInfo *seqInfoList,
                                         struct geneInfo *geneInfoList, struct seqWindow *gSeqWin,
                                         struct hash *spikeChanges, int *pStartTime)
 /* Write a tab-separated summary file for download.  If the user uploaded enough samples to make
  * a tree, then write out samples in tree order; otherwise use sampleIds list.
  * Accumulate S gene changes. */
 {
 struct tempName *tsvTn = NULL;
 AllocVar(tsvTn);
 trashDirFile(tsvTn, "ct", "usher_samples", ".tsv");
 FILE *f = mustOpen(tsvTn->forCgi, "w");
 fprintf(f, "name\tnuc_mutations\taa_mutations\timputed_bases\tmutation_path"
         "\tplacement_count\tparsimony_score_increase\tlength\taligned_bases"
         "\tins_bases\tins_ranges\tdel_bases\tdel_ranges\tmasked_mutations"
+        "\tnearest_neighbor\tneighbor_lineage"
         "\n");
 struct hash *seqInfoHash = hashFromSeqInfoList(seqInfoList);
 if (sampleTree)
     {
     rWriteTsvSummaryTreeOrder(sampleTree, f, results, seqInfoHash, geneInfoList, gSeqWin,
                               spikeChanges);
     }
 else
     {
     struct slName *sample;
     for (sample = sampleIds;  sample != NULL;  sample = sample->next)
         writeOneTsvRow(f, sample->name, results, seqInfoHash, geneInfoList, gSeqWin, spikeChanges);
     }
 carefulClose(&f);
 hashFree(&seqInfoHash);
@@ -2169,33 +2190,30 @@
     vcfTn = checkAndSaveVcf(lf, refGenome, maskSites, &seqInfoList, &sampleIds);
     reportTiming(&startTime, "check uploaded VCF");
     }
 else
     {
     if (isNotEmpty(lf->fileName))
         warn("Sorry, can't recognize your file %s as FASTA or VCF.\n", lf->fileName);
     else
         warn("Sorry, can't recognize your uploaded data as FASTA or VCF.\n");
     }
 lineFileClose(&lf);
 if (vcfTn)
     {
     fflush(stdout);
     int seqCount = slCount(seqInfoList);
-    // Don't make smaller subtrees when a large number of sequences are uploaded.
-    if (seqCount > MAX_SEQ_DETAILS)
-        subtreeSize = 0;
     struct usherResults *results = runUsher(usherPath, usherAssignmentsPath, vcfTn->forCgi,
                                             subtreeSize, sampleIds, bigTree->condensedNodes,
                                             &startTime);
     if (results->singleSubtreeInfo)
         {
         puts("<p></p>");
         readQcThresholds(db);
         int subtreeCount = slCount(results->subtreeInfoList);
         // Sort subtrees by number of user samples (largest first).
         slSort(&results->subtreeInfoList, subTreeInfoUserSampleCmp);
         // Make Nextstrain/auspice JSON file for each subtree.
         char *bigGenePredFile = phyloPlaceDbSettingPath(db, "bigGenePredFile");
         struct geneInfo *geneInfoList = getGeneInfoList(bigGenePredFile, refGenome);
         struct seqWindow *gSeqWin = chromSeqWindowNew(db, chrom, 0, chromSize);
         struct hash *sampleMetadata = getSampleMetadata(metadataFile);
@@ -2212,81 +2230,82 @@
             treeToAuspiceJson(ti, db, geneInfoList, gSeqWin, sampleMetadata, NULL,
                               jsonTns[ix]->forCgi, source);
             // Add a link for every sample to this subtree, so the single-subtree JSON can
             // link to subtree JSONs
             char *subtreeUrl = nextstrainUrlFromTn(jsonTns[ix]);
             struct slName *sample;
             for (sample = ti->subtreeUserSampleIds;  sample != NULL;  sample = sample->next)
                 hashAdd(sampleUrls, sample->name, subtreeUrl);
             }
         struct tempName *singleSubtreeJsonTn;
         AllocVar(singleSubtreeJsonTn);
         trashDirFile(singleSubtreeJsonTn, "ct", "singleSubtreeAuspice", ".json");
         treeToAuspiceJson(results->singleSubtreeInfo, db, geneInfoList, gSeqWin, sampleMetadata,
                           sampleUrls, singleSubtreeJsonTn->forCgi, source);
         struct subtreeInfo *subtreeInfoForButtons = results->subtreeInfoList;
-        if (seqCount > MAX_SEQ_DETAILS || subtreeCount > MAX_SUBTREE_BUTTONS)
+        if (subtreeCount > MAX_SUBTREE_BUTTONS)
             subtreeInfoForButtons = NULL;
         makeButtonRow(singleSubtreeJsonTn, jsonTns, subtreeInfoForButtons, subtreeSize, isFasta);
         printf("<p>If you have metadata you wish to display, click a 'view subtree in "
                "Nextstrain' button, and then you can drag on a CSV file to "
                "<a href='"NEXTSTRAIN_DRAG_DROP_DOC"' target=_blank>add it to the tree view</a>."
                "</p>\n");
 
+        findNearestNeighbors(results->samplePlacements, sampleMetadata, bigTree);
+
         // Make custom tracks for uploaded samples and subtree(s).
         struct phyloTree *sampleTree = NULL;
         struct tempName *ctTn = writeCustomTracks(vcfTn, results, sampleIds, bigTree->tree,
                                                   source, fontHeight, &sampleTree, &startTime);
 
         // Make a sample summary TSV file and accumulate S gene changes
         struct hash *spikeChanges = hashNew(0);
         struct tempName *tsvTn = writeTsvSummary(results, sampleTree, sampleIds, seqInfoList,
                                                  geneInfoList, gSeqWin, spikeChanges, &startTime);
         struct tempName *sTsvTn = writeSpikeChangeSummary(spikeChanges, slCount(sampleIds));
         struct tempName *zipTn = makeSubtreeZipFile(results, jsonTns, singleSubtreeJsonTn,
                                                     &startTime);
         downloadsRow(results->bigTreePlusTn->forHtml, tsvTn->forHtml, sTsvTn->forHtml,
                      zipTn->forHtml);
 
         if (seqCount <= MAX_SEQ_DETAILS)
             {
-            summarizeSequences(seqInfoList, isFasta, results, jsonTns, sampleMetadata, bigTree,
-                               refGenome);
+            summarizeSequences(seqInfoList, isFasta, results, jsonTns, sampleMetadata, refGenome);
             reportTiming(&startTime, "write summary table (including reading in lineages)");
             for (ix = 0, ti = results->subtreeInfoList;  ti != NULL;  ti = ti->next, ix++)
                 {
                 int subtreeUserSampleCount = slCount(ti->subtreeUserSampleIds);
                 printf("<h3>Subtree %d: ", ix+1);
                 if (subtreeUserSampleCount > 1)
                     printf("%d related samples", subtreeUserSampleCount);
                 else if (subtreeCount > 1)
                     printf("Unrelated sample");
                 printf("</h3>\n");
                 makeNextstrainButtonN("viewNextstrainSub", ix, subtreeUserSampleCount, subtreeSize,
                                       jsonTns);
                 puts("<br>");
                 // Make a sub-subtree with only user samples for display:
                 struct phyloTree *subtree = phyloOpenTree(ti->subtreeTn->forCgi);
                 subtree = phyloPruneToIds(subtree, ti->subtreeUserSampleIds);
                 describeSamplePlacements(ti->subtreeUserSampleIds, results->samplePlacements,
-                                         subtree, sampleMetadata, bigTree, source);
+                                         subtree, sampleMetadata, source);
                 }
             reportTiming(&startTime, "describe placements");
             }
         else
-            printf("<p>(Skipping details and subtrees; "
-                   "you uploaded %d sequences, and details/subtrees are shown only when "
+            printf("<p>(Skipping details; "
+                   "you uploaded %d sequences, and details are shown only when "
                    "you upload at most %d sequences.)</p>\n",
                    seqCount, MAX_SEQ_DETAILS);
 
         // Offer big tree w/new samples for download
         puts("<h3>Downloads</h3>");
         puts("<ul>");
         printf("<li><a href='%s' download>SARS-CoV-2 phylogenetic tree "
                "with your samples (Newick file)</a>\n", results->bigTreePlusTn->forHtml);
         printf("<li><a href='%s' download>TSV summary of sequences and placements</a>\n",
                tsvTn->forHtml);
         printf("<li><a href='%s' download>TSV summary of S (Spike) gene changes</a>\n",
                sTsvTn->forHtml);
         printf("<li><a href='%s' download>ZIP archive of subtree Newick and JSON files</a>\n",
                zipTn->forHtml);
         // For now, leave in the individual links so I don't break anybody's pipeline that's