src/hg/hgPhyloPlace/phyloPlace.c 08ec019f344fc52bd83ea3a98bedccd0048d732c

08ec019f344fc52bd83ea3a98bedccd0048d732c
angie
  Mon May 24 00:04:08 2021 -0700
Let the user upload a file containing names or IDs of sequences already in the selected tree; run matUtils extract to get subtrees that include those sequences.  protobufs.tab gets a new optional column to specify a file that maps alias to tree name/ID (e.g. for mapping EPI_ISL to public names/IDs).

diff --git src/hg/hgPhyloPlace/phyloPlace.c src/hg/hgPhyloPlace/phyloPlace.c
index e589069..17fd160 100644
--- src/hg/hgPhyloPlace/phyloPlace.c
+++ src/hg/hgPhyloPlace/phyloPlace.c
@@ -75,30 +75,41 @@
     }
 return fileName;
 }
 
 char *getUsherPath(boolean abortIfNotFound)
 /* Return hgPhyloPlaceData/usher if it exists, else NULL.  Do not free the returned value. */
 {
 char *usherPath = PHYLOPLACE_DATA_DIR "/usher";
 if (fileExists(usherPath))
     return usherPath;
 else if (abortIfNotFound)
     errAbort("Missing required file %s", usherPath);
 return NULL;
 }
 
+char *getMatUtilsPath(boolean abortIfNotFound)
+/* Return hgPhyloPlaceData/matUtils if it exists, else NULL.  Do not free the returned value. */
+{
+char *matUtilsPath = PHYLOPLACE_DATA_DIR "/matUtils";
+if (fileExists(matUtilsPath))
+    return matUtilsPath;
+else if (abortIfNotFound)
+    errAbort("Missing required file %s", matUtilsPath);
+return NULL;
+}
+
 char *getUsherAssignmentsPath(char *db, boolean abortIfNotFound)
 /* If <db>/config.ra specifies the file for use by usher --load-assignments and the file exists,
  * return the path, else NULL.  Do not free the returned value. */
 {
 char *usherAssignmentsPath = phyloPlaceDbSettingPath(db, "usherAssignmentsFile");
 if (isNotEmpty(usherAssignmentsPath) && fileExists(usherAssignmentsPath))
     return usherAssignmentsPath;
 else if (abortIfNotFound)
     errAbort("Missing required file %s", usherAssignmentsPath);
 return NULL;
 }
 
 //#*** This needs to go in a lib so CGIs know whether to include it in the menu. needs better name.
 boolean hgPhyloPlaceEnabled()
 /* Return TRUE if hgPhyloPlace is enabled in hg.conf and db wuhCor1 exists. */
@@ -120,59 +131,65 @@
 }
 
 struct treeChoices *loadTreeChoices(char *db)
 /* If <db>/config.ra specifies a treeChoices file, load it up, else return NULL. */
 {
 struct treeChoices *treeChoices = NULL;
 char *filename = phyloPlaceDbSettingPath(db, "treeChoices");
 if (isNotEmpty(filename) && fileExists(filename))
     {
     AllocVar(treeChoices);
     int maxChoices = 128;
     AllocArray(treeChoices->protobufFiles, maxChoices);
     AllocArray(treeChoices->metadataFiles, maxChoices);
     AllocArray(treeChoices->sources, maxChoices);
     AllocArray(treeChoices->descriptions, maxChoices);
+    AllocArray(treeChoices->aliasFiles, maxChoices);
     struct lineFile *lf = lineFileOpen(filename, TRUE);
     char *line;
     while (lineFileNextReal(lf, &line))
         {
-        char *words[5];
+        char *words[6];
         int wordCount = chopTabs(line, words);
-        lineFileExpectWords(lf, 4, wordCount);
+        lineFileExpectAtLeast(lf, 4, wordCount);
         if (treeChoices->count >= maxChoices)
             {
             warn("File %s has too many lines, only showing first %d phylogenetic tree choices",
                  filename, maxChoices);
             break;
             }
         struct dyString *dy = dyStringNew(0);
         addPathIfNecessary(dy, db, words[0]);
         treeChoices->protobufFiles[treeChoices->count] = cloneString(dy->string);
         addPathIfNecessary(dy, db, words[1]);
         treeChoices->metadataFiles[treeChoices->count] = cloneString(dy->string);
         treeChoices->sources[treeChoices->count] = cloneString(words[2]);
         // Description can be either a file or just some text.
         addPathIfNecessary(dy, db, words[3]);
         if (fileExists(dy->string))
             {
             char *desc = NULL;
             readInGulp(dy->string, &desc, NULL);
             treeChoices->descriptions[treeChoices->count] = desc;
             }
         else
             treeChoices->descriptions[treeChoices->count] = cloneString(words[3]);
+        if (wordCount > 4)
+            {
+            addPathIfNecessary(dy, db, words[4]);
+            treeChoices->aliasFiles[treeChoices->count] = cloneString(dy->string);
+            }
         treeChoices->count++;
         dyStringFree(&dy);
         }
     lineFileClose(&lf);
     }
 return treeChoices;
 }
 
 static char *urlFromTn(struct tempName *tn)
 /* Make a full URL to a trash file that our net.c code will be able to follow, for when we can't
  * just leave it up to the user's web browser to do the right thing with "../". */
 {
 struct dyString *dy = dyStringCreate("%s%s", hLocalHostCgiBinUrl(), tn->forHtml);
 return dyStringCannibalize(&dy);
 }
@@ -1180,34 +1197,36 @@
                                        ix+1, userSampleCount, subtreeSize - userSampleCount);
 makeNextstrainButton(buttonId, jsonTns[ix], buttonLabel, dyMo->string);
 dyStringFree(&dyMo);
 }
 
 static void makeNsSingleTreeButton(struct tempName *tn)
 /* Make a button to view single subtree (with all uploaded samples) in Nextstrain. */
 {
 makeNextstrainButton("viewNextstrainSingleSubtree", tn, "view single subtree in Nextstrain",
                      "view one subtree that includes all of your uploaded sequences plus "
                      SINGLE_SUBTREE_SIZE" randomly selected sequences from the phylogenetic "
                      "tree for context");
 }
 
 static void makeButtonRow(struct tempName *singleSubtreeJsonTn, struct tempName *jsonTns[],
-                          struct subtreeInfo *subtreeInfoList, int subtreeSize, boolean isFasta)
+                          struct subtreeInfo *subtreeInfoList, int subtreeSize, boolean isFasta,
+                          boolean offerCustomTrack)
 /* Russ's suggestion: row of buttons at the top to view results in GB, Nextstrain, Nextclade. */
 {
 puts("<p>");
+if (offerCustomTrack)
     cgiMakeButtonWithMsg("submit", "view in Genome Browser",
                          "view your uploaded sequences, their phylogenetic relationship and their "
                          "mutations along with many other datasets available in the Genome Browser");
 if (nextstrainHost())
     {
     printf("&nbsp;");
     makeNsSingleTreeButton(singleSubtreeJsonTn);
     struct subtreeInfo *ti;
     int ix;
     for (ix = 0, ti = subtreeInfoList;  ti != NULL;  ti = ti->next, ix++)
         {
         int userSampleCount = slCount(ti->subtreeUserSampleIds);
         printf("&nbsp;");
         makeNextstrainButtonN("viewNextstrainTopRow", ix, userSampleCount, subtreeSize, jsonTns);
         }
@@ -2206,83 +2225,301 @@
 printf("<a href='%s' download>Global phylogenetic tree with your sequences</a> | ", treeFile);
 printf("<a href='%s' download>TSV summary of sequences and placements</a> | ", sampleSummaryFile);
 printf("<a href='%s' download>TSV summary of Spike mutations</a> | ", spikeSummaryFile);
 printf("<a href='%s' download>ZIP file of subtree JSON and Newick files</a> | ", subtreeZipFile);
 puts("</p>");
 }
 
 static int subTreeInfoUserSampleCmp(const void *pa, const void *pb)
 /* Compare subtreeInfo by number of user sample IDs (highest number first). */
 {
 struct subtreeInfo *tiA = *(struct subtreeInfo **)pa;
 struct subtreeInfo *tiB = *(struct subtreeInfo **)pb;
 return slCount(tiB->subtreeUserSampleIds) - slCount(tiA->subtreeUserSampleIds);
 }
 
-char *phyloPlaceSamples(struct lineFile *lf, char *db, char *defaultProtobuf,
-                        boolean doMeasureTiming, int subtreeSize, int fontHeight)
-/* Given a lineFile that contains either FASTA or VCF, prepare VCF for usher;
- * if that goes well then run usher, report results, make custom track files
- * and return the top-level custom track file; otherwise return NULL. */
+static void getProtobufMetadataSource(char *db, char *protobufFile, char **retProtobufPath,
+                                      char **retMetadataFile, char **retSource, char **retAliasFile)
+/* If the config file specifies a list of tree choices, and protobufFile is a valid choice, then
+ * set ret* to the files associated with that choice.  Otherwise fall back on older conf settings.
+ * Return the selected treeChoice if there is one. */
 {
-char *ctFile = NULL;
-measureTiming = doMeasureTiming;
-int startTime = clock1000();
-struct tempName *vcfTn = NULL;
-struct slName *sampleIds = NULL;
-char *usherPath = getUsherPath(TRUE);
-char *usherAssignmentsPath = NULL;
-char *source = NULL;
-char *metadataFile = NULL;
 struct treeChoices *treeChoices = loadTreeChoices(db);
 if (treeChoices)
     {
-    usherAssignmentsPath = defaultProtobuf;
-    if (isEmpty(usherAssignmentsPath))
-        usherAssignmentsPath = treeChoices->protobufFiles[0];
+    *retProtobufPath = protobufFile;
+    if (isEmpty(*retProtobufPath))
+        *retProtobufPath = treeChoices->protobufFiles[0];
     int i;
     for (i = 0;  i < treeChoices->count;  i++)
-        if (sameString(treeChoices->protobufFiles[i], usherAssignmentsPath))
+        if (sameString(treeChoices->protobufFiles[i], *retProtobufPath))
             {
-            metadataFile = treeChoices->metadataFiles[i];
-            source = treeChoices->sources[i];
+            *retMetadataFile = treeChoices->metadataFiles[i];
+            *retSource = treeChoices->sources[i];
+            *retAliasFile = treeChoices->aliasFiles[i];
             break;
             }
     if (i == treeChoices->count)
         {
-        usherAssignmentsPath = treeChoices->protobufFiles[0];
-        metadataFile = treeChoices->metadataFiles[0];
-        source = treeChoices->sources[0];
+        *retProtobufPath = treeChoices->protobufFiles[0];
+        *retMetadataFile = treeChoices->metadataFiles[0];
+        *retSource = treeChoices->sources[0];
+        *retAliasFile = treeChoices->aliasFiles[0];
         }
     }
 else
     {
     // Fall back on old settings
-    usherAssignmentsPath = getUsherAssignmentsPath(db, TRUE);
-    metadataFile = phyloPlaceDbSettingPath(db, "metadataFile");
-    source = "GISAID";
+    *retProtobufPath = getUsherAssignmentsPath(db, TRUE);
+    *retMetadataFile = phyloPlaceDbSettingPath(db, "metadataFile");
+    *retSource = "GISAID";
+    *retAliasFile = NULL;
+    }
+}
+
+static void addNameAndComponents(struct hash *nameHash, char *fullName)
+/* Add entries to nameHash mapping fullName to itself, and components of fullName to fullName. */
+{
+char *fullNameHashStored = hashStoreName(nameHash, fullName);
+// Now that we have hash storage for fullName, make it point to itself.
+struct hashEl *hel = hashLookup(nameHash, fullName);
+if (hel == NULL)
+    errAbort("Can't look up '%s' right after adding it", fullName);
+hel->val = fullNameHashStored;
+char *words[4];
+char copy[strlen(fullName)+1];
+safecpy(copy, sizeof copy, fullName);
+int wordCount = chopString(copy, "|", words, ArraySize(words));
+if (wordCount == 3)
+    {
+    // name|ID|date
+    hashAdd(nameHash, words[0], fullNameHashStored);
+    hashAdd(nameHash, words[1], fullNameHashStored);
+    }
+else if (wordCount == 2)
+    {
+    // ID|date
+    hashAdd(nameHash, words[0], fullNameHashStored);
+    }
+}
+
+static void rAddLeafNames(struct phyloTree *node, struct hash *condensedNodes, struct hash *nameHash)
+/* Recursively descend tree, adding leaf node names to nameHash (including all names of condensed
+ * leaf nodes).  Also map components of full names (country/isolate/year|ID|date) to full names. */
+{
+if (node->numEdges == 0)
+    {
+    char *leafName = node->ident->name;
+    struct slName *nodeList = hashFindVal(condensedNodes, leafName);
+    if (nodeList)
+        {
+        struct slName *sample;
+        for (sample = nodeList;  sample != NULL;  sample = sample->next)
+            addNameAndComponents(nameHash, sample->name);
+        }
+    else
+        addNameAndComponents(nameHash, leafName);
+    }
+else
+    {
+    int i;
+    for (i = 0;  i < node->numEdges;  i++)
+        rAddLeafNames(node->edges[i], condensedNodes, nameHash);
+    }
+}
+
+static void addAliases(struct hash *nameHash, char *aliasFile)
+/* If there is an aliasFile, then add its mappings of ID/alias to full tree name to nameHash. */
+{
+if (isNotEmpty(aliasFile) && fileExists(aliasFile))
+    {
+    struct lineFile *lf = lineFileOpen(aliasFile, TRUE);
+    int missCount = 0;
+    char *missExample = NULL;
+    char *line;
+    while (lineFileNextReal(lf, &line))
+        {
+        char *words[3];
+        int wordCount = chopTabs(line, words);
+        lineFileExpectWords(lf, 2, wordCount);
+        char *fullName = hashFindVal(nameHash, words[1]);
+        if (fullName)
+            hashAdd(nameHash, words[0], fullName);
+        else
+            {
+            missCount++;
+            if (missExample == NULL)
+                missExample = cloneString(words[1]);
+            }
+        }
+    lineFileClose(&lf);
+    if (missCount > 0)
+        fprintf(stderr, "aliasFile %s: %d values in second column were not found in tree, "
+                "e.g. '%s'", aliasFile, missCount, missExample);
+    }
+}
+
+static struct hash *getTreeNames(struct mutationAnnotatedTree *bigTree, char *aliasFile)
+/* Make a hash of full names of leaves of bigTree; also map components of those names to the
+ * full names in case the user gives us partial names. */
+{
+int nodeCount = bigTree->nodeHash->elCount;
+struct hash *nameHash = hashNew(digitsBaseTwo(nodeCount) + 3);
+rAddLeafNames(bigTree->tree, bigTree->condensedNodes, nameHash);
+addAliases(nameHash, aliasFile);
+return nameHash;
+}
+
+static char *matchName(struct hash *nameHash, char *name)
+/* Look for a possibly partial name or ID provided by the user in nameHash.  Return the result,
+ * possibly NULL.  If the full name doesn't match, try components of the name. */
+{
+name = trimSpaces(name);
+// GISAID fasta headers all have hCoV-19/country/isolate/year|EPI_ISL_#|date; strip the hCoV-19
+// because Nextstrain strips it in nextmeta/nextfasta download files, and so do I when building
+// UCSC's tree.
+if (startsWithNoCase("hCoV-19/", name))
+    name += strlen("hCoV-19/");
+char *match = hashFindVal(nameHash, name);
+int minWordSize=5;
+if (match == NULL && strchr(name, '|'))
+    {
+    // GISAID fasta headers have name|ID|date, and so do our tree IDs; try ID and name separately.
+    char *words[4];
+    char copy[strlen(name)+1];
+    safecpy(copy, sizeof copy, name);
+    int wordCount = chopString(copy, "|", words, ArraySize(words));
+    if (wordCount == 3)
+        {
+        // name|ID|date; try ID first.
+        if (strlen(words[1]) > minWordSize)
+            match = hashFindVal(nameHash, words[1]);
+        if (match == NULL && strlen(words[0]) > minWordSize)
+            {
+            match = hashFindVal(nameHash, words[0]);
+            // Sometimes country/isolate names have spaces... strip out if present.
+            if (match == NULL && strchr(words[0], ' '))
+                {
+                stripChar(words[0], ' ');
+                match = hashFindVal(nameHash, words[0]);
+                }
+            }
+        }
+    else if (wordCount == 2)
+        {
+        // ID|date
+        if (strlen(words[0]) > minWordSize)
+             match = hashFindVal(nameHash, words[0]);
+        }
+    }
+else if (match == NULL && strchr(name, ' '))
+    {
+    // GISAID sequence names may include spaces, in both country names ("South Korea") and
+    // isolate names.  That messes up FASTA headers, so Nextstrain strips out spaces when
+    // making the nextmeta and nextfasta download files for GISAID.  Try stripping out spaces:
+    char copy[strlen(name)+1];
+    safecpy(copy, sizeof copy, name);
+    stripChar(copy, ' ');
+    match = hashFindVal(nameHash, copy);
+    }
+return match;
+}
+
+static struct slName *readSampleIds(struct lineFile *lf, struct mutationAnnotatedTree *bigTree,
+                                    char *aliasFile)
+/* Read a file of sample names/IDs from the user; typically these will not be exactly the same
+ * as the protobuf's (UCSC protobuf names are typically country/isolate/year|ID|date), so attempt
+ * to find component matches if an exact match isn't found. */
+{
+struct slName *sampleIds = NULL;
+struct slName *unmatched = NULL;
+struct hash *nameHash = getTreeNames(bigTree, aliasFile);
+char *line;
+while (lineFileNext(lf, &line, NULL))
+    {
+    // If tab-sep or comma-sep, just try first word in line
+    char *tab = strchr(line, '\t');
+    if (tab)
+        *tab = '\0';
+    else
+        {
+        char *comma = strchr(line, ',');
+        if (comma)
+            *comma = '\0';
+        }
+    char *match = matchName(nameHash, line);
+    if (match)
+        slNameAddHead(&sampleIds, match);
+    else
+        slNameAddHead(&unmatched, line);
     }
-struct mutationAnnotatedTree *bigTree = parseParsimonyProtobuf(usherAssignmentsPath);
+if (unmatched)
+    {
+    struct dyString *firstFew = dyStringNew(0);
+    int maxExamples = 5;
+    struct slName *example;
+    int i;
+    for (i = 0, example = unmatched;  example != NULL && i < maxExamples;
+         i++, example = example->next)
+        {
+        dyStringAppendSep(firstFew, ", ");
+        dyStringPrintf(firstFew, "'%s'", example->name);
+        }
+    warn("Unable to find %d of your sequences in the tree, e.g. %s",
+         slCount(unmatched), firstFew->string);
+    dyStringFree(&firstFew);
+    }
+else if (sampleIds == NULL)
+    warn("Could not find any names in input; empty file uploaded?");
+slNameFreeList(&unmatched);
+return sampleIds;
+}
+
+char *phyloPlaceSamples(struct lineFile *lf, char *db, char *defaultProtobuf,
+                        boolean doMeasureTiming, int subtreeSize, int fontHeight,
+                        boolean *retSuccess)
+/* Given a lineFile that contains either FASTA, VCF, or a list of sequence names/ids:
+ * If FASTA/VCF, then prepare VCF for usher; if that goes well then run usher, report results,
+ * make custom track files and return the top-level custom track file.
+ * If list of seq names/ids, then attempt to find their full names in the protobuf, run matUtils
+ * to make subtrees, show subtree results, and return NULL.  Set retSuccess to TRUE if we were
+ * able to get at least some results for the user's input. */
+{
+char *ctFile = NULL;
+if (retSuccess)
+    *retSuccess = FALSE;
+measureTiming = doMeasureTiming;
+int startTime = clock1000();
+struct tempName *vcfTn = NULL;
+struct slName *sampleIds = NULL;
+char *usherPath = getUsherPath(TRUE);
+char *protobufPath = NULL;
+char *source = NULL;
+char *metadataFile = NULL;
+char *aliasFile = NULL;
+getProtobufMetadataSource(db, defaultProtobuf, &protobufPath, &metadataFile, &source, &aliasFile);
+struct mutationAnnotatedTree *bigTree = parseParsimonyProtobuf(protobufPath);
 reportTiming(&startTime, "parse protobuf file");
 if (! bigTree)
     {
-    warn("Problem parsing %s; can't make subtree subtracks.", usherAssignmentsPath);
+    warn("Problem parsing %s; can't make subtree subtracks.", protobufPath);
     }
 lineFileCarefulNewlines(lf);
 struct slName **maskSites = getProblematicSites(db);
 struct dnaSeq *refGenome = hChromSeq(db, chrom, 0, chromSize);
 boolean isFasta = FALSE;
+boolean subtreesOnly = FALSE;
 struct seqInfo *seqInfoList = NULL;
 if (lfLooksLikeFasta(lf))
     {
     boolean *informativeBases = informativeBasesFromTree(bigTree->tree, maskSites);
     struct slPair *failedSeqs;
     struct slPair *failedPsls;
     vcfTn = vcfFromFasta(lf, db, refGenome, informativeBases, maskSites,
                          &sampleIds, &seqInfoList, &failedSeqs, &failedPsls, &startTime);
     if (failedSeqs)
         {
         puts("<p>");
         struct slPair *fail;
         for (fail = failedSeqs;  fail != NULL;  fail = fail->next)
             printf("%s<br>\n", fail->name);
         puts("</p>");
@@ -2295,45 +2532,57 @@
             printf("%s<br>\n", fail->name);
         puts("</p>");
         }
     if (seqInfoList == NULL)
         printf("<p>Sorry, could not align any sequences to reference well enough to place in "
                "the phylogenetic tree.</p>\n");
     isFasta = TRUE;
     }
 else if (lfLooksLikeVcf(lf))
     {
     vcfTn = checkAndSaveVcf(lf, refGenome, maskSites, &seqInfoList, &sampleIds);
     reportTiming(&startTime, "check uploaded VCF");
     }
 else
     {
-    if (isNotEmpty(lf->fileName))
-        warn("Sorry, can't recognize your file %s as FASTA or VCF.\n", lf->fileName);
-    else
-        warn("Sorry, can't recognize your uploaded data as FASTA or VCF.\n");
+    subtreesOnly = TRUE;
+    sampleIds = readSampleIds(lf, bigTree, aliasFile);
     }
 lineFileClose(&lf);
+if (sampleIds == NULL)
+    {
+    return ctFile;
+    }
+struct usherResults *results = NULL;
 if (vcfTn)
     {
     fflush(stdout);
-    int seqCount = slCount(seqInfoList);
-    struct usherResults *results = runUsher(usherPath, usherAssignmentsPath, vcfTn->forCgi,
+    results = runUsher(usherPath, protobufPath, vcfTn->forCgi,
                        subtreeSize, sampleIds, bigTree->condensedNodes,
                        &startTime);
-    if (results->singleSubtreeInfo)
+    }
+else if (subtreesOnly)
+    {
+    char *matUtilsPath = getMatUtilsPath(TRUE);
+    results = runMatUtilsExtractSubtrees(matUtilsPath, protobufPath, subtreeSize,
+                                         sampleIds, bigTree->condensedNodes,
+                                         &startTime);
+    }
+if (results && results->singleSubtreeInfo)
     {
+    if (retSuccess)
+        *retSuccess = TRUE;
     puts("<p></p>");
     readQcThresholds(db);
     int subtreeCount = slCount(results->subtreeInfoList);
     // Sort subtrees by number of user samples (largest first).
     slSort(&results->subtreeInfoList, subTreeInfoUserSampleCmp);
     // Make Nextstrain/auspice JSON file for each subtree.
     char *bigGenePredFile = phyloPlaceDbSettingPath(db, "bigGenePredFile");
     struct geneInfo *geneInfoList = getGeneInfoList(bigGenePredFile, refGenome);
     struct seqWindow *gSeqWin = chromSeqWindowNew(db, chrom, 0, chromSize);
     struct hash *sampleMetadata = getSampleMetadata(metadataFile);
     struct hash *sampleUrls = hashNew(0);
     struct tempName *jsonTns[subtreeCount];
     struct subtreeInfo *ti;
     int ix;
     for (ix = 0, ti = results->subtreeInfoList;  ti != NULL;  ti = ti->next, ix++)
@@ -2347,92 +2596,102 @@
         // Add a link for every sample to this subtree, so the single-subtree JSON can
         // link to subtree JSONs
         char *subtreeUrl = nextstrainUrlFromTn(jsonTns[ix]);
         struct slName *sample;
         for (sample = ti->subtreeUserSampleIds;  sample != NULL;  sample = sample->next)
             hashAdd(sampleUrls, sample->name, subtreeUrl);
         }
     struct tempName *singleSubtreeJsonTn;
     AllocVar(singleSubtreeJsonTn);
     trashDirFile(singleSubtreeJsonTn, "ct", "singleSubtreeAuspice", ".json");
     treeToAuspiceJson(results->singleSubtreeInfo, db, geneInfoList, gSeqWin, sampleMetadata,
                       sampleUrls, singleSubtreeJsonTn->forCgi, source);
     struct subtreeInfo *subtreeInfoForButtons = results->subtreeInfoList;
     if (subtreeCount > MAX_SUBTREE_BUTTONS)
         subtreeInfoForButtons = NULL;
-        makeButtonRow(singleSubtreeJsonTn, jsonTns, subtreeInfoForButtons, subtreeSize, isFasta);
+    makeButtonRow(singleSubtreeJsonTn, jsonTns, subtreeInfoForButtons, subtreeSize, isFasta,
+                  !subtreesOnly);
     printf("<p>If you have metadata you wish to display, click a 'view subtree in "
            "Nextstrain' button, and then you can drag on a CSV file to "
            "<a href='"NEXTSTRAIN_DRAG_DROP_DOC"' target=_blank>add it to the tree view</a>."
            "</p>\n");
 
+    struct tempName *tsvTn = NULL, *sTsvTn = NULL;
+    struct tempName *zipTn = makeSubtreeZipFile(results, jsonTns, singleSubtreeJsonTn,
+                                                &startTime);
+    struct tempName *ctTn = NULL;
+    if (! subtreesOnly)
+        {
         findNearestNeighbors(results->samplePlacements, sampleMetadata, bigTree);
 
         // Make custom tracks for uploaded samples and subtree(s).
         struct phyloTree *sampleTree = NULL;
-        struct tempName *ctTn = writeCustomTracks(vcfTn, results, sampleIds, bigTree->tree,
+        ctTn = writeCustomTracks(vcfTn, results, sampleIds, bigTree->tree,
                                  source, fontHeight, &sampleTree, &startTime);
 
         // Make a sample summary TSV file and accumulate S gene changes
         struct hash *spikeChanges = hashNew(0);
-        struct tempName *tsvTn = writeTsvSummary(results, sampleTree, sampleIds, seqInfoList,
+        tsvTn = writeTsvSummary(results, sampleTree, sampleIds, seqInfoList,
                                                  geneInfoList, gSeqWin, spikeChanges, &startTime);
-        struct tempName *sTsvTn = writeSpikeChangeSummary(spikeChanges, slCount(sampleIds));
-        struct tempName *zipTn = makeSubtreeZipFile(results, jsonTns, singleSubtreeJsonTn,
-                                                    &startTime);
+        sTsvTn = writeSpikeChangeSummary(spikeChanges, slCount(sampleIds));
         downloadsRow(results->bigTreePlusTn->forHtml, tsvTn->forHtml, sTsvTn->forHtml,
                      zipTn->forHtml);
 
+        int seqCount = slCount(seqInfoList);
         if (seqCount <= MAX_SEQ_DETAILS)
             {
             summarizeSequences(seqInfoList, isFasta, results, jsonTns, sampleMetadata, refGenome);
             reportTiming(&startTime, "write summary table (including reading in lineages)");
             for (ix = 0, ti = results->subtreeInfoList;  ti != NULL;  ti = ti->next, ix++)
                 {
                 int subtreeUserSampleCount = slCount(ti->subtreeUserSampleIds);
                 printf("<h3>Subtree %d: ", ix+1);
                 if (subtreeUserSampleCount > 1)
                     printf("%d related samples", subtreeUserSampleCount);
                 else if (subtreeCount > 1)
                     printf("Unrelated sample");
                 printf("</h3>\n");
                 makeNextstrainButtonN("viewNextstrainSub", ix, subtreeUserSampleCount, subtreeSize,
                                       jsonTns);
                 puts("<br>");
                 // Make a sub-subtree with only user samples for display:
                 struct phyloTree *subtree = phyloOpenTree(ti->subtreeTn->forCgi);
                 subtree = phyloPruneToIds(subtree, ti->subtreeUserSampleIds);
                 describeSamplePlacements(ti->subtreeUserSampleIds, results->samplePlacements,
                                          subtree, sampleMetadata, source);
                 }
             reportTiming(&startTime, "describe placements");
             }
         else
             printf("<p>(Skipping details; "
                    "you uploaded %d sequences, and details are shown only when "
                    "you upload at most %d sequences.)</p>\n",
                    seqCount, MAX_SEQ_DETAILS);
+        }
 
-        // Offer big tree w/new samples for download
     puts("<h3>Downloads</h3>");
+    if (! subtreesOnly)
+        {
         puts("<ul>");
+        // Offer big tree w/new samples for download
         printf("<li><a href='%s' download>SARS-CoV-2 phylogenetic tree "
                "with your samples (Newick file)</a>\n", results->bigTreePlusTn->forHtml);
         printf("<li><a href='%s' download>TSV summary of sequences and placements</a>\n",
                tsvTn->forHtml);
         printf("<li><a href='%s' download>TSV summary of S (Spike) gene changes</a>\n",
                sTsvTn->forHtml);
+        }
     printf("<li><a href='%s' download>ZIP archive of subtree Newick and JSON files</a>\n",
            zipTn->forHtml);
     // For now, leave in the individual links so I don't break anybody's pipeline that's
     // scraping this page...
     for (ix = 0, ti = results->subtreeInfoList;  ti != NULL;  ti = ti->next, ix++)
         {
         int subtreeUserSampleCount = slCount(ti->subtreeUserSampleIds);
         printf("<li><a href='%s' download>Subtree with %s", ti->subtreeTn->forHtml,
                ti->subtreeUserSampleIds->name);
         if (subtreeUserSampleCount > 10)
             printf(" and %d other samples", subtreeUserSampleCount - 1);
         else
             {
             struct slName *sln;
             for (sln = ti->subtreeUserSampleIds->next;  sln != NULL;  sln = sln->next)
@@ -2441,30 +2700,32 @@
         puts(" (Newick file)</a>");
         printf("<li><a href='%s' download>Auspice JSON for subtree with %s",
                jsonTns[ix]->forHtml, ti->subtreeUserSampleIds->name);
         if (subtreeUserSampleCount > 10)
             printf(" and %d other samples", subtreeUserSampleCount - 1);
         else
             {
             struct slName *sln;
             for (sln = ti->subtreeUserSampleIds->next;  sln != NULL;  sln = sln->next)
                 printf(", %s", sln->name);
             }
         puts(" (JSON file)</a>");
         }
     puts("</ul>");
 
+    if (!subtreesOnly)
+        {
         // Notify in opposite order of custom track creation.
         puts("<h3>Custom tracks for viewing in the Genome Browser</h3>");
         printf("<p>Added custom track of uploaded samples.</p>\n");
         if (subtreeCount > 0 && subtreeCount <= MAX_SUBTREE_CTS)
             printf("<p>Added %d subtree custom track%s.</p>\n",
                    subtreeCount, (subtreeCount > 1 ? "s" : ""));
         ctFile = urlFromTn(ctTn);
         }
+    }
 else
     {
     warn("No subtree output from usher.\n");
     }
-    }
 return ctFile;
 }