a2a68e8ad4ff5927b604b27898c183a519c9e5f4
angie
  Tue Feb 23 20:08:34 2021 -0800
Instead of replacing small subtrees (usher -k) with the new single subtree option (-K), make both (unless a large number of sequences are uploaded; then just -K).

diff --git src/hg/hgPhyloPlace/phyloPlace.c src/hg/hgPhyloPlace/phyloPlace.c
index 00013b9..6f49fe1 100644
--- src/hg/hgPhyloPlace/phyloPlace.c
+++ src/hg/hgPhyloPlace/phyloPlace.c
@@ -1022,77 +1022,89 @@
                                                 int *retIx)
 /* Find the subtree that contains sample name and set *retIx to its index in the list.
  * If we can't find it, return NULL and set *retIx to -1. */
 {
 struct subtreeInfo *ti;
 int ix;
 for (ti = subtreeInfoList, ix = 0;  ti != NULL;  ti = ti->next, ix++)
     if (slNameInList(ti->subtreeUserSampleIds, name))
         break;
 if (ti == NULL)
     ix = -1;
 *retIx = ix;
 return ti;
 }
 
-//#*** TODO: Replace temporary host with nextstrain.org when feature request is released
-//#*** https://github.com/nextstrain/nextstrain.org/pull/216
 static char *nextstrainHost()
-/* Until the new /fetch/ function is live on nextstrain.org, get their temporary staging host
- * from an hg.conf param, or NULL if missing. */
+/* Return the nextstrain hostname from an hg.conf param, or NULL if missing. */
 {
 return cfgOption("nextstrainHost");
 }
 
 static char *nextstrainUrlFromTn(struct tempName *jsonTn)
 /* Return a link to Nextstrain to view an annotated subtree. */
 {
 char *jsonUrlForNextstrain = urlFromTn(jsonTn);
 char *protocol = strstr(jsonUrlForNextstrain, "://");
 if (protocol)
     jsonUrlForNextstrain = protocol + strlen("://");
 struct dyString *dy = dyStringCreate("%s/fetch/%s", nextstrainHost(), jsonUrlForNextstrain);
 return dyStringCannibalize(&dy);
 }
 
-static void makeNextstrainButton(char *idBase, int ix, struct tempName *jsonTns[])
-/* Make a button to view results in Nextstrain.  idBase is a short string and
+static void makeNextstrainButton(char *id, struct tempName *tn, char *label)
+/* Make a button to view an auspice JSON file in Nextstrain. */
+{
+char *nextstrainUrl = nextstrainUrlFromTn(tn);
+struct dyString *js = dyStringCreate("window.open('%s');", nextstrainUrl);
+cgiMakeOnClickButton(id, js->string, label);
+dyStringFree(&js);
+freeMem(nextstrainUrl);
+}
+
+static void makeNextstrainButtonN(char *idBase, int ix, struct tempName *jsonTns[])
+/* Make a button to view one subtree in Nextstrain.  idBase is a short string and
  * ix is 0-based subtree number. */
 {
 char buttonId[256];
 safef(buttonId, sizeof buttonId, "%s%d", idBase, ix+1);
 char buttonLabel[256];
 safef(buttonLabel, sizeof buttonLabel, "view subtree %d in Nextstrain", ix+1);
-char *nextstrainUrl = nextstrainUrlFromTn(jsonTns[ix]);
-struct dyString *js = dyStringCreate("window.open('%s');", nextstrainUrl);
-cgiMakeOnClickButton(buttonId, js->string, buttonLabel);
-dyStringFree(&js);
-freeMem(nextstrainUrl);
+makeNextstrainButton(buttonId, jsonTns[ix], buttonLabel);
+}
+
+static void makeNsSingleTreeButton(struct tempName *tn)
+/* Make a button to view single subtree (with all uploaded samples) in Nextstrain. */
+{
+makeNextstrainButton("viewNextstrainSingleSubtree", tn, "view comprehensive subtree in Nextstrain");
 }
 
-static void makeButtonRow(struct tempName *jsonTns[], int subtreeCount, boolean isFasta)
+static void makeButtonRow(struct tempName *singleSubtreeJsonTn, struct tempName *jsonTns[],
+                          int subtreeCount, boolean isFasta)
 /* Russ's suggestion: row of buttons at the top to view results in GB, Nextstrain, Nextclade. */
 {
 puts("<p>");
 cgiMakeButton("submit", "view in Genome Browser");
 if (nextstrainHost())
     {
+    printf("&nbsp;");
+    makeNsSingleTreeButton(singleSubtreeJsonTn);
     int ix;
     for (ix = 0;  ix < subtreeCount;  ix++)
         {
         printf("&nbsp;");
-        makeNextstrainButton("viewNextstrainTopRow", ix, jsonTns);
+        makeNextstrainButtonN("viewNextstrainTopRow", ix, jsonTns);
         }
     }
 if (0 && isFasta)
     {
     printf("&nbsp;");
     struct dyString *js = dyStringCreate("window.open('https://master.clades.nextstrain.org/"
                                          "?input-fasta=%s');",
                                          "needATn");  //#*** TODO: save FASTA to file
     cgiMakeOnClickButton("viewNextclade", js->string, "view sequences in Nextclade");
     }
 puts("</p>");
 }
 
 #define TOOLTIP(text) " <div class='tooltip'>(?)<span class='tooltiptext'>" text "</span></div>"
 
@@ -1830,78 +1842,84 @@
     vcfTn = checkAndSaveVcf(lf, refGenome, maskSites, &seqInfoList, &sampleIds);
     reportTiming(&startTime, "check uploaded VCF");
     }
 else
     {
     if (isNotEmpty(lf->fileName))
         warn("Sorry, can't recognize your file %s as FASTA or VCF.\n", lf->fileName);
     else
         warn("Sorry, can't recognize your uploaded data as FASTA or VCF.\n");
     }
 lineFileClose(&lf);
 if (vcfTn)
     {
     fflush(stdout);
     int seqCount = slCount(seqInfoList);
+    // Don't make smaller subtrees when a large number of sequences are uploaded.
+    if (seqCount > MAX_SEQ_DETAILS)
+        subtreeSize = 0;
     struct usherResults *results = runUsher(usherPath, usherAssignmentsPath, vcfTn->forCgi,
                                             subtreeSize, sampleIds, bigTree->condensedNodes,
                                             &startTime);
-    if (results->subtreeInfoList || seqCount > MAX_SEQ_DETAILS)
+    if (results->singleSubtreeInfo)
         {
         readQcThresholds(db);
         int subtreeCount = slCount(results->subtreeInfoList);
         // Sort subtrees by number of user samples (largest first).
         slSort(&results->subtreeInfoList, subTreeInfoUserSampleCmp);
         // Make Nextstrain/auspice JSON file for each subtree.
         char *bigGenePredFile = phyloPlaceDbSettingPath(db, "bigGenePredFile");
         struct geneInfo *geneInfoList = getGeneInfoList(bigGenePredFile, refGenome);
         struct seqWindow *gSeqWin = chromSeqWindowNew(db, chrom, 0, chromSize);
         struct hash *sampleMetadata = getSampleMetadata(metadataFile);
+        struct tempName *singleSubtreeJsonTn;
+        AllocVar(singleSubtreeJsonTn);
+        trashDirFile(singleSubtreeJsonTn, "ct", "singleSubtreeAuspice", ".json");
+        treeToAuspiceJson(results->singleSubtreeInfo, db, geneInfoList, gSeqWin, sampleMetadata,
+                          singleSubtreeJsonTn->forCgi, source);
         struct tempName *jsonTns[subtreeCount];
         struct subtreeInfo *ti;
         int ix;
         for (ix = 0, ti = results->subtreeInfoList;  ti != NULL;  ti = ti->next, ix++)
             {
             AllocVar(jsonTns[ix]);
             trashDirFile(jsonTns[ix], "ct", "subtreeAuspice", ".json");
             treeToAuspiceJson(ti, db, geneInfoList, gSeqWin, sampleMetadata, jsonTns[ix]->forCgi,
                               source);
             }
         puts("<p></p>");
-        if (subtreeCount > 0 && subtreeCount <= MAX_SUBTREE_BUTTONS)
-            {
-            makeButtonRow(jsonTns, subtreeCount, isFasta);
+        int subtreeButtonCount = (seqCount <= MAX_SEQ_DETAILS) ? subtreeCount : 0;
+        makeButtonRow(singleSubtreeJsonTn, jsonTns, subtreeButtonCount, isFasta);
         printf("<p>If you have metadata you wish to display, click a 'view subtree in "
                "Nextstrain' button, and then you can drag on a CSV file to "
                "<a href='"NEXTSTRAIN_DRAG_DROP_DOC"' target=_blank>add it to the tree view</a>."
                "</p>\n");
-            }
         if (seqCount <= MAX_SEQ_DETAILS)
             {
             summarizeSequences(seqInfoList, isFasta, results, jsonTns, sampleMetadata, bigTree,
                                refGenome);
             reportTiming(&startTime, "write summary table (including reading in lineages)");
             for (ix = 0, ti = results->subtreeInfoList;  ti != NULL;  ti = ti->next, ix++)
                 {
                 int subtreeUserSampleCount = slCount(ti->subtreeUserSampleIds);
                 printf("<h3>Subtree %d: ", ix+1);
                 if (subtreeUserSampleCount > 1)
                     printf("%d related samples", subtreeUserSampleCount);
                 else if (subtreeCount > 1)
                     printf("Unrelated sample");
                 printf("</h3>\n");
-                makeNextstrainButton("viewNextstrainSub", ix, jsonTns);
+                makeNextstrainButtonN("viewNextstrainSub", ix, jsonTns);
                 puts("<br>");
                 // Make a sub-subtree with only user samples for display:
                 struct phyloTree *subtree = phyloOpenTree(ti->subtreeTn->forCgi);
                 subtree = phyloPruneToIds(subtree, ti->subtreeUserSampleIds);
                 describeSamplePlacements(ti->subtreeUserSampleIds, results->samplePlacements,
                                          subtree, sampleMetadata, bigTree, source);
                 }
             reportTiming(&startTime, "describe placements");
             }
         else
             printf("<p>(Skipping details and subtrees; "
                    "you uploaded %d sequences, and details/subtrees are shown only when "
                    "you upload at most %d sequences.)</p>\n",
                    seqCount, MAX_SEQ_DETAILS);