d354f7c4ff9867b3a64ecaadba87ba2381371e27
angie
  Mon Nov 7 16:46:38 2022 -0800
In Nextstrain view (JSON), show the Nextstrain clade and Pango lineage assigned by usher for uploaded samples.  Make Pango lineage the default coloring, and add a filter to highlight uploaded samples so user immediately sees the uploaded samples' lineage(s) and placement in the subtree.  Also, for uploaded names/IDs, don't truncate at first comma until we check whether line is a list of IDs / ID ranges.

diff --git src/hg/hgPhyloPlace/phyloPlace.c src/hg/hgPhyloPlace/phyloPlace.c
index fe1e4a4..04db44b 100644
--- src/hg/hgPhyloPlace/phyloPlace.c
+++ src/hg/hgPhyloPlace/phyloPlace.c
@@ -1278,31 +1278,32 @@
 }
 
 static char *nextstrainHost()
 /* Return the nextstrain hostname from an hg.conf param, or NULL if missing. */
 {
 return cfgOption("nextstrainHost");
 }
 
 static char *nextstrainUrlFromTn(struct tempName *jsonTn)
 /* Return a link to Nextstrain to view an annotated subtree. */
 {
 char *jsonUrlForNextstrain = urlFromTn(jsonTn);
 char *protocol = strstr(jsonUrlForNextstrain, "://");
 if (protocol)
     jsonUrlForNextstrain = protocol + strlen("://");
-struct dyString *dy = dyStringCreate("%s/fetch/%s", nextstrainHost(), jsonUrlForNextstrain);
+struct dyString *dy = dyStringCreate("%s/fetch/%s?f_userOrOld=uploaded%%20sample",
+                                     nextstrainHost(), jsonUrlForNextstrain);
 return dyStringCannibalize(&dy);
 }
 
 static void makeNextstrainButton(char *id, struct tempName *tn, char *label, char *mouseover)
 /* Make a button to view an auspice JSON file in Nextstrain. */
 {
 char *nextstrainUrl = nextstrainUrlFromTn(tn);
 struct dyString *js = dyStringCreate("window.open('%s');", nextstrainUrl);
 cgiMakeOnClickButtonWithMsg(id, js->string, label, mouseover);
 dyStringFree(&js);
 freeMem(nextstrainUrl);
 }
 
 static void makeNextstrainButtonN(char *idBase, int ix, int userSampleCount, int subtreeSize,
                                   struct tempName *jsonTns[])
@@ -2723,31 +2724,31 @@
 return foundIt;
 }
 
 static boolean matchIdRange(struct hash *nameHash, char *line,
                             struct slName **retMatches, struct slName **retUnmatched)
 /* If line looks like it might contain a range of IDs, for example EPI_ISL_123-129 from an EPI_SET,
  * then expand the range(s) into individual IDs, look up the IDs, set retMatches and retUnmatched
  * to per-ID results, and return TRUE. */
 {
 boolean foundAny = FALSE;
 *retMatches = *retUnmatched = NULL;
 regmatch_t substrArr[7];
 // Line may contain a list of distinct IDs and/or ID ranges
 #define oneIdExp "([A-Z_]+)([0-9]+)"
 #define rangeEndExp "- *([A-Z_]*)([0-9]+)"
-#define rangeListExp "^("oneIdExp" *("rangeEndExp")?),? *"
+#define rangeListExp "^("oneIdExp",? *("rangeEndExp")?),? *"
 while (regexMatchSubstr(line, rangeListExp, substrArr, ArraySize(substrArr)))
     {
     char *prefixA = regexSubstringClone(line, substrArr[2]);
     char *numberA = regexSubstringClone(line, substrArr[3]);
     if (regexSubstrMatched(substrArr[4]))
         {
         // Looks like a well-formed ID range
         char *prefixB = regexSubstringClone(line, substrArr[5]);
         char *numberB = regexSubstringClone(line, substrArr[6]);
         int start = atol(numberA);
         int end = atol(numberB);
         if ((isEmpty(prefixB) || sameString(prefixA, prefixB)) && end >= start)
             {
             char oneId[strlen(line)+1];
             int num;
@@ -2779,55 +2780,63 @@
     line += (substrArr[0].rm_eo - substrArr[0].rm_so);
     }
 return foundAny;
 }
 
 static struct slName *readSampleIds(struct lineFile *lf, struct hash *nameHash)
 /* Read a file of sample names/IDs from the user; typically these will not be exactly the same
  * as the protobuf's (UCSC protobuf names are typically country/isolate/year|ID|date), so attempt
  * to find component matches if an exact match isn't found. */
 {
 struct slName *sampleIds = NULL;
 struct slName *unmatched = NULL;
 char *line;
 while (lineFileNext(lf, &line, NULL))
     {
-    // If tab-sep or comma-sep, just try first word in line
+    // If tab-sep, just try first word in line
     char *tab = strchr(line, '\t');
     if (tab)
         *tab = '\0';
-    else
-        {
-        char *comma = strchr(line, ',');
-        if (comma)
-            *comma = '\0';
-        }
     char *match = matchName(nameHash, line);
     if (match)
         slNameAddHead(&sampleIds, match);
     else
         {
         struct slName *rangeMatches = NULL, *rangeUnmatched = NULL;
         if (matchIdRange(nameHash, line, &rangeMatches, &rangeUnmatched))
             {
             sampleIds = slCat(rangeMatches, sampleIds);
             unmatched = slCat(rangeUnmatched, unmatched);
             }
         else
+            {
+            // If comma-sep, just try first word in line
+            char *comma = strchr(line, ',');
+            if (comma)
+                {
+                *comma = '\0';
+                match = matchName(nameHash, line);
+                if (match)
+                    slNameAddHead(&sampleIds, match);
+                else
+                    slNameAddHead(&unmatched, line);
+                }
+            else
                 slNameAddHead(&unmatched, line);
             }
         }
+    }
 if (unmatched)
     {
     struct dyString *firstFew = dyStringNew(0);
     int maxExamples = 5;
     struct slName *example;
     int i;
     for (i = 0, example = unmatched;  example != NULL && i < maxExamples;
          i++, example = example->next)
         {
         dyStringAppendSep(firstFew, ", ");
         dyStringPrintf(firstFew, "'%s'", example->name);
         }
     warn("Unable to find %d of your sequences in the tree, e.g. %s",
          slCount(unmatched), firstFew->string);
     dyStringFree(&firstFew);
@@ -2996,43 +3005,43 @@
     // Make Nextstrain/auspice JSON file for each subtree.
     char *bigGenePredFile = phyloPlaceDbSettingPath(db, "bigGenePredFile");
     struct geneInfo *geneInfoList = getGeneInfoList(bigGenePredFile, refGenome);
     struct seqWindow *gSeqWin = memSeqWindowNew(chrom, refGenome->dna);
     struct hash *sampleUrls = hashNew(0);
     struct tempName *jsonTns[subtreeCount];
     struct subtreeInfo *ti;
     int ix;
     for (ix = 0, ti = results->subtreeInfoList;  ti != NULL;  ti = ti->next, ix++)
         {
         AllocVar(jsonTns[ix]);
         char subtreeName[512];
         safef(subtreeName, sizeof(subtreeName), "subtreeAuspice%d", ix+1);
         trashDirFile(jsonTns[ix], "ct", subtreeName, ".json");
         treeToAuspiceJson(ti, db, geneInfoList, gSeqWin, sampleMetadata, NULL,
-                          jsonTns[ix]->forCgi, source);
+                          results->samplePlacements, jsonTns[ix]->forCgi, source);
         // Add a link for every sample to this subtree, so the single-subtree JSON can
         // link to subtree JSONs
         char *subtreeUrl = nextstrainUrlFromTn(jsonTns[ix]);
         struct slName *sample;
         for (sample = ti->subtreeUserSampleIds;  sample != NULL;  sample = sample->next)
             hashAdd(sampleUrls, sample->name, subtreeUrl);
         }
     struct tempName *singleSubtreeJsonTn;
     AllocVar(singleSubtreeJsonTn);
     trashDirFile(singleSubtreeJsonTn, "ct", "singleSubtreeAuspice", ".json");
     treeToAuspiceJson(results->singleSubtreeInfo, db, geneInfoList, gSeqWin, sampleMetadata,
-                      sampleUrls, singleSubtreeJsonTn->forCgi, source);
+                      sampleUrls, results->samplePlacements, singleSubtreeJsonTn->forCgi, source);
     reportTiming(&startTime, "make Auspice JSON");
     struct subtreeInfo *subtreeInfoForButtons = results->subtreeInfoList;
     if (subtreeCount > MAX_SUBTREE_BUTTONS)
         subtreeInfoForButtons = NULL;
     makeButtonRow(singleSubtreeJsonTn, jsonTns, subtreeInfoForButtons, subtreeSize, isFasta,
                   !subtreesOnly);
     printf("<p>If you have metadata you wish to display, click a 'view subtree in "
            "Nextstrain' button, and then you can drag on a CSV file to "
            "<a href='"NEXTSTRAIN_DRAG_DROP_DOC"' target=_blank>add it to the tree view</a>."
            "</p>\n");
     puts("<p><em>Note: "
          "The Nextstrain subtree views, and Download files below, are temporary files and will "
          "expire within two days.  "
          "Please download the Nextstrain subtree JSON files if you will want to view them "
          "again in the future.  The JSON files can be drag-dropped onto "