3fd9e9e8d0a977ee255486e60c785aef5689cc39
angie
  Tue Jul 13 09:42:47 2021 -0700
Add summary table for results from pasted/uploaded IDs, to show lineages & subtree numbers, requested by Russ.

diff --git src/hg/hgPhyloPlace/phyloPlace.c src/hg/hgPhyloPlace/phyloPlace.c
index 5ad13be..fb8b18e 100644
--- src/hg/hgPhyloPlace/phyloPlace.c
+++ src/hg/hgPhyloPlace/phyloPlace.c
@@ -1133,39 +1133,40 @@
                                                 int *retIx)
 /* Find the subtree that contains sample name and set *retIx to its index in the list.
  * If we can't find it, return NULL and set *retIx to -1. */
 {
 struct subtreeInfo *ti;
 int ix;
 for (ti = subtreeInfoList, ix = 0;  ti != NULL;  ti = ti->next, ix++)
     if (slNameInList(ti->subtreeUserSampleIds, name))
         break;
 if (ti == NULL)
     ix = -1;
 *retIx = ix;
 return ti;
 }
 
-static void lookForCladesAndLineages(struct seqInfo *seqInfoList, struct hash *samplePlacements,
+static void lookForCladesAndLineages(struct hash *samplePlacements,
                                      boolean *retGotClades, boolean *retGotLineages)
 /* See if UShER has annotated any clades and/or lineages for seqs. */
 {
 boolean gotClades = FALSE, gotLineages = FALSE;
-struct seqInfo *si;
-for (si = seqInfoList;  si != NULL;  si = si->next)
+struct hashEl *hel;
+struct hashCookie cookie = hashFirst(samplePlacements);
+while ((hel = hashNext(&cookie)) != NULL)
     {
-    struct placementInfo *pi = hashFindVal(samplePlacements, si->seq->name);
+    struct placementInfo *pi = hel->val;
     if (pi)
         {
         if (isNotEmpty(pi->nextClade))
             gotClades = TRUE;
         if (isNotEmpty(pi->pangoLineage))
             gotLineages = TRUE;
         if (gotClades && gotLineages)
             break;
         }
     }
 *retGotClades = gotClades;
 *retGotLineages = gotLineages;
 }
 
 static char *nextstrainHost()
@@ -1496,40 +1497,61 @@
 if (si->nCountEnd)
     dyStringPrintf(dy, "%d N bases at end", si->nCountEnd);
 }
 
 static void printLineageTd(char *lineage, char *alt)
 /* Print a table cell with lineage (& link to outbreak.info if not 'None') or alt if no lineage. */
 {
 if (lineage && differentString(lineage, "None"))
     printf("<td><a href='"OUTBREAK_INFO_URLBASE"%s' target=_blank>%s</a></td>", lineage, lineage);
 else if (lineage)
     printf("<td>%s</td>", lineage);
 else
     printf("<td>%s</td>", alt);
 }
 
+static void printSubtreeTd(struct subtreeInfo *subtreeInfoList, struct tempName *jsonTns[],
+                           char *seqName)
+/* Print a table cell with subtree (& link if possible) if found. */
+{
+int ix;
+struct subtreeInfo *ti = subtreeInfoForSample(subtreeInfoList, seqName, &ix);
+if (ix < 0)
+    //#*** Probably an error.
+    printf("<td>n/a</td>");
+else
+    {
+    printf("<td>%d", ix+1);
+    if (ti && nextstrainHost())
+        {
+        char *nextstrainUrl = nextstrainUrlFromTn(jsonTns[ix]);
+        printf(" (<a href='%s' target=_blank>view in Nextstrain<a>)", nextstrainUrl);
+        }
+    printf("</td>");
+    }
+}
+
 static void summarizeSequences(struct seqInfo *seqInfoList, boolean isFasta,
                                struct usherResults *ur, struct tempName *jsonTns[],
                                struct hash *sampleMetadata, struct dnaSeq *refGenome)
 /* Show a table with composition & alignment stats for each sequence that passed basic QC. */
 {
 if (seqInfoList)
     {
     puts("<table class='seqSummary'>");
     boolean gotClades = FALSE, gotLineages = FALSE;
-    lookForCladesAndLineages(seqInfoList, ur->samplePlacements, &gotClades, &gotLineages);
+    lookForCladesAndLineages(ur->samplePlacements, &gotClades, &gotLineages);
     printSummaryHeader(isFasta, gotClades, gotLineages);
     puts("<tbody>");
     struct dyString *dy = dyStringNew(0);
     struct seqInfo *si;
     for (si = seqInfoList;  si != NULL;  si = si->next)
         {
         puts("<tr>");
         printf("<th>%s</td>", replaceChars(si->seq->name, "|", " | "));
         if (isFasta)
             {
             if (si->nCountStart || si->nCountEnd)
                 {
                 int effectiveLength = si->seq->size - (si->nCountStart + si->nCountEnd);
                 dyStringClear(dy);
                 dyStringPrintf(dy, "%d ", effectiveLength);
@@ -1670,50 +1692,96 @@
                 }
             printf("</td><td class='%s'>%d",
                    qcClassForPlacements(pi->bestNodeCount), pi->bestNodeCount);
             printf("</td><td class='%s'>%d",
                    qcClassForPScore(pi->parsimonyScore), pi->parsimonyScore);
             printf("</td>");
             }
         else
             {
             if (gotClades)
                 printf("<td>n/a></td>");
             if (gotLineages)
                 printf("<td>n/a></td>");
             printf("<td>n/a</td><td>n/a</td><td>n/a</td><td>n/a</td><td>n/a</td>");
             }
-        int ix;
-        struct subtreeInfo *ti = subtreeInfoForSample(ur->subtreeInfoList, si->seq->name, &ix);
-        if (ix < 0)
-            //#*** Probably an error.
-            printf("<td>n/a</td>");
-        else
+        printSubtreeTd(ur->subtreeInfoList, jsonTns, si->seq->name);
+        puts("</tr>");
+        }
+    puts("</tbody></table><p></p>");
+    }
+}
+
+static void summarizeSubtrees(struct slName *sampleIds, struct usherResults *results,
+                              struct hash *sampleMetadata, struct tempName *jsonTns[],
+                              struct mutationAnnotatedTree *bigTree)
+/* Print a summary table of pasted/uploaded identifiers and subtrees */
 {
-            printf("<td>%d", ix+1);
-            if (ti && nextstrainHost())
+boolean gotClades = FALSE, gotLineages = FALSE;
+lookForCladesAndLineages(results->samplePlacements, &gotClades, &gotLineages);
+puts("<table class='seqSummary'><tbody>");
+puts("<tr><th>Sequence</th>");
+if (gotClades)
+    puts("<th>Nextstrain clade (UShER)"
+     TOOLTIP("The <a href='https://nextstrain.org/blog/2021-01-06-updated-SARS-CoV-2-clade-naming' "
+             "target=_blank>Nextstrain clade</a> "
+             "assigned to the sequence by UShER according to its place in the phylogenetic tree")
+         "</th>");
+if (gotLineages)
+    puts("<th>Pango lineage (UShER)"
+         TOOLTIP("The <a href='https://cov-lineages.org/' "
+                 "target=_blank>Pango lineage</a> "
+                 "assigned to the sequence by UShER according to its place in the phylogenetic tree")
+         "</th>");
+puts("<th>Pango lineage (pangolin)"
+     TOOLTIP("The <a href='https://cov-lineages.org/' target=_blank>"
+             "Pango lineage</a> assigned to the sequence by "
+             "<a href='https://github.com/cov-lineages/pangolin/' target=_blank>pangolin</a>")
+     "</th>"
+     "<th>subtree</th></tr>");
+struct slName *si;
+for (si = sampleIds;  si != NULL;  si = si->next)
     {
-                char *nextstrainUrl = nextstrainUrlFromTn(jsonTns[ix]);
-                printf(" (<a href='%s' target=_blank>view in Nextstrain<a>)", nextstrainUrl);
+    puts("<tr>");
+    printf("<th>%s</td>", replaceChars(si->name, "|", " | "));
+    struct placementInfo *pi = hashFindVal(results->samplePlacements, si->name);
+    if (pi)
+        {
+        if (gotClades)
+            printf("<td>%s</td>", pi->nextClade ? pi->nextClade : "n/a");
+        if (gotLineages)
+            printLineageTd(pi->pangoLineage, "n/a");
         }
-            printf("</td>");
+    else
+        {
+        if (gotClades)
+            printf("<td>n/a></td>");
+        if (gotLineages)
+            printf("<td>n/a></td>");
         }
-        puts("</tr>");
+    // pangolin-assigned lineage
+    char *lineage = lineageForSample(sampleMetadata, si->name);
+    if (isNotEmpty(lineage))
+        printf("<td><a href='"OUTBREAK_INFO_URLBASE"%s' target=_blank>%s</a></td>",
+               lineage, lineage);
+    else
+        printf("<td>n/a></td>");
+    // Maybe also #mutations with mouseover to show mutation path?
+    printSubtreeTd(results->subtreeInfoList, jsonTns, si->name);
     }
 puts("</tbody></table><p></p>");
 }
-}
 
 static struct singleNucChange *sncListFromSampleMutsAndImputed(struct slName *sampleMuts,
                                                                struct baseVal *imputedBases,
                                                                struct seqWindow *gSeqWin)
 /* Convert a list of "<ref><pos><alt>" names to struct singleNucChange list.
  * However, if <alt> is ambiguous, skip it because variantProjector doesn't like it.
  * Add imputed base predictions. */
 {
 struct singleNucChange *sncList = NULL;
 struct slName *mut;
 for (mut = sampleMuts;  mut != NULL;  mut = mut->next)
     {
     char ref = mut->name[0];
     if (ref < 'A' || ref > 'Z')
         errAbort("sncListFromSampleMuts: expected ref base value, got '%c' in '%s'",
@@ -2631,31 +2699,36 @@
                       sampleUrls, singleSubtreeJsonTn->forCgi, source);
     struct subtreeInfo *subtreeInfoForButtons = results->subtreeInfoList;
     if (subtreeCount > MAX_SUBTREE_BUTTONS)
         subtreeInfoForButtons = NULL;
     makeButtonRow(singleSubtreeJsonTn, jsonTns, subtreeInfoForButtons, subtreeSize, isFasta,
                   !subtreesOnly);
     printf("<p>If you have metadata you wish to display, click a 'view subtree in "
            "Nextstrain' button, and then you can drag on a CSV file to "
            "<a href='"NEXTSTRAIN_DRAG_DROP_DOC"' target=_blank>add it to the tree view</a>."
            "</p>\n");
 
     struct tempName *tsvTn = NULL, *sTsvTn = NULL;
     struct tempName *zipTn = makeSubtreeZipFile(results, jsonTns, singleSubtreeJsonTn,
                                                 &startTime);
     struct tempName *ctTn = NULL;
-    if (! subtreesOnly)
+    if (subtreesOnly)
+        {
+        summarizeSubtrees(sampleIds, results, sampleMetadata, jsonTns, bigTree);
+        reportTiming(&startTime, "describe subtrees");
+        }
+    else
         {
         findNearestNeighbors(results->samplePlacements, sampleMetadata, bigTree);
 
         // Make custom tracks for uploaded samples and subtree(s).
         struct phyloTree *sampleTree = NULL;
         ctTn = writeCustomTracks(vcfTn, results, sampleIds, bigTree->tree,
                                  source, fontHeight, &sampleTree, &startTime);
 
         // Make a sample summary TSV file and accumulate S gene changes
         struct hash *spikeChanges = hashNew(0);
         tsvTn = writeTsvSummary(results, sampleTree, sampleIds, seqInfoList,
                                                  geneInfoList, gSeqWin, spikeChanges, &startTime);
         sTsvTn = writeSpikeChangeSummary(spikeChanges, slCount(sampleIds));
         downloadsRow(results->bigTreePlusTn->forHtml, tsvTn->forHtml, sTsvTn->forHtml,
                      zipTn->forHtml);