17cdc50389b482ee9f3f1ce649ed297201269d53
angie
  Thu Feb 25 20:41:01 2021 -0800
If usher's output includes clade and lineage assignments, parse them and include them in the summary table.

diff --git src/hg/hgPhyloPlace/phyloPlace.c src/hg/hgPhyloPlace/phyloPlace.c
index c63468d..7fc3fe5 100644
--- src/hg/hgPhyloPlace/phyloPlace.c
+++ src/hg/hgPhyloPlace/phyloPlace.c
@@ -1033,30 +1033,53 @@
                                                 int *retIx)
 /* Find the subtree that contains sample name and set *retIx to its index in the list.
  * If we can't find it, return NULL and set *retIx to -1. */
 {
 struct subtreeInfo *ti;
 int ix;
 for (ti = subtreeInfoList, ix = 0;  ti != NULL;  ti = ti->next, ix++)
     if (slNameInList(ti->subtreeUserSampleIds, name))
         break;
 if (ti == NULL)
     ix = -1;
 *retIx = ix;
 return ti;
 }
 
+static void lookForCladesAndLineages(struct seqInfo *seqInfoList, struct hash *samplePlacements,
+                                     boolean *retGotClades, boolean *retGotLineages)
+/* See if UShER has annotated any clades and/or lineages for seqs. */
+{
+boolean gotClades = FALSE, gotLineages = FALSE;
+struct seqInfo *si;
+for (si = seqInfoList;  si != NULL;  si = si->next)
+    {
+    struct placementInfo *pi = hashFindVal(samplePlacements, si->seq->name);
+    if (pi)
+        {
+        if (isNotEmpty(pi->nextClade))
+            gotClades = TRUE;
+        if (isNotEmpty(pi->pangoLineage))
+            gotLineages = TRUE;
+        if (gotClades && gotLineages)
+            break;
+        }
+    }
+*retGotClades = gotClades;
+*retGotLineages = gotLineages;
+}
+
 static char *nextstrainHost()
 /* Return the nextstrain hostname from an hg.conf param, or NULL if missing. */
 {
 return cfgOption("nextstrainHost");
 }
 
 static char *nextstrainUrlFromTn(struct tempName *jsonTn)
 /* Return a link to Nextstrain to view an annotated subtree. */
 {
 char *jsonUrlForNextstrain = urlFromTn(jsonTn);
 char *protocol = strstr(jsonUrlForNextstrain, "://");
 if (protocol)
     jsonUrlForNextstrain = protocol + strlen("://");
 struct dyString *dy = dyStringCreate("%s/fetch/%s", nextstrainHost(), jsonUrlForNextstrain);
 return dyStringCannibalize(&dy);
@@ -1107,31 +1130,31 @@
         }
     }
 if (0 && isFasta)
     {
     printf(" ");
     struct dyString *js = dyStringCreate("window.open('https://master.clades.nextstrain.org/"
                                          "?input-fasta=%s');",
                                          "needATn");  //#*** TODO: save FASTA to file
     cgiMakeOnClickButton("viewNextclade", js->string, "view sequences in Nextclade");
     }
 puts("</p>");
 }
 
 #define TOOLTIP(text) " <div class='tooltip'>(?)<span class='tooltiptext'>" text "</span></div>"
 
-static void printSummaryHeader(boolean isFasta)
+static void printSummaryHeader(boolean isFasta, boolean gotClades, boolean gotLineages)
 /* Print the summary table header row with tooltips explaining columns. */
 {
 puts("<thead><tr>");
 if (isFasta)
     puts("<th>Fasta Sequence</th>\n"
          "<th>Size"
          TOOLTIP("Length of uploaded sequence in bases, excluding runs of N bases at "
                  "beginning and/or end")
          "</th>\n<th>#Ns"
          TOOLTIP("Number of 'N' bases in uploaded sequence, excluding runs of N bases at "
                  "beginning and/or end")
          "</th>");
 else
     puts("<th>VCF Sample</th>\n"
          "<th>#Ns"
@@ -1148,37 +1171,45 @@
          "</th>\n<th>Inserted bases"
          TOOLTIP("Number of bases in aligned portion of uploaded sequence that are not present in "
                  "reference NC_045512.2 Wuhan/Hu-1")
          "</th>\n<th>Deleted bases"
          TOOLTIP("Number of bases in reference NC_045512.2 Wuhan/Hu-1 that are not "
                  "present in aligned portion of uploaded sequence")
          "</th>");
 puts("<th>#SNVs used for placement"
      TOOLTIP("Number of single-nucleotide variants in uploaded sample "
              "(does not include N's or mixed bases) used by UShER to place sample "
              "in phylogenetic tree")
      "</th>\n<th>#Masked SNVs"
      TOOLTIP("Number of single-nucleotide variants in uploaded sample that are masked "
              "(not used for placement) because they occur at known "
              "<a href='https://virological.org/t/issues-with-sars-cov-2-sequencing-data/473/12' "
-             "target=_blank>Problematic Sites</a>")
-     "</th>\n<th>Neighboring sample in tree"
+             "target=_blank>Problematic Sites</a>"));;
+if (gotClades)
+    puts("</th>\n<th>Nextstrain clade"
+     TOOLTIP("The <a href='https://nextstrain.org/blog/2021-01-06-updated-SARS-CoV-2-clade-naming' "
+             "target=_blank>Nextstrain clade</a> assigned to the sample by UShER"));
+if (gotLineages)
+    puts("</th>\n<th>Pango lineage"
+     TOOLTIP("The <a href='https://cov-lineages.org/' "
+             "target=_blank>Pango lineage</a> assigned to the sample by UShER"));
+puts("</th>\n<th>Neighboring sample in tree"
      TOOLTIP("A sample already in the tree that is a child of the node at which the uploaded "
              "sample was placed, to give an example of a closely related sample")
      "</th>\n<th>Lineage of neighbor"
-     TOOLTIP("The <a href='https://github.com/cov-lineages/pangolin' target=_blank>"
-             "Pangolin lineage</a> assigned to the nearest neighboring sample already in the tree")
+     TOOLTIP("The <a href='https://cov-lineages.org/' target=_blank>"
+             "Pango lineage</a> assigned to the nearest neighboring sample already in the tree")
      "</th>\n<th>#Imputed values for mixed bases"
      TOOLTIP("If the uploaded sequence contains mixed/ambiguous bases, then UShER may assign "
              "values based on maximum parsimony")
      "</th>\n<th>#Maximally parsimonious placements"
      TOOLTIP("Number of potential placements in the tree with minimal parsimony score; "
              "the higher the number, the less confident the placement")
      "</th>\n<th>Parsimony score"
      TOOLTIP("Number of mutations/changes that must be added to the tree when placing the "
              "uploaded sample; the higher the number, the more diverged the sample")
      "</th>\n<th>Subtree number"
      TOOLTIP("Sequence number of subtree that contains this sample")
      "</th></tr></thead>");
 }
 
 
@@ -1348,31 +1379,33 @@
 if (si->nCountStart && si->nCountEnd)
     dyStringAppend(dy, " and ");
 if (si->nCountEnd)
     dyStringPrintf(dy, "%d N bases at end", si->nCountEnd);
 }
 
 static void summarizeSequences(struct seqInfo *seqInfoList, boolean isFasta,
                                struct usherResults *ur, struct tempName *jsonTns[],
                                struct hash *sampleMetadata, struct mutationAnnotatedTree *bigTree,
                                struct dnaSeq *refGenome)
 /* Show a table with composition & alignment stats for each sequence that passed basic QC. */
 {
 if (seqInfoList)
     {
     puts("<table class='seqSummary'>");
-    printSummaryHeader(isFasta);
+    boolean gotClades = FALSE, gotLineages = FALSE;
+    lookForCladesAndLineages(seqInfoList, ur->samplePlacements, &gotClades, &gotLineages);
+    printSummaryHeader(isFasta, gotClades, gotLineages);
     puts("<tbody>");
     struct dyString *dy = dyStringNew(0);
     struct seqInfo *si;
     for (si = seqInfoList;  si != NULL;  si = si->next)
         {
         puts("<tr>");
         printf("<th>%s</td>", replaceChars(si->seq->name, "|", " | "));
         if (isFasta)
             {
             if (si->nCountStart || si->nCountEnd)
                 {
                 int effectiveLength = si->seq->size - (si->nCountStart + si->nCountEnd);
                 dyStringClear(dy);
                 dyStringPrintf(dy, "%d ", effectiveLength);
                 appendExcludingNs(dy, si);
@@ -1477,57 +1510,67 @@
                 dyStringPrintf(dy, "%c%d%c (%s",
                                snc->refBase, snc->chromStart+1, snc->newBase, reasonList->name);
                 for (reason = reasonList->next;  reason != NULL;  reason = reason->next)
                     {
                     replaceChar(reason->name, '_', ' ');
                     dyStringPrintf(dy, ", %s", reason->name);
                     }
                 dyStringAppendC(dy, ')');
                 }
             printTooltip(dy->string);
             }
         printf("</td>");
         struct placementInfo *pi = hashFindVal(ur->samplePlacements, si->seq->name);
         if (pi)
             {
+            if (gotClades)
+                printf("<td>%s</td>", pi->nextClade ? pi->nextClade : "n/a");
+            if (gotLineages)
+                printf("<td>%s</td>", pi->pangoLineage ? pi->pangoLineage : "n/a");
             struct slName *neighbor = findNearestNeighbor(bigTree, pi->sampleId, pi->variantPath);
             char *lineage = neighbor ?  lineageForSample(sampleMetadata, neighbor->name) : "?";
             printf("<td>%s</td><td>%s</td>",
                    neighbor ? replaceChars(neighbor->name, "|", " | ") : "?",
                    lineage ? lineage : "?");
             int imputedCount = slCount(pi->imputedBases);
             printf("<td class='%s'>%d",
                    qcClassForImputedBases(imputedCount), imputedCount);
             if (imputedCount > 0)
                 {
                 dyStringClear(dy);
                 struct baseVal *bv;
                 for (bv = pi->imputedBases;  bv != NULL;  bv = bv->next)
                     {
                     dyStringAppendSep(dy, ", ");
                     dyStringPrintf(dy, "%d: %s", bv->chromStart+1, bv->val);
                     }
                 printTooltip(dy->string);
                 }
             printf("</td><td class='%s'>%d",
                    qcClassForPlacements(pi->bestNodeCount), pi->bestNodeCount);
             printf("</td><td class='%s'>%d",
                    qcClassForPScore(pi->parsimonyScore), pi->parsimonyScore);
             printf("</td>");
             }
         else
+            {
+            if (gotClades)
+                printf("<td>n/a></td>");
+            if (gotLineages)
+                printf("<td>n/a></td>");
             printf("<td>n/a</td><td>n/a</td><td>n/a</td><td>n/a</td><td>n/a</td>");
+            }
         int ix;
         struct subtreeInfo *ti = subtreeInfoForSample(ur->subtreeInfoList, si->seq->name, &ix);
         if (ix < 0)
             //#*** Probably an error.
             printf("<td>n/a</td>");
         else
             {
             printf("<td>%d", ix+1);
             if (ti && nextstrainHost())
                 {
                 char *nextstrainUrl = nextstrainUrlFromTn(jsonTns[ix]);
                 printf(" (<a href='%s' target=_blank>view in Nextstrain<a>)", nextstrainUrl);
                 }
             printf("</td>");
             }