1813d94a8d9108b9709cb48166b4980d0f02d839
angie
  Wed Sep 8 12:02:05 2021 -0700
Add coloring options for Nextstrain clade and Pango lineage assigned by usher (really by matUtils annotate) if metadata includes columns for those.

diff --git src/hg/hgPhyloPlace/treeToAuspiceJson.c src/hg/hgPhyloPlace/treeToAuspiceJson.c
index 9d09250..978e603 100644
--- src/hg/hgPhyloPlace/treeToAuspiceJson.c
+++ src/hg/hgPhyloPlace/treeToAuspiceJson.c
@@ -48,31 +48,59 @@
       "        [ \"21D (Eta)\", \"#BEBB48\" ],"
       "        [ \"21E (Theta)\", \"#D9AE3E\" ],"
       "        [ \"21F (Iota)\", \"#E69036\" ],"
       "        [ \"21G (Lambda)\", \"#E35F2D\" ],"
       "        [ \"21H\", \"#DB2823\" ],"
       "        [ \"uploaded sample\", \"#FF0000\" ] ],"
       "    \"title\": \"Nextstrain Clade\", \"type\": \"categorical\" },"
       , outF);
 if (sameString(source, "GISAID"))
     fputs("  { \"key\": \"GISAID_clade\","
       "    \"scale\": [ [ \"S\", \"#EC676D\" ], [ \"L\", \"#F79E43\" ], [ \"O\", \"#F9D136\" ],"
       "        [ \"V\", \"#FAEA95\" ], [ \"G\", \"#B6D77A\" ], [ \"GH\", \"#8FD4ED\" ],"
       "        [ \"GR\", \"#A692C3\" ] ],"
       "    \"title\": \"GISAID Clade\", \"type\": \"categorical\" },"
           , outF);
-fprintf(outF, "  { \"key\": \"userOrOld\", "
+fprintf(outF,
+      "  { \"key\": \"pango_lineage_usher\", "
+      "    \"title\": \"Pango lineage assigned by UShER\", \"type\": \"categorical\" },"
+      "  { \"key\": \"Nextstrain_clade_usher\","
+      "    \"scale\": [ "
+      "        [ \"20H(Beta, V2)\", \"#3F47C9\" ],"
+      "        [ \"20I(Alpha, V1)\", \"#4274CE\" ],"
+      "        [ \"20J(Gamma, V3)\", \"#4F97BB\" ],"
+      "        [ \"21A(Delta)\", \"#64AC99\" ],"
+      "        [ \"21B(Kappa)\", \"#7EB976\" ],"
+      "        [ \"21C(Epsilon)\", \"#9EBE5A\" ],"
+      "        [ \"21D(Eta)\", \"#BEBB48\" ],"
+      "        [ \"21E(Theta)\", \"#D9AE3E\" ],"
+      "        [ \"21F(Iota)\", \"#E69036\" ],"
+      "        [ \"21G(Lambda)\", \"#E35F2D\" ],"
+      "        [ \"20H (Beta, V2)\", \"#3F47C9\" ],"
+      "        [ \"20I (Alpha, V1)\", \"#4274CE\" ],"
+      "        [ \"20J (Gamma, V3)\", \"#4F97BB\" ],"
+      "        [ \"21A (Delta)\", \"#64AC99\" ],"
+      "        [ \"21B (Kappa)\", \"#7EB976\" ],"
+      "        [ \"21C (Epsilon)\", \"#9EBE5A\" ],"
+      "        [ \"21D (Eta)\", \"#BEBB48\" ],"
+      "        [ \"21E (Theta)\", \"#D9AE3E\" ],"
+      "        [ \"21F (Iota)\", \"#E69036\" ],"
+      "        [ \"21G (Lambda)\", \"#E35F2D\" ],"
+      "        [ \"21H\", \"#DB2823\" ],"
+      "        [ \"uploaded sample\", \"#FF0000\" ] ],"
+      "    \"title\": \"Nextstrain Clade assigned by UShER\", \"type\": \"categorical\" },"
+        "  { \"key\": \"userOrOld\", "
         "    \"scale\": [ [ \"uploaded sample\", \"#CC0000\"] , [ \"%s\", \"#000000\"] ],"
         "    \"title\": \"Sample type\", \"type\": \"categorical\" },"
         "  {\"key\": \"gt\", \"title\": \"Genotype\", \"type\": \"categorical\"},"
         "  {\"key\": \"country\", \"title\": \"Country\", \"type\": \"categorical\"}"
         , source);
 fputs("  ] , "
 //#*** Filters didn't seem to work... maybe something about the new fetch feature, or do I need to spcify in some other way?
 //#***      "\"filters\": [ \"GISAID_clade\", \"region\", \"country\", \"division\", \"author\" ], "
       "\"filters\": [ ], "
       "\"genome_annotations\":"
       "{\"E\":{\"end\":26472,\"start\":26245,\"strand\":\"+\",\"type\":\"CDS\"},"
       " \"M\":{\"end\":27191,\"start\":26523,\"strand\":\"+\",\"type\":\"CDS\"},"
       " \"N\":{\"end\":29533,\"start\":28274,\"strand\":\"+\",\"type\":\"CDS\"},"
       " \"ORF1a\":{\"end\":13468,\"start\":266,\"strand\":\"+\",\"type\":\"CDS\"},"
       " \"ORF1b\":{\"end\":21555,\"start\":13468,\"strand\":\"+\",\"type\":\"CDS\"},"
@@ -106,116 +134,140 @@
 {
 jsonWriteObjectStart(jw, name);
 jsonWriteString(jw, "value", value);
 if (isNotEmpty(url))
     jsonWriteString(jw, "url", url);
 jsonWriteObjectEnd(jw);
 }
 
 static void jsonWriteObjectValue(struct jsonWrite *jw, char *name, char *value)
 /* Write an object with one member, "value", set to value, as most Auspice node attributes are
  * formatted. */
 {
 jsonWriteObjectValueUrl(jw, name, value, NULL);
 }
 
+static void makeLineageUrl(char *lineage, char *lineageUrl, size_t lineageUrlSize)
+/* If lineage is not "uploaded sample", make an outbreak.info link to it, otherwise just copy
+ * lineage. */
+{
+if (sameString(lineage, "uploaded sample"))
+    safecpy(lineageUrl, lineageUrlSize, lineage);
+else
+    safef(lineageUrl, lineageUrlSize, OUTBREAK_INFO_URLBASE "%s", lineage);
+}
+
 static void jsonWriteLeafNodeAttributes(struct jsonWrite *jw, char *name,
                                         struct sampleMetadata *met, boolean isUserSample,
                                         char *source, struct hash *sampleUrls,
                                         char **retUserOrOld, char **retNClade, char **retGClade,
-                                        char **retLineage)
+                                        char **retLineage,
+                                        char **retNCladeUsher, char **retLineageUsher)
 /* Write elements of node_attrs for a sample which may be preexisting and in our metadata hash,
  * or may be a new sample from the user.  Set rets for color categories so parent branches can
  * determine their color categories. */
 {
 *retUserOrOld = isUserSample ? "uploaded sample" : source;
 jsonWriteObjectValue(jw, "userOrOld", *retUserOrOld);
 if (met && met->date)
     jsonWriteObjectValue(jw, "date", met->date);
 if (met && met->author)
     {
     jsonWriteObjectValue(jw, "author", met->author);
     // Note: Nextstrain adds paper_url and title when available; they also add author and use
     // a uniquified value (e.g. "author": "Wenjie Tan et al" / "value": "Wenjie Tan et al A")
     }
 *retNClade = isUserSample ? "uploaded sample" : (met && met->nClade) ? met->nClade : NULL;
 if (isNotEmpty(*retNClade))
     jsonWriteObjectValue(jw, "Nextstrain_clade", *retNClade);
 *retGClade = isUserSample ? "uploaded sample" : (met && met->gClade) ? met->gClade : NULL;
 if (isNotEmpty(*retGClade))
     jsonWriteObjectValue(jw, "GISAID_clade", *retGClade);
 *retLineage = isUserSample ? "uploaded sample" :
                              (met && met->lineage) ? met->lineage : NULL;
 if (isNotEmpty(*retLineage))
     {
     char lineageUrl[1024];
-    if (sameString(*retLineage, "uploaded sample"))
-        safecpy(lineageUrl, sizeof lineageUrl, *retLineage);
-    else
-        safef(lineageUrl, sizeof lineageUrl, OUTBREAK_INFO_URLBASE "%s",
-              *retLineage);
+    makeLineageUrl(*retLineage, lineageUrl, sizeof lineageUrl);
     jsonWriteObjectValueUrl(jw, "pango_lineage", *retLineage, lineageUrl);
     }
 if (met && met->epiId)
     jsonWriteObjectValue(jw, "gisaid_epi_isl", met->epiId);
 if (met && met->gbAcc)
     jsonWriteObjectValue(jw, "genbank_accession", met->gbAcc);
 if (met && met->country)
     jsonWriteObjectValue(jw, "country", met->country);
 if (met && met->division)
     jsonWriteObjectValue(jw, "division", met->division);
 if (met && met->location)
     jsonWriteObjectValue(jw, "location", met->location);
 if (met && met->countryExp)
     jsonWriteObjectValue(jw, "country_exposure", met->countryExp);
 if (met && met->divExp)
     jsonWriteObjectValue(jw, "division_exposure", met->divExp);
 if (met && met->origLab)
     jsonWriteObjectValue(jw, "originating_lab", met->origLab);
 if (met && met->subLab)
     jsonWriteObjectValue(jw, "submitting_lab", met->subLab);
 if (met && met->region)
     jsonWriteObjectValue(jw, "region", met->region);
+*retNCladeUsher = isUserSample ? "uploaded sample" :
+                                 (met && met->nCladeUsher) ? met->nCladeUsher : NULL;
+if (isNotEmpty(*retNCladeUsher))
+    jsonWriteObjectValue(jw, "Nextstrain_clade_usher", *retNCladeUsher);
+*retLineageUsher = isUserSample ? "uploaded sample" :
+                                  (met && met->lineageUsher) ? met->lineageUsher : NULL;
+if (isNotEmpty(*retLineageUsher))
+    {
+    char lineageUrl[1024];
+    makeLineageUrl(*retLineageUsher, lineageUrl, sizeof lineageUrl);
+    jsonWriteObjectValueUrl(jw, "pango_lineage_usher", *retLineageUsher, lineageUrl);
+    }
 char *sampleUrl = (sampleUrls && name) ? hashFindVal(sampleUrls, name) : NULL;
 if (isNotEmpty(sampleUrl))
     {
     char *p = strstr(sampleUrl, "subtreeAuspice");
     char *subtreeNum = p + strlen("subtreeAuspice");
     if (p && isdigit(*subtreeNum))
         {
         int num = atoi(subtreeNum);
         char subtreeLabel[1024];
         safef(subtreeLabel, sizeof subtreeLabel, "view subtree %d", num);
         jsonWriteObjectValueUrl(jw, "subtree", subtreeLabel, sampleUrl);
         }
     else
         jsonWriteObjectValueUrl(jw, "subtree", sampleUrl, sampleUrl);
     }
 }
 
 static void jsonWriteBranchNodeAttributes(struct jsonWrite *jw, char *userOrOld,
-                                          char *nClade, char *gClade, char *lineage)
+                                          char *nClade, char *gClade, char *lineage,
+                                          char *nCladeUsher, char *lineageUsher)
 /* Write elements of node_attrs for a branch. */
 {
 if (userOrOld)
     jsonWriteObjectValue(jw, "userOrOld", userOrOld);
 if (nClade)
     jsonWriteObjectValue(jw, "Nextstrain_clade", nClade);
 if (gClade)
     jsonWriteObjectValue(jw, "GISAID_clade", gClade);
 if (lineage)
     jsonWriteObjectValue(jw, "pango_lineage", lineage);
+if (nCladeUsher)
+    jsonWriteObjectValue(jw, "Nextstrain_clade_usher", nCladeUsher);
+if (lineageUsher)
+    jsonWriteObjectValue(jw, "pango_lineage_usher", lineageUsher);
 }
 
 static boolean changesProtein(struct singleNucChange *snc, struct geneInfo *gi,
                               struct seqWindow *gSeqWin,
                               int *retAaStart, char *retOldAa, char *retNewAa)
 /* If snc changes the coding sequence of gene, return TRUE and set ret values accordingly
  * (note amino acid values are single-base not strings). */
 {
 boolean isCodingChange = FALSE;
 if (snc->chromStart < gi->psl->tEnd && snc->chromStart >= gi->psl->tStart)
     {
     struct bed3 gBed3 = { NULL, chrom, snc->chromStart, snc->chromStart+1 };
     char gAlt[2];
     safef(gAlt, sizeof(gAlt), "%c", snc->newBase);
     if (!sameString(gi->psl->strand, "+"))
@@ -411,81 +463,90 @@
         runLength++;
         if (runLength > maxRunLength)
             {
             maxRunLength = runLength;
             maxRunVal = array[i];
             }
         }
     else
         runLength = 1;
     }
 return (maxRunLength > (arraySize >> 1)) ? maxRunVal : NULL;
 }
 
 static void rTreeToAuspiceJson(struct phyloTree *node, int depth, struct auspiceJsonInfo *aji,
                                char **retUserOrOld, char **retNClade, char **retGClade,
-                               char **retLineage)
+                               char **retLineage, char **retNCladeUsher, char **retLineageUsher)
 /* Write Augur/Auspice V2 JSON for tree.  Enclosing object start and end are written by caller. */
 {
 if (node->priv)
     {
     struct singleNucChange *sncList = node->priv;
     depth += slCount(sncList);
     }
 boolean isUserSample = FALSE;
 if (node->ident->name)
     isUserSample = slNameInList(aji->subtreeUserSampleIds, node->ident->name);
 char *name = node->ident->name;
 struct sampleMetadata *met = name ? metadataForSample(aji->sampleMetadata, name) : NULL;
 if (name)
     jsonWriteString(aji->jw, "name", name);
 else
     jsonWriteStringf(aji->jw, "name", "NODE%d", aji->nodeNum++);
 jsonWriteBranchAttrs(aji->jw, node, aji->geneInfoList, aji->gSeqWin);
 if (node->numEdges > 0)
     {
     jsonWriteListStart(aji->jw, "children");
     char *kidUserOrOld[node->numEdges];
     char *kidNClade[node->numEdges];
     char *kidGClade[node->numEdges];
     char *kidLineage[node->numEdges];
+    char *kidNCladeUsher[node->numEdges];
+    char *kidLineageUsher[node->numEdges];
     // Step through children in reverse order because nextstrain/Auspice draws upside-down. :)
     int i;
     for (i = node->numEdges - 1;  i >= 0;  i--)
         {
         jsonWriteObjectStart(aji->jw, NULL);
         rTreeToAuspiceJson(node->edges[i], depth, aji,
-                           &kidUserOrOld[i], &kidNClade[i], &kidGClade[i], &kidLineage[i]);
+                           &kidUserOrOld[i], &kidNClade[i], &kidGClade[i], &kidLineage[i],
+                           &kidNCladeUsher[i], &kidLineageUsher[i]);
         jsonWriteObjectEnd(aji->jw);
         }
     jsonWriteListEnd(aji->jw);
     if (retUserOrOld)
         *retUserOrOld = majorityMaybe(kidUserOrOld, node->numEdges);
     if (retNClade)
         *retNClade = majorityMaybe(kidNClade, node->numEdges);
     if (retGClade)
         *retGClade = majorityMaybe(kidGClade, node->numEdges);
     if (retLineage)
         *retLineage = majorityMaybe(kidLineage, node->numEdges);
+    if (retNCladeUsher)
+        *retNCladeUsher = majorityMaybe(kidNCladeUsher, node->numEdges);
+    if (retLineageUsher)
+        *retLineageUsher = majorityMaybe(kidLineageUsher, node->numEdges);
     }
 jsonWriteObjectStart(aji->jw, "node_attrs");
 jsonWriteDouble(aji->jw, "div", depth);
 if (node->numEdges == 0)
     jsonWriteLeafNodeAttributes(aji->jw, name, met, isUserSample, aji->source, aji->sampleUrls,
-                                retUserOrOld, retNClade, retGClade, retLineage);
+                                retUserOrOld, retNClade, retGClade, retLineage,
+                                retNCladeUsher, retLineageUsher);
 else if (retUserOrOld && retGClade && retLineage)
-    jsonWriteBranchNodeAttributes(aji->jw, *retUserOrOld, *retNClade, *retGClade, *retLineage);
+    jsonWriteBranchNodeAttributes(aji->jw, *retUserOrOld, *retNClade, *retGClade, *retLineage,
+                                  *retNCladeUsher, *retLineageUsher);
 jsonWriteObjectEnd(aji->jw);
 }
 
 struct phyloTree *phyloTreeNewNode(char *name)
 /* Alloc & return a new node with no children. */
 {
 struct phyloTree *node;
 AllocVar(node);
 AllocVar(node->ident);
 node->ident->name = cloneString(name);
 return node;
 }
 
 struct geneInfo *getGeneInfoList(char *bigGenePredFile, struct dnaSeq *refGenome)
 /* If config.ra has a source of gene annotations, then return the gene list. */
@@ -538,23 +599,23 @@
 fputs("{ \"version\": \"v2\", ", outF);
 writeAuspiceMeta(outF, sti->subtreeUserSampleIds, source);
 // The meta part is mostly constant & easier to just write out, but jsonWrite is better for the
 // nested tree structure.
 struct jsonWrite *jw = jsonWriteNew();
 jsonWriteObjectStart(jw, "tree");
 int nodeNum = 10000; // Auspice.us starting node number for newick -> json
 int depth = 0;
 
 // Add an extra root node because otherwise Auspice won't draw branch from big tree root to subtree
 struct phyloTree *root = phyloTreeNewNode("wrapper");
 phyloAddEdge(root, tree);
 tree = root;
 struct auspiceJsonInfo aji = { jw, sti->subtreeUserSampleIds, geneInfoList, gSeqWin,
                                sampleMetadata, sampleUrls, nodeNum, source };
-rTreeToAuspiceJson(tree, depth, &aji, NULL, NULL, NULL, NULL);
+rTreeToAuspiceJson(tree, depth, &aji, NULL, NULL, NULL, NULL, NULL, NULL);
 jsonWriteObjectEnd(jw);
 fputs(jw->dy->string, outF);
 jsonWriteFree(&jw);
 fputs("}", outF);
 carefulClose(&outF);
 }