d354f7c4ff9867b3a64ecaadba87ba2381371e27
angie
  Mon Nov 7 16:46:38 2022 -0800
In Nextstrain view (JSON), show the Nextstrain clade and Pango lineage assigned by usher for uploaded samples.  Make Pango lineage the default coloring, and add a filter to highlight uploaded samples so user immediately sees the uploaded samples' lineage(s) and placement in the subtree.  Also, for uploaded names/IDs, don't truncate at first comma until we check whether line is a list of IDs / ID ranges.

diff --git src/hg/hgPhyloPlace/treeToAuspiceJson.c src/hg/hgPhyloPlace/treeToAuspiceJson.c
index 9aa310d..5665c58 100644
--- src/hg/hgPhyloPlace/treeToAuspiceJson.c
+++ src/hg/hgPhyloPlace/treeToAuspiceJson.c
@@ -98,31 +98,33 @@
     jsonWriteObjectEnd(jw);
     }
 jsonWriteObjectStart(jw, "nuc");
 jsonWriteNumber(jw, "start", 1);
 jsonWriteNumber(jw, "end", genomeSize);
 jsonWriteString(jw, "strand", "+");
 jsonWriteString(jw, "type", "source");
 jsonWriteObjectEnd(jw);
 jsonWriteObjectEnd(jw);
 }
 
 static char *getDefaultColor(struct slName *colorFields)
 /* Pick default color from available color fields from metadata.  Do not free returned string. */
 {
 char *colorDefault = NULL;
-if (slNameInList(colorFields, "Nextstrain_lineage"))
+if (slNameInList(colorFields, "pango_lineage_usher"))
+    colorDefault = "pango_lineage_usher";
+else if (slNameInList(colorFields, "Nextstrain_lineage"))
     colorDefault = "Nextstrain_lineage";
 else if (slNameInList(colorFields, "Nextstrain_clade"))
     colorDefault = "Nextstrain_clade";
 else if (colorFields != NULL)
     colorDefault = colorFields->name;
 else
     colorDefault = "userOrOld";
 return colorDefault;
 }
 
 static void auspiceMetaColorings(struct jsonWrite *jw, char *source, struct slName *colorFields)
 /* Write coloring specs for colorFields from metadata, locally added userOrOld, and
  * Auspice-automatic gt. */
 {
 jsonWriteListStart(jw, "colorings");
@@ -234,92 +236,96 @@
 }
 
 static void makeLineageUrl(char *lineage, char *lineageUrl, size_t lineageUrlSize)
 /* If lineage is not "uploaded sample", make an outbreak.info link to it, otherwise just copy
  * lineage. */
 {
 if (sameString(lineage, "uploaded sample"))
     safecpy(lineageUrl, lineageUrlSize, lineage);
 else
     safef(lineageUrl, lineageUrlSize, OUTBREAK_INFO_URLBASE "%s", lineage);
 }
 
 static void jsonWriteLeafNodeAttributes(struct jsonWrite *jw, char *name,
                                         struct sampleMetadata *met, boolean isUserSample,
                                         char *source, struct hash *sampleUrls,
+                                        struct hash *samplePlacements,
                                         char **retUserOrOld, char **retNClade, char **retGClade,
                                         char **retLineage, char **retNLineage,
                                         char **retNCladeUsher, char **retLineageUsher)
 /* Write elements of node_attrs for a sample which may be preexisting and in our metadata hash,
  * or may be a new sample from the user.  Set rets for color categories so parent branches can
  * determine their color categories. */
 {
 *retUserOrOld = isUserSample ? "uploaded sample" : source;
 jsonWriteObjectValue(jw, "userOrOld", *retUserOrOld);
 if (met && met->date)
     jsonWriteObjectValue(jw, "date", met->date);
 if (met && met->author)
     {
     jsonWriteObjectValue(jw, "author", met->author);
     // Note: Nextstrain adds paper_url and title when available; they also add author and use
     // a uniquified value (e.g. "author": "Wenjie Tan et al" / "value": "Wenjie Tan et al A")
     }
-*retNClade = isUserSample ? "uploaded sample" : (met && met->nClade) ? met->nClade : NULL;
+struct placementInfo *pi = (isUserSample && name) ? hashFindVal(samplePlacements, name) : NULL;
+
+*retNClade = (met && met->nClade) ? met->nClade : isUserSample ? "uploaded sample" : NULL;
 if (isNotEmpty(*retNClade))
     jsonWriteObjectValue(jw, "Nextstrain_clade", *retNClade);
-*retGClade = isUserSample ? "uploaded sample" : (met && met->gClade) ? met->gClade : NULL;
+*retGClade = (met && met->gClade) ? met->gClade : isUserSample ? "uploaded sample" : NULL;
 if (isNotEmpty(*retGClade))
     jsonWriteObjectValue(jw, "GISAID_clade", *retGClade);
-*retLineage = isUserSample ? "uploaded sample" :
-                             (met && met->lineage) ? met->lineage : NULL;
+*retLineage =  (met && met->lineage) ? met->lineage : isUserSample ? "uploaded sample" : NULL;
 if (isNotEmpty(*retLineage))
     {
     char lineageUrl[1024];
     makeLineageUrl(*retLineage, lineageUrl, sizeof lineageUrl);
     jsonWriteObjectValueUrl(jw, "pango_lineage", *retLineage, lineageUrl);
     }
-*retNLineage = isUserSample ? "uploaded sample" : (met && met->nLineage) ? met->nLineage : NULL;
+*retNLineage = (met && met->nLineage) ? met->nLineage : isUserSample ? "uploaded sample" : NULL;
 if (isNotEmpty(*retNLineage))
     {
     jsonWriteObjectValue(jw, "Nextstrain_lineage", *retNLineage);
     }
 if (met && met->epiId)
     jsonWriteObjectValue(jw, "gisaid_epi_isl", met->epiId);
 if (met && met->gbAcc)
     jsonWriteObjectValue(jw, "genbank_accession", met->gbAcc);
 if (met && met->country)
     jsonWriteObjectValue(jw, "country", met->country);
 if (met && met->division)
     jsonWriteObjectValue(jw, "division", met->division);
 if (met && met->location)
     jsonWriteObjectValue(jw, "location", met->location);
 if (met && met->countryExp)
     jsonWriteObjectValue(jw, "country_exposure", met->countryExp);
 if (met && met->divExp)
     jsonWriteObjectValue(jw, "division_exposure", met->divExp);
 if (met && met->origLab)
     jsonWriteObjectValue(jw, "originating_lab", met->origLab);
 if (met && met->subLab)
     jsonWriteObjectValue(jw, "submitting_lab", met->subLab);
 if (met && met->region)
     jsonWriteObjectValue(jw, "region", met->region);
-*retNCladeUsher = isUserSample ? "uploaded sample" :
-                                 (met && met->nCladeUsher) ? met->nCladeUsher : NULL;
+*retNCladeUsher = (pi && pi->nextClade) ? pi->nextClade :
+                  (met && met->nCladeUsher) ? met->nCladeUsher :
+                  isUserSample ? "uploaded sample" : NULL;
 if (isNotEmpty(*retNCladeUsher))
     jsonWriteObjectValue(jw, "Nextstrain_clade_usher", *retNCladeUsher);
-*retLineageUsher = isUserSample ? "uploaded sample" :
-                                  (met && met->lineageUsher) ? met->lineageUsher : NULL;
+*retLineageUsher = (pi && pi->pangoLineage) ? pi->pangoLineage :
+                   (met && met->lineageUsher) ? met->lineageUsher :
+                   isUserSample ? "uploaded sample" : NULL;
 if (isNotEmpty(*retLineageUsher))
     {
     char lineageUrl[1024];
     makeLineageUrl(*retLineageUsher, lineageUrl, sizeof lineageUrl);
     jsonWriteObjectValueUrl(jw, "pango_lineage_usher", *retLineageUsher, lineageUrl);
     }
 char *sampleUrl = (sampleUrls && name) ? hashFindVal(sampleUrls, name) : NULL;
 if (isNotEmpty(sampleUrl))
     {
     char *p = strstr(sampleUrl, "subtreeAuspice");
     char *subtreeNum = p + strlen("subtreeAuspice");
     if (p && isdigit(*subtreeNum))
         {
         int num = atoi(subtreeNum);
         char subtreeLabel[1024];
@@ -515,30 +521,31 @@
     jsonWriteObjectEnd(jw);  // mutations
     jsonWriteObjectEnd(jw); // branch_attrs
     }
 }
 
 struct auspiceJsonInfo
 /* Collection of a bunch of things used when writing out auspice JSON for a subtree, so the
  * recursive function doesn't need a dozen args. */
     {
     struct jsonWrite *jw;
     struct slName *subtreeUserSampleIds;  // Subtree node names for user samples (not from big tree)
     struct geneInfo *geneInfoList;        // Transcript seq & alignment for predicting AA change
     struct seqWindow *gSeqWin;            // Reference genome seq for predicting AA change
     struct hash *sampleMetadata;          // Sample metadata for decorating tree
     struct hash *sampleUrls;              // URLs for samples, if applicable
+    struct hash *samplePlacements;        // Sample placement info e.g. clade/lineage from usher
     int nodeNum;                          // For generating sequential node ID (in absence of name)
     char *source;                         // Source of non-user sequences in tree (GISAID or public)
     };
 
 static int cmpstringp(const void *p1, const void *p2)
 /* strcmp on pointers to strings, as in 'man qsort' but tolerate NULLs */
 {
 char *s1 = *(char * const *)p1;
 char *s2 = *(char * const *)p2;
 if (s1 && s2)
     return strcmp(s1, s2);
 else if (s1 && !s2)
     return 1;
 else if (s2 && !s1)
     return -1;
@@ -620,30 +627,31 @@
     if (retGClade)
         *retGClade = majorityMaybe(kidGClade, node->numEdges);
     if (retLineage)
         *retLineage = majorityMaybe(kidLineage, node->numEdges);
     if (retNCladeUsher)
         *retNCladeUsher = majorityMaybe(kidNCladeUsher, node->numEdges);
     if (retLineageUsher)
         *retLineageUsher = majorityMaybe(kidLineageUsher, node->numEdges);
     if (retNLineage)
         *retNLineage = majorityMaybe(kidNLineage, node->numEdges);
     }
 jsonWriteObjectStart(aji->jw, "node_attrs");
 jsonWriteDouble(aji->jw, "div", depth);
 if (node->numEdges == 0)
     jsonWriteLeafNodeAttributes(aji->jw, name, met, isUserSample, aji->source, aji->sampleUrls,
+                                aji->samplePlacements,
                                 retUserOrOld, retNClade, retGClade, retLineage, retNLineage,
                                 retNCladeUsher, retLineageUsher);
 else if (retUserOrOld && retGClade && retLineage)
     jsonWriteBranchNodeAttributes(aji->jw, *retUserOrOld, *retNClade, *retGClade, *retLineage,
                                   *retNLineage, *retNCladeUsher, *retLineageUsher);
 jsonWriteObjectEnd(aji->jw);
 }
 
 struct phyloTree *phyloTreeNewNode(char *name)
 /* Alloc & return a new node with no children. */
 {
 struct phyloTree *node;
 AllocVar(node);
 AllocVar(node->ident);
 node->ident->name = cloneString(name);
@@ -690,31 +698,32 @@
         AllocVar(gi);
         gi->psl = genePredToPsl((struct genePred *)gp, refGenome->size, txLen);
         gi->psl->qName = cloneString(gp->name2);
         gi->txSeq = newDnaSeq(seq, txLen, gp->name2);
         slAddHead(&geneInfoList, gi);
         }
     lmCleanup(&lm);
     bigBedFileClose(&bbi);
     }
 slReverse(&geneInfoList);
 return geneInfoList;
 }
 
 void treeToAuspiceJson(struct subtreeInfo *sti, char *db, struct geneInfo *geneInfoList,
                        struct seqWindow *gSeqWin, struct hash *sampleMetadata,
-                       struct hash *sampleUrls, char *jsonFile, char *source)
+                       struct hash *sampleUrls, struct hash *samplePlacements,
+                       char *jsonFile, char *source)
 /* Write JSON for tree in Nextstrain's Augur/Auspice V2 JSON format
  * (https://github.com/nextstrain/augur/blob/master/augur/data/schema-export-v2.json). */
 {
 struct phyloTree *tree = sti->subtree;
 FILE *outF = mustOpen(jsonFile, "w");
 struct jsonWrite *jw = jsonWriteNew();
 jsonWriteObjectStart(jw, NULL);
 jsonWriteString(jw, "version", "v2");
 //#*** FIXME: TODO: either pass in along with sampleMetadata, or better yet, compute while building
 //#*** tree object and then write the header object.
 struct slName *colorFields = NULL;
 if (sameString(db, "wuhCor1"))
     {
     slNameAddHead(&colorFields, "country");
     slNameAddHead(&colorFields, "Nextstrain_clade_usher");
@@ -727,24 +736,24 @@
     slNameAddHead(&colorFields, "country");
     slNameAddHead(&colorFields, "Nextstrain_lineage");
     }
 //#*** END FIXME
 writeAuspiceMeta(jw, sti->subtreeUserSampleIds, source, db, colorFields, geneInfoList,
                  gSeqWin->end);
 jsonWriteObjectStart(jw, "tree");
 int nodeNum = 10000; // Auspice.us starting node number for newick -> json
 int depth = 0;
 
 // Add an extra root node because otherwise Auspice won't draw branch from big tree root to subtree
 struct phyloTree *root = phyloTreeNewNode("wrapper");
 phyloAddEdge(root, tree);
 tree = root;
 struct auspiceJsonInfo aji = { jw, sti->subtreeUserSampleIds, geneInfoList, gSeqWin,
-                               sampleMetadata, sampleUrls, nodeNum, source };
+                               sampleMetadata, sampleUrls, samplePlacements, nodeNum, source };
 rTreeToAuspiceJson(tree, depth, &aji, NULL, NULL, NULL, NULL, NULL, NULL, NULL);
 jsonWriteObjectEnd(jw); // tree
 jsonWriteObjectEnd(jw); // top-level object
 fputs(jw->dy->string, outF);
 jsonWriteFree(&jw);
 carefulClose(&outF);
 }