ea80dbd18523cfb2ad1efd1306ac81d7e994f9c4
angie
  Fri Dec 1 09:01:56 2023 -0800
Update RSV metadata for new clade system (RSV Genotyping Consensus Consortium lineages replace Ramaekers 2020 clades).

diff --git src/hg/hgPhyloPlace/treeToAuspiceJson.c src/hg/hgPhyloPlace/treeToAuspiceJson.c
index 3616afa..cc70972 100644
--- src/hg/hgPhyloPlace/treeToAuspiceJson.c
+++ src/hg/hgPhyloPlace/treeToAuspiceJson.c
@@ -121,32 +121,32 @@
 jsonWriteString(jw, "type", "source");
 jsonWriteObjectEnd(jw);
 jsonWriteObjectEnd(jw);
 }
 
 static char *getDefaultColor(struct slName *colorFields)
 /* Pick default color from available color fields from metadata.  Do not free returned string. */
 {
 char *colorDefault = NULL;
 if (slNameInList(colorFields, "pango_lineage_usher"))
     colorDefault = "pango_lineage_usher";
 else if (slNameInList(colorFields, "Nextstrain_lineage"))
     colorDefault = "Nextstrain_lineage";
 else if (slNameInList(colorFields, "Nextstrain_clade"))
     colorDefault = "Nextstrain_clade";
-else if (slNameInList(colorFields, "goya_usher"))
-    colorDefault = "goya_usher";
+else if (slNameInList(colorFields, "GCC_usher"))
+    colorDefault = "GCC_usher";
 else if (colorFields != NULL)
     colorDefault = colorFields->name;
 else
     colorDefault = "userOrOld";
 return colorDefault;
 }
 
 static void auspiceMetaColorings(struct jsonWrite *jw, char *source, struct slName *colorFields,
                                  char *db)
 /* Write coloring specs for colorFields from metadata, locally added userOrOld, and
  * Auspice-automatic gt. */
 {
 jsonWriteListStart(jw, "colorings");
 auspiceMetaColoringCategoricalStart(jw, "userOrOld", "Sample type");
 jsonWriteListStart(jw, "scale");
@@ -166,36 +166,36 @@
             auspiceMetaColoringCategorical(jw, col->name, "Clade assigned by nextclade");
         }
     else if (sameString(col->name, "Nextstrain_clade_usher"))
         auspiceMetaColoringSarsCov2Nextclade(jw, col->name, "Nextstrain Clade assigned by UShER");
     else if (sameString(col->name, "pango_lineage"))
         auspiceMetaColoringCategorical(jw, col->name, "Pango lineage");
     else if (sameString(col->name, "pango_lineage_usher"))
         auspiceMetaColoringCategorical(jw, col->name, "Pango lineage assigned by UShER");
     else if (sameString(col->name, "Nextstrain_lineage"))
         auspiceMetaColoringCategorical(jw, col->name, "Nextstrain lineage");
     //#*** RSV hacks -- colorings really should come from JSON file in config directory
     else if (sameString(col->name, "goya_nextclade"))
         auspiceMetaColoringCategorical(jw, col->name, "Goya 2020 clade assigned by nextclade");
     else if (sameString(col->name, "goya_usher"))
         auspiceMetaColoringCategorical(jw, col->name, "Goya 2020 clade assigned by UShER");
-    else if (sameString(col->name, "ramaekers_nextclade"))
-        auspiceMetaColoringCategorical(jw, col->name, "Ramaekers 2020 clade assigned by nextclade");
-    else if (sameString(col->name, "ramaekers_usher"))
-        auspiceMetaColoringCategorical(jw, col->name, "Ramaekers 2020 clade assigned by UShER");
-    else if (sameString(col->name, "ramaekers_tableS1"))
-        auspiceMetaColoringCategorical(jw, col->name, "Ramaekers 2020 Table S1 designation");
+    else if (sameString(col->name, "GCC_nextclade"))
+        auspiceMetaColoringCategorical(jw, col->name, "RGCC lineage assigned by nextclade");
+    else if (sameString(col->name, "GCC_usher"))
+        auspiceMetaColoringCategorical(jw, col->name, "RGCC lineage assigned by UShER");
+    else if (sameString(col->name, "GCC_assigned_2023-11"))
+        auspiceMetaColoringCategorical(jw, col->name, "RGCC designated lineage");
     else if (sameString(col->name, "country"))
         auspiceMetaColoringCategorical(jw, col->name, "Country");
     else
         auspiceMetaColoringCategorical(jw, col->name, col->name);
     }
 jsonWriteListEnd(jw);
 }
 
 static void writeAuspiceMeta(struct jsonWrite *jw, struct slName *subtreeUserSampleIds, char *source,
                              char *db, struct slName *colorFields, struct geneInfo *geneInfoList,
                              uint genomeSize)
 /* Write metadata to configure Auspice display. */
 {
 jsonWriteObjectStart(jw, "meta");
 // Title
@@ -233,37 +233,37 @@
 auspiceMetaColorings(jw, source, colorFields, db);
 // Filters didn't work when I tried them a long time ago... revisit someday.
 jsonWriteListStart(jw, "filters");
 jsonWriteString(jw, NULL, "userOrOld");
 jsonWriteString(jw, NULL, "country");
 //#*** FIXME: TODO: either pass in along with sampleMetadata, or take from JSON file specified
 //#*** in config, or better yet, compute while building tree object in memory, then write the
 //#*** header object, then write the tree.
 if (sameString(db, "wuhCor1"))
     {
     jsonWriteString(jw, NULL, "pango_lineage_usher");
     jsonWriteString(jw, NULL, "pango_lineage");
     jsonWriteString(jw, NULL, "Nextstrain_clade_usher");
     jsonWriteString(jw, NULL, "Nextstrain_clade");
     }
-else if (stringIn("GCF_000855545", db) || stringIn("GCF_002815475", db))
+else if (stringIn("GCF_000855545", db) || stringIn("GCF_002815475", db) || stringIn("RGCC", db))
     {
+    jsonWriteString(jw, NULL, "GCC_usher");
+    jsonWriteString(jw, NULL, "GCC_nextclade");
+    jsonWriteString(jw, NULL, "GCC_assigned_2023-11");
     jsonWriteString(jw, NULL, "goya_usher");
     jsonWriteString(jw, NULL, "goya_nextclade");
-    jsonWriteString(jw, NULL, "ramaekers_tableS1");
-    jsonWriteString(jw, NULL, "ramaekers_usher");
-    jsonWriteString(jw, NULL, "ramaekers_nextclade");
     }
 else if (stringIn("GCF_000865085", db) || stringIn("GCF_001343785", db))
     {
     jsonWriteString(jw, NULL, "Nextstrain_clade");
     }
 else
     {
     jsonWriteString(jw, NULL, "Nextstrain_lineage");
     }
 jsonWriteListEnd(jw);
 // Annotations for coloring/filtering by base
 writeAuspiceMetaGenomeAnnotations(jw, geneInfoList, genomeSize);
 jsonWriteObjectEnd(jw);
 }
 
@@ -310,37 +310,37 @@
 if (met && met->date)
     jsonWriteObjectValue(jw, "date", met->date);
 if (met && met->author)
     {
     jsonWriteObjectValue(jw, "author", met->author);
     // Note: Nextstrain adds paper_url and title when available; they also add author and use
     // a uniquified value (e.g. "author": "Wenjie Tan et al" / "value": "Wenjie Tan et al A")
     }
 struct placementInfo *pi = (isUserSample && name) ? hashFindVal(samplePlacements, name) : NULL;
 
 *retNClade = (met && met->nClade) ? met->nClade : isUserSample ? "uploaded sample" : NULL;
 if (isNotEmpty(*retNClade))
     jsonWriteObjectValue(jw, (isRsv ? "goya_nextclade" : "Nextstrain_clade"), *retNClade);
 *retGClade = (met && met->gClade) ? met->gClade : isUserSample ? "uploaded sample" : NULL;
 if (isNotEmpty(*retGClade))
-    jsonWriteObjectValue(jw, (isRsv ? "ramaekers_tableS1" : "GISAID_clade"), *retGClade);
+    jsonWriteObjectValue(jw, (isRsv ? "GCC_assigned_2023-11" : "GISAID_clade"), *retGClade);
 *retLineage =  (met && met->lineage) ? met->lineage : isUserSample ? "uploaded sample" : NULL;
 if (isNotEmpty(*retLineage))
     {
     char lineageUrl[1024];
     makeLineageUrl(*retLineage, lineageUrl, sizeof lineageUrl);
-    jsonWriteObjectValueUrl(jw, (isRsv ? "ramaekers_nextclade" : "pango_lineage"),
+    jsonWriteObjectValueUrl(jw, (isRsv ? "GCC_nextclade" : "pango_lineage"),
                             *retLineage, lineageUrl);
     }
 *retNLineage = (met && met->nLineage) ? met->nLineage : isUserSample ? "uploaded sample" : NULL;
 if (isNotEmpty(*retNLineage))
     {
     jsonWriteObjectValue(jw, "Nextstrain_lineage", *retNLineage);
     }
 if (met && met->epiId)
     jsonWriteObjectValue(jw, "gisaid_epi_isl", met->epiId);
 if (met && met->gbAcc)
     jsonWriteObjectValue(jw, "genbank_accession", met->gbAcc);
 if (met && met->country)
     jsonWriteObjectValue(jw, "country", met->country);
 if (met && met->division)
     jsonWriteObjectValue(jw, "division", met->division);
@@ -356,69 +356,69 @@
     jsonWriteObjectValue(jw, "submitting_lab", met->subLab);
 if (met && met->region)
     jsonWriteObjectValue(jw, "region", met->region);
 *retNCladeUsher = (pi && pi->nextClade) ? pi->nextClade :
                   (met && met->nCladeUsher) ? met->nCladeUsher :
                   isUserSample ? "uploaded sample" : NULL;
 if (isNotEmpty(*retNCladeUsher))
     jsonWriteObjectValue(jw, (isRsv ? "goya_usher" : "Nextstrain_clade_usher"), *retNCladeUsher);
 *retLineageUsher = (pi && pi->pangoLineage) ? pi->pangoLineage :
                    (met && met->lineageUsher) ? met->lineageUsher :
                    isUserSample ? "uploaded sample" : NULL;
 if (isNotEmpty(*retLineageUsher))
     {
     char lineageUrl[1024];
     makeLineageUrl(*retLineageUsher, lineageUrl, sizeof lineageUrl);
-    jsonWriteObjectValueUrl(jw, (isRsv ? "ramaekers_usher" : "pango_lineage_usher"),
+    jsonWriteObjectValueUrl(jw, (isRsv ? "GCC_usher" : "pango_lineage_usher"),
                             *retLineageUsher, lineageUrl);
     }
 char *sampleUrl = (sampleUrls && name) ? hashFindVal(sampleUrls, name) : NULL;
 if (isNotEmpty(sampleUrl))
     {
     char *p = strstr(sampleUrl, "subtreeAuspice");
     char *subtreeNum = p + strlen("subtreeAuspice");
     if (p && isdigit(*subtreeNum))
         {
         int num = atoi(subtreeNum);
         char subtreeLabel[1024];
         safef(subtreeLabel, sizeof subtreeLabel, "view subtree %d", num);
         jsonWriteObjectValueUrl(jw, "subtree", subtreeLabel, sampleUrl);
         }
     else
         jsonWriteObjectValueUrl(jw, "subtree", sampleUrl, sampleUrl);
     }
 }
 
 static void jsonWriteBranchNodeAttributes(struct jsonWrite *jw, boolean isRsv, char *userOrOld,
                                           char *nClade, char *gClade, char *lineage, char *nLineage,
                                           char *nCladeUsher, char *lineageUsher)
 /* Write elements of node_attrs for a branch. */
 {
 if (userOrOld)
     jsonWriteObjectValue(jw, "userOrOld", userOrOld);
 if (nClade)
     jsonWriteObjectValue(jw, (isRsv ? "goya_nextclade" : "Nextstrain_clade"), nClade);
 if (gClade)
-    jsonWriteObjectValue(jw, (isRsv ? "ramaekers_tableS1" : "GISAID_clade"), gClade);
+    jsonWriteObjectValue(jw, (isRsv ? "GCC_assigned_2023-11" : "GISAID_clade"), gClade);
 if (lineage)
-    jsonWriteObjectValue(jw, (isRsv ? "ramaekers_nextclade" : "pango_lineage"), lineage);
+    jsonWriteObjectValue(jw, (isRsv ? "GCC_nextclade" : "pango_lineage"), lineage);
 if (nLineage)
     jsonWriteObjectValue(jw, "Nextstrain_lineage", lineage);
 if (nCladeUsher)
     jsonWriteObjectValue(jw, (isRsv ? "goya_usher" : "Nextstrain_clade_usher"), nCladeUsher);
 if (lineageUsher)
-    jsonWriteObjectValue(jw, (isRsv ? "ramaekers_usher" : "pango_lineage_usher"), lineageUsher);
+    jsonWriteObjectValue(jw, (isRsv ? "GCC_usher" : "pango_lineage_usher"), lineageUsher);
 }
 
 INLINE char maybeComplement(char base, struct psl *psl)
 /* If psl is on '+' strand, return base, otherwise return the complement of base. */
 {
 return (pslOrientation(psl) > 0) ? base : ntCompTable[(int)base];
 }
 
 static struct slName *codonVpTxToAaChange(struct vpTx *codonVpTxList,
                                           struct singleNucChange *ancestorMuts,
                                           struct geneInfo *gi)
 /* Given a list of vpTx from the same codon, combine their changes with inherited mutations
  * in the same codon to get the amino acid change at this node.
  * Note: this assumes there is no UTR in transcript, only CDS.  True so far for pathogens... */
 {
@@ -824,46 +824,46 @@
 void treeToAuspiceJson(struct subtreeInfo *sti, char *db, struct geneInfo *geneInfoList,
                        struct seqWindow *gSeqWin, struct hash *sampleMetadata,
                        struct hash *sampleUrls, struct hash *samplePlacements,
                        char *jsonFile, char *source)
 /* Write JSON for tree in Nextstrain's Augur/Auspice V2 JSON format
  * (https://github.com/nextstrain/augur/blob/master/augur/data/schema-export-v2.json). */
 {
 struct phyloTree *tree = sti->subtree;
 FILE *outF = mustOpen(jsonFile, "w");
 struct jsonWrite *jw = jsonWriteNew();
 jsonWriteObjectStart(jw, NULL);
 jsonWriteString(jw, "version", "v2");
 //#*** FIXME: TODO: either pass in along with sampleMetadata, or take from JSON file specified
 //#*** in config, or better yet, compute while building tree object in memory, then write the
 //#*** header object, then write the tree.
-boolean isRsv = (stringIn("GCF_000855545", db) || stringIn("GCF_002815475", db));
+boolean isRsv = (stringIn("GCF_000855545", db) || stringIn("GCF_002815475", db) || stringIn("RGCC", db));
 struct slName *colorFields = NULL;
 if (sameString(db, "wuhCor1"))
     {
     slNameAddHead(&colorFields, "country");
     slNameAddHead(&colorFields, "Nextstrain_clade_usher");
     slNameAddHead(&colorFields, "pango_lineage_usher");
     slNameAddHead(&colorFields, "Nextstrain_clade");
     slNameAddHead(&colorFields, "pango_lineage");
     }
 else if (isRsv)
     {
     slNameAddHead(&colorFields, "country");
-    slNameAddHead(&colorFields, "ramaekers_nextclade");
-    slNameAddHead(&colorFields, "ramaekers_usher");
-    slNameAddHead(&colorFields, "ramaekers_tableS1");
+    slNameAddHead(&colorFields, "GCC_nextclade");
+    slNameAddHead(&colorFields, "GCC_usher");
+    slNameAddHead(&colorFields, "GCC_assigned_2023-11");
     slNameAddHead(&colorFields, "goya_nextclade");
     slNameAddHead(&colorFields, "goya_usher");
     }
 else if (stringIn("GCF_000865085", db) || stringIn("GCF_001343785", db))
     {
     slNameAddHead(&colorFields, "country");
     slNameAddHead(&colorFields, "Nextstrain_clade");
     }
 else
     {
     slNameAddHead(&colorFields, "country");
     slNameAddHead(&colorFields, "Nextstrain_lineage");
     }
 //#*** END FIXME
 writeAuspiceMeta(jw, sti->subtreeUserSampleIds, source, db, colorFields, geneInfoList,