ea80dbd18523cfb2ad1efd1306ac81d7e994f9c4 angie Fri Dec 1 09:01:56 2023 -0800 Update RSV metadata for new clade system (RSV Genotyping Consensus Consortium lineages replace Ramaekers 2020 clades). diff --git src/hg/hgPhyloPlace/treeToAuspiceJson.c src/hg/hgPhyloPlace/treeToAuspiceJson.c index 3616afa..cc70972 100644 --- src/hg/hgPhyloPlace/treeToAuspiceJson.c +++ src/hg/hgPhyloPlace/treeToAuspiceJson.c @@ -121,32 +121,32 @@ jsonWriteString(jw, "type", "source"); jsonWriteObjectEnd(jw); jsonWriteObjectEnd(jw); } static char *getDefaultColor(struct slName *colorFields) /* Pick default color from available color fields from metadata. Do not free returned string. */ { char *colorDefault = NULL; if (slNameInList(colorFields, "pango_lineage_usher")) colorDefault = "pango_lineage_usher"; else if (slNameInList(colorFields, "Nextstrain_lineage")) colorDefault = "Nextstrain_lineage"; else if (slNameInList(colorFields, "Nextstrain_clade")) colorDefault = "Nextstrain_clade"; -else if (slNameInList(colorFields, "goya_usher")) - colorDefault = "goya_usher"; +else if (slNameInList(colorFields, "GCC_usher")) + colorDefault = "GCC_usher"; else if (colorFields != NULL) colorDefault = colorFields->name; else colorDefault = "userOrOld"; return colorDefault; } static void auspiceMetaColorings(struct jsonWrite *jw, char *source, struct slName *colorFields, char *db) /* Write coloring specs for colorFields from metadata, locally added userOrOld, and * Auspice-automatic gt. */ { jsonWriteListStart(jw, "colorings"); auspiceMetaColoringCategoricalStart(jw, "userOrOld", "Sample type"); jsonWriteListStart(jw, "scale"); @@ -166,36 +166,36 @@ auspiceMetaColoringCategorical(jw, col->name, "Clade assigned by nextclade"); } else if (sameString(col->name, "Nextstrain_clade_usher")) auspiceMetaColoringSarsCov2Nextclade(jw, col->name, "Nextstrain Clade assigned by UShER"); else if (sameString(col->name, "pango_lineage")) auspiceMetaColoringCategorical(jw, col->name, "Pango lineage"); else if (sameString(col->name, "pango_lineage_usher")) auspiceMetaColoringCategorical(jw, col->name, "Pango lineage assigned by UShER"); else if (sameString(col->name, "Nextstrain_lineage")) auspiceMetaColoringCategorical(jw, col->name, "Nextstrain lineage"); //#*** RSV hacks -- colorings really should come from JSON file in config directory else if (sameString(col->name, "goya_nextclade")) auspiceMetaColoringCategorical(jw, col->name, "Goya 2020 clade assigned by nextclade"); else if (sameString(col->name, "goya_usher")) auspiceMetaColoringCategorical(jw, col->name, "Goya 2020 clade assigned by UShER"); - else if (sameString(col->name, "ramaekers_nextclade")) - auspiceMetaColoringCategorical(jw, col->name, "Ramaekers 2020 clade assigned by nextclade"); - else if (sameString(col->name, "ramaekers_usher")) - auspiceMetaColoringCategorical(jw, col->name, "Ramaekers 2020 clade assigned by UShER"); - else if (sameString(col->name, "ramaekers_tableS1")) - auspiceMetaColoringCategorical(jw, col->name, "Ramaekers 2020 Table S1 designation"); + else if (sameString(col->name, "GCC_nextclade")) + auspiceMetaColoringCategorical(jw, col->name, "RGCC lineage assigned by nextclade"); + else if (sameString(col->name, "GCC_usher")) + auspiceMetaColoringCategorical(jw, col->name, "RGCC lineage assigned by UShER"); + else if (sameString(col->name, "GCC_assigned_2023-11")) + auspiceMetaColoringCategorical(jw, col->name, "RGCC designated lineage"); else if (sameString(col->name, "country")) auspiceMetaColoringCategorical(jw, col->name, "Country"); else auspiceMetaColoringCategorical(jw, col->name, col->name); } jsonWriteListEnd(jw); } static void writeAuspiceMeta(struct jsonWrite *jw, struct slName *subtreeUserSampleIds, char *source, char *db, struct slName *colorFields, struct geneInfo *geneInfoList, uint genomeSize) /* Write metadata to configure Auspice display. */ { jsonWriteObjectStart(jw, "meta"); // Title @@ -233,37 +233,37 @@ auspiceMetaColorings(jw, source, colorFields, db); // Filters didn't work when I tried them a long time ago... revisit someday. jsonWriteListStart(jw, "filters"); jsonWriteString(jw, NULL, "userOrOld"); jsonWriteString(jw, NULL, "country"); //#*** FIXME: TODO: either pass in along with sampleMetadata, or take from JSON file specified //#*** in config, or better yet, compute while building tree object in memory, then write the //#*** header object, then write the tree. if (sameString(db, "wuhCor1")) { jsonWriteString(jw, NULL, "pango_lineage_usher"); jsonWriteString(jw, NULL, "pango_lineage"); jsonWriteString(jw, NULL, "Nextstrain_clade_usher"); jsonWriteString(jw, NULL, "Nextstrain_clade"); } -else if (stringIn("GCF_000855545", db) || stringIn("GCF_002815475", db)) +else if (stringIn("GCF_000855545", db) || stringIn("GCF_002815475", db) || stringIn("RGCC", db)) { + jsonWriteString(jw, NULL, "GCC_usher"); + jsonWriteString(jw, NULL, "GCC_nextclade"); + jsonWriteString(jw, NULL, "GCC_assigned_2023-11"); jsonWriteString(jw, NULL, "goya_usher"); jsonWriteString(jw, NULL, "goya_nextclade"); - jsonWriteString(jw, NULL, "ramaekers_tableS1"); - jsonWriteString(jw, NULL, "ramaekers_usher"); - jsonWriteString(jw, NULL, "ramaekers_nextclade"); } else if (stringIn("GCF_000865085", db) || stringIn("GCF_001343785", db)) { jsonWriteString(jw, NULL, "Nextstrain_clade"); } else { jsonWriteString(jw, NULL, "Nextstrain_lineage"); } jsonWriteListEnd(jw); // Annotations for coloring/filtering by base writeAuspiceMetaGenomeAnnotations(jw, geneInfoList, genomeSize); jsonWriteObjectEnd(jw); } @@ -310,37 +310,37 @@ if (met && met->date) jsonWriteObjectValue(jw, "date", met->date); if (met && met->author) { jsonWriteObjectValue(jw, "author", met->author); // Note: Nextstrain adds paper_url and title when available; they also add author and use // a uniquified value (e.g. "author": "Wenjie Tan et al" / "value": "Wenjie Tan et al A") } struct placementInfo *pi = (isUserSample && name) ? hashFindVal(samplePlacements, name) : NULL; *retNClade = (met && met->nClade) ? met->nClade : isUserSample ? "uploaded sample" : NULL; if (isNotEmpty(*retNClade)) jsonWriteObjectValue(jw, (isRsv ? "goya_nextclade" : "Nextstrain_clade"), *retNClade); *retGClade = (met && met->gClade) ? met->gClade : isUserSample ? "uploaded sample" : NULL; if (isNotEmpty(*retGClade)) - jsonWriteObjectValue(jw, (isRsv ? "ramaekers_tableS1" : "GISAID_clade"), *retGClade); + jsonWriteObjectValue(jw, (isRsv ? "GCC_assigned_2023-11" : "GISAID_clade"), *retGClade); *retLineage = (met && met->lineage) ? met->lineage : isUserSample ? "uploaded sample" : NULL; if (isNotEmpty(*retLineage)) { char lineageUrl[1024]; makeLineageUrl(*retLineage, lineageUrl, sizeof lineageUrl); - jsonWriteObjectValueUrl(jw, (isRsv ? "ramaekers_nextclade" : "pango_lineage"), + jsonWriteObjectValueUrl(jw, (isRsv ? "GCC_nextclade" : "pango_lineage"), *retLineage, lineageUrl); } *retNLineage = (met && met->nLineage) ? met->nLineage : isUserSample ? "uploaded sample" : NULL; if (isNotEmpty(*retNLineage)) { jsonWriteObjectValue(jw, "Nextstrain_lineage", *retNLineage); } if (met && met->epiId) jsonWriteObjectValue(jw, "gisaid_epi_isl", met->epiId); if (met && met->gbAcc) jsonWriteObjectValue(jw, "genbank_accession", met->gbAcc); if (met && met->country) jsonWriteObjectValue(jw, "country", met->country); if (met && met->division) jsonWriteObjectValue(jw, "division", met->division); @@ -356,69 +356,69 @@ jsonWriteObjectValue(jw, "submitting_lab", met->subLab); if (met && met->region) jsonWriteObjectValue(jw, "region", met->region); *retNCladeUsher = (pi && pi->nextClade) ? pi->nextClade : (met && met->nCladeUsher) ? met->nCladeUsher : isUserSample ? "uploaded sample" : NULL; if (isNotEmpty(*retNCladeUsher)) jsonWriteObjectValue(jw, (isRsv ? "goya_usher" : "Nextstrain_clade_usher"), *retNCladeUsher); *retLineageUsher = (pi && pi->pangoLineage) ? pi->pangoLineage : (met && met->lineageUsher) ? met->lineageUsher : isUserSample ? "uploaded sample" : NULL; if (isNotEmpty(*retLineageUsher)) { char lineageUrl[1024]; makeLineageUrl(*retLineageUsher, lineageUrl, sizeof lineageUrl); - jsonWriteObjectValueUrl(jw, (isRsv ? "ramaekers_usher" : "pango_lineage_usher"), + jsonWriteObjectValueUrl(jw, (isRsv ? "GCC_usher" : "pango_lineage_usher"), *retLineageUsher, lineageUrl); } char *sampleUrl = (sampleUrls && name) ? hashFindVal(sampleUrls, name) : NULL; if (isNotEmpty(sampleUrl)) { char *p = strstr(sampleUrl, "subtreeAuspice"); char *subtreeNum = p + strlen("subtreeAuspice"); if (p && isdigit(*subtreeNum)) { int num = atoi(subtreeNum); char subtreeLabel[1024]; safef(subtreeLabel, sizeof subtreeLabel, "view subtree %d", num); jsonWriteObjectValueUrl(jw, "subtree", subtreeLabel, sampleUrl); } else jsonWriteObjectValueUrl(jw, "subtree", sampleUrl, sampleUrl); } } static void jsonWriteBranchNodeAttributes(struct jsonWrite *jw, boolean isRsv, char *userOrOld, char *nClade, char *gClade, char *lineage, char *nLineage, char *nCladeUsher, char *lineageUsher) /* Write elements of node_attrs for a branch. */ { if (userOrOld) jsonWriteObjectValue(jw, "userOrOld", userOrOld); if (nClade) jsonWriteObjectValue(jw, (isRsv ? "goya_nextclade" : "Nextstrain_clade"), nClade); if (gClade) - jsonWriteObjectValue(jw, (isRsv ? "ramaekers_tableS1" : "GISAID_clade"), gClade); + jsonWriteObjectValue(jw, (isRsv ? "GCC_assigned_2023-11" : "GISAID_clade"), gClade); if (lineage) - jsonWriteObjectValue(jw, (isRsv ? "ramaekers_nextclade" : "pango_lineage"), lineage); + jsonWriteObjectValue(jw, (isRsv ? "GCC_nextclade" : "pango_lineage"), lineage); if (nLineage) jsonWriteObjectValue(jw, "Nextstrain_lineage", lineage); if (nCladeUsher) jsonWriteObjectValue(jw, (isRsv ? "goya_usher" : "Nextstrain_clade_usher"), nCladeUsher); if (lineageUsher) - jsonWriteObjectValue(jw, (isRsv ? "ramaekers_usher" : "pango_lineage_usher"), lineageUsher); + jsonWriteObjectValue(jw, (isRsv ? "GCC_usher" : "pango_lineage_usher"), lineageUsher); } INLINE char maybeComplement(char base, struct psl *psl) /* If psl is on '+' strand, return base, otherwise return the complement of base. */ { return (pslOrientation(psl) > 0) ? base : ntCompTable[(int)base]; } static struct slName *codonVpTxToAaChange(struct vpTx *codonVpTxList, struct singleNucChange *ancestorMuts, struct geneInfo *gi) /* Given a list of vpTx from the same codon, combine their changes with inherited mutations * in the same codon to get the amino acid change at this node. * Note: this assumes there is no UTR in transcript, only CDS. True so far for pathogens... */ { @@ -824,46 +824,46 @@ void treeToAuspiceJson(struct subtreeInfo *sti, char *db, struct geneInfo *geneInfoList, struct seqWindow *gSeqWin, struct hash *sampleMetadata, struct hash *sampleUrls, struct hash *samplePlacements, char *jsonFile, char *source) /* Write JSON for tree in Nextstrain's Augur/Auspice V2 JSON format * (https://github.com/nextstrain/augur/blob/master/augur/data/schema-export-v2.json). */ { struct phyloTree *tree = sti->subtree; FILE *outF = mustOpen(jsonFile, "w"); struct jsonWrite *jw = jsonWriteNew(); jsonWriteObjectStart(jw, NULL); jsonWriteString(jw, "version", "v2"); //#*** FIXME: TODO: either pass in along with sampleMetadata, or take from JSON file specified //#*** in config, or better yet, compute while building tree object in memory, then write the //#*** header object, then write the tree. -boolean isRsv = (stringIn("GCF_000855545", db) || stringIn("GCF_002815475", db)); +boolean isRsv = (stringIn("GCF_000855545", db) || stringIn("GCF_002815475", db) || stringIn("RGCC", db)); struct slName *colorFields = NULL; if (sameString(db, "wuhCor1")) { slNameAddHead(&colorFields, "country"); slNameAddHead(&colorFields, "Nextstrain_clade_usher"); slNameAddHead(&colorFields, "pango_lineage_usher"); slNameAddHead(&colorFields, "Nextstrain_clade"); slNameAddHead(&colorFields, "pango_lineage"); } else if (isRsv) { slNameAddHead(&colorFields, "country"); - slNameAddHead(&colorFields, "ramaekers_nextclade"); - slNameAddHead(&colorFields, "ramaekers_usher"); - slNameAddHead(&colorFields, "ramaekers_tableS1"); + slNameAddHead(&colorFields, "GCC_nextclade"); + slNameAddHead(&colorFields, "GCC_usher"); + slNameAddHead(&colorFields, "GCC_assigned_2023-11"); slNameAddHead(&colorFields, "goya_nextclade"); slNameAddHead(&colorFields, "goya_usher"); } else if (stringIn("GCF_000865085", db) || stringIn("GCF_001343785", db)) { slNameAddHead(&colorFields, "country"); slNameAddHead(&colorFields, "Nextstrain_clade"); } else { slNameAddHead(&colorFields, "country"); slNameAddHead(&colorFields, "Nextstrain_lineage"); } //#*** END FIXME writeAuspiceMeta(jw, sti->subtreeUserSampleIds, source, db, colorFields, geneInfoList,