d354f7c4ff9867b3a64ecaadba87ba2381371e27 angie Mon Nov 7 16:46:38 2022 -0800 In Nextstrain view (JSON), show the Nextstrain clade and Pango lineage assigned by usher for uploaded samples. Make Pango lineage the default coloring, and add a filter to highlight uploaded samples so user immediately sees the uploaded samples' lineage(s) and placement in the subtree. Also, for uploaded names/IDs, don't truncate at first comma until we check whether line is a list of IDs / ID ranges. diff --git src/hg/hgPhyloPlace/treeToAuspiceJson.c src/hg/hgPhyloPlace/treeToAuspiceJson.c index 9aa310d..5665c58 100644 --- src/hg/hgPhyloPlace/treeToAuspiceJson.c +++ src/hg/hgPhyloPlace/treeToAuspiceJson.c @@ -98,31 +98,33 @@ jsonWriteObjectEnd(jw); } jsonWriteObjectStart(jw, "nuc"); jsonWriteNumber(jw, "start", 1); jsonWriteNumber(jw, "end", genomeSize); jsonWriteString(jw, "strand", "+"); jsonWriteString(jw, "type", "source"); jsonWriteObjectEnd(jw); jsonWriteObjectEnd(jw); } static char *getDefaultColor(struct slName *colorFields) /* Pick default color from available color fields from metadata. Do not free returned string. */ { char *colorDefault = NULL; -if (slNameInList(colorFields, "Nextstrain_lineage")) +if (slNameInList(colorFields, "pango_lineage_usher")) + colorDefault = "pango_lineage_usher"; +else if (slNameInList(colorFields, "Nextstrain_lineage")) colorDefault = "Nextstrain_lineage"; else if (slNameInList(colorFields, "Nextstrain_clade")) colorDefault = "Nextstrain_clade"; else if (colorFields != NULL) colorDefault = colorFields->name; else colorDefault = "userOrOld"; return colorDefault; } static void auspiceMetaColorings(struct jsonWrite *jw, char *source, struct slName *colorFields) /* Write coloring specs for colorFields from metadata, locally added userOrOld, and * Auspice-automatic gt. */ { jsonWriteListStart(jw, "colorings"); @@ -234,92 +236,96 @@ } static void makeLineageUrl(char *lineage, char *lineageUrl, size_t lineageUrlSize) /* If lineage is not "uploaded sample", make an outbreak.info link to it, otherwise just copy * lineage. */ { if (sameString(lineage, "uploaded sample")) safecpy(lineageUrl, lineageUrlSize, lineage); else safef(lineageUrl, lineageUrlSize, OUTBREAK_INFO_URLBASE "%s", lineage); } static void jsonWriteLeafNodeAttributes(struct jsonWrite *jw, char *name, struct sampleMetadata *met, boolean isUserSample, char *source, struct hash *sampleUrls, + struct hash *samplePlacements, char **retUserOrOld, char **retNClade, char **retGClade, char **retLineage, char **retNLineage, char **retNCladeUsher, char **retLineageUsher) /* Write elements of node_attrs for a sample which may be preexisting and in our metadata hash, * or may be a new sample from the user. Set rets for color categories so parent branches can * determine their color categories. */ { *retUserOrOld = isUserSample ? "uploaded sample" : source; jsonWriteObjectValue(jw, "userOrOld", *retUserOrOld); if (met && met->date) jsonWriteObjectValue(jw, "date", met->date); if (met && met->author) { jsonWriteObjectValue(jw, "author", met->author); // Note: Nextstrain adds paper_url and title when available; they also add author and use // a uniquified value (e.g. "author": "Wenjie Tan et al" / "value": "Wenjie Tan et al A") } -*retNClade = isUserSample ? "uploaded sample" : (met && met->nClade) ? met->nClade : NULL; +struct placementInfo *pi = (isUserSample && name) ? hashFindVal(samplePlacements, name) : NULL; + +*retNClade = (met && met->nClade) ? met->nClade : isUserSample ? "uploaded sample" : NULL; if (isNotEmpty(*retNClade)) jsonWriteObjectValue(jw, "Nextstrain_clade", *retNClade); -*retGClade = isUserSample ? "uploaded sample" : (met && met->gClade) ? met->gClade : NULL; +*retGClade = (met && met->gClade) ? met->gClade : isUserSample ? "uploaded sample" : NULL; if (isNotEmpty(*retGClade)) jsonWriteObjectValue(jw, "GISAID_clade", *retGClade); -*retLineage = isUserSample ? "uploaded sample" : - (met && met->lineage) ? met->lineage : NULL; +*retLineage = (met && met->lineage) ? met->lineage : isUserSample ? "uploaded sample" : NULL; if (isNotEmpty(*retLineage)) { char lineageUrl[1024]; makeLineageUrl(*retLineage, lineageUrl, sizeof lineageUrl); jsonWriteObjectValueUrl(jw, "pango_lineage", *retLineage, lineageUrl); } -*retNLineage = isUserSample ? "uploaded sample" : (met && met->nLineage) ? met->nLineage : NULL; +*retNLineage = (met && met->nLineage) ? met->nLineage : isUserSample ? "uploaded sample" : NULL; if (isNotEmpty(*retNLineage)) { jsonWriteObjectValue(jw, "Nextstrain_lineage", *retNLineage); } if (met && met->epiId) jsonWriteObjectValue(jw, "gisaid_epi_isl", met->epiId); if (met && met->gbAcc) jsonWriteObjectValue(jw, "genbank_accession", met->gbAcc); if (met && met->country) jsonWriteObjectValue(jw, "country", met->country); if (met && met->division) jsonWriteObjectValue(jw, "division", met->division); if (met && met->location) jsonWriteObjectValue(jw, "location", met->location); if (met && met->countryExp) jsonWriteObjectValue(jw, "country_exposure", met->countryExp); if (met && met->divExp) jsonWriteObjectValue(jw, "division_exposure", met->divExp); if (met && met->origLab) jsonWriteObjectValue(jw, "originating_lab", met->origLab); if (met && met->subLab) jsonWriteObjectValue(jw, "submitting_lab", met->subLab); if (met && met->region) jsonWriteObjectValue(jw, "region", met->region); -*retNCladeUsher = isUserSample ? "uploaded sample" : - (met && met->nCladeUsher) ? met->nCladeUsher : NULL; +*retNCladeUsher = (pi && pi->nextClade) ? pi->nextClade : + (met && met->nCladeUsher) ? met->nCladeUsher : + isUserSample ? "uploaded sample" : NULL; if (isNotEmpty(*retNCladeUsher)) jsonWriteObjectValue(jw, "Nextstrain_clade_usher", *retNCladeUsher); -*retLineageUsher = isUserSample ? "uploaded sample" : - (met && met->lineageUsher) ? met->lineageUsher : NULL; +*retLineageUsher = (pi && pi->pangoLineage) ? pi->pangoLineage : + (met && met->lineageUsher) ? met->lineageUsher : + isUserSample ? "uploaded sample" : NULL; if (isNotEmpty(*retLineageUsher)) { char lineageUrl[1024]; makeLineageUrl(*retLineageUsher, lineageUrl, sizeof lineageUrl); jsonWriteObjectValueUrl(jw, "pango_lineage_usher", *retLineageUsher, lineageUrl); } char *sampleUrl = (sampleUrls && name) ? hashFindVal(sampleUrls, name) : NULL; if (isNotEmpty(sampleUrl)) { char *p = strstr(sampleUrl, "subtreeAuspice"); char *subtreeNum = p + strlen("subtreeAuspice"); if (p && isdigit(*subtreeNum)) { int num = atoi(subtreeNum); char subtreeLabel[1024]; @@ -515,30 +521,31 @@ jsonWriteObjectEnd(jw); // mutations jsonWriteObjectEnd(jw); // branch_attrs } } struct auspiceJsonInfo /* Collection of a bunch of things used when writing out auspice JSON for a subtree, so the * recursive function doesn't need a dozen args. */ { struct jsonWrite *jw; struct slName *subtreeUserSampleIds; // Subtree node names for user samples (not from big tree) struct geneInfo *geneInfoList; // Transcript seq & alignment for predicting AA change struct seqWindow *gSeqWin; // Reference genome seq for predicting AA change struct hash *sampleMetadata; // Sample metadata for decorating tree struct hash *sampleUrls; // URLs for samples, if applicable + struct hash *samplePlacements; // Sample placement info e.g. clade/lineage from usher int nodeNum; // For generating sequential node ID (in absence of name) char *source; // Source of non-user sequences in tree (GISAID or public) }; static int cmpstringp(const void *p1, const void *p2) /* strcmp on pointers to strings, as in 'man qsort' but tolerate NULLs */ { char *s1 = *(char * const *)p1; char *s2 = *(char * const *)p2; if (s1 && s2) return strcmp(s1, s2); else if (s1 && !s2) return 1; else if (s2 && !s1) return -1; @@ -620,30 +627,31 @@ if (retGClade) *retGClade = majorityMaybe(kidGClade, node->numEdges); if (retLineage) *retLineage = majorityMaybe(kidLineage, node->numEdges); if (retNCladeUsher) *retNCladeUsher = majorityMaybe(kidNCladeUsher, node->numEdges); if (retLineageUsher) *retLineageUsher = majorityMaybe(kidLineageUsher, node->numEdges); if (retNLineage) *retNLineage = majorityMaybe(kidNLineage, node->numEdges); } jsonWriteObjectStart(aji->jw, "node_attrs"); jsonWriteDouble(aji->jw, "div", depth); if (node->numEdges == 0) jsonWriteLeafNodeAttributes(aji->jw, name, met, isUserSample, aji->source, aji->sampleUrls, + aji->samplePlacements, retUserOrOld, retNClade, retGClade, retLineage, retNLineage, retNCladeUsher, retLineageUsher); else if (retUserOrOld && retGClade && retLineage) jsonWriteBranchNodeAttributes(aji->jw, *retUserOrOld, *retNClade, *retGClade, *retLineage, *retNLineage, *retNCladeUsher, *retLineageUsher); jsonWriteObjectEnd(aji->jw); } struct phyloTree *phyloTreeNewNode(char *name) /* Alloc & return a new node with no children. */ { struct phyloTree *node; AllocVar(node); AllocVar(node->ident); node->ident->name = cloneString(name); @@ -690,31 +698,32 @@ AllocVar(gi); gi->psl = genePredToPsl((struct genePred *)gp, refGenome->size, txLen); gi->psl->qName = cloneString(gp->name2); gi->txSeq = newDnaSeq(seq, txLen, gp->name2); slAddHead(&geneInfoList, gi); } lmCleanup(&lm); bigBedFileClose(&bbi); } slReverse(&geneInfoList); return geneInfoList; } void treeToAuspiceJson(struct subtreeInfo *sti, char *db, struct geneInfo *geneInfoList, struct seqWindow *gSeqWin, struct hash *sampleMetadata, - struct hash *sampleUrls, char *jsonFile, char *source) + struct hash *sampleUrls, struct hash *samplePlacements, + char *jsonFile, char *source) /* Write JSON for tree in Nextstrain's Augur/Auspice V2 JSON format * (https://github.com/nextstrain/augur/blob/master/augur/data/schema-export-v2.json). */ { struct phyloTree *tree = sti->subtree; FILE *outF = mustOpen(jsonFile, "w"); struct jsonWrite *jw = jsonWriteNew(); jsonWriteObjectStart(jw, NULL); jsonWriteString(jw, "version", "v2"); //#*** FIXME: TODO: either pass in along with sampleMetadata, or better yet, compute while building //#*** tree object and then write the header object. struct slName *colorFields = NULL; if (sameString(db, "wuhCor1")) { slNameAddHead(&colorFields, "country"); slNameAddHead(&colorFields, "Nextstrain_clade_usher"); @@ -727,24 +736,24 @@ slNameAddHead(&colorFields, "country"); slNameAddHead(&colorFields, "Nextstrain_lineage"); } //#*** END FIXME writeAuspiceMeta(jw, sti->subtreeUserSampleIds, source, db, colorFields, geneInfoList, gSeqWin->end); jsonWriteObjectStart(jw, "tree"); int nodeNum = 10000; // Auspice.us starting node number for newick -> json int depth = 0; // Add an extra root node because otherwise Auspice won't draw branch from big tree root to subtree struct phyloTree *root = phyloTreeNewNode("wrapper"); phyloAddEdge(root, tree); tree = root; struct auspiceJsonInfo aji = { jw, sti->subtreeUserSampleIds, geneInfoList, gSeqWin, - sampleMetadata, sampleUrls, nodeNum, source }; + sampleMetadata, sampleUrls, samplePlacements, nodeNum, source }; rTreeToAuspiceJson(tree, depth, &aji, NULL, NULL, NULL, NULL, NULL, NULL, NULL); jsonWriteObjectEnd(jw); // tree jsonWriteObjectEnd(jw); // top-level object fputs(jw->dy->string, outF); jsonWriteFree(&jw); carefulClose(&outF); }