fb0c581ba94fd1ca33d57e3253f732cd4c4ea62a angie Fri Aug 26 10:51:31 2022 -0700 Support Auspice JSON output for monkeypox: url settings not just files, Nextclade_lineage, non-hardcoded Auspice config header, correct amino acid changes on - strand genes. diff --git src/hg/hgPhyloPlace/treeToAuspiceJson.c src/hg/hgPhyloPlace/treeToAuspiceJson.c index f055fd5..dcea69f 100644 --- src/hg/hgPhyloPlace/treeToAuspiceJson.c +++ src/hg/hgPhyloPlace/treeToAuspiceJson.c @@ -1,142 +1,215 @@ /* Convert a (sub)tree with condensed nodes to JSON for Nextstrain to display, adding in sample * mutations, protein changes and metadata. */ /* Copyright (C) 2020 The Regents of the University of California */ #include "common.h" +#include "errCatch.h" #include "hash.h" #include "hui.h" #include "jsonWrite.h" #include "linefile.h" #include "parsimonyProto.h" #include "phyloPlace.h" #include "phyloTree.h" #include "variantProjector.h" -static void writeAuspiceMeta(FILE *outF, struct slName *subtreeUserSampleIds, char *source) +static void auspiceMetaColoringCategoricalStart(struct jsonWrite *jw, char *key, char *title) +/* Write key, title and type of a "categorical" coloring spec, but leave it open in case a + * scale list needs to be added. */ +{ +jsonWriteObjectStart(jw, NULL); +jsonWriteString(jw, "key", key); +jsonWriteString(jw, "title", title); +jsonWriteString(jw, "type", "categorical"); +} + +static void auspiceMetaColoringCategoricalEnd(struct jsonWrite *jw) +/* Close out a coloring spec that was opened with auspiceMetaColoringCategoricalStart. */ +{ +jsonWriteObjectEnd(jw); +} + +static void auspiceMetaColoringCategorical(struct jsonWrite *jw, char *key, char *title) +/* Write a "categorical" coloring spec with no scale component. */ +{ +auspiceMetaColoringCategoricalStart(jw, key, title); +auspiceMetaColoringCategoricalEnd(jw); +} + +static void jsonWritePair(struct jsonWrite *jw, char *valA, char *valB) +/* Write a list with two string values. */ +{ +jsonWriteListStart(jw, NULL); +jsonWriteString(jw, NULL, valA); +jsonWriteString(jw, NULL, valB); +jsonWriteListEnd(jw); +} + +static void auspiceMetaColoringSarsCov2Nextclade(struct jsonWrite *jw, char *key, char *title) +/* Write a coloring spec for SARS-CoV-2 Nextstrain clades. (Non-VoC clades are omitted and + * will be assigned grayscale values by Auspice. */ +{ +auspiceMetaColoringCategoricalStart(jw, key, title); +jsonWriteListStart(jw, "scale"); +// Color hex values are words from line N of +// https://github.com/nextstrain/ncov/blob/master/defaults/color_schemes.tsv +// where N = number of clade_membership lines in +// https://github.com/nextstrain/ncov/blob/master/defaults/color_ordering.tsv +jsonWritePair(jw, "20H (Beta, V2)", "#5E1D9D"); +jsonWritePair(jw, "20I (Alpha, V1)", "#492AB5"); +jsonWritePair(jw, "20J (Gamma, V3)", "#4042C7"); +jsonWritePair(jw, "21A (Delta)", "#3E5DD0"); +jsonWritePair(jw, "21I (Delta)", "#4377CD"); +jsonWritePair(jw, "21J (Delta)", "#4A8CC2"); +jsonWritePair(jw, "21B (Kappa)", "#549DB2"); +jsonWritePair(jw, "21C (Epsilon)", "#60AA9E"); +jsonWritePair(jw, "21D (Eta)", "#6EB389"); +jsonWritePair(jw, "21E (Theta)", "#80B974"); +jsonWritePair(jw, "21F (Iota)", "#92BC63"); +jsonWritePair(jw, "21G (Lambda)", "#A6BE55"); +jsonWritePair(jw, "21H (Mu)", "#B9BC4A"); +jsonWritePair(jw, "21K (Omicron)", "#CBB742"); +jsonWritePair(jw, "21L (Omicron)", "#D9AD3D"); +jsonWritePair(jw, "21M (Omicron)", "#E29D39"); +jsonWritePair(jw, "22A (Omicron)", "#E68634"); +jsonWritePair(jw, "22B (Omicron)", "#E56A2F"); +jsonWritePair(jw, "22C (Omicron)", "#E04929"); +jsonWritePair(jw, "22D (Omicron)", "#DB2823"); +jsonWritePair(jw, "uploaded sample", "#000000"); +jsonWriteListEnd(jw); +auspiceMetaColoringCategoricalEnd(jw); +} + +static void writeAuspiceMetaGenomeAnnotations(struct jsonWrite *jw, struct geneInfo *geneInfoList, + uint genomeSize) +/* Write out auspice genome annotations (protein-coding gene CDS and "nuc"). */ +{ +jsonWriteObjectStart(jw, "genome_annotations"); +struct geneInfo *gi; +for (gi = geneInfoList; gi != NULL; gi = gi->next) + { + jsonWriteObjectStart(jw, gi->psl->qName); + jsonWriteNumber(jw, "start", gi->psl->tStart+1); + jsonWriteNumber(jw, "end", gi->psl->tEnd); + jsonWriteString(jw, "strand", (pslOrientation(gi->psl) > 0) ? "+" : "-"); + jsonWriteString(jw, "type", "CDS"); + jsonWriteObjectEnd(jw); + } +jsonWriteObjectStart(jw, "nuc"); +jsonWriteNumber(jw, "start", 1); +jsonWriteNumber(jw, "end", genomeSize); +jsonWriteString(jw, "strand", "+"); +jsonWriteString(jw, "type", "source"); +jsonWriteObjectEnd(jw); +jsonWriteObjectEnd(jw); +} + +static char *getDefaultColor(struct slName *colorFields) +/* Pick default color from available color fields from metadata. Do not free returned string. */ +{ +char *colorDefault = NULL; +if (slNameInList(colorFields, "Nextstrain_lineage")) + colorDefault = "Nextstrain_lineage"; +else if (slNameInList(colorFields, "Nextstrain_clade")) + colorDefault = "Nextstrain_clade"; +else if (colorFields != NULL) + colorDefault = colorFields->name; +else + colorDefault = "userOrOld"; +return colorDefault; +} + +static void auspiceMetaColorings(struct jsonWrite *jw, char *source, struct slName *colorFields) +/* Write coloring specs for colorFields from metadata, locally added userOrOld, and + * Auspice-automatic gt. */ +{ +jsonWriteListStart(jw, "colorings"); +auspiceMetaColoringCategoricalStart(jw, "userOrOld", "Sample type"); +jsonWriteListStart(jw, "scale"); +jsonWritePair(jw, "uploaded sample", "#CC0000"); +jsonWritePair(jw, source, "#000000"); +jsonWriteListEnd(jw); +auspiceMetaColoringCategoricalEnd(jw); +auspiceMetaColoringCategorical(jw, "gt", "Genotype"); +struct slName *col; +for (col = colorFields; col != NULL; col = col->next) + { + if (sameString(col->name, "Nextstrain_clade")) + auspiceMetaColoringSarsCov2Nextclade(jw, col->name, "Nextstrain Clade"); + else if (sameString(col->name, "Nextstrain_clade_usher")) + auspiceMetaColoringSarsCov2Nextclade(jw, col->name, "Nextstrain Clade assigned by UShER"); + else if (sameString(col->name, "pango_lineage")) + auspiceMetaColoringCategorical(jw, col->name, "Pango lineage"); + else if (sameString(col->name, "pango_lineage_usher")) + auspiceMetaColoringCategorical(jw, col->name, "Pango lineage assigned by UShER"); + else if (sameString(col->name, "Nextstrain_lineage")) + auspiceMetaColoringCategorical(jw, col->name, "Nextstrain lineage"); + else if (sameString(col->name, "country")) + auspiceMetaColoringCategorical(jw, col->name, "Country"); + else + auspiceMetaColoringCategorical(jw, col->name, col->name); + } +jsonWriteListEnd(jw); +} + +static void writeAuspiceMeta(struct jsonWrite *jw, struct slName *subtreeUserSampleIds, char *source, + struct slName *colorFields, struct geneInfo *geneInfoList, + uint genomeSize) /* Write metadata to configure Auspice display. */ { -fprintf(outF, - "\"meta\": { " - "\"title\": \"Subtree with %s", subtreeUserSampleIds->name); +jsonWriteObjectStart(jw, "meta"); +// Title +struct dyString *dy = dyStringCreate("Subtree with %s", subtreeUserSampleIds->name); int sampleCount = slCount(subtreeUserSampleIds); if (sampleCount > 10) - fprintf(outF, " and %d other uploaded samples", sampleCount - 1); + dyStringPrintf(dy, " and %d other uploaded samples", sampleCount - 1); else { struct slName *sln; for (sln = subtreeUserSampleIds->next; sln != NULL; sln = sln->next) - fprintf(outF, ", %s", sln->name); - } -fputs("\", " - "\"panels\": [ \"tree\"] , " - "\"colorings\": [ " - " { \"key\": \"pango_lineage\", " - " \"title\": \"Pango lineage\", \"type\": \"categorical\" }," - " { \"key\": \"Nextstrain_clade\"," - " \"scale\": [ " - " [ \"20H (Beta, V2)\", \"#571EA2\" ]," - " [ \"20I (Alpha, V1)\", \"#4334BF\" ]," - " [ \"20J (Gamma, V3)\", \"#3F55CE\" ]," - " [ \"21A (Delta)\", \"#4376CD\" ]," - " [ \"21I (Delta)\", \"#4C91C0\" ]," - " [ \"21J (Delta)\", \"#59A4A9\" ]," - " [ \"21C (Epsilon)\", \"#7FB975\" ]," - " [ \"21D (Eta)\", \"#97BD5F\" ]," - " [ \"21F (Iota)\", \"#C7B944\" ]," - " [ \"21G (Lambda)\", \"#D9AD3D\" ]," - " [ \"21H (Mu)\", \"#E49838\" ]," - " [ \"21K (Omicron)\", \"#DB2823\" ]," - " [ \"21L (Omicron)\", \"#DB2823\" ]," - " [ \"21M (Omicron)\", \"#DB2823\" ]," - " [ \"uploaded sample\", \"#FF0000\" ] ]," - " \"title\": \"Nextstrain Clade\", \"type\": \"categorical\" }," - , outF); -if (sameString(source, "GISAID")) - fputs(" { \"key\": \"GISAID_clade\"," - " \"scale\": [ [ \"S\", \"#EC676D\" ], [ \"L\", \"#F79E43\" ], [ \"O\", \"#F9D136\" ]," - " [ \"V\", \"#FAEA95\" ], [ \"G\", \"#B6D77A\" ], [ \"GH\", \"#8FD4ED\" ]," - " [ \"GR\", \"#A692C3\" ] ]," - " \"title\": \"GISAID Clade\", \"type\": \"categorical\" }," - , outF); -fprintf(outF, - " { \"key\": \"pango_lineage_usher\", " - " \"title\": \"Pango lineage assigned by UShER\", \"type\": \"categorical\" }," - " { \"key\": \"Nextstrain_clade_usher\"," - " \"scale\": [ " - " [ \"20H (Beta, V2)\", \"#571EA2\" ]," - " [ \"20I (Alpha, V1)\", \"#4334BF\" ]," - " [ \"20J (Gamma, V3)\", \"#3F55CE\" ]," - " [ \"20H (Beta,V2)\", \"#571EA2\" ]," - " [ \"20I (Alpha,V1)\", \"#4334BF\" ]," - " [ \"20J (Gamma,V3)\", \"#3F55CE\" ]," - " [ \"21A (Delta)\", \"#4376CD\" ]," - " [ \"21I (Delta)\", \"#4C91C0\" ]," - " [ \"21J (Delta)\", \"#59A4A9\" ]," - " [ \"21C (Epsilon)\", \"#7FB975\" ]," - " [ \"21D (Eta)\", \"#97BD5F\" ]," - " [ \"21F (Iota)\", \"#C7B944\" ]," - " [ \"21G (Lambda)\", \"#D9AD3D\" ]," - " [ \"21H (Mu)\", \"#E49838\" ]," - " [ \"21K (Omicron)\", \"#DB2823\" ]," - " [ \"21L\", \"#DB2823\" ]," - " [ \"21L (Omicron)\", \"#DB2823\" ]," - " [ \"21M (Omicron)\", \"#DB2823\" ]," - " [ \"uploaded sample\", \"#FF0000\" ] ]," - " \"title\": \"Nextstrain Clade assigned by UShER\", \"type\": \"categorical\" }," - " { \"key\": \"userOrOld\", " - " \"scale\": [ [ \"uploaded sample\", \"#CC0000\"] , [ \"%s\", \"#000000\"] ]," - " \"title\": \"Sample type\", \"type\": \"categorical\" }," - " {\"key\": \"gt\", \"title\": \"Genotype\", \"type\": \"categorical\"}," - " {\"key\": \"country\", \"title\": \"Country\", \"type\": \"categorical\"}" - , source); -fputs(" ] , " -//#*** Filters didn't seem to work... maybe something about the new fetch feature, or do I need to spcify in some other way? -//#*** "\"filters\": [ \"GISAID_clade\", \"region\", \"country\", \"division\", \"author\" ], " - "\"filters\": [ ], " - "\"genome_annotations\":" - "{\"E\":{\"end\":26472,\"start\":26245,\"strand\":\"+\",\"type\":\"CDS\"}," - " \"M\":{\"end\":27191,\"start\":26523,\"strand\":\"+\",\"type\":\"CDS\"}," - " \"N\":{\"end\":29533,\"start\":28274,\"strand\":\"+\",\"type\":\"CDS\"}," - " \"ORF1ab\":{\"end\":21555,\"start\":266,\"strand\":\"+\",\"type\":\"CDS\"}," -// " \"ORF1a\":{\"end\":13468,\"start\":266,\"strand\":\"+\",\"type\":\"CDS\"}," -// " \"ORF1b\":{\"end\":21555,\"start\":13468,\"strand\":\"+\",\"type\":\"CDS\"}," - " \"ORF3a\":{\"end\":26220,\"start\":25393,\"strand\":\"+\",\"type\":\"CDS\"}," - " \"ORF6\":{\"end\":27387,\"start\":27202,\"strand\":\"+\",\"type\":\"CDS\"}," - " \"ORF7a\":{\"end\":27759,\"start\":27394,\"strand\":\"+\",\"type\":\"CDS\"}," - " \"ORF7b\":{\"end\":27887,\"start\":27756,\"strand\":\"+\",\"type\":\"CDS\"}," - " \"ORF8\":{\"end\":28259,\"start\":27894,\"strand\":\"+\",\"type\":\"CDS\"}," - " \"ORF9b\":{\"end\":28577,\"start\":28284,\"strand\":\"+\",\"type\":\"CDS\"}," - " \"S\":{\"end\":25384,\"start\":21563,\"strand\":\"+\",\"type\":\"CDS\"}," - " \"nuc\":{\"end\":29903,\"start\":1,\"strand\":\"+\",\"type\":\"source\"}}," - "\"display_defaults\": { " - " \"branch_label\": \"none\", " - " \"color_by\": \"Nextstrain_clade\" " - "}, " - , outF); -fprintf(outF, - "\"description\": \"Dataset generated by [UShER web interface]" + dyStringPrintf(dy, ", %s", sln->name); + } +jsonWriteString(jw, "title", dy->string); +// Description +jsonWriteStringf(jw, "description", "Dataset generated by [UShER web interface]" "(%shgPhyloPlace) using the " "[usher](https://github.com/yatisht/usher/) program. " //#*** TODO: describe input from which tree was generated: user sample, version of tree, etc. + "If you have metadata you wish to display, you can now drag on a CSV file and " + "it will be added into this view, [see here]("NEXTSTRAIN_DRAG_DROP_DOC") " + "for more info." , hLocalHostCgiBinUrl()); -fputs("If you have metadata you wish to display, you can now drag on a CSV file and it will be " - "added into this view, [see here]("NEXTSTRAIN_DRAG_DROP_DOC") " - "for more info.\"} ," - , outF); +// Panels: just the tree (no map, entropy etc.) +jsonWriteListStart(jw, "panels"); +jsonWriteString(jw, NULL, "tree"); +jsonWriteListEnd(jw); +// Default label & color +jsonWriteObjectStart(jw, "display_defaults"); +jsonWriteString(jw, "branch_label", "none"); +jsonWriteString(jw, "color_by", getDefaultColor(colorFields)); +jsonWriteObjectEnd(jw); +// Colorings: userOrOld, gt and whatever we got from metadata +auspiceMetaColorings(jw, source, colorFields); +// Filters didn't work when I tried them a long time ago... revisit someday. +jsonWriteListStart(jw, "filters"); +jsonWriteListEnd(jw); +// Annotations for coloring/filtering by base +writeAuspiceMetaGenomeAnnotations(jw, geneInfoList, genomeSize); +jsonWriteObjectEnd(jw); } static void jsonWriteObjectValueUrl(struct jsonWrite *jw, char *name, char *value, char *url) /* Write an object with member "value" set to value, and if url is non-empty, "url" set to url. */ { jsonWriteObjectStart(jw, name); jsonWriteString(jw, "value", value); if (isNotEmpty(url)) jsonWriteString(jw, "url", url); jsonWriteObjectEnd(jw); } static void jsonWriteObjectValue(struct jsonWrite *jw, char *name, char *value) /* Write an object with one member, "value", set to value, as most Auspice node attributes are * formatted. */ @@ -146,60 +219,65 @@ static void makeLineageUrl(char *lineage, char *lineageUrl, size_t lineageUrlSize) /* If lineage is not "uploaded sample", make an outbreak.info link to it, otherwise just copy * lineage. */ { if (sameString(lineage, "uploaded sample")) safecpy(lineageUrl, lineageUrlSize, lineage); else safef(lineageUrl, lineageUrlSize, OUTBREAK_INFO_URLBASE "%s", lineage); } static void jsonWriteLeafNodeAttributes(struct jsonWrite *jw, char *name, struct sampleMetadata *met, boolean isUserSample, char *source, struct hash *sampleUrls, char **retUserOrOld, char **retNClade, char **retGClade, - char **retLineage, + char **retLineage, char **retNLineage, char **retNCladeUsher, char **retLineageUsher) /* Write elements of node_attrs for a sample which may be preexisting and in our metadata hash, * or may be a new sample from the user. Set rets for color categories so parent branches can * determine their color categories. */ { *retUserOrOld = isUserSample ? "uploaded sample" : source; jsonWriteObjectValue(jw, "userOrOld", *retUserOrOld); if (met && met->date) jsonWriteObjectValue(jw, "date", met->date); if (met && met->author) { jsonWriteObjectValue(jw, "author", met->author); // Note: Nextstrain adds paper_url and title when available; they also add author and use // a uniquified value (e.g. "author": "Wenjie Tan et al" / "value": "Wenjie Tan et al A") } *retNClade = isUserSample ? "uploaded sample" : (met && met->nClade) ? met->nClade : NULL; if (isNotEmpty(*retNClade)) jsonWriteObjectValue(jw, "Nextstrain_clade", *retNClade); *retGClade = isUserSample ? "uploaded sample" : (met && met->gClade) ? met->gClade : NULL; if (isNotEmpty(*retGClade)) jsonWriteObjectValue(jw, "GISAID_clade", *retGClade); *retLineage = isUserSample ? "uploaded sample" : (met && met->lineage) ? met->lineage : NULL; if (isNotEmpty(*retLineage)) { char lineageUrl[1024]; makeLineageUrl(*retLineage, lineageUrl, sizeof lineageUrl); jsonWriteObjectValueUrl(jw, "pango_lineage", *retLineage, lineageUrl); } +*retNLineage = isUserSample ? "uploaded sample" : (met && met->nLineage) ? met->nLineage : NULL; +if (isNotEmpty(*retNLineage)) + { + jsonWriteObjectValue(jw, "Nextstrain_lineage", *retNLineage); + } if (met && met->epiId) jsonWriteObjectValue(jw, "gisaid_epi_isl", met->epiId); if (met && met->gbAcc) jsonWriteObjectValue(jw, "genbank_accession", met->gbAcc); if (met && met->country) jsonWriteObjectValue(jw, "country", met->country); if (met && met->division) jsonWriteObjectValue(jw, "division", met->division); if (met && met->location) jsonWriteObjectValue(jw, "location", met->location); if (met && met->countryExp) jsonWriteObjectValue(jw, "country_exposure", met->countryExp); if (met && met->divExp) jsonWriteObjectValue(jw, "division_exposure", met->divExp); if (met && met->origLab) @@ -226,82 +304,83 @@ char *p = strstr(sampleUrl, "subtreeAuspice"); char *subtreeNum = p + strlen("subtreeAuspice"); if (p && isdigit(*subtreeNum)) { int num = atoi(subtreeNum); char subtreeLabel[1024]; safef(subtreeLabel, sizeof subtreeLabel, "view subtree %d", num); jsonWriteObjectValueUrl(jw, "subtree", subtreeLabel, sampleUrl); } else jsonWriteObjectValueUrl(jw, "subtree", sampleUrl, sampleUrl); } } static void jsonWriteBranchNodeAttributes(struct jsonWrite *jw, char *userOrOld, - char *nClade, char *gClade, char *lineage, + char *nClade, char *gClade, char *lineage, char *nLineage, char *nCladeUsher, char *lineageUsher) /* Write elements of node_attrs for a branch. */ { if (userOrOld) jsonWriteObjectValue(jw, "userOrOld", userOrOld); if (nClade) jsonWriteObjectValue(jw, "Nextstrain_clade", nClade); if (gClade) jsonWriteObjectValue(jw, "GISAID_clade", gClade); if (lineage) jsonWriteObjectValue(jw, "pango_lineage", lineage); +if (nLineage) + jsonWriteObjectValue(jw, "Nextstrain_lineage", lineage); if (nCladeUsher) jsonWriteObjectValue(jw, "Nextstrain_clade_usher", nCladeUsher); if (lineageUsher) jsonWriteObjectValue(jw, "pango_lineage_usher", lineageUsher); } static boolean changesProtein(struct singleNucChange *snc, struct geneInfo *gi, struct seqWindow *gSeqWin, int *retAaStart, char *retOldAa, char *retNewAa) /* If snc changes the coding sequence of gene, return TRUE and set ret values accordingly * (note amino acid values are single-base not strings). */ { boolean isCodingChange = FALSE; if (snc->chromStart < gi->psl->tEnd && snc->chromStart >= gi->psl->tStart) { struct bed3 gBed3 = { NULL, gSeqWin->seqName, snc->chromStart, snc->chromStart+1 }; char gAlt[2]; safef(gAlt, sizeof(gAlt), "%c", snc->newBase); - if (!sameString(gi->psl->strand, "+")) - errAbort("changesProtein: only works for psl->strand='+', but gene '%s' has strand '%s'", - gi->psl->qName, gi->psl->strand); struct vpTx *vpTx = vpGenomicToTranscript(gSeqWin, &gBed3, gAlt, gi->psl, gi->txSeq); if (vpTx->start.region == vpExon) { int aaStart = vpTx->start.txOffset / 3; int codonStart = aaStart * 3; char newCodon[4]; safencpy(newCodon, sizeof newCodon, gi->txSeq->dna + codonStart, 3); int codonOffset = vpTx->start.txOffset - codonStart; assert(codonOffset < sizeof newCodon); - newCodon[codonOffset] = snc->newBase; + char newNt = (pslOrientation(gi->psl) > 0) ? snc->newBase : ntCompTable[(int)snc->newBase]; + newCodon[codonOffset] = newNt; char newAa = lookupCodon(newCodon); char oldAa; if (snc->parBase == snc->refBase) oldAa = lookupCodon(gi->txSeq->dna + codonStart); else { char oldCodon[4]; safencpy(oldCodon, sizeof oldCodon, gi->txSeq->dna + codonStart, 3); - oldCodon[codonOffset] = snc->parBase; + char oldNt = (pslOrientation(gi->psl) > 0) ? snc->parBase : ntCompTable[(int)snc->parBase]; + oldCodon[codonOffset] = oldNt; oldAa = lookupCodon(oldCodon); } // Watch out for lookupCodon's null-character return value for stop codon; we want '*'. if (newAa == '\0') newAa = '*'; if (oldAa == '\0') oldAa = '*'; if (newAa != oldAa) { isCodingChange = TRUE; *retAaStart = aaStart; *retOldAa = oldAa; *retNewAa = newAa; } } @@ -466,160 +545,190 @@ runLength++; if (runLength > maxRunLength) { maxRunLength = runLength; maxRunVal = array[i]; } } else runLength = 1; } return (maxRunLength > (arraySize >> 1)) ? maxRunVal : NULL; } static void rTreeToAuspiceJson(struct phyloTree *node, int depth, struct auspiceJsonInfo *aji, char **retUserOrOld, char **retNClade, char **retGClade, - char **retLineage, char **retNCladeUsher, char **retLineageUsher) + char **retLineage, char **retNLineage, + char **retNCladeUsher, char **retLineageUsher) /* Write Augur/Auspice V2 JSON for tree. Enclosing object start and end are written by caller. */ { if (node->priv) { struct singleNucChange *sncList = node->priv; depth += slCount(sncList); } boolean isUserSample = FALSE; if (node->ident->name) isUserSample = slNameInList(aji->subtreeUserSampleIds, node->ident->name); char *name = node->ident->name; struct sampleMetadata *met = name ? metadataForSample(aji->sampleMetadata, name) : NULL; if (name) jsonWriteString(aji->jw, "name", name); else jsonWriteStringf(aji->jw, "name", "NODE%d", aji->nodeNum++); jsonWriteBranchAttrs(aji->jw, node, aji->geneInfoList, aji->gSeqWin); if (node->numEdges > 0) { jsonWriteListStart(aji->jw, "children"); char *kidUserOrOld[node->numEdges]; char *kidNClade[node->numEdges]; char *kidGClade[node->numEdges]; char *kidLineage[node->numEdges]; char *kidNCladeUsher[node->numEdges]; char *kidLineageUsher[node->numEdges]; + char *kidNLineage[node->numEdges]; // Step through children in reverse order because nextstrain/Auspice draws upside-down. :) int i; for (i = node->numEdges - 1; i >= 0; i--) { jsonWriteObjectStart(aji->jw, NULL); rTreeToAuspiceJson(node->edges[i], depth, aji, &kidUserOrOld[i], &kidNClade[i], &kidGClade[i], &kidLineage[i], - &kidNCladeUsher[i], &kidLineageUsher[i]); + &kidNLineage[i], &kidNCladeUsher[i], &kidLineageUsher[i]); jsonWriteObjectEnd(aji->jw); } jsonWriteListEnd(aji->jw); if (retUserOrOld) *retUserOrOld = majorityMaybe(kidUserOrOld, node->numEdges); if (retNClade) *retNClade = majorityMaybe(kidNClade, node->numEdges); if (retGClade) *retGClade = majorityMaybe(kidGClade, node->numEdges); if (retLineage) *retLineage = majorityMaybe(kidLineage, node->numEdges); if (retNCladeUsher) *retNCladeUsher = majorityMaybe(kidNCladeUsher, node->numEdges); if (retLineageUsher) *retLineageUsher = majorityMaybe(kidLineageUsher, node->numEdges); + if (retNLineage) + *retNLineage = majorityMaybe(kidNLineage, node->numEdges); } jsonWriteObjectStart(aji->jw, "node_attrs"); jsonWriteDouble(aji->jw, "div", depth); if (node->numEdges == 0) jsonWriteLeafNodeAttributes(aji->jw, name, met, isUserSample, aji->source, aji->sampleUrls, - retUserOrOld, retNClade, retGClade, retLineage, + retUserOrOld, retNClade, retGClade, retLineage, retNLineage, retNCladeUsher, retLineageUsher); else if (retUserOrOld && retGClade && retLineage) jsonWriteBranchNodeAttributes(aji->jw, *retUserOrOld, *retNClade, *retGClade, *retLineage, - *retNCladeUsher, *retLineageUsher); + *retNLineage, *retNCladeUsher, *retLineageUsher); jsonWriteObjectEnd(aji->jw); } struct phyloTree *phyloTreeNewNode(char *name) /* Alloc & return a new node with no children. */ { struct phyloTree *node; AllocVar(node); AllocVar(node->ident); node->ident->name = cloneString(name); return node; } struct geneInfo *getGeneInfoList(char *bigGenePredFile, struct dnaSeq *refGenome) /* If config.ra has a source of gene annotations, then return the gene list. */ { struct geneInfo *geneInfoList = NULL; -if (isNotEmpty(bigGenePredFile) && fileExists(bigGenePredFile)) +struct bbiFile *bbi = NULL; +struct errCatch *errCatch = errCatchNew(); +if (isNotEmpty(bigGenePredFile)) + { + if (errCatchStart(errCatch)) + { + bbi = bigBedFileOpen(bigGenePredFile); + } + errCatchEnd(errCatch); + } +if (bbi) { - struct bbiFile *bbi = bigBedFileOpen(bigGenePredFile); struct lm *lm = lmInit(0); struct bigBedInterval *bb, *bbList = bigBedIntervalQuery(bbi, refGenome->name, 0, refGenome->size, 0, lm); for (bb = bbList; bb != NULL; bb = bb->next) { struct genePredExt *gp = genePredFromBigGenePred(refGenome->name, bb); - if (gp->strand[0] != '+') - errAbort("getGeneInfoList: strand must be '+' but got '%s' for gene %s", - gp->strand, gp->name); int txLen = 0; int ex; for (ex = 0; ex < gp->exonCount; ex++) txLen += (gp->exonEnds[ex] - gp->exonStarts[ex]); char *seq = needMem(txLen+1); int txOffset = 0; for (ex = 0; ex < gp->exonCount; ex++) { int exonLen = gp->exonEnds[ex] - gp->exonStarts[ex]; safencpy(seq+txOffset, txLen+1-txOffset, refGenome->dna+gp->exonStarts[ex], exonLen); txOffset += exonLen; } + if (sameString(gp->strand, "-")) + reverseComplement(seq, txLen); struct geneInfo *gi; AllocVar(gi); gi->psl = genePredToPsl((struct genePred *)gp, refGenome->size, txLen); + gi->psl->qName = cloneString(gp->name2); gi->txSeq = newDnaSeq(seq, txLen, gp->name2); slAddHead(&geneInfoList, gi); } lmCleanup(&lm); bigBedFileClose(&bbi); } slReverse(&geneInfoList); return geneInfoList; } void treeToAuspiceJson(struct subtreeInfo *sti, char *db, struct geneInfo *geneInfoList, struct seqWindow *gSeqWin, struct hash *sampleMetadata, struct hash *sampleUrls, char *jsonFile, char *source) /* Write JSON for tree in Nextstrain's Augur/Auspice V2 JSON format * (https://github.com/nextstrain/augur/blob/master/augur/data/schema-export-v2.json). */ { struct phyloTree *tree = sti->subtree; FILE *outF = mustOpen(jsonFile, "w"); -fputs("{ \"version\": \"v2\", ", outF); -writeAuspiceMeta(outF, sti->subtreeUserSampleIds, source); -// The meta part is mostly constant & easier to just write out, but jsonWrite is better for the -// nested tree structure. struct jsonWrite *jw = jsonWriteNew(); +jsonWriteObjectStart(jw, NULL); +jsonWriteString(jw, "version", "v2"); +//#*** FIXME: TODO: either pass in along with sampleMetadata, or better yet, compute while building +//#*** tree object and then write the header object. +struct slName *colorFields = NULL; +if (sameString(db, "wuhCor1")) + { + slNameAddHead(&colorFields, "country"); + slNameAddHead(&colorFields, "Nextstrain_clade_usher"); + slNameAddHead(&colorFields, "pango_lineage_usher"); + slNameAddHead(&colorFields, "Nextstrain_clade"); + slNameAddHead(&colorFields, "pango_lineage"); + } +else + { + slNameAddHead(&colorFields, "country"); + slNameAddHead(&colorFields, "Nextstrain_lineage"); + } +//#*** END FIXME +writeAuspiceMeta(jw, sti->subtreeUserSampleIds, source, colorFields, geneInfoList, + gSeqWin->end); jsonWriteObjectStart(jw, "tree"); int nodeNum = 10000; // Auspice.us starting node number for newick -> json int depth = 0; // Add an extra root node because otherwise Auspice won't draw branch from big tree root to subtree struct phyloTree *root = phyloTreeNewNode("wrapper"); phyloAddEdge(root, tree); tree = root; struct auspiceJsonInfo aji = { jw, sti->subtreeUserSampleIds, geneInfoList, gSeqWin, sampleMetadata, sampleUrls, nodeNum, source }; -rTreeToAuspiceJson(tree, depth, &aji, NULL, NULL, NULL, NULL, NULL, NULL); -jsonWriteObjectEnd(jw); +rTreeToAuspiceJson(tree, depth, &aji, NULL, NULL, NULL, NULL, NULL, NULL, NULL); +jsonWriteObjectEnd(jw); // tree +jsonWriteObjectEnd(jw); // top-level object fputs(jw->dy->string, outF); jsonWriteFree(&jw); -fputs("}", outF); carefulClose(&outF); }