705420e703b067fbcad43ab67ed3e131552e7ac8 angie Wed Apr 24 14:23:03 2024 -0700 Pathogen drop-down choices can now be groups of references/trees, for example 'Dengue (types 1 - 4)' instead of a separate choice for each type. Instead of config.ra, each group has an organism.ra and subdirectories named after reference accessions that contain reference.ra files. nextclade sort is used to match the user's uploaded sequences against available references for the selected pathogen. SARS-CoV-2, M. tuberculosis and hMPXV still have only one reference and still use config.ra, but RSV, Dengue and Influenza will become groups. Presentation is still kinda rough, just a loop on the original results output. The server commands part needs testing and will not work yet for groups (currently used only for SARS-CoV-2). diff --git src/hg/hgPhyloPlace/treeToAuspiceJson.c src/hg/hgPhyloPlace/treeToAuspiceJson.c index 1db6673..ebe22a0 100644 --- src/hg/hgPhyloPlace/treeToAuspiceJson.c +++ src/hg/hgPhyloPlace/treeToAuspiceJson.c @@ -1,19 +1,19 @@ /* Convert a (sub)tree with condensed nodes to JSON for Nextstrain to display, adding in sample * mutations, protein changes and metadata. */ -/* Copyright (C) 2020 The Regents of the University of California */ +/* Copyright (C) 2020-2024 The Regents of the University of California */ #include "common.h" #include "dnaseq.h" #include "errCatch.h" #include "hash.h" #include "hui.h" #include "jsonWrite.h" #include "linefile.h" #include "obscure.h" #include "parsimonyProto.h" #include "phyloPlace.h" #include "phyloTree.h" #include "variantProjector.h" @@ -235,62 +235,62 @@ else if (sameString(col->name, "GCC_nextclade")) auspiceMetaColoringCategorical(jw, col->name, "RGCC lineage assigned by nextclade"); else if (sameString(col->name, "GCC_usher")) auspiceMetaColoringCategorical(jw, col->name, "RGCC lineage assigned by UShER"); else if (sameString(col->name, "GCC_assigned_2023-11")) auspiceMetaColoringCategorical(jw, col->name, "RGCC designated lineage"); else if (sameString(col->name, "country")) auspiceMetaColoringCategorical(jw, col->name, "Country"); else auspiceMetaColoringCategorical(jw, col->name, col->name); } jsonWriteListEnd(jw); } static void writeAuspiceMeta(struct jsonWrite *jw, struct slName *subtreeUserSampleIds, char *source, - char *db, struct geneInfo *geneInfoList, + char *org, char *db, struct geneInfo *geneInfoList, uint genomeSize, boolean isRsv, boolean isFlu) /* Write metadata to configure Auspice display. */ { jsonWriteObjectStart(jw, "meta"); // Title struct dyString *dy = dyStringCreate("Subtree with %s", subtreeUserSampleIds->name); int sampleCount = slCount(subtreeUserSampleIds); if (sampleCount > 10) dyStringPrintf(dy, " and %d other uploaded samples", sampleCount - 1); else { struct slName *sln; for (sln = subtreeUserSampleIds->next; sln != NULL; sln = sln->next) dyStringPrintf(dy, ", %s", sln->name); } jsonWriteString(jw, "title", dy->string); // Description jsonWriteStringf(jw, "description", "Dataset generated by [UShER web interface]" "(%shgPhyloPlace) using the " "[usher](https://github.com/yatisht/usher/) program. " //#*** TODO: describe input from which tree was generated: user sample, version of tree, etc. "If you have metadata you wish to display, you can now drag on a CSV file and " "it will be added into this view, [see here]("NEXTSTRAIN_DRAG_DROP_DOC") " "for more info." , hLocalHostCgiBinUrl()); // Panels: just the tree and entropy (no map) jsonWriteListStart(jw, "panels"); jsonWriteString(jw, NULL, "tree"); jsonWriteString(jw, NULL, "entropy"); jsonWriteListEnd(jw); -char *metaJsonFile = phyloPlaceDbSettingPath(db, "auspiceMeta"); +char *metaJsonFile = phyloPlaceRefSettingPath(org, db, "auspiceMeta"); if (isNotEmpty(metaJsonFile) && fileExists(metaJsonFile)) { char *metaJson = NULL; size_t size = 0; readInGulp(metaJsonFile, &metaJson, &size); while (size > 0 && metaJson[size-1] == '\n') metaJson[--size] = '\0'; jsonWriteAppendData(jw, ", "); jsonWriteAppendData(jw, metaJson); freeMem(metaJson); } else { // Default label & color struct slName *colorFields = getColorFields(db, isRsv, isFlu); @@ -864,76 +864,76 @@ genePredToCds((struct genePred *)gp, gi->cds); int cdsLen = gi->cds->end - gi->cds->start; // Skip genes with no CDS (like RNA genes) or obviously incomplete/incorrect CDS. if (cdsLen > 0 && (cdsLen % 3) == 0) { slAddHead(&geneInfoList, gi); } } lmCleanup(&lm); bigBedFileClose(&bbi); } slReverse(&geneInfoList); return geneInfoList; } -static int getBranchAttrCols(char *db, char ***retBranchAttrCols) +static int getBranchAttrCols(char *org, char *db, char ***retBranchAttrCols) /* Alloc an array of metadata column names to use as branch attributes and return count. * There will always be at least 1 (userOrOld / Sample type); others come from config setting. */ { int branchAttrCount = 1; struct slName *attrList = NULL, *attr; -char *branchAttrSetting = phyloPlaceDbSetting(db, "branchAttributes"); +char *branchAttrSetting = phyloPlaceRefSetting(org, db, "branchAttributes"); if (isNotEmpty(branchAttrSetting)) { attrList = slNameListFromComma(branchAttrSetting); branchAttrCount += slCount(attrList); } char **branchAttrCols = NULL; AllocArray(branchAttrCols, branchAttrCount); branchAttrCols[0] = cloneString("userOrOld"); int i; for (i = 1, attr = attrList; i < branchAttrCount && attr != NULL; i++, attr = attr->next) branchAttrCols[i] = cloneString(trimSpaces(attr->name)); *retBranchAttrCols = branchAttrCols; return branchAttrCount; } -void treeToAuspiceJson(struct subtreeInfo *sti, char *db, struct geneInfo *geneInfoList, +void treeToAuspiceJson(struct subtreeInfo *sti, char *org, char *db, struct geneInfo *geneInfoList, struct seqWindow *gSeqWin, struct hash *sampleMetadata, struct hash *sampleUrls, struct hash *samplePlacements, char *jsonFile, char *source) /* Write JSON for tree in Nextstrain's Augur/Auspice V2 JSON format * (https://github.com/nextstrain/augur/blob/master/augur/data/schema-export-v2.json). */ { struct phyloTree *tree = sti->subtree; FILE *outF = mustOpen(jsonFile, "w"); struct jsonWrite *jw = jsonWriteNew(); jsonWriteObjectStart(jw, NULL); jsonWriteString(jw, "version", "v2"); boolean isRsv = (stringIn("GCF_000855545", db) || stringIn("GCF_002815475", db) || startsWith("RGCC", db)); boolean isFlu = (stringIn("GCF_000865085", db) || stringIn("GCF_001343785", db)); -writeAuspiceMeta(jw, sti->subtreeUserSampleIds, source, db, geneInfoList, +writeAuspiceMeta(jw, sti->subtreeUserSampleIds, source, org, db, geneInfoList, gSeqWin->end, isRsv, isFlu); jsonWriteObjectStart(jw, "tree"); int nodeNum = 10000; // Auspice.us starting node number for newick -> json int depth = 0; // Add an extra root node because otherwise Auspice won't draw branch from big tree root to subtree struct phyloTree *root = phyloTreeNewNode("wrapper"); phyloAddEdge(root, tree); tree = root; struct auspiceJsonInfo aji = { jw, sti->subtreeUserSampleIds, geneInfoList, gSeqWin, sampleMetadata, sampleUrls, samplePlacements, nodeNum, source }; char **branchAttrCols = NULL; -int branchAttrCount = getBranchAttrCols(db, &branchAttrCols); +int branchAttrCount = getBranchAttrCols(org, db, &branchAttrCols); rTreeToAuspiceJson(tree, depth, &aji, NULL, isRsv, branchAttrCount, branchAttrCols, NULL); jsonWriteObjectEnd(jw); // tree jsonWriteObjectEnd(jw); // top-level object fputs(jw->dy->string, outF); jsonWriteFree(&jw); carefulClose(&outF); }