e267c9ec78e654975dbcd0bb8d5b4bf393187269 angie Thu Mar 4 15:39:32 2021 -0800 Add ZIP file of subtree JSON & Newick files for download. Add subtree numbers to the filenames so it's not all trashDir soup. diff --git src/hg/hgPhyloPlace/runUsher.c src/hg/hgPhyloPlace/runUsher.c index 6987b4a..f208a1b 100644 --- src/hg/hgPhyloPlace/runUsher.c +++ src/hg/hgPhyloPlace/runUsher.c @@ -535,38 +535,39 @@ static void rSubstTreeNames(struct phyloTree *node, struct hash *nameSubstitutions) /* If node or descendants have names in nameSubstitutions, then substitute those names. */ { if (node->ident->name) { char *subst = hashFindVal(nameSubstitutions, node->ident->name); if (subst) node->ident->name = subst; } int i; for (i = 0; i < node->numEdges; i++) rSubstTreeNames(node->edges[i], nameSubstitutions); } -static struct tempName *substituteTreeNames(struct phyloTree *tree, struct hash *nameSubstitutions) +static struct tempName *substituteTreeNames(struct phyloTree *tree, char *treeName, + struct hash *nameSubstitutions) /* If tree has any nodes whose names are in nameSubstitutions, then substitute those names. * Write tree out to a trash file and return its location. */ { rSubstTreeNames(tree, nameSubstitutions); struct tempName *newTn; AllocVar(newTn); -trashDirFile(newTn, "ct", "treeNameSubst", ".nwk"); +trashDirFile(newTn, "ct", treeName, ".nwk"); FILE *f = mustOpen(newTn->forCgi, "w"); phyloPrintTree(tree, f); carefulClose(&f); return newTn; } static struct slName *substituteNameList(struct slName *idList, struct hash *nameSubstitutions) /* Return a new list that is just like idList, except if any item in idList has a value in * nameSubstitutions, then the item is replaced by the substitution. */ { struct slName *newList = NULL; struct slName *id; for (id = idList; id != NULL; id = id->next) { char *subst = hashFindVal(nameSubstitutions, id->name); @@ -581,80 +582,82 @@ { struct variantPathNode *nodeMuts = slPopHead(pNodeMutList); if (! nodeMuts) errAbort("addMutationsToTree: subtree mutation list has fewer nodes than subtree"); if (node->ident->name && ! sameString(nodeMuts->nodeName, node->ident->name)) errAbort("addMutationsToTree: subtree node name is '%s' but subtree mutation list item is '%s'", node->ident->name, nodeMuts->nodeName); if (node->priv != NULL) errAbort("addMutationsToTree: node already has mutations assigned"); node->priv = nodeMuts->sncList; int i; for (i = 0; i < node->numEdges; i++) addMutationsToTree(node->edges[i], pNodeMutList); } -static struct subtreeInfo *parseOneSubtree(struct tempName *subtreeTn, +static struct subtreeInfo *parseOneSubtree(struct tempName *subtreeTn, char *subtreeName, struct variantPathNode *subtreeMuts, struct slName *userSampleIds, struct hash *condensedNodes) /* Parse usher's subtree output, figure out which user samples are in subtree and expand names * of condensed nodes. */ { struct subtreeInfo *ti; AllocVar(ti); ti->subtreeTn = subtreeTn; ti->subtree = phyloOpenTree(ti->subtreeTn->forCgi); addMutationsToTree(ti->subtree, &subtreeMuts); if (subtreeMuts != NULL) errAbort("addMutationsToTree: subtreeMutationList has more nodes than subtree"); struct slName *subtreeIdList = phyloLeafNames(ti->subtree); // Don't do name substitutions on condensed node names in subtreeIdToIx since the IDs have to // match those in the original tree from protobuf. ti->subtreeIdToIx = slNameListToIxHash(subtreeIdList); ti->subtreeUserSampleIds = getSubtreeSampleIds(userSampleIds, ti->subtreeIdToIx); if (slCount(ti->subtreeUserSampleIds) == 0) errAbort("No user sample IDs found in subtree file %s", ti->subtreeTn->forCgi); // Substitute in nicer node names for condensed nodes for displaying to the user in // custom tracks and Nextstrain/auspice JSON. struct hash *nameSubstitutions = expandCondensedNodeNames(condensedNodes, subtreeIdList); if (nameSubstitutions->elCount > 0) - ti->subtreeTn = substituteTreeNames(ti->subtree, nameSubstitutions); + ti->subtreeTn = substituteTreeNames(ti->subtree, subtreeName, nameSubstitutions); ti->subtreeNameList = substituteNameList(subtreeIdList, nameSubstitutions); hashFree(&nameSubstitutions); slFreeList(&subtreeIdList); return ti; } static struct subtreeInfo *parseSubtrees(int subtreeCount, struct tempName *singleSubtreeTn, struct variantPathNode *singleSubtreeMuts, struct tempName *subtreeTns[], struct variantPathNode *subtreeMuts[], struct slName *userSampleIds, struct hash *condensedNodes) /* Parse usher's subtree output, figure out which user samples are in each subtree, expand names * of condensed nodes. Add parsed singleSubtree at head of list, followed by numbered subtrees. */ { struct subtreeInfo *subtreeInfoList = NULL; int sIx; for (sIx = 0; sIx < subtreeCount; sIx++) { - struct subtreeInfo *ti = parseOneSubtree(subtreeTns[sIx], subtreeMuts[sIx], userSampleIds, - condensedNodes); + char subtreeName[512]; + safef(subtreeName, sizeof(subtreeName), "subtree%d", sIx+1); + struct subtreeInfo *ti = parseOneSubtree(subtreeTns[sIx], subtreeName, subtreeMuts[sIx], + userSampleIds, condensedNodes); slAddHead(&subtreeInfoList, ti); } slReverse(&subtreeInfoList); -struct subtreeInfo *ti = parseOneSubtree(singleSubtreeTn, singleSubtreeMuts, userSampleIds, - condensedNodes); +struct subtreeInfo *ti = parseOneSubtree(singleSubtreeTn, "singleSubtree", singleSubtreeMuts, + userSampleIds, condensedNodes); slAddHead(&subtreeInfoList, ti); return subtreeInfoList; } static void parseClades(char *filename, struct hash *samplePlacements) /* Parse usher's clades.txt, which might have {sample, clade} or {sample, clade, lineage}. */ { struct hash *wordStore = hashNew(0); struct lineFile *lf = lineFileOpen(filename, TRUE); char *line; while (lineFileNext(lf, &line, NULL)) { char *words[3]; int wordCount = chopTabs(line, words); char *sampleId = words[0];