edd8fa03488ce1ee3fa7b12b64de39ad0e3fc216 angie Wed Oct 21 12:57:36 2020 -0700 New utils for making the public-sequences-only version of the SARS-CoV-2 Phylogeny track. Forgot to add these back in commit 0901c8be. refs #26364 diff --git src/hg/utils/phyloRenameAndPrune/phyloRenameAndPrune.c src/hg/utils/phyloRenameAndPrune/phyloRenameAndPrune.c new file mode 100644 index 0000000..0636628 --- /dev/null +++ src/hg/utils/phyloRenameAndPrune/phyloRenameAndPrune.c @@ -0,0 +1,84 @@ +/* phyloRenameAndPrune - Rename or remove leaves of phylogenetic tree and prune any branches with no remaining leaves. */ +#include "common.h" +#include "hash.h" +#include "linefile.h" +#include "obscure.h" +#include "options.h" +#include "phyloTree.h" + +void usage() +/* Explain usage and exit. */ +{ +errAbort( + "phyloRenameAndPrune - Rename or remove leaves of phylogenetic tree and prune any branches with no remaining leaves\n" + "usage:\n" + " phyloRenameAndPrune treeIn.nh renaming.tab treeOut.nh\n" +// "options:\n" +// " -xxx=XXX\n" + "renaming.tab has two columns: old name (must uniquely match some leaf in tree) and new name.\n" + ); +} + +/* Command line validation table. */ +static struct optionSpec options[] = { + {NULL, 0}, +}; + +struct phyloTree *rRenameAndPrune(struct phyloTree *node, struct hash *renaming) +{ +if (node->numEdges == 0) + { + char *newName = NULL; + if (node->ident->name && (newName = hashFindVal(renaming, node->ident->name)) != NULL) + node->ident->name = newName; + else + return NULL; + } +else + { + // Rename or prune children + struct phyloTree *newKids[node->numEdges]; + int newKidCount = 0; + int i; + for (i = 0; i < node->numEdges; i++) + { + struct phyloTree *kid = rRenameAndPrune(node->edges[i], renaming); + if (kid) + newKids[newKidCount++] = kid; + } + if (newKidCount == 0) + return NULL; + else if (newKidCount < node->numEdges) + { + // At least one kid was pruned; update node. + node->numEdges = newKidCount; + for (i = 0; i < newKidCount; i++) + node->edges[i] = newKids[i]; + } + } +return node; +} + +void phyloRenameAndPrune(char *treeInFile, char *renamingFile, char *treeOutFile) +/* phyloRenameAndPrune - Rename or remove leaves of phylogenetic tree and prune any branches with no remaining leaves. */ +{ +struct phyloTree *tree = phyloOpenTree(treeInFile); +struct hash *renaming = hashTwoColumnFile(renamingFile); +tree = rRenameAndPrune(tree, renaming); +FILE *outF = mustOpen(treeOutFile, "w"); +if (tree) + phyloPrintTree(tree, outF); +else + warn("No leaves were renamed, all were pruned; no tree to write to %s.", treeOutFile); +carefulClose(&outF); +} + +int main(int argc, char *argv[]) +/* Process command line. */ +{ +optionInit(&argc, argv, options); +if (argc != 4) + usage(); +phyloRenameAndPrune(argv[1], argv[2], argv[3]); +return 0; +}