edd8fa03488ce1ee3fa7b12b64de39ad0e3fc216
angie
  Wed Oct 21 12:57:36 2020 -0700
New utils for making the public-sequences-only version of the SARS-CoV-2
Phylogeny track.
Forgot to add these back in commit 0901c8be.  refs #26364

diff --git src/hg/utils/phyloRenameAndPrune/phyloRenameAndPrune.c src/hg/utils/phyloRenameAndPrune/phyloRenameAndPrune.c
new file mode 100644
index 0000000..0636628
--- /dev/null
+++ src/hg/utils/phyloRenameAndPrune/phyloRenameAndPrune.c
@@ -0,0 +1,84 @@
+/* phyloRenameAndPrune - Rename or remove leaves of phylogenetic tree and prune any branches with no remaining leaves. */
+#include "common.h"
+#include "hash.h"
+#include "linefile.h"
+#include "obscure.h"
+#include "options.h"
+#include "phyloTree.h"
+
+void usage()
+/* Explain usage and exit. */
+{
+errAbort(
+  "phyloRenameAndPrune - Rename or remove leaves of phylogenetic tree and prune any branches with no remaining leaves\n"
+  "usage:\n"
+  "   phyloRenameAndPrune treeIn.nh renaming.tab treeOut.nh\n"
+//  "options:\n"
+//  "   -xxx=XXX\n"
+  "renaming.tab has two columns: old name (must uniquely match some leaf in tree) and new name.\n"
+  );
+}
+
+/* Command line validation table. */
+static struct optionSpec options[] = {
+   {NULL, 0},
+};
+
+struct phyloTree *rRenameAndPrune(struct phyloTree *node, struct hash *renaming)
+{
+if (node->numEdges == 0)
+    {
+    char *newName = NULL;
+    if (node->ident->name && (newName = hashFindVal(renaming, node->ident->name)) != NULL)
+        node->ident->name = newName;
+    else
+        return NULL;
+    }
+else
+    {
+    // Rename or prune children
+    struct phyloTree *newKids[node->numEdges];
+    int newKidCount = 0;
+    int i;
+    for (i = 0;  i < node->numEdges;  i++)
+        {
+        struct phyloTree *kid = rRenameAndPrune(node->edges[i], renaming);
+        if (kid)
+            newKids[newKidCount++] = kid;
+        }
+    if (newKidCount == 0)
+        return NULL;
+    else if (newKidCount < node->numEdges)
+        {
+        // At least one kid was pruned; update node.
+        node->numEdges = newKidCount;
+        for (i = 0;  i < newKidCount;  i++)
+            node->edges[i] = newKids[i];
+        }
+    }
+return node;
+}
+
+void phyloRenameAndPrune(char *treeInFile, char *renamingFile, char *treeOutFile)
+/* phyloRenameAndPrune - Rename or remove leaves of phylogenetic tree and prune any branches with no remaining leaves. */
+{
+struct phyloTree *tree = phyloOpenTree(treeInFile);
+struct hash *renaming = hashTwoColumnFile(renamingFile);
+tree = rRenameAndPrune(tree, renaming);
+FILE *outF = mustOpen(treeOutFile, "w");
+if (tree)
+    phyloPrintTree(tree, outF);
+else
+    warn("No leaves were renamed, all were pruned; no tree to write to %s.", treeOutFile);
+carefulClose(&outF);
+}
+
+int main(int argc, char *argv[])
+/* Process command line. */
+{
+optionInit(&argc, argv, options);
+if (argc != 4)
+    usage();
+phyloRenameAndPrune(argv[1], argv[2], argv[3]);
+return 0;
+}