src/hg/utils/phyloTrees/rerootTree.pl 1.1
1.1 2009/12/09 17:05:37 hiram
Initial contents
Index: src/hg/utils/phyloTrees/rerootTree.pl
===================================================================
RCS file: src/hg/utils/phyloTrees/rerootTree.pl
diff -N src/hg/utils/phyloTrees/rerootTree.pl
--- /dev/null 1 Jan 1970 00:00:00 -0000
+++ src/hg/utils/phyloTrees/rerootTree.pl 9 Dec 2009 17:05:37 -0000 1.1
@@ -0,0 +1,104 @@
+#!/usr/bin/env perl
+# rerootTree.pl - rearrange a phylogenetic tree in order from a given node
+
+# $Id$
+
+use strict;
+use warnings;
+no warnings 'recursion';
+
+my $argc = scalar(@ARGV);
+
+if ($argc != 2) {
+ printf STDERR "rerootTree.pl - rearrange a phylogenetic tree in order from a given node\n";
+ printf STDERR "usage:\n rerootTree.pl <nodeName> <dissect_output>\n";
+ printf STDERR "obtain dissect_output from tree-doctor with --dissect option\n";
+ printf STDERR "e.g. tree_doctor --dissect --name-ancestors 46way.nh \\\n";
+ printf STDERR " | rerootTree.pl monDom5 stdin\n";
+ exit 255;
+}
+
+my @nodes; # index is node number, value is name
+my @parent; # index is node number, value is parent node number
+my %nodeNames; # key is node name, value is node number
+my @lchild; # index is node number, value is node number of lchild
+my @rchild; # index is node number, value is node number of rchild
+my %nodesDone; # during tree printout, to mark nodes done
+
+# recursive routine, given a node number, show its tree
+sub outputTree($) {
+ my ($id) = @_;
+ return if (exists($nodesDone{$id}));
+ if ($rchild[$id] > -1) {
+ &outputTree($rchild[$id]);
+ }
+ if ($lchild[$id] > -1) {
+ &outputTree($lchild[$id]);
+ }
+ printf "%s\n", $nodes[$id] if ($id ne $nodes[$id]);
+ $nodesDone{$id} = 1;
+ &outputTree($parent[$id]) if ($parent[$id] > -1);
+ return;
+}
+
+my $findName = shift;
+my $file = shift;
+
+if ($file =~ m/stdin/) {
+ open (FH, "</dev/stdin") or die "can not read stdin";
+} else {
+ open (FH, "<$file") or die "can not read $file";
+}
+
+
+my $curNode = -1;
+
+while (my $line = <FH>) {
+# print $line;
+ chomp $line;
+ if ($line =~ m/^Node /) {
+ (my $dummy, $curNode) = split('\s+', $line);
+ $curNode =~ s/://;
+ } elsif ($line =~ m/\s+parent = /) {
+ $line =~ s/^\s+//;
+ my ($dummy, $equal, $id) = split('\s+', $line);
+ $parent[$curNode] = $id;
+ } elsif ($line =~ m/\s+lchild = /) {
+ $line =~ s/^\s+//;
+ my ($dummy, $equal, $id) = split('\s+', $line);
+ $lchild[$curNode] = $id;
+ } elsif ($line =~ m/\s+rchild = /) {
+ $line =~ s/^\s+//;
+ my ($dummy, $equal, $id) = split('\s+', $line);
+ $rchild[$curNode] = $id;
+ } elsif ($line =~ m/\s+name = /) {
+ $line =~ s/^\s+//;
+ my ($dummy, $equal, $name) = split('\s+', $line);
+ $name =~ s/'//g;
+ $name = $curNode if (length($name) < 1);
+ $nodes[$curNode] = $name;
+ $nodeNames{$name} = $curNode;
+ }
+}
+close (FH);
+
+if (exists($nodeNames{$findName})) {
+ $curNode = $nodeNames{$findName};
+ my $parentNode = $parent[$curNode];
+ my $parentName = $nodes[$parentNode];
+ printf STDERR "tree from: Node %d, name: %s, parent: %d, parent: %s\n", $curNode, $findName, $parentNode, $parentName;
+ &outputTree($curNode);
+} else {
+ printf STDERR "ERROR: can not find specified name: $findName\n";
+ exit 255;
+}
+
+__END__
+
+Node 89:
+ parent = 81
+ lchild = -1
+ rchild = -1
+ name = 'danRer6'
+ dparent = 0.731166
+