49eae9eda65782332874d75f644479b1a5b0b51b hiram Fri Mar 13 09:45:24 2020 -0700 add name quoting option refs #25090 diff --git src/hg/utils/phyloTrees/binaryTree.pl src/hg/utils/phyloTrees/binaryTree.pl index 455cce4..004bdbb 100755 --- src/hg/utils/phyloTrees/binaryTree.pl +++ src/hg/utils/phyloTrees/binaryTree.pl @@ -72,48 +72,50 @@ # 2. no pointers, this is a leaf, print name:length when parent right # pointer got here, else print ,name:length when left pointer got here use strict; use warnings; use Getopt::Long; ############################################################################## sub usage() { printf STDERR "usage: binaryTree.pl [options] file.phy options: -noInternal - do not output internal node names -defaultDistance=0.1 - use this distance when not given in input -allDistances=0.1 - use this distance for everything, default use input -lineOutput - output one line per leaf output, indented per depth + -quoteNames - add \"quotes\" on node names, default not quoted -nameTranslate= - two column file, translate names from input file, first column is name in input file, second column is output name tab separation columns -verbose=N - specify verbose debug printout, 0 nothing, 1 a bit, 2 more, etc reads 'phylip' file format from NCBI taxonomy and outputs binary newick tree format, resolving the polytomys common to NCBI output format. Output is to 'stdout'.\n"; exit 255; } ############################################################################## # globals and options my $noInternal = 0; # option -noInternal - do not output internal node names my $defaultDistance = "0.1"; # to set distances when not given in input my $verbose = 0; # verbose debug level, integer my $allDistances = ""; # to set all distances to this value, default use input my $lineOutput = 0; # one line per leaf output format +my $quoteNames = 0; # add "quotes" on node names my $nameTranslate = ""; # two column tab separated: inputNameoutputName my %translateName; # key is input name, value is output name # establish empty root branch parent my %root; my $root = \%root; # pointer handle to root node $root->{'parent'} = undef; $root->{'right'} = undef; $root->{'left'} = undef; $root->{'name'} = 'root'; $root->{'distance'} = $defaultDistance; $root->{'nextLeft'} = 0; # starts out false # the following two are only on this root node for global bookeeping $root->{'branchCount'} = 0; $root->{'leafCount'} = 0; @@ -153,33 +155,41 @@ if (defined($node->{'right'})) { printTree($node->{'right'}); printf "," if (defined($node->{'left'})); if ($lineOutput && defined($node->{'left'})) { printf "\n"; $printNewLine = 1; } } printTree($node->{'left'}) if (defined($node->{'left'})); printf ")" if defined($node->{'left'}); } my $distOut = sprintf("%.9f", $node->{'distance'}); $distOut =~ s/0+$//g; $distOut = "0.000001" if ($distOut eq "0."); if ( $node->{'isLeaf'} ) { + if ($quoteNames) { + printf "\"%s\":%s", $node->{'name'}, $distOut; + } else { printf "%s:%s", $node->{'name'}, $distOut; + } } elsif ( ! $noInternal) { + if ($quoteNames) { + printf "\"%s\":%s", $node->{'name'}, $distOut; + } else { printf "%s:%s", $node->{'name'}, $distOut; + } } else { printf ":%s", $distOut; } $printDepth -= 1; } # sub printTree($) ############################################################################## # start a new node element sub newNode($$$$$$) { my ($parent, $right, $left, $name, $distance, $nextLeft) = @_; my %node; $node{'parent'} = $parent; $node{'right'} = $right; $node{'left'} = $left; $node{'name'} = $name; @@ -308,40 +318,42 @@ } ############################################################################## # main starts here ############################################################################## my $argc = scalar(@ARGV); if ($argc < 1) { usage; } GetOptions ("noInternal" => \$noInternal, "defaultDistance=f" => \$defaultDistance, "verbose=i" => \$verbose, "nameTranslate=s" => \$nameTranslate, "lineOutput" => \$lineOutput, + "quoteNames" => \$quoteNames, "allDistances=f" => \$allDistances) or die "Error in command line arguments\n"; $defaultDistance = $allDistances if (length($allDistances)); printf STDERR "# noInternal: %s\n", $noInternal ? "TRUE" : "FALSE"; printf STDERR "# defaultDistance: %f\n", $defaultDistance; printf STDERR "# allDistances: %f\n", $allDistances if (length($allDistances)); printf STDERR "# nameTranslate from: %s\n", $nameTranslate if (length($nameTranslate)); printf STDERR "# lineOutput '%s'\n", $lineOutput ? "TRUE" : "FALSE"; +printf STDERR "# quoteNames '%s'\n", $quoteNames ? "TRUE" : "FALSE"; printf STDERR "# verbose: %d\n", $verbose; if (length($nameTranslate)) { open (FH, "<$nameTranslate") or die "can not read nameTranslate file '$nameTranslate'"; while (my $line = ) { chomp $line; my ($inName, $outName) = split('\t', $line); $translateName{$inName} = $outName; } close (FH); } my $phyFile = shift; my $currentNode = $root; printf STDERR "# reading %s\n", $phyFile if ($verbose > 0);