2c221611ba1fe1da89314c8b9aad0bc9f5998e5c hiram Fri Mar 13 11:28:26 2020 -0700 add bothNames option to allow both names in output when translateName refs #25090 diff --git src/hg/utils/phyloTrees/binaryTree.pl src/hg/utils/phyloTrees/binaryTree.pl index 8f3bf04..ae6a11c 100755 --- src/hg/utils/phyloTrees/binaryTree.pl +++ src/hg/utils/phyloTrees/binaryTree.pl @@ -76,46 +76,48 @@ use warnings; use Getopt::Long; ############################################################################## sub usage() { printf STDERR "usage: binaryTree.pl [options] file.phy options: -noInternal - do not output internal node names -defaultDistance=0.1 - use this distance when not given in input -allDistances=0.1 - use this distance for everything, default use input -lineOutput - output one line per leaf output, indented per depth -quoteNames - add 'quotes' on node names, default not quoted -nameTranslate=<file> - two column file, translate names from input file, first column is name in input file, second column is output name tab separation columns + -bothNames - during nameTranslate, use both names in output: name1/name2 -verbose=N - specify verbose debug printout, 0 nothing, 1 a bit, 2 more, etc reads 'phylip' file format from NCBI taxonomy and outputs binary newick tree format, resolving the polytomys common to NCBI output format. Output is to 'stdout'.\n"; exit 255; } ############################################################################## # globals and options my $noInternal = 0; # option -noInternal - do not output internal node names my $defaultDistance = "0.1"; # to set distances when not given in input my $verbose = 0; # verbose debug level, integer my $allDistances = ""; # to set all distances to this value, default use input my $lineOutput = 0; # one line per leaf output format my $quoteNames = 0; # add "quotes" on node names +my $bothNames = 0; # during nameTranslate, use both names in output my $nameTranslate = ""; # two column tab separated: inputName<tab>outputName my %translateName; # key is input name, value is output name # establish empty root branch parent my %root; my $root = \%root; # pointer handle to root node $root->{'parent'} = undef; $root->{'right'} = undef; $root->{'left'} = undef; $root->{'name'} = 'root'; $root->{'distance'} = $defaultDistance; $root->{'nextLeft'} = 0; # starts out false # the following two are only on this root node for global bookeeping $root->{'branchCount'} = 0; $root->{'leafCount'} = 0; @@ -319,50 +321,56 @@ ############################################################################## # main starts here ############################################################################## my $argc = scalar(@ARGV); if ($argc < 1) { usage; } GetOptions ("noInternal" => \$noInternal, "defaultDistance=f" => \$defaultDistance, "verbose=i" => \$verbose, "nameTranslate=s" => \$nameTranslate, "lineOutput" => \$lineOutput, "quoteNames" => \$quoteNames, + "bothNames" => \$bothNames, "allDistances=f" => \$allDistances) or die "Error in command line arguments\n"; $defaultDistance = $allDistances if (length($allDistances)); printf STDERR "# noInternal: %s\n", $noInternal ? "TRUE" : "FALSE"; printf STDERR "# defaultDistance: %f\n", $defaultDistance; printf STDERR "# allDistances: %f\n", $allDistances if (length($allDistances)); printf STDERR "# nameTranslate from: %s\n", $nameTranslate if (length($nameTranslate)); printf STDERR "# lineOutput '%s'\n", $lineOutput ? "TRUE" : "FALSE"; printf STDERR "# quoteNames '%s'\n", $quoteNames ? "TRUE" : "FALSE"; +printf STDERR "# bothNames '%s'\n", $bothNames ? "TRUE" : "FALSE"; printf STDERR "# verbose: %d\n", $verbose; if (length($nameTranslate)) { open (FH, "<$nameTranslate") or die "can not read nameTranslate file '$nameTranslate'"; while (my $line = <FH>) { chomp $line; my ($inName, $outName) = split('\t', $line); + if ($bothNames) { + $translateName{$inName} = "$inName/$outName"; + } else { $translateName{$inName} = $outName; } + } close (FH); } my $phyFile = shift; my $currentNode = $root; printf STDERR "# reading %s\n", $phyFile if ($verbose > 0); ############################################################################# # processing the input file ############################################################################# open (FH, "<$phyFile") or die "can not read $phyFile"; while (my $line = <FH>) { chomp $line; $line =~ s/^\s+//; # eliminate leading space, just in case garbage $line =~ s/\s+$//; # eliminate trailing space, just in case garbage