775f8e290daefbb3312fd1d8962344690550c887
angie
  Mon Jun 8 16:20:24 2020 -0700
Extending and updating tree manipulation & SARS-CoV-2 lineage coloring python scripts and modules that I've been working on for David et al.  sorta refs #25278, #25382

diff --git src/hg/utils/otto/nextstrainNcov/newick.py src/hg/utils/otto/nextstrainNcov/newick.py
index 3ae8281..fde0978 100644
--- src/hg/utils/otto/nextstrainNcov/newick.py
+++ src/hg/utils/otto/nextstrainNcov/newick.py
@@ -53,72 +53,73 @@
         if (not treeString[offset].isdigit()):
             die("Expected number to follow ':' but instead got '" +
                 treeString[offset:offset+100] + "'")
         lengthStart = offset
         while (offset != len(treeString) and
                (treeString[offset].isdigit() or treeString[offset] == '.' or
                 treeString[offset] == 'E' or treeString[offset] == 'e' or
                 treeString[offset] == '-')):
             offset += 1
         lengthStr = treeString[lengthStart:offset]
         offset = skipSpaces(treeString, offset)
         return (lengthStr, offset)
     else:
         return ('', offset)
 
-def parseBranch(treeString, offset):
+def parseBranch(treeString, offset, internalNode):
     """Recursively parse Newick branch (x, y, z)[label][:length] from treeString at offset"""
     if (treeString[offset] != '('):
         die("parseBranch called on treeString that doesn't begin with '(': '" +
             treeString + "'")
     branchStart = offset
-    branch = { 'kids': [],  'label': '', 'length': '' }
+    internalNode += 1
+    branch = { 'kids': [],  'label': '', 'length': '', 'inode': internalNode }
     offset = skipSpaces(treeString, offset + 1)
     while (offset != len(treeString) and treeString[offset] != ')' and treeString[offset] != ';'):
-        (child, offset) = parseString(treeString, offset)
+        (child, offset, internalNode) = parseString(treeString, offset, internalNode)
         branch['kids'].append(child)
         if (treeString[offset] == ','):
             offset = skipSpaces(treeString, offset + 1)
     if (offset == len(treeString)):
         die("Input ended before ')' for '" + treeString[branchStart:branchStart+100] + "'")
     if (treeString[offset] == ')'):
         offset = skipSpaces(treeString, offset + 1)
     else:
         die("Can't find ')' matching '" + treeString[branchStart:branchStart+100] + "', " +
             "instead got '" + treeString[offset:offset+100] + "'")
     (branch['label'], offset) = parseLabel(treeString, offset)
     (branch['length'], offset) = parseLength(treeString, offset)
-    return (branch, offset)
+    return (branch, offset, internalNode)
 
-def parseString(treeString, offset=0):
+def parseString(treeString, offset=0, internalNode=0):
     """Recursively parse Newick tree from treeString"""
     offset = skipSpaces(treeString, offset)
     if (treeString[offset] == '('):
-        return parseBranch(treeString, offset)
+        return parseBranch(treeString, offset, internalNode)
     else:
         (label, offset) = parseLabel(treeString, offset)
         (length, offset) = parseLength(treeString, offset)
         leaf = { 'kids': None, 'label': label, 'length': length }
-        return (leaf, offset)
+        return (leaf, offset, internalNode)
 
 def parseFile(treeFile):
     """Read Newick file, return tree object"""
     with open(treeFile, 'r') as treeF:
         line1 = treeF.readline().strip()
         if (line1 == ''):
             return None
-        (tree, offset) = parseString(line1)
+        (tree, offset, internalNode) = parseString(line1)
         if (offset != len(line1) and line1[offset] != ';'):
             die("Tree terminated without ';' before '" + line1[offset:offset+100] + "'")
         treeF.close()
     return tree
 
 def treeToString(node, pretty=False, indent=0):
     """Return a Newick string encoding node and its descendants, optionally pretty-printing with
     newlines and indentation.  String is not ';'-terminated, caller must do that."""
     labelLen = ''
     if (node['label']):
         labelLen += node['label']
     if (node['length']):
         labelLen += ':' + node['length']
     if (node['kids']):
         string = '('