2e6ddaec223b5b760cbba134eee52d14a327fda3
angie
  Fri May 29 15:22:45 2020 -0700
Adding tree manipulation python scripts and modules that I've been working on for David et al.  sorta refs #25278, #25382

diff --git src/hg/utils/otto/nextstrainNcov/newick.py src/hg/utils/otto/nextstrainNcov/newick.py
new file mode 100644
index 0000000..3ae8281
--- /dev/null
+++ src/hg/utils/otto/nextstrainNcov/newick.py
@@ -0,0 +1,148 @@
+# Parse Newick-formatted file into tree of { 'kids': [], 'label': '', 'length': '' }
+
+from utils import die
+
+def skipSpaces(treeString, offset):
+    while (offset != len(treeString) and treeString[offset].isspace()):
+        offset += 1
+    return offset
+
+def parseQuoted(treeString, offset):
+    """Read in a quoted, possibly \-escaped string in treeString at offset"""
+    label = ''
+    quoteChar = treeString[offset]
+    offset += 1
+    labelStart = offset
+    while (offset != len(treeString) and treeString[offset] != quoteChar):
+        if (treeString[offset] == '\\'):
+            offset += 1
+        offset += 1
+    if (treeString[offset] != quoteChar):
+        die("Missing end-" + quoteChar + " after '" + treeString + "'")
+    else:
+        label = treeString[labelStart:offset]
+        offset = skipSpaces(treeString, offset + 1)
+    return (label, offset)
+
+def terminatesLabel(treeString, offset):
+    """Return True if treeString+offset is empty or starts w/char that would terminate a label"""
+    return (offset == len(treeString) or treeString[offset] == ',' or treeString[offset] == ')' or
+            treeString[offset] == ';' or treeString[offset] == ':')
+
+def parseLabel(treeString, offset):
+    """Read in a possibly quoted, possibly \-escaped node label terminated by [,):;]"""
+    if (offset == len(treeString)):
+        label = ''
+    elif (treeString[offset] == "'" or treeString[offset] == '"'):
+        (label, offset) = parseQuoted(treeString, offset)
+    else:
+        labelStart = offset
+        while (not terminatesLabel(treeString, offset)):
+            if (treeString[offset] == '\\'):
+                offset += 1
+            offset += 1
+        label = treeString[labelStart:offset]
+        offset = skipSpaces(treeString, offset)
+    return(label, offset)
+
+def parseLength(treeString, offset):
+    """If treeString[offset] is ':', then parse the number that follows; otherwise return 0.0."""
+    if (offset != len(treeString) and treeString[offset] == ':'):
+        # Branch length
+        offset = skipSpaces(treeString, offset + 1)
+        if (not treeString[offset].isdigit()):
+            die("Expected number to follow ':' but instead got '" +
+                treeString[offset:offset+100] + "'")
+        lengthStart = offset
+        while (offset != len(treeString) and
+               (treeString[offset].isdigit() or treeString[offset] == '.' or
+                treeString[offset] == 'E' or treeString[offset] == 'e' or
+                treeString[offset] == '-')):
+            offset += 1
+        lengthStr = treeString[lengthStart:offset]
+        offset = skipSpaces(treeString, offset)
+        return (lengthStr, offset)
+    else:
+        return ('', offset)
+
+def parseBranch(treeString, offset):
+    """Recursively parse Newick branch (x, y, z)[label][:length] from treeString at offset"""
+    if (treeString[offset] != '('):
+        die("parseBranch called on treeString that doesn't begin with '(': '" +
+            treeString + "'")
+    branchStart = offset
+    branch = { 'kids': [],  'label': '', 'length': '' }
+    offset = skipSpaces(treeString, offset + 1)
+    while (offset != len(treeString) and treeString[offset] != ')' and treeString[offset] != ';'):
+        (child, offset) = parseString(treeString, offset)
+        branch['kids'].append(child)
+        if (treeString[offset] == ','):
+            offset = skipSpaces(treeString, offset + 1)
+    if (offset == len(treeString)):
+        die("Input ended before ')' for '" + treeString[branchStart:branchStart+100] + "'")
+    if (treeString[offset] == ')'):
+        offset = skipSpaces(treeString, offset + 1)
+    else:
+        die("Can't find ')' matching '" + treeString[branchStart:branchStart+100] + "', " +
+            "instead got '" + treeString[offset:offset+100] + "'")
+    (branch['label'], offset) = parseLabel(treeString, offset)
+    (branch['length'], offset) = parseLength(treeString, offset)
+    return (branch, offset)
+
+def parseString(treeString, offset=0):
+    """Recursively parse Newick tree from treeString"""
+    offset = skipSpaces(treeString, offset)
+    if (treeString[offset] == '('):
+        return parseBranch(treeString, offset)
+    else:
+        (label, offset) = parseLabel(treeString, offset)
+        (length, offset) = parseLength(treeString, offset)
+        leaf = { 'kids': None, 'label': label, 'length': length }
+        return (leaf, offset)
+
+def parseFile(treeFile):
+    """Read Newick file, return tree object"""
+    with open(treeFile, 'r') as treeF:
+        line1 = treeF.readline().strip()
+        if (line1 == ''):
+            return None
+        (tree, offset) = parseString(line1)
+        if (offset != len(line1) and line1[offset] != ';'):
+            die("Tree terminated without ';' before '" + line1[offset:offset+100] + "'")
+        treeF.close()
+    return tree
+
+def treeToString(node, pretty=False, indent=0):
+    """Return a Newick string encoding node and its descendants, optionally pretty-printing with
+    newlines and indentation.  String is not ';'-terminated, caller must do that."""
+    labelLen = ''
+    if (node['label']):
+        labelLen += node['label']
+    if (node['length']):
+        labelLen += ':' + node['length']
+    if (node['kids']):
+        string = '('
+        kidIndent = indent + 1
+        kidStrings = [ treeToString(kid, pretty, kidIndent) for kid in node['kids'] ]
+        sep = ','
+        if (pretty):
+            sep = ',\n' + ' ' * kidIndent
+        string += sep.join(kidStrings)
+        string += ')'
+        string += labelLen
+    else:
+        string = labelLen
+    return string;
+
+def printTree(tree, pretty=False, indent=0):
+    """Print out Newick text encoding tree, optionally pretty-printing with
+    newlines and indentation."""
+    print(treeToString(tree, pretty, indent) + ';')
+
+def leafNames(node):
+    """Return a list of labels of all leaf descendants of node"""
+    if (node['kids']):
+        return [ leaf for kid in node['kids'] for leaf in leafNames(kid) ]
+    else:
+        return [ node['label'] ]
+