5839fc5f68fc919fc61fad936929af799c52f577
angie
  Fri Apr 3 12:08:21 2020 -0700
Add RGB colors to Nextstrain trees as fake branch lengths.  Make hapClusterMethod treeFile use branch lengths as RGB colors.  refs #25278

diff --git src/hg/utils/otto/nextstrainNcov/nextstrain.py src/hg/utils/otto/nextstrainNcov/nextstrain.py
index 6a1dca5..25317bea 100755
--- src/hg/utils/otto/nextstrainNcov/nextstrain.py
+++ src/hg/utils/otto/nextstrainNcov/nextstrain.py
@@ -31,31 +31,31 @@
 
 clades = {}
 cladeNodes = {}
 variantCounts = {}
 variantAaChanges = {}
 samples = []
 
 cladeColors = { 'A1a': '73,75,225', 'A2': '75,131,233', 'A2a': '92,173,207',
                 'A3': '119,199,164', 'A6': '154,212,122', 'A7': '173,189,81',
                 'B': '233,205,74', 'B1': '255,176,65', 'B2': '255,122,53',
                 'B4': '249,53,41' }
 
 def cladeColorFromName(cladeName):
     color = cladeColors.get(cladeName);
     if (not color):
-        color = 'purple'
+        color = '0,0,0'
     return color
 
 def subtractStart(coord, start):
     return coord - start
 
 def cladeFromVariants(name, variants, varStr):
     """Extract bed12 info from an object whose keys are SNV variant names"""
     clade = {}
     snvEnds = []
     varNames = []
     for varName in variants:
         m = snvRe.match(varName)
         if (m):
             snvEnds.append(int(m.group(2)))
             varNames.append(varName)
@@ -322,37 +322,55 @@
                                        clade['thickStart'], clade['thickEnd'], clade['color'],
                                        len(clade['varSizes']) + 2,
                                        '1,' + ','.join(map(str, clade['varSizes'])) + ',1,',
                                        '0,' + ','.join(map(str, clade['varStarts'])) + ',29902,',
                                        clade['varNames'],
                                        numDateToYmdStr(clade['dateInferred']),
                                        numDateToYmdStr(clade['dateConfMin']),
                                        numDateToYmdStr(clade['dateConfMax']),
                                        clade['countryInferred'],
                                        clade['countryConf'],
                                        cladeSampleCounts[name],
                                        ', '.join(cladeSampleNames[name]) ])) + '\n')
     outC.close()
 
 # Newick-formatted tree of samples for VCF display
-def rNextstrainToNewick(node):
+def cladeRgbFromName(cladeName):
+    """Look up the r,g,b string color for clade; convert to int RGB."""
+    rgbCommaStr = cladeColorFromName(cladeName)
+    r, g, b = [ int(x) for x in rgbCommaStr.split(',') ]
+    rgb = (r << 16) | (g << 8) | b
+    return rgb
+
+def rNextstrainToNewick(node, parentColor=None):
     """Recursively descend ncov.tree and build Newick tree string of samples to file"""
     kids = node.get('children')
     if (kids):
-        treeString = '(' + ','.join([ rNextstrainToNewick(child) for child in kids ]) + ')'
+        nodeAttrs = node['node_attrs']
+        if (nodeAttrs.get('clade_membership')):
+            cladeName = nodeAttrs['clade_membership']['value']
+            color = str(cladeRgbFromName(cladeName))
+        elif (parentColor):
+            color = parentColor
+        else:
+            color = '0'
+        descendants = ','.join([ rNextstrainToNewick(child, color) for child in kids ])
+        treeString = '(' + descendants + ')' + ':' + color
     else:
         nodeAttrs = node['node_attrs']
         gId = nodeAttrs['gisaid_epi_isl']['value']
         name = node['name']
         date = numDateToMonthDay(nodeAttrs['num_date']['value'])
-        treeString = '|'.join([ gId, name, date ])
+        cladeName = nodeAttrs['clade_membership']['value']
+        color = str(cladeRgbFromName(cladeName))
+        treeString = '|'.join([ gId, name, date ]) + ':' + color
     return treeString
 
 with open('nextstrain.nh', 'w') as outF:
     outF.write(rNextstrainToNewick(ncov['tree']) + ';\n')
     outF.close
 
 for cladeName, node in cladeNodes.items():
     filename = 'nextstrain' + cladeName + '.nh'
     with open(filename, 'w') as outF:
         outF.write(rNextstrainToNewick(node) + ';\n')
         outF.close