5839fc5f68fc919fc61fad936929af799c52f577 angie Fri Apr 3 12:08:21 2020 -0700 Add RGB colors to Nextstrain trees as fake branch lengths. Make hapClusterMethod treeFile use branch lengths as RGB colors. refs #25278 diff --git src/hg/utils/otto/nextstrainNcov/nextstrain.py src/hg/utils/otto/nextstrainNcov/nextstrain.py index 6a1dca5..25317bea 100755 --- src/hg/utils/otto/nextstrainNcov/nextstrain.py +++ src/hg/utils/otto/nextstrainNcov/nextstrain.py @@ -31,31 +31,31 @@ clades = {} cladeNodes = {} variantCounts = {} variantAaChanges = {} samples = [] cladeColors = { 'A1a': '73,75,225', 'A2': '75,131,233', 'A2a': '92,173,207', 'A3': '119,199,164', 'A6': '154,212,122', 'A7': '173,189,81', 'B': '233,205,74', 'B1': '255,176,65', 'B2': '255,122,53', 'B4': '249,53,41' } def cladeColorFromName(cladeName): color = cladeColors.get(cladeName); if (not color): - color = 'purple' + color = '0,0,0' return color def subtractStart(coord, start): return coord - start def cladeFromVariants(name, variants, varStr): """Extract bed12 info from an object whose keys are SNV variant names""" clade = {} snvEnds = [] varNames = [] for varName in variants: m = snvRe.match(varName) if (m): snvEnds.append(int(m.group(2))) varNames.append(varName) @@ -322,37 +322,55 @@ clade['thickStart'], clade['thickEnd'], clade['color'], len(clade['varSizes']) + 2, '1,' + ','.join(map(str, clade['varSizes'])) + ',1,', '0,' + ','.join(map(str, clade['varStarts'])) + ',29902,', clade['varNames'], numDateToYmdStr(clade['dateInferred']), numDateToYmdStr(clade['dateConfMin']), numDateToYmdStr(clade['dateConfMax']), clade['countryInferred'], clade['countryConf'], cladeSampleCounts[name], ', '.join(cladeSampleNames[name]) ])) + '\n') outC.close() # Newick-formatted tree of samples for VCF display -def rNextstrainToNewick(node): +def cladeRgbFromName(cladeName): + """Look up the r,g,b string color for clade; convert to int RGB.""" + rgbCommaStr = cladeColorFromName(cladeName) + r, g, b = [ int(x) for x in rgbCommaStr.split(',') ] + rgb = (r << 16) | (g << 8) | b + return rgb + +def rNextstrainToNewick(node, parentColor=None): """Recursively descend ncov.tree and build Newick tree string of samples to file""" kids = node.get('children') if (kids): - treeString = '(' + ','.join([ rNextstrainToNewick(child) for child in kids ]) + ')' + nodeAttrs = node['node_attrs'] + if (nodeAttrs.get('clade_membership')): + cladeName = nodeAttrs['clade_membership']['value'] + color = str(cladeRgbFromName(cladeName)) + elif (parentColor): + color = parentColor + else: + color = '0' + descendants = ','.join([ rNextstrainToNewick(child, color) for child in kids ]) + treeString = '(' + descendants + ')' + ':' + color else: nodeAttrs = node['node_attrs'] gId = nodeAttrs['gisaid_epi_isl']['value'] name = node['name'] date = numDateToMonthDay(nodeAttrs['num_date']['value']) - treeString = '|'.join([ gId, name, date ]) + cladeName = nodeAttrs['clade_membership']['value'] + color = str(cladeRgbFromName(cladeName)) + treeString = '|'.join([ gId, name, date ]) + ':' + color return treeString with open('nextstrain.nh', 'w') as outF: outF.write(rNextstrainToNewick(ncov['tree']) + ';\n') outF.close for cladeName, node in cladeNodes.items(): filename = 'nextstrain' + cladeName + '.nh' with open(filename, 'w') as outF: outF.write(rNextstrainToNewick(node) + ';\n') outF.close