f070f343168fcb94a21b832d1cb20104b0670186 angie Fri Jun 30 15:08:56 2023 -0700 There really should never be a clade-defining mutation on the first or last base of the genome (low/no coverage in most genomes), but this morning it happened, so at least prevent illegal overlapping blocks in nextstrainClade bed output. diff --git src/hg/utils/otto/nextstrainNcov/nextstrain.py src/hg/utils/otto/nextstrainNcov/nextstrain.py index de8b47d..f941e52 100755 --- src/hg/utils/otto/nextstrainNcov/nextstrain.py +++ src/hg/utils/otto/nextstrainNcov/nextstrain.py @@ -514,37 +514,43 @@ for name, clade in clades.items(): if (not clade.get('thickStart')): # "Clade" 19A encompasses the entire tree (minus the parts assigned to # other "clades"). It has no identifying variants, and (as of June 7) # no dates assigned. clade['thickStart'] = clade['thickEnd'] = 0 clade['varStarts'] = clade['varSizes'] = [] clade['varNames'] = '' clade['dateInferred'] = clade['dateConfMin'] = clade['dateConfMax'] = 0 countryConf = clade.get('countryConf') if (not countryConf): countryConf = '' countryInferred = clade.get('countryInferred') if (not countryInferred): countryInferred = '' + # Add placeholder blocks at first and last base of genome, but don't duplicate + varStarts = clade['varStarts'] + if len(varStarts) == 0 or varStarts[0] != 0: + varStarts = [0] + varStarts + if varStarts[-1] != 29902: + varStarts = varStarts + [29902] outC.write('\t'.join(map(str, [ chrom, 0, 29903, name, 0, '.', clade['thickStart'], clade['thickEnd'], cladeColorFromName(name, cladeColors), - len(clade['varSizes']) + 2, - ','.join(map(str, ([1] + clade['varSizes']) + [1])), - ','.join(map(str, ([0] + clade['varStarts']) + [29902])), + len(varStarts), + ','.join(map(str, [1 for x in varStarts])), + ','.join(map(str, varStarts)), clade['varNames'], numDateToYmdStr(clade['dateInferred']), numDateToYmdStr(clade['dateConfMin']), numDateToYmdStr(clade['dateConfMax']), countryInferred, countryConf, cladeSampleCounts[name], ', '.join(cladeSampleNames[name]) ])) + '\n') newCladeTops = [ newClades[cladeName]['topNode'] for cladeName in newClades ] vcfForClades(newClades, newCladeTops) bedForClades('nextstrainClade.bed', newClades, newCladeColors) # Newick-formatted tree of samples for VCF display def cladeRgbFromName(cladeName, cladeColors):