f070f343168fcb94a21b832d1cb20104b0670186
angie
  Fri Jun 30 15:08:56 2023 -0700
There really should never be a clade-defining mutation on the first or last base of the genome (low/no coverage in most genomes), but this morning it happened, so at least prevent illegal overlapping blocks in nextstrainClade bed output.

diff --git src/hg/utils/otto/nextstrainNcov/nextstrain.py src/hg/utils/otto/nextstrainNcov/nextstrain.py
index de8b47d..f941e52 100755
--- src/hg/utils/otto/nextstrainNcov/nextstrain.py
+++ src/hg/utils/otto/nextstrainNcov/nextstrain.py
@@ -514,37 +514,43 @@
         for name, clade in clades.items():
             if (not clade.get('thickStart')):
                 # "Clade" 19A encompasses the entire tree (minus the parts assigned to
                 # other "clades").  It has no identifying variants, and (as of June 7)
                 # no dates assigned.
                 clade['thickStart'] = clade['thickEnd'] = 0
                 clade['varStarts'] = clade['varSizes'] = []
                 clade['varNames'] = ''
                 clade['dateInferred'] = clade['dateConfMin'] = clade['dateConfMax'] = 0
             countryConf = clade.get('countryConf')
             if (not countryConf):
                 countryConf = ''
             countryInferred = clade.get('countryInferred')
             if (not countryInferred):
                 countryInferred = ''
+            # Add placeholder blocks at first and last base of genome, but don't duplicate
+            varStarts = clade['varStarts']
+            if len(varStarts) == 0 or varStarts[0] != 0:
+                varStarts = [0] + varStarts
+            if varStarts[-1] != 29902:
+                varStarts = varStarts + [29902]
             outC.write('\t'.join(map(str,
                                      [ chrom, 0, 29903, name, 0, '.',
                                        clade['thickStart'], clade['thickEnd'],
                                        cladeColorFromName(name, cladeColors),
-                                       len(clade['varSizes']) + 2,
-                                       ','.join(map(str, ([1] + clade['varSizes']) + [1])),
-                                       ','.join(map(str, ([0] + clade['varStarts']) + [29902])),
+                                       len(varStarts),
+                                       ','.join(map(str, [1 for x in varStarts])),
+                                       ','.join(map(str, varStarts)),
                                        clade['varNames'],
                                        numDateToYmdStr(clade['dateInferred']),
                                        numDateToYmdStr(clade['dateConfMin']),
                                        numDateToYmdStr(clade['dateConfMax']),
                                        countryInferred,
                                        countryConf,
                                        cladeSampleCounts[name],
                                        ', '.join(cladeSampleNames[name]) ])) + '\n')
 
 newCladeTops = [ newClades[cladeName]['topNode'] for cladeName in newClades ]
 vcfForClades(newClades, newCladeTops)
 bedForClades('nextstrainClade.bed', newClades, newCladeColors)
 
 # Newick-formatted tree of samples for VCF display
 def cladeRgbFromName(cladeName, cladeColors):