5793028c4b37b6cb8300580e70d7b078abc516b2 angie Wed Oct 4 17:46:58 2023 -0700 Mask deletion 21991-21993 in XBB. diff --git src/hg/utils/otto/sarscov2phylo/branchSpecificMask.yml src/hg/utils/otto/sarscov2phylo/branchSpecificMask.yml index cf04d29..0e3e5d7 100644 --- src/hg/utils/otto/sarscov2phylo/branchSpecificMask.yml +++ src/hg/utils/otto/sarscov2phylo/branchSpecificMask.yml @@ -129,31 +129,34 @@ representative: England/PHEP-YYGYEQS/2022 sites: [ 114, 117, 244, 256, 258, 261, 264 ] BQ.1.1: # Inherits from BQ.1 representative: England/DHSC-CYF1KSU/2022 reversions: [ C22893A, A22942T ] XBB: # Inherits from BA.2 # Lots of false reversions; I would also mask G405A except that would be wrong for XBN. representative: BGD/icddrb_TND_06_1053/2022 # Homopolymer run 21999-22005 causes big trouble for Ion Torrent & nanopore, leading to false # substitutions in 21994, 21995 and 21998 as discussed in pango-designation issues 1882, 1503, # 1999, 1918. 21998 in particular crops up a lot, all over XBB, interfering with many sublins. - sites: [ 21994, 21995, 21998 ] + # Also, XBB.1.9, XBB.1.16 and XBB.2.3 sequences all seem to have del21991-21993 though very few + # XBB.1.5 polytomy sequences seem to have it. Mask anyway, causing trouble. + ranges: [ [ 21991, 21995 ] ] + sites: [ 21998 ] reversions: [ T2790C, T3037C, A4184G, T4321C, T9344C, G9424A, T10198C, C17859T, G19326A, T21618C, C21810T, A22000C, G22109C, C22577G, A22578G, C22599G, A22664C, C22679T, T22686C, G22688A, T22813G, C22895G, C22896T, A22898G, G22942T, A22992G, A22995C, C23013A, C23019T, C23031T, T24424A, A24469T, T25000C, T26060C, G26577C, A26709G, T26858C, T27807C ] XBB.1: # Inherits from XBB # Here I'm going a bit past the technical start of XBB.1, to include G27915T to be more sure # that it's XBB.1 before we mask out the defining mutation of XBB.1 representative: England/LSPA-32578111/2022 reversions: [ T22317G ] XBB.1.5: # Inherits from XBB.1 # Don't believe reversions on 27915 once we're as far as XBB.1.*