c3ac1c095777767dff7ad31b94d4e71318a6db67
angie
  Tue Feb 22 10:55:09 2022 -0800
Use new scripts findDropoutContam.pl and findRefBackfill.pl to identify problematic sequences using nextclade annotations.  Exclude those sequences (with an exception for reported recombinants, via new file includeRecombinants.tsv) from the tree.

diff --git src/hg/utils/otto/sarscov2phylo/includeRecombinants.tsv src/hg/utils/otto/sarscov2phylo/includeRecombinants.tsv
new file mode 100644
index 0000000..be2853a
--- /dev/null
+++ src/hg/utils/otto/sarscov2phylo/includeRecombinants.tsv
@@ -0,0 +1,51 @@
+ID	pango-designation issue number or comment
+England/ALDP-2F86D33/2021	422
+England/QEUH-312A66E/2022	422
+England/ALDP-31AE19E/2022	422
+England/ALDP-325CFB2/2022	422
+OV626513.1	422
+OV628319.1	422
+OV720067.1	422
+OV721520.1	422
+EPI_ISL_8166781	422
+EPI_ISL_8610834	422
+EPI_ISL_8865709	422
+EPI_ISL_8898412	422
+OM392955.1	439
+OM393452.1	439
+OM372355.1	439
+OM344043.1	439
+OM344166.1	439
+OM344238.1	439
+OM272834.1	439
+EPI_ISL_8720194	439
+EPI_ISL_8981459	439
+EPI_ISL_8981712	439
+EPI_ISL_8981824	439
+EPI_ISL_9088187	439
+EPI_ISL_9147438	439
+EPI_ISL_9147935	439
+England/PLYM-32FEF76/2022	441
+England/PLYM-332E079/2022	441
+England/PLYM-332E510/2022	441
+England/PLYM-336A651/2022	441
+England/PLYM-33BB5CD/2022	441
+OV757286.1	441
+OV758622.1	441
+OV768982.1	441
+OV781287.1	441
+EPI_ISL_9125312	441
+EPI_ISL_9175215	441
+EPI_ISL_9026013	441
+EPI_ISL_9062592	441
+EPI_ISL_9296739	441
+EPI_ISL_9959921	444
+EPI_ISL_9879437	444
+EPI_ISL_9879436	444
+EPI_ISL_9863764	444
+EPI_ISL_9857381	444
+EPI_ISL_9791275	444
+EPI_ISL_9449070	444
+EPI_ISL_9166910	444
+EPI_ISL_9518370	444
+EPI_ISL_9791300	444