1715fcf5e6c59c033d9f6dfb341e62b1f1b7c629
angie
  Tue Dec 7 18:38:36 2021 -0800
Mask 21846 (S:95) in Delta branch - Cornelius Roemer and Chris Ruis pointed out phylogeny problems it caused.

diff --git src/hg/utils/otto/sarscov2phylo/maskDelta.sh src/hg/utils/otto/sarscov2phylo/maskDelta.sh
index 6d767e5..30cba66 100755
--- src/hg/utils/otto/sarscov2phylo/maskDelta.sh
+++ src/hg/utils/otto/sarscov2phylo/maskDelta.sh
@@ -1,48 +1,52 @@
 #!/bin/bash
 set -beEu -x -o pipefail
 
 #	Do not modify this script, modify the source tree copy:
 #	kent/src/hg/utils/otto/sarscov2phylo/maskDelta.sh
 
 usage() {
     echo "usage: $0 treeIn.pb treeOut.pb"
 }
 
 if [ $# != 2 ]; then
   usage
   exit 1
 fi
 
 treeInPb=$1
 treeOutPb=$2
 
 ottoDir=/hive/data/outside/otto/sarscov2phylo
 
 usherDir=~angie/github/usher
 matUtils=$usherDir/build/matUtils
 matOptimize=$usherDir/build/matOptimize
 
 # I wish there were a less hacky method to identify the node for Delta, but since mutation
 # paths can change as new samples are added, this is the most stable method I have at the moment:
 # make sample-paths, grep for basal sample IND/GBRC714b/2021 (USA/WI-CDC-FG-038252/2021 would also
 # work in case that one goes away for any reason), use the final node in path.
 samplePaths=$treeInPb.sample-paths
 $matUtils extract -i $treeInPb -S $samplePaths
 deltaNode=$(grep IND/GBRC714b/2021 $samplePaths | awk '{print $NF;}' | sed -re 's/:.*//;')
 
 # Delta has deletions at S:157-158 (22029-22034), ORF8:119-120 (28248-28253) and 28271.
 # Mask those locations and some adjacent bases where we get a ton of spurious "mutations".
 maskFile=$(basename $treeInPb .pb).maskForDelta.tsv
 set +x
 for ((i=22027;  $i <= 22034;  i++)); do
     echo -e "N${i}N\t$deltaNode"
 done > $maskFile
 for ((i=28246;  $i <= 28253;  i++)); do
     echo -e "N${i}N\t$deltaNode"
 done >> $maskFile
 echo -e "N28271N\t$deltaNode" >> $maskFile
 set -x
 
+# S:95 (21846) is also very unreliably detected in Delta.  Mask it off to avoid tree trouble,
+# like split AY.100.
+echo -e "N21846N\t$deltaNode" >> $maskFile
+
 time $matUtils mask -i $treeInPb \
     -m $maskFile \
     -o $treeOutPb