ed7e20316990e2e6e52e0baf73087222eb571b67 angie Fri Jan 20 12:58:02 2023 -0800 Mask reversion G26530A in BA.1.1 because it made a mess of the branch leading to BA.1.1.1. diff --git src/hg/utils/otto/sarscov2phylo/maskDelta.sh src/hg/utils/otto/sarscov2phylo/maskDelta.sh index 056d44b..0a66b35 100755 --- src/hg/utils/otto/sarscov2phylo/maskDelta.sh +++ src/hg/utils/otto/sarscov2phylo/maskDelta.sh @@ -195,20 +195,36 @@ done >> $maskFile # BA.5 reversions causing big yuck branches for backMut in G670T T2790C T3037C A4184G T4321C G9424A T9534C T10198C A12160G T15714C T17410C \ G18163A T19955C G20055A T21618C G22200T A22578G T22674C C22679T T22686C G22688A A22775G T22813G \ G22882T G22917T A22992G A22995C C23013A G23018T G23055A T23063A C23075T G23403A T23525C G23599T \ A23604C T25000C A26529G G26577C A26709G T27807C T27889C T28271A C29510A ; do echo -e "$backMut\t$BA5Node" done >> $maskFile # The path to XAV should have G12160A and C27889T, but those cause too much trouble in BA.5 # so it will have to be a little incorrect. # Likewise, XAN path should have G12160A but oh well. set -x +# BA.1.1 has a ton of false reversions of A26530G that cause many flip-flops on the branch +# leading to BA.1.1.1, as well as a bunch of sequences with noise at 76/77. As of Jan. 2023 +# I'm not worried about messing up potential recombinants and want a cleaner tree for assignment. +# Although it does add a false A26530G to the path for XD, oh well. +BA11Node=$(grep England/ALDP-2BEB0A0/2021 $samplePaths \ + | awk '{print $NF;}' | sed -re 's/:.*//;') +set +x +for backMut in G26530A ; do + echo -e "$backMut\t$BA11Node" +done >> $maskFile +for ((i=76; $i <= 77; i++)); do + echo -e "N${i}N\t$BA11Node" +done >> $maskFile + +set -x + time $matUtils mask -i $treeInPb \ -m $maskFile \ -o $treeOutPb rm $samplePaths