8cab21c8b8a5e84a3491b796bd6590c5d6905325 angie Thu Dec 4 15:45:54 2025 -0800 Added a couple samples to spare for reversion-riddled LF.7.6.5 diff --git src/hg/utils/otto/sarscov2phylo/updateLineageTreePb.sh src/hg/utils/otto/sarscov2phylo/updateLineageTreePb.sh index 9459bda28b0..9128b4b4a25 100755 --- src/hg/utils/otto/sarscov2phylo/updateLineageTreePb.sh +++ src/hg/utils/otto/sarscov2phylo/updateLineageTreePb.sh @@ -19,58 +19,61 @@ startingTree=$2 else startingTree=gisaidAndPublic.$buildDate.masked.pb.gz fi ottoDir=/hive/data/outside/otto/sarscov2phylo usherDir=~angie/github/usher matUtils=$usherDir/build/matUtils today=$(date +%F) cd $ottoDir/$buildDate # Remove sequences that have two or more reversions relative to their assigned clade/lineage. $matUtils summary -i $startingTree --node-stats node-stats -#*** Until BA.2.3.22 gets more samples that don't have bogus reversion on 25000 and 26577, -#*** exempt some samples; also, most of JP.1 has rev on 27383,27384: +# Until BA.2.3.22 gets more samples that don't have bogus reversion on 25000 and 26577, +# exempt some samples; also, most of JP.1 has rev on 27383,27384 and 2 of LF.7.6.5's 4 +# samples have revs. cat > pruneRevsExemptions < 1 {print $1;}' node-stats \ | grep -vFwf pruneRevsExemptions \ > pruneRevs $matUtils extract -i $startingTree \ -p -s pruneRevs -O -o gisaidAndPublic.$buildDate.masked.pruneRevs.pb.gz # Get node ID for root of lineage A, used as reference/root by Pangolin: $matUtils extract -i gisaidAndPublic.$buildDate.masked.pruneRevs.pb.gz -C clade-paths.prunedRevs lineageARoot=$(grep ^A$'\t' clade-paths.prunedRevs | cut -f 2) # Reroot protobuf to lineage A and restrict to low mutation density (highly supported nodes): $matUtils extract -i gisaidAndPublic.$buildDate.masked.pruneRevs.pb.gz \ --reroot $lineageARoot \ --max-mutation-density 2 \