009620a961ec07ce597829ebac077d1920fae8f1
angie
  Mon Oct 10 15:47:58 2022 -0700
Loosen filtering parameters so we don't lop off XAW.

diff --git src/hg/utils/otto/sarscov2phylo/updateCombinedTree.sh src/hg/utils/otto/sarscov2phylo/updateCombinedTree.sh
index aba7f70..2a25eb1 100755
--- src/hg/utils/otto/sarscov2phylo/updateCombinedTree.sh
+++ src/hg/utils/otto/sarscov2phylo/updateCombinedTree.sh
@@ -45,45 +45,45 @@
 if [ ! -s gisaidAndPublic.$today.masked.pb ]; then
     # $scriptDir/usherClusterRun.sh $today
     # Instead of the cluster, use Cheng's blazingly fast new usher-sampled:
     time $usher \
         -T 64 -A -e 5 \
         -i prevRenamed.pb \
         -v new.masked.vcf.gz \
         -o merged.pb \
         --optimization_radius 0 --batch_size_per_process 10 \
         > usher.addNew.log 2>usher-sampled.stderr
     # Branch-specific masking
     time $scriptDir/maskDelta.sh merged.pb merged.deltaMasked.pb
     # Prune samples with too many private mutations and internal branches that are too long.
     $matUtils extract -i merged.deltaMasked.pb \
         --max-parsimony 20 \
-        --max-branch-length 50 \
-        --max-path-length 150 \
+        --max-branch-length 60 \
+        --max-path-length 175 \
         -O -o merged.deltaMasked.filtered.pb
     # Improved matOptimize from branch
     time $matOptimize \
         -T 80 -r 8 -M 2 -S move_log.filtered \
         -i merged.deltaMasked.filtered.pb \
         -o gisaidAndPublic.$today.masked.preTrim.pb \
         >& matOptimize.filtered.log
 
     # Again prune samples with too many private mutations and internal branches that are too long.
     $matUtils extract -i gisaidAndPublic.$today.masked.preTrim.pb \
         --max-parsimony 20 \
-        --max-branch-length 50 \
-        --max-path-length 150 \
+        --max-branch-length 60 \
+        --max-path-length 175 \
         -O -o gisaidAndPublic.$today.masked.pb
 fi
 
 # Exclude sequences with a very high number of EPPs from future runs
 grep ^Current usher.addNew.log \
 | awk '$16 >= 10 {print $8;}' \
 | awk -F\| '{ if ($3 == "") { print $1; } else { print $2; } }' \
     > tooManyEpps.ids
 cat tooManyEpps.ids >> ../tooManyEpps.ids
 
 $matUtils extract -i gisaidAndPublic.$today.masked.pb -u samples.$today
 
 $scriptDir/combineMetadata.sh $prevDate $today
 
 # version/description files