009620a961ec07ce597829ebac077d1920fae8f1 angie Mon Oct 10 15:47:58 2022 -0700 Loosen filtering parameters so we don't lop off XAW. diff --git src/hg/utils/otto/sarscov2phylo/updateCombinedTree.sh src/hg/utils/otto/sarscov2phylo/updateCombinedTree.sh index aba7f70..2a25eb1 100755 --- src/hg/utils/otto/sarscov2phylo/updateCombinedTree.sh +++ src/hg/utils/otto/sarscov2phylo/updateCombinedTree.sh @@ -45,45 +45,45 @@ if [ ! -s gisaidAndPublic.$today.masked.pb ]; then # $scriptDir/usherClusterRun.sh $today # Instead of the cluster, use Cheng's blazingly fast new usher-sampled: time $usher \ -T 64 -A -e 5 \ -i prevRenamed.pb \ -v new.masked.vcf.gz \ -o merged.pb \ --optimization_radius 0 --batch_size_per_process 10 \ > usher.addNew.log 2>usher-sampled.stderr # Branch-specific masking time $scriptDir/maskDelta.sh merged.pb merged.deltaMasked.pb # Prune samples with too many private mutations and internal branches that are too long. $matUtils extract -i merged.deltaMasked.pb \ --max-parsimony 20 \ - --max-branch-length 50 \ - --max-path-length 150 \ + --max-branch-length 60 \ + --max-path-length 175 \ -O -o merged.deltaMasked.filtered.pb # Improved matOptimize from branch time $matOptimize \ -T 80 -r 8 -M 2 -S move_log.filtered \ -i merged.deltaMasked.filtered.pb \ -o gisaidAndPublic.$today.masked.preTrim.pb \ >& matOptimize.filtered.log # Again prune samples with too many private mutations and internal branches that are too long. $matUtils extract -i gisaidAndPublic.$today.masked.preTrim.pb \ --max-parsimony 20 \ - --max-branch-length 50 \ - --max-path-length 150 \ + --max-branch-length 60 \ + --max-path-length 175 \ -O -o gisaidAndPublic.$today.masked.pb fi # Exclude sequences with a very high number of EPPs from future runs grep ^Current usher.addNew.log \ | awk '$16 >= 10 {print $8;}' \ | awk -F\| '{ if ($3 == "") { print $1; } else { print $2; } }' \ > tooManyEpps.ids cat tooManyEpps.ids >> ../tooManyEpps.ids $matUtils extract -i gisaidAndPublic.$today.masked.pb -u samples.$today $scriptDir/combineMetadata.sh $prevDate $today # version/description files