eeafd7b8b2ea5f34ee78bfe73b51ea7a1be5bce4 angie Thu Sep 2 17:51:46 2021 -0700 Add the same filters that we've been using on the combined tree because when GISAID samples are removed, some branches become very long. (Russ & Yatish request) diff --git src/hg/utils/otto/sarscov2phylo/extractPublicTree.sh src/hg/utils/otto/sarscov2phylo/extractPublicTree.sh index 3ee55c2..5a8a750 100755 --- src/hg/utils/otto/sarscov2phylo/extractPublicTree.sh +++ src/hg/utils/otto/sarscov2phylo/extractPublicTree.sh @@ -18,30 +18,32 @@ ottoDir=/hive/data/outside/otto/sarscov2phylo cncbDir=$ottoDir/cncb.latest scriptDir=$(dirname "${BASH_SOURCE[0]}") source $scriptDir/util.sh usherDir=~angie/github/usher matUtils=$usherDir/build/matUtils cd $ottoDir/$today # Extract public samples from tree grep -v EPI_ISL_ samples.$today > newPublicNames $matUtils extract -i gisaidAndPublic.$today.masked.pb \ -s newPublicNames \ + --max-parsimony 20 \ + --max-branch-length 30 \ -O -o public-$today.all.masked.pb # Add nextclade annotations to public protobuf if [ -s cladeToName ]; then # Use combined tree's clade assignments to annotate clades on public tree grep -v EPI_ISL cladeToName > cladeToPublicName time $matUtils annotate -T 50 \ -l \ -i public-$today.all.masked.pb \ -c cladeToPublicName \ -f 0.95 \ -D details.nextclade.public \ -o public-$today.all.masked.nextclade.pb \ >& annotate.nextclade.public else