ecbbcd78daf00901ca5eac7cde487effd342a376 angie Thu Jan 13 11:33:31 2022 -0800 Increase max-path-length so we don't lop off all of BA.2 and BA.3 which due to Delta contam & back-muts are on super-long paths. diff --git src/hg/utils/otto/sarscov2phylo/extractPublicTree.sh src/hg/utils/otto/sarscov2phylo/extractPublicTree.sh index 73717c8..1f22096 100755 --- src/hg/utils/otto/sarscov2phylo/extractPublicTree.sh +++ src/hg/utils/otto/sarscov2phylo/extractPublicTree.sh @@ -24,31 +24,31 @@ usherDir=~angie/github/usher matUtils=$usherDir/build/matUtils cd $ottoDir/$today # Extract public samples from tree grep -v EPI_ISL_ samples.$today > newPublicNames # Dunno why, but when I tried using -s together with the filtering params, it ran for 3 hours # and I killed it -- stuck in a loop? Run two commands: $matUtils extract -i gisaidAndPublic.$today.masked.pb \ -s newPublicNames \ -o public-$today.all.masked.preTrim.pb $matUtils extract -i public-$today.all.masked.preTrim.pb \ --max-parsimony 20 \ --max-branch-length 45 \ - --max-path-length 100 \ + --max-path-length 150 \ -O -o public-$today.all.masked.pb # Add nextclade annotations to protobuf (completely specified by nextstrain.clade-mutations.tsv) grep -v EPI_ISL cladeToName > cladeToPublicName time $matUtils annotate -T 50 \ -l \ -i public-$today.all.masked.pb \ -M $scriptDir/nextstrain.clade-mutations.tsv \ -D details.nextclade.public \ -o public-$today.all.masked.nextclade.pb \ >& annotate.nextclade.public # Add pangolin lineage annotations to public protobuf grep -v EPI_ISL lineageToName > lineageToPublicName time $matUtils annotate -T 50 \