75d8863314800ba0199291f101308dc2e52435be angie Tue Nov 2 12:53:58 2021 -0700 Add the new -M (--clade-mutations) option to matUtils annotate for precise (yet hopefully more stable than --clade-paths) identification of lineage root nodes. diff --git src/hg/utils/otto/sarscov2phylo/extractPublicTree.sh src/hg/utils/otto/sarscov2phylo/extractPublicTree.sh index 055389f..2d8d85b 100755 --- src/hg/utils/otto/sarscov2phylo/extractPublicTree.sh +++ src/hg/utils/otto/sarscov2phylo/extractPublicTree.sh @@ -33,48 +33,50 @@ $matUtils extract -i gisaidAndPublic.$today.masked.pb \ -s newPublicNames \ -o public-$today.all.masked.preTrim.pb $matUtils extract -i public-$today.all.masked.preTrim.pb \ --max-parsimony 20 \ --max-branch-length 30 \ -O -o public-$today.all.masked.pb # Add nextclade annotations to public protobuf if [ -s cladeToName ]; then # Use combined tree's clade assignments to annotate clades on public tree grep -v EPI_ISL cladeToName > cladeToPublicName time $matUtils annotate -T 50 \ -l \ -i public-$today.all.masked.pb \ + -M $scriptDir/nextstrain.clade-mutations.tsv \ -c cladeToPublicName \ -f 0.95 \ -D details.nextclade.public \ -o public-$today.all.masked.nextclade.pb \ >& annotate.nextclade.public else time $matUtils annotate -T 50 \ -l \ -i public-$today.all.masked.pb \ -P ../nextstrain.clade-paths.public.tsv \ -o public-$today.all.masked.nextclade.pb fi # Add pangolin lineage annotations to public protobuf if [ -s lineageToName ]; then grep -v EPI_ISL lineageToName > lineageToPublicName time $matUtils annotate -T 50 \ -i public-$today.all.masked.nextclade.pb \ + -M $scriptDir/pango.clade-mutations.tsv \ -c lineageToPublicName \ -f 0.95 \ -D details.pango.public \ -o public-$today.all.masked.nextclade.pangolin.pb \ >& annotate.pango.public else time $matUtils annotate -T 50 \ -i public-$today.all.masked.nextclade.pb \ -P ../pango.clade-paths.public.tsv \ -o public-$today.all.masked.nextclade.pangolin.pb fi # Extract Newick and VCF from public-only tree time $matUtils extract -i public-$today.all.masked.pb \ -t public-$today.all.nwk \