75d8863314800ba0199291f101308dc2e52435be
angie
  Tue Nov 2 12:53:58 2021 -0700
Add the new -M (--clade-mutations) option to matUtils annotate for precise (yet hopefully more stable than --clade-paths) identification of lineage root nodes.

diff --git src/hg/utils/otto/sarscov2phylo/extractPublicTree.sh src/hg/utils/otto/sarscov2phylo/extractPublicTree.sh
index 055389f..2d8d85b 100755
--- src/hg/utils/otto/sarscov2phylo/extractPublicTree.sh
+++ src/hg/utils/otto/sarscov2phylo/extractPublicTree.sh
@@ -33,48 +33,50 @@
 $matUtils extract -i gisaidAndPublic.$today.masked.pb \
     -s newPublicNames \
     -o public-$today.all.masked.preTrim.pb
 $matUtils extract -i public-$today.all.masked.preTrim.pb \
     --max-parsimony 20 \
     --max-branch-length 30 \
     -O -o public-$today.all.masked.pb
 
 # Add nextclade annotations to public protobuf
 if [ -s cladeToName ]; then
     # Use combined tree's clade assignments to annotate clades on public tree
     grep -v EPI_ISL cladeToName > cladeToPublicName
     time $matUtils annotate -T 50 \
         -l \
         -i public-$today.all.masked.pb \
+        -M $scriptDir/nextstrain.clade-mutations.tsv \
         -c cladeToPublicName \
         -f 0.95 \
         -D details.nextclade.public \
         -o public-$today.all.masked.nextclade.pb \
         >& annotate.nextclade.public
 else
     time $matUtils annotate -T 50 \
         -l \
         -i public-$today.all.masked.pb \
         -P ../nextstrain.clade-paths.public.tsv \
         -o public-$today.all.masked.nextclade.pb
 fi
 
 # Add pangolin lineage annotations to public protobuf
 if [ -s lineageToName ]; then
     grep -v EPI_ISL lineageToName > lineageToPublicName
     time $matUtils annotate -T 50 \
         -i public-$today.all.masked.nextclade.pb \
+        -M $scriptDir/pango.clade-mutations.tsv \
         -c lineageToPublicName \
         -f 0.95 \
         -D details.pango.public \
         -o public-$today.all.masked.nextclade.pangolin.pb \
         >& annotate.pango.public
 else
     time $matUtils annotate -T 50 \
         -i public-$today.all.masked.nextclade.pb \
         -P ../pango.clade-paths.public.tsv \
         -o public-$today.all.masked.nextclade.pangolin.pb
 fi
 
 # Extract Newick and VCF from public-only tree
 time $matUtils extract -i public-$today.all.masked.pb \
     -t public-$today.all.nwk \