18aba4871dfccf70dabe6fc328338c1442acb2bc
angie
  Mon Jul 10 11:59:23 2023 -0700
Have been using different matOptimize args since mid-May, see comment.

diff --git src/hg/utils/otto/sarscov2phylo/updateCombinedTree.sh src/hg/utils/otto/sarscov2phylo/updateCombinedTree.sh
index c7d3410..b2045ef 100755
--- src/hg/utils/otto/sarscov2phylo/updateCombinedTree.sh
+++ src/hg/utils/otto/sarscov2phylo/updateCombinedTree.sh
@@ -48,33 +48,41 @@
     time $usher \
         -T 64 -A -e 5 \
         -i prevRenamed.pb \
         -v new.masked.vcf.gz \
         -o merged.pb \
         --optimization_radius 0 --batch_size_per_process 10 \
         > usher.addNew.log 2>usher-sampled.stderr
     # Branch-specific masking
     time $scriptDir/maskDelta.sh merged.pb merged.deltaMasked.pb
     # Prune samples with too many private mutations and internal branches that are too long.
     $matUtils extract -i merged.deltaMasked.pb \
         --max-parsimony 20 \
         --max-branch-length 60 \
         --max-path-length 175 \
         -O -o merged.deltaMasked.filtered.pb
-    # Improved matOptimize from branch
+    # matOptimize: used -r 8 -M2 until 2023-05-12, then switched to Cheng's recommended
+    # -m 0.00000001 -M 4 (avoid identical-child-node problem in
+    #  https://github.com/sars-cov-2-variants/lineage-proposals/issues/40)
+    # The -M 4 allowed up to radius 32, and crazy things started happening all while I was
+    # trying to get the tree cleaned up for pango-designation release 1.20 --> lineageTree.
+    # After 2023-05-20, when I found that matOptimize had moved a big chunk of B.1 onto a
+    # B.1.1.7 garbage branch, causing big trouble for lineageTree (23_05_18_updateLineageTreePb.txt).
+    # After that I changed it back to -M 2 for my sanity. If the identical-child thing happens again,
+    # then I'll probably just run matOptimize twice, with a small radius the second time.
     time $matOptimize \
-        -T 80 -r 8 -M 2 -S move_log.filtered \
+        -T 80 -m 0.00000001 -M 2 -S move_log.filtered \
         -i merged.deltaMasked.filtered.pb \
         -o gisaidAndPublic.$today.masked.preTrim.pb \
         >& matOptimize.filtered.log
 
     # Again prune samples with too many private mutations and internal branches that are too long.
     $matUtils extract -i gisaidAndPublic.$today.masked.preTrim.pb \
         --max-parsimony 20 \
         --max-branch-length 60 \
         --max-path-length 175 \
         -O -o gisaidAndPublic.$today.masked.pb
 fi
 
 # Exclude sequences with a very high number of EPPs from future runs
 grep ^Current usher.addNew.log \
 | awk '$16 >= 10 {print $8;}' \