ecbbcd78daf00901ca5eac7cde487effd342a376 angie Thu Jan 13 11:33:31 2022 -0800 Increase max-path-length so we don't lop off all of BA.2 and BA.3 which due to Delta contam & back-muts are on super-long paths. diff --git src/hg/utils/otto/sarscov2phylo/updateCombinedTree.sh src/hg/utils/otto/sarscov2phylo/updateCombinedTree.sh index 958a9ff..659623d 100755 --- src/hg/utils/otto/sarscov2phylo/updateCombinedTree.sh +++ src/hg/utils/otto/sarscov2phylo/updateCombinedTree.sh @@ -35,31 +35,31 @@ usherDir=~angie/github/usher usher=$usherDir/build/usher matUtils=$usherDir/build/matUtils if [ ! -s new.masked.vcf.gz ]; then $scriptDir/makeNewMaskedVcf.sh $prevDate $today $problematicSitesVcf $baseProtobuf fi if [ ! -s gisaidAndPublic.$today.masked.pb ]; then $scriptDir/usherClusterRun.sh $today # Prune samples with too many private mutations and internal branches that are too long. $matUtils extract -i gisaidAndPublic.$today.masked.preTrim.pb \ --max-parsimony 20 \ --max-branch-length 45 \ - --max-path-length 100 \ + --max-path-length 150 \ -O -o gisaidAndPublic.$today.masked.pb fi # Exclude sequences with a very high number of EPPs from future runs grep ^Current usher.addNew.log \ | awk '$16 >= 10 {print $8;}' \ | awk -F\| '{ if ($3 == "") { print $1; } else { print $2; } }' \ > tooManyEpps.ids cat tooManyEpps.ids >> ../tooManyEpps.ids $matUtils extract -i gisaidAndPublic.$today.masked.pb -u samples.$today $scriptDir/combineMetadata.sh $prevDate $today # version/description files