6b7b5c359f41944103a1209a09b076cdade0044b
angie
  Fri Dec 2 11:25:11 2022 -0800
Add check for unannotated lineages.

diff --git src/hg/utils/otto/sarscov2phylo/updatePublic.sh src/hg/utils/otto/sarscov2phylo/updatePublic.sh
index 7a3e9df..e74c4e7 100755
--- src/hg/utils/otto/sarscov2phylo/updatePublic.sh
+++ src/hg/utils/otto/sarscov2phylo/updatePublic.sh
@@ -40,19 +40,39 @@
 buildDir=$ottoDir/$today
 mkdir -p $buildDir
 cd $buildDir
 
 prevDate=$(date -d yesterday +%F)
 time $scriptDir/updateCombinedTree.sh $prevDate $today $problematicSitesVcf \
     >& updateCombinedTree.log
 
 echo ""
 cat hgPhyloPlace.description.txt
 cat hgPhyloPlace.plusGisaid.description.txt
 
 set +o pipefail
 grep skip annotate.pango annotate.nextclade | cat
 grep 'Could not' annotate.pango annotate.nextclade | cat
+
+# Check for newly lineages that are missing from pango.clade-mutations.tsv
+set +x
+lineages=~angie/github/pango-designation/lineages.csv
+tail -n+2 $lineages | cut -d, -f 2 | uniq | grep -E '^(AY|[B-Z][A-Z])' | sort -u \
+    > $TMPDIR/designatedDoubleLetters
+cut -f 1 $scriptDir/pango.clade-mutations.tsv  \
+| grep -E '^(AY|[B-Z][A-Z])' | grep -v _ | sort -u \
+    > $TMPDIR/cladeMutDoubleLetters
+missingLineages=$(comm -23 $TMPDIR/designatedDoubleLetters $TMPDIR/cladeMutDoubleLetters)
+if [[ "$missingLineages" != "" ]]; then
+    echo "LINEAGES MISSING FROM lineages.csv:"
+    echo $missingLineages
+fi
+extraLineages=$(comm -13 $TMPDIR/designatedDoubleLetters $TMPDIR/cladeMutDoubleLetters)
+if [[ "$extraLineages" != "" ]]; then
+    echo "EXTRA LINEAGES (withdrawn?) in pango.clade-mutations.tsv:"
+    echo $extraLineages
+fi
 set -o pipefail
+set -x
 
 # Clean up
 nice xz -f new*fa &