6b7b5c359f41944103a1209a09b076cdade0044b angie Fri Dec 2 11:25:11 2022 -0800 Add check for unannotated lineages. diff --git src/hg/utils/otto/sarscov2phylo/updatePublic.sh src/hg/utils/otto/sarscov2phylo/updatePublic.sh index 7a3e9df..e74c4e7 100755 --- src/hg/utils/otto/sarscov2phylo/updatePublic.sh +++ src/hg/utils/otto/sarscov2phylo/updatePublic.sh @@ -40,19 +40,39 @@ buildDir=$ottoDir/$today mkdir -p $buildDir cd $buildDir prevDate=$(date -d yesterday +%F) time $scriptDir/updateCombinedTree.sh $prevDate $today $problematicSitesVcf \ >& updateCombinedTree.log echo "" cat hgPhyloPlace.description.txt cat hgPhyloPlace.plusGisaid.description.txt set +o pipefail grep skip annotate.pango annotate.nextclade | cat grep 'Could not' annotate.pango annotate.nextclade | cat + +# Check for newly lineages that are missing from pango.clade-mutations.tsv +set +x +lineages=~angie/github/pango-designation/lineages.csv +tail -n+2 $lineages | cut -d, -f 2 | uniq | grep -E '^(AY|[B-Z][A-Z])' | sort -u \ + > $TMPDIR/designatedDoubleLetters +cut -f 1 $scriptDir/pango.clade-mutations.tsv \ +| grep -E '^(AY|[B-Z][A-Z])' | grep -v _ | sort -u \ + > $TMPDIR/cladeMutDoubleLetters +missingLineages=$(comm -23 $TMPDIR/designatedDoubleLetters $TMPDIR/cladeMutDoubleLetters) +if [[ "$missingLineages" != "" ]]; then + echo "LINEAGES MISSING FROM lineages.csv:" + echo $missingLineages +fi +extraLineages=$(comm -13 $TMPDIR/designatedDoubleLetters $TMPDIR/cladeMutDoubleLetters) +if [[ "$extraLineages" != "" ]]; then + echo "EXTRA LINEAGES (withdrawn?) in pango.clade-mutations.tsv:" + echo $extraLineages +fi set -o pipefail +set -x # Clean up nice xz -f new*fa &