d091ea9bacb0841f1aa4b17b5f1e8a5219b43929 angie Sat Apr 22 18:25:31 2023 -0700 Get new sequences from CNCB. diff --git src/hg/utils/otto/sarscov2phylo/updatePublic.sh src/hg/utils/otto/sarscov2phylo/updatePublic.sh index e74c4e7..bd3c79c 100755 --- src/hg/utils/otto/sarscov2phylo/updatePublic.sh +++ src/hg/utils/otto/sarscov2phylo/updatePublic.sh @@ -20,52 +20,56 @@ gisaidDir=/hive/users/angie/gisaid today=$(date +%F) scriptDir=$(dirname "${BASH_SOURCE[0]}") $scriptDir/gisaidFromChunks.sh & cogUkDir=$ottoDir/cogUk.$today mkdir -p $cogUkDir cd $cogUkDir && time $scriptDir/getCogUk.sh >& getCogUk.log & ncbiDir=$ottoDir/ncbi.$today mkdir -p $ncbiDir cd $ncbiDir && time $scriptDir/getNcbi.sh >& getNcbi.log & +cncbDir=$ottoDir/cncb.$today +mkdir -p $cncbDir +cd $cncbDir && time $scriptDir/getCncb.sh >& getCncb.log & + wait time $scriptDir/updateIdMapping.sh \ $gisaidDir/{metadata_batch_$today.tsv.gz,sequences_batch_$today.fa.xz} buildDir=$ottoDir/$today mkdir -p $buildDir cd $buildDir prevDate=$(date -d yesterday +%F) time $scriptDir/updateCombinedTree.sh $prevDate $today $problematicSitesVcf \ >& updateCombinedTree.log echo "" cat hgPhyloPlace.description.txt cat hgPhyloPlace.plusGisaid.description.txt set +o pipefail grep skip annotate.pango annotate.nextclade | cat grep 'Could not' annotate.pango annotate.nextclade | cat -# Check for newly lineages that are missing from pango.clade-mutations.tsv +# Check for newly added lineages that are missing from pango.clade-mutations.tsv set +x lineages=~angie/github/pango-designation/lineages.csv tail -n+2 $lineages | cut -d, -f 2 | uniq | grep -E '^(AY|[B-Z][A-Z])' | sort -u \ > $TMPDIR/designatedDoubleLetters cut -f 1 $scriptDir/pango.clade-mutations.tsv \ | grep -E '^(AY|[B-Z][A-Z])' | grep -v _ | sort -u \ > $TMPDIR/cladeMutDoubleLetters missingLineages=$(comm -23 $TMPDIR/designatedDoubleLetters $TMPDIR/cladeMutDoubleLetters) if [[ "$missingLineages" != "" ]]; then echo "LINEAGES MISSING FROM lineages.csv:" echo $missingLineages fi extraLineages=$(comm -13 $TMPDIR/designatedDoubleLetters $TMPDIR/cladeMutDoubleLetters) if [[ "$extraLineages" != "" ]]; then echo "EXTRA LINEAGES (withdrawn?) in pango.clade-mutations.tsv:"