7f46acdf8f879de28c3ba40534e58a881941e168 angie Sun Feb 4 09:24:53 2024 -0800 CNCB is updated daily so make version.txt more concise. diff --git src/hg/utils/otto/sarscov2phylo/extractPublicTree.sh src/hg/utils/otto/sarscov2phylo/extractPublicTree.sh index 6ebf17f..29b1d0c 100755 --- src/hg/utils/otto/sarscov2phylo/extractPublicTree.sh +++ src/hg/utils/otto/sarscov2phylo/extractPublicTree.sh @@ -74,32 +74,31 @@ > public-$today.metadata.tsv.gz rm public-$today.all.masked.pb ln -f public-$today.all.masked.nextclade.pangolin.pb public-$today.all.masked.pb # Save paths for use tomorrow. $matUtils extract -i public-$today.all.masked.pb -C clade-paths.public tail -n+2 clade-paths.public \ | grep -E '^[12]' \ | cut -f 1,3 > cladeToPath.public tail -n+2 clade-paths.public \ | grep -E '^[A-Za-z]' \ | cut -f 1,3 > lineageToPath.public -cncbDate=$(ls -l $cncbDir | sed -re 's/.*cncb\.([0-9]{4}-[0-9][0-9]-[0-9][0-9]).*/\1/') -echo "sarscov2phylo release 13-11-20; NCBI and COG-UK sequences downloaded $today; CNCB sequences downloaded $cncbDate" \ +echo "sarscov2phylo release 13-11-20; NCBI, COG-UK and CNCB sequences downloaded $today" \ > version.txt $matUtils extract -i public-$today.all.masked.pb -u samples.public.$today sampleCountComma=$(echo $(wc -l < samples.public.$today) \ | sed -re 's/([0-9]+)([0-9]{3})$/\1,\2/; s/([0-9]+)([0-9]{3},[0-9]{3})$/\1,\2/;') echo "$sampleCountComma genomes from GenBank, COG-UK and CNCB ($today); sarscov2phylo 13-11-20 tree with newer sequences added by UShER" \ > hgPhyloPlace.description.txt # Make Taxonium V2 .jsonl.gz protobuf for display usher_to_taxonium --input public-$today.all.masked.pb \ --metadata public-$today.metadata.tsv.gz \ --genbank ~angie/github/taxonium/taxoniumtools/test_data/hu1.gb \ --columns genbank_accession,country,date,pangolin_lineage,pango_lineage_usher \ --clade_types=nextstrain,pango \ --name_internal_nodes \