8a1149c22482aaa4787bb02799a62f348c5dc2b2 angie Thu Jun 22 10:17:38 2023 -0700 Send verbose usher_to_taxonium output to log diff --git src/hg/utils/otto/sarscov2phylo/extractPublicTree.sh src/hg/utils/otto/sarscov2phylo/extractPublicTree.sh index 50a84d3..3e5f83a 100755 --- src/hg/utils/otto/sarscov2phylo/extractPublicTree.sh +++ src/hg/utils/otto/sarscov2phylo/extractPublicTree.sh @@ -92,31 +92,31 @@ $matUtils extract -i public-$today.all.masked.pb -u samples.public.$today sampleCountComma=$(echo $(wc -l < samples.public.$today) \ | sed -re 's/([0-9]+)([0-9]{3})$/\1,\2/; s/([0-9]+)([0-9]{3},[0-9]{3})$/\1,\2/;') echo "$sampleCountComma genomes from GenBank, COG-UK and CNCB ($today); sarscov2phylo 13-11-20 tree with newer sequences added by UShER" \ > hgPhyloPlace.description.txt # Make Taxonium V2 .jsonl.gz protobuf for display usher_to_taxonium --input public-$today.all.masked.pb \ --metadata public-$today.metadata.tsv.gz \ --genbank ~angie/github/taxonium/taxoniumtools/test_data/hu1.gb \ --columns genbank_accession,country,date,pangolin_lineage,pango_lineage_usher \ --clade_types=nextstrain,pango \ --name_internal_nodes \ --title "$today tree with sequences from GISAID, INSDC, COG-UK and CNCB" \ - --output public-$today.all.masked.taxonium.jsonl.gz + --output public-$today.all.masked.taxonium.jsonl.gz >& utt.log # Make a size-limited public tree for ShUShER so it doesn't exceed browser memory limits $matUtils extract -i public-$today.all.masked.pb --set-size 6000000 \ -o public-$today.all.masked.ShUShER.pb.gz # Link to public trees download directory hierarchy archiveRoot=/hive/users/angie/publicTrees read y m d < <(echo $today | sed -re 's/-/ /g') archive=$archiveRoot/$y/$m/$d mkdir -p $archive gzip -c public-$today.all.nwk > $archive/public-$today.all.nwk.gz ln -f `pwd`/public-$today.all.masked.{pb,vcf.gz} $archive/ gzip -c public-$today.all.masked.pb > $archive/public-$today.all.masked.pb.gz ln -f `pwd`/public-$today.metadata.tsv.gz $archive/ gzip -c public-$today.all.masked.nextclade.pangolin.pb \