23036e8b97ae73f56eeee54d68a7668d4881e77b angie Mon Jan 9 18:00:29 2023 -0800 Don't make old taxodium/v1 protobuf anymore, everyone has moved on to taxonium/v2. Make names file for big tree so hgPhyloPlace doesn't have to read in protobuf to get names for matching uploaded names/IDs. diff --git src/hg/utils/otto/sarscov2phylo/updateCombinedTree.sh src/hg/utils/otto/sarscov2phylo/updateCombinedTree.sh index 2a25eb1..ccec350 100755 --- src/hg/utils/otto/sarscov2phylo/updateCombinedTree.sh +++ src/hg/utils/otto/sarscov2phylo/updateCombinedTree.sh @@ -144,50 +144,39 @@ tail -n+2 sample-clades \ | sort > tmp2 paste <(zcat gisaidAndPublic.$today.metadata.tsv.gz | cut -f 1-9 | head -1) \ <(echo -e "Nextstrain_clade_usher\tpango_lineage_usher") \ > gisaidAndPublic.$today.metadata.tsv join -t$'\t' tmp1 tmp2 \ >> gisaidAndPublic.$today.metadata.tsv pigz -p 8 -f gisaidAndPublic.$today.metadata.tsv rm tmp1 tmp2 # EPI_ISL_ ID to public sequence name mapping, so if users upload EPI_ISL IDs for which we have # public names & IDs, we can match them. cut -f 1,3 $epiToPublic > epiToPublic.latest # Update links to latest public+GISAID protobuf and metadata in hgwdev cgi-bin directories +pigz -p 8 -c samples.$today > samples.$today.gz for dir in /usr/local/apache/cgi-bin{-angie,-beta,}/hgPhyloPlaceData/wuhCor1; do ln -sf `pwd`/gisaidAndPublic.$today.masked.pb $dir/public.plusGisaid.latest.masked.pb ln -sf `pwd`/gisaidAndPublic.$today.metadata.tsv.gz \ $dir/public.plusGisaid.latest.metadata.tsv.gz ln -sf `pwd`/hgPhyloPlace.plusGisaid.description.txt $dir/public.plusGisaid.latest.version.txt ln -sf `pwd`/epiToPublic.latest $dir/ + ln -sf `pwd`/samples.$today.gz $dir/public.plusGisaid.names.gz done -# Make Taxonium v1 protobuf for display -zcat /hive/data/genomes/wuhCor1/goldenPath/bigZips/genes/ncbiGenes.gtf.gz \ -| grep -v '"ORF1a"' > ncbiGenes.gtf -zcat /hive/data/genomes/wuhCor1/wuhCor1.fa.gz > wuhCor1.fa -zcat gisaidAndPublic.$today.metadata.tsv.gz > metadata.tmp.tsv -time $matUtils extract -i gisaidAndPublic.$today.masked.pb \ - -f wuhCor1.fa \ - -g ncbiGenes.gtf \ - -M metadata.tmp.tsv \ - --extra-fields pango_lineage_usher \ - --include-nt \ - --write-taxodium gisaidAndPublic.$today.masked.taxodium.pb -rm metadata.tmp.tsv wuhCor1.fa -pigz -p 8 -f gisaidAndPublic.$today.masked.taxodium.pb - # Make Taxonium v2 protobuf for display usher_to_taxonium --input gisaidAndPublic.$today.masked.pb \ --metadata gisaidAndPublic.$today.metadata.tsv.gz \ --genbank ~angie/github/taxonium/taxoniumtools/test_data/hu1.gb \ --columns genbank_accession,country,date,pangolin_lineage,pango_lineage_usher \ --clade_types=nextstrain,pango \ + --name_internal_nodes \ + --title "$today tree with sequences from GISAID, INSDC, COG-UK and CNCB" \ --output gisaidAndPublic.$today.masked.taxonium.jsonl.gz $scriptDir/extractPublicTree.sh $today $prevDate -grep skipping annotate* - +set +o pipefail +grep skipping annotate* | cat