src/hg/utils/otto/sarscov2phylo/updateCombinedTree.sh 23036e8b97ae73f56eeee54d68a7668d4881e77b

23036e8b97ae73f56eeee54d68a7668d4881e77b
angie
  Mon Jan 9 18:00:29 2023 -0800
Don't make old taxodium/v1 protobuf anymore, everyone has moved on to taxonium/v2.  Make names file for big tree so hgPhyloPlace doesn't have to read in protobuf to get names for matching uploaded names/IDs.

diff --git src/hg/utils/otto/sarscov2phylo/updateCombinedTree.sh src/hg/utils/otto/sarscov2phylo/updateCombinedTree.sh
index 2a25eb1..ccec350 100755
--- src/hg/utils/otto/sarscov2phylo/updateCombinedTree.sh
+++ src/hg/utils/otto/sarscov2phylo/updateCombinedTree.sh
@@ -144,50 +144,39 @@
 tail -n+2 sample-clades \
 | sort > tmp2
 paste <(zcat gisaidAndPublic.$today.metadata.tsv.gz | cut -f 1-9 | head -1) \
       <(echo -e "Nextstrain_clade_usher\tpango_lineage_usher") \
     > gisaidAndPublic.$today.metadata.tsv
 join -t$'\t' tmp1 tmp2 \
     >> gisaidAndPublic.$today.metadata.tsv
 pigz -p 8 -f gisaidAndPublic.$today.metadata.tsv
 rm tmp1 tmp2
 
 # EPI_ISL_ ID to public sequence name mapping, so if users upload EPI_ISL IDs for which we have
 # public names & IDs, we can match them.
 cut -f 1,3 $epiToPublic > epiToPublic.latest
 
 # Update links to latest public+GISAID protobuf and metadata in hgwdev cgi-bin directories
+pigz -p 8 -c samples.$today > samples.$today.gz
 for dir in /usr/local/apache/cgi-bin{-angie,-beta,}/hgPhyloPlaceData/wuhCor1; do
     ln -sf `pwd`/gisaidAndPublic.$today.masked.pb $dir/public.plusGisaid.latest.masked.pb
     ln -sf `pwd`/gisaidAndPublic.$today.metadata.tsv.gz \
         $dir/public.plusGisaid.latest.metadata.tsv.gz
     ln -sf `pwd`/hgPhyloPlace.plusGisaid.description.txt $dir/public.plusGisaid.latest.version.txt
     ln -sf `pwd`/epiToPublic.latest $dir/
+    ln -sf `pwd`/samples.$today.gz $dir/public.plusGisaid.names.gz
 done
 
-# Make Taxonium v1 protobuf for display
-zcat /hive/data/genomes/wuhCor1/goldenPath/bigZips/genes/ncbiGenes.gtf.gz \
-| grep -v '"ORF1a"' > ncbiGenes.gtf
-zcat /hive/data/genomes/wuhCor1/wuhCor1.fa.gz > wuhCor1.fa
-zcat gisaidAndPublic.$today.metadata.tsv.gz > metadata.tmp.tsv
-time $matUtils extract -i gisaidAndPublic.$today.masked.pb \
-    -f wuhCor1.fa \
-    -g ncbiGenes.gtf \
-    -M metadata.tmp.tsv \
-    --extra-fields pango_lineage_usher \
-    --include-nt \
-    --write-taxodium gisaidAndPublic.$today.masked.taxodium.pb
-rm metadata.tmp.tsv wuhCor1.fa
-pigz -p 8 -f gisaidAndPublic.$today.masked.taxodium.pb
-
 # Make Taxonium v2 protobuf for display
 usher_to_taxonium --input gisaidAndPublic.$today.masked.pb \
     --metadata gisaidAndPublic.$today.metadata.tsv.gz \
     --genbank ~angie/github/taxonium/taxoniumtools/test_data/hu1.gb \
     --columns genbank_accession,country,date,pangolin_lineage,pango_lineage_usher \
     --clade_types=nextstrain,pango \
+    --name_internal_nodes \
+    --title "$today tree with sequences from GISAID, INSDC, COG-UK and CNCB" \
     --output gisaidAndPublic.$today.masked.taxonium.jsonl.gz
 
 $scriptDir/extractPublicTree.sh $today $prevDate
 
-grep skipping annotate*
-
+set +o pipefail
+grep skipping annotate* | cat