4e46cbfb5a3e3c506b82234e1dbb774b99bb9168
angie
  Wed Mar 3 18:42:02 2021 -0800
Add protobufs with nextclade and pangolin annotations to public tree archive dir.  Update public-latest.* links in more cgi-bin* locations.  Clean up a bit.

diff --git src/hg/utils/otto/sarscov2phylo/updatePublicTree.sh src/hg/utils/otto/sarscov2phylo/updatePublicTree.sh
index b108dfe..32c13ae 100755
--- src/hg/utils/otto/sarscov2phylo/updatePublicTree.sh
+++ src/hg/utils/otto/sarscov2phylo/updatePublicTree.sh
@@ -182,32 +182,31 @@
     > public-$today.all.minAf.001.vcf
 wc -l public-$today.all.minAf.001.vcf
 bgzip -f public-$today.all.minAf.001.vcf
 tabix -p vcf public-$today.all.minAf.001.vcf.gz
 
 minAc01=$(( (($sampleCount + 99) / 100) ))
 vcfFilter -minAc=$minAc01 -rename public-$today.all.minAf.001.vcf.gz \
     > public-$today.all.minAf.01.vcf
 wc -l public-$today.all.minAf.01.vcf
 bgzip -f public-$today.all.minAf.01.vcf
 tabix -p vcf public-$today.all.minAf.01.vcf.gz
 
 # Parsimony scores on collapsed tree
 time $find_parsimonious_assignments --tree public-$today.all.nwk \
     --vcf <(gunzip -c public-$today.all.vcf.gz) \
-> fpa.out
-tail -n+2 fpa.out \
+| tail -n+2 \
 | sed -re 's/^[A-Z]([0-9]+)[A-Z,]+.*parsimony_score=([0-9]+).*/\1\t\2/;' \
 | tawk '{print "NC_045512v2", $1-1, $1, $2;}' \
 | sort -k2n,2n \
     > public-$today.all.parsimony.bg
 bedGraphToBigWig public-$today.all.parsimony.bg /hive/data/genomes/wuhCor1/chrom.sizes \
     public-$today.all.parsimony.bw
 
 # Metadata for hgPhyloPlace:
 # Header names same as nextmeta (with strain first) so hgPhyloPlace recognizes them:
 echo -e "strain\tgenbank_accession\tdate\tcountry\thost\tcompleteness\tlength\tNextstrain_clade\tpangolin_lineage" \
     > public-$today.metadata.tsv
 # It's not always possible to recreate both old and new names correctly from metadata,
 # so make a file to translate accession or COG-UK to the name used in VCF, tree and protobufs.
 cut -f 2 $renaming \
 | awk -F\| '{ if ($3 == "") { print $1 "\t" $0; } else { print $2 "\t" $0; } }' \
@@ -347,28 +346,35 @@
 ln -sf `pwd`/public-$today.nextstrainColors.gz \
     /gbdb/wuhCor1/sarsCov2PhyloPub/public.all.nextstrainColors.gz
 ln -sf `pwd`/version.txt /gbdb/wuhCor1/sarsCov2PhyloPub/public.all.version.txt
 fi
 
 # Link to public trees download directory hierarchy
 y=$(date +%Y)
 m=$(date +%m)
 d=$(date +%d)
 archiveRoot=/hive/users/angie/publicTrees
 archive=$archiveRoot/$y/$m/$d
 mkdir -p $archive
 ln `pwd`/public-$today.all.nwk $archive/
 ln `pwd`/public-$today.all.masked.{pb,vcf.gz} $archive/
 ln `pwd`/public-$today.metadata.tsv.gz $archive/
+ln `pwd`/public-$today.all.masked.nextclade.pangolin.pb $archive/
+ln `pwd`/cladeToPublicName $archive/
+ln `pwd`/lineageToPublicName $archive/
+
 # Update 'latest' in $archiveRoot
 ln -f `pwd`/public-$today.all.nwk $archiveRoot/public-latest.all.nwk
 ln -f `pwd`/public-$today.all.masked.pb $archiveRoot/public-latest.all.masked.pb
 ln -f `pwd`/public-$today.all.masked.vcf.gz $archiveRoot/public-latest.all.masked.vcf.gz
 ln -f `pwd`/public-$today.metadata.tsv.gz $archiveRoot/public-latest.metadata.tsv.gz
 ln -f `pwd`/hgPhyloPlace.description.txt $archiveRoot/public-latest.version.txt
 
 # Update 'latest' protobuf, metadata and desc in and cgi-bin{,-angie}/hgPhyloPlaceData/wuhCor1/
-for dir in /usr/local/apache/cgi-bin{-angie,}/hgPhyloPlaceData/wuhCor1; do
+for dir in /usr/local/apache/cgi-bin{-angie,-demo-angie,-beta,}/hgPhyloPlaceData/wuhCor1; do
     ln -sf `pwd`/public-$today.all.masked.pb $dir/public-latest.all.masked.pb
     ln -sf `pwd`/public-$today.metadata.tsv.gz $dir/public-latest.metadata.tsv.gz
     ln -sf `pwd`/hgPhyloPlace.description.txt $dir/public-latest.version.txt
 done
+
+# Clean up
+nice xz new*fa &