655174351c1c50009ea8e8af21ad0b15d8231e55
angie
  Tue Apr 13 23:15:03 2021 -0700
gzip-compress the public tree download files.  Leave around an uncompressed copy of public-*.masked.pb in case somebody's script depends on it.

diff --git src/hg/utils/otto/sarscov2phylo/updatePublicTree.sh src/hg/utils/otto/sarscov2phylo/updatePublicTree.sh
index 1a5b826..585eb51 100755
--- src/hg/utils/otto/sarscov2phylo/updatePublicTree.sh
+++ src/hg/utils/otto/sarscov2phylo/updatePublicTree.sh
@@ -20,30 +20,33 @@
 prevVcf=$ottoDir/$prevDate/public-$prevDate.all.vcf.gz
 prevProtobufMasked=$ottoDir/$prevDate/public-$prevDate.all.masked.pb
 prevProtobufUnmasked=$ottoDir/$prevDate/public-$prevDate.all.notMasked.pb
 prevMeta=$ottoDir/$prevDate/public-$prevDate.metadata.tsv.gz
 
 echo "prevVcf=$prevVcf"
 echo "prevProtobufMasked=$prevProtobufMasked"
 echo "prevProtobufUnmasked=$prevProtobufUnmasked"
 echo "prevMeta=$prevMeta"
 echo "problematicSitesVcf=$problematicSitesVcf"
 
 ncbiDir=$ottoDir/ncbi.latest
 cogUkDir=$ottoDir/cogUk.latest
 cncbDir=$ottoDir/cncb.latest
 today=$(date +%F)
+y=$(date +%Y)
+m=$(date +%m)
+d=$(date +%d)
 
 minReal=20000
 ref2bit=/hive/data/genomes/wuhCor1/wuhCor1.2bit
 
 usherDir=~angie/github/usher
 usher=$usherDir/build/usher
 matUtils=$usherDir/build/matUtils
 find_parsimonious_assignments=~angie/github/strain_phylogenetics/build/find_parsimonious_assignments
 
 scriptDir=$(dirname "${BASH_SOURCE[0]}")
 
 source $scriptDir/util.sh
 
 # Before we get started, make sure cog_metadata has the columns we're expecting:
 expectedHeaderStart='sequence_name,country,adm1,pillar_2,sample_date,epi_week,lineage,'
@@ -378,42 +381,43 @@
     /gbdb/wuhCor1/sarsCov2PhyloPub/public.all.parsimony.bw
 ln -sf `pwd`/public-$today.lineageColors.gz \
     /gbdb/wuhCor1/sarsCov2PhyloPub/public.all.lineageColors.gz
 ln -sf `pwd`/public-$today.nextstrainColors.gz \
     /gbdb/wuhCor1/sarsCov2PhyloPub/public.all.nextstrainColors.gz
 ln -sf `pwd`/version.txt /gbdb/wuhCor1/sarsCov2PhyloPub/public.all.version.txt
 for q in 20Q1 20Q2 20Q3 20Q4 21Q1; do
     for af in 01 001; do
         ln -sf `pwd`/public-$today.$q.minAf.$af.vcf.gz \
             /gbdb/wuhCor1/sarsCov2PhyloPub/public.$q.minAf.$af.vcf.gz
     done
 done
 fi
 
 # Link to public trees download directory hierarchy
-y=$(date +%Y)
-m=$(date +%m)
-d=$(date +%d)
 archiveRoot=/hive/users/angie/publicTrees
 archive=$archiveRoot/$y/$m/$d
 mkdir -p $archive
-ln `pwd`/public-$today.all.nwk $archive/
+gzip -c public-$today.all.nwk > $archive/public-$today.all.nwk.gz
 ln `pwd`/public-$today.all.masked.{pb,vcf.gz} $archive/
+gzip -c public-$today.all.masked.pb > $archive/public-$today.all.masked.pb.gz
 ln `pwd`/public-$today.metadata.tsv.gz $archive/
-ln `pwd`/public-$today.all.masked.nextclade.pangolin.pb $archive/
-ln `pwd`/cladeToPublicName $archive/
-ln `pwd`/lineageToPublicName $archive/
+gzip -c public-$today.all.masked.nextclade.pangolin.pb \
+    > $archive/public-$today.all.masked.nextclade.pangolin.pb.gz
+gzip -c cladeToPublicName $archive/cladeToPublicName.gz
+gzip -c lineageToPublicName $archive/lineageToPublicName.gz
+ln `pwd`/hgPhyloPlace.description.txt $archive/public-$today.version.txt
 
 # Update 'latest' in $archiveRoot
-ln -f `pwd`/public-$today.all.nwk $archiveRoot/public-latest.all.nwk
-ln -f `pwd`/public-$today.all.masked.pb $archiveRoot/public-latest.all.masked.pb
-ln -f `pwd`/public-$today.all.masked.vcf.gz $archiveRoot/public-latest.all.masked.vcf.gz
-ln -f `pwd`/public-$today.metadata.tsv.gz $archiveRoot/public-latest.metadata.tsv.gz
-ln -f `pwd`/hgPhyloPlace.description.txt $archiveRoot/public-latest.version.txt
+ln -f $y/$m/$d/public-$today.all.nwk.gz $archiveRoot/public-latest.all.nwk.gz
+ln -f $y/$m/$d/public-$today.all.masked.pb $archiveRoot/public-latest.all.masked.pb
+ln -f $y/$m/$d/public-$today.all.masked.pb.gz $archiveRoot/public-latest.all.masked.pb.gz
+ln -f $y/$m/$d/public-$today.all.masked.vcf.gz $archiveRoot/public-latest.all.masked.vcf.gz
+ln -f $y/$m/$d/public-$today.metadata.tsv.gz $archiveRoot/public-latest.metadata.tsv.gz
+ln -f $y/$m/$d/hgPhyloPlace.description.txt $archiveRoot/public-latest.version.txt
 
 # Update 'latest' protobuf, metadata and desc in and cgi-bin{,-angie}/hgPhyloPlaceData/wuhCor1/
 for dir in /usr/local/apache/cgi-bin{-angie,-beta,}/hgPhyloPlaceData/wuhCor1; do
     ln -sf `pwd`/public-$today.all.masked.pb $dir/public-latest.all.masked.pb
     ln -sf `pwd`/public-$today.metadata.tsv.gz $dir/public-latest.metadata.tsv.gz
     ln -sf `pwd`/hgPhyloPlace.description.txt $dir/public-latest.version.txt
 done