655174351c1c50009ea8e8af21ad0b15d8231e55 angie Tue Apr 13 23:15:03 2021 -0700 gzip-compress the public tree download files. Leave around an uncompressed copy of public-*.masked.pb in case somebody's script depends on it. diff --git src/hg/utils/otto/sarscov2phylo/updatePublicTree.sh src/hg/utils/otto/sarscov2phylo/updatePublicTree.sh index 1a5b826..585eb51 100755 --- src/hg/utils/otto/sarscov2phylo/updatePublicTree.sh +++ src/hg/utils/otto/sarscov2phylo/updatePublicTree.sh @@ -20,30 +20,33 @@ prevVcf=$ottoDir/$prevDate/public-$prevDate.all.vcf.gz prevProtobufMasked=$ottoDir/$prevDate/public-$prevDate.all.masked.pb prevProtobufUnmasked=$ottoDir/$prevDate/public-$prevDate.all.notMasked.pb prevMeta=$ottoDir/$prevDate/public-$prevDate.metadata.tsv.gz echo "prevVcf=$prevVcf" echo "prevProtobufMasked=$prevProtobufMasked" echo "prevProtobufUnmasked=$prevProtobufUnmasked" echo "prevMeta=$prevMeta" echo "problematicSitesVcf=$problematicSitesVcf" ncbiDir=$ottoDir/ncbi.latest cogUkDir=$ottoDir/cogUk.latest cncbDir=$ottoDir/cncb.latest today=$(date +%F) +y=$(date +%Y) +m=$(date +%m) +d=$(date +%d) minReal=20000 ref2bit=/hive/data/genomes/wuhCor1/wuhCor1.2bit usherDir=~angie/github/usher usher=$usherDir/build/usher matUtils=$usherDir/build/matUtils find_parsimonious_assignments=~angie/github/strain_phylogenetics/build/find_parsimonious_assignments scriptDir=$(dirname "${BASH_SOURCE[0]}") source $scriptDir/util.sh # Before we get started, make sure cog_metadata has the columns we're expecting: expectedHeaderStart='sequence_name,country,adm1,pillar_2,sample_date,epi_week,lineage,' @@ -378,42 +381,43 @@ /gbdb/wuhCor1/sarsCov2PhyloPub/public.all.parsimony.bw ln -sf `pwd`/public-$today.lineageColors.gz \ /gbdb/wuhCor1/sarsCov2PhyloPub/public.all.lineageColors.gz ln -sf `pwd`/public-$today.nextstrainColors.gz \ /gbdb/wuhCor1/sarsCov2PhyloPub/public.all.nextstrainColors.gz ln -sf `pwd`/version.txt /gbdb/wuhCor1/sarsCov2PhyloPub/public.all.version.txt for q in 20Q1 20Q2 20Q3 20Q4 21Q1; do for af in 01 001; do ln -sf `pwd`/public-$today.$q.minAf.$af.vcf.gz \ /gbdb/wuhCor1/sarsCov2PhyloPub/public.$q.minAf.$af.vcf.gz done done fi # Link to public trees download directory hierarchy -y=$(date +%Y) -m=$(date +%m) -d=$(date +%d) archiveRoot=/hive/users/angie/publicTrees archive=$archiveRoot/$y/$m/$d mkdir -p $archive -ln `pwd`/public-$today.all.nwk $archive/ +gzip -c public-$today.all.nwk > $archive/public-$today.all.nwk.gz ln `pwd`/public-$today.all.masked.{pb,vcf.gz} $archive/ +gzip -c public-$today.all.masked.pb > $archive/public-$today.all.masked.pb.gz ln `pwd`/public-$today.metadata.tsv.gz $archive/ -ln `pwd`/public-$today.all.masked.nextclade.pangolin.pb $archive/ -ln `pwd`/cladeToPublicName $archive/ -ln `pwd`/lineageToPublicName $archive/ +gzip -c public-$today.all.masked.nextclade.pangolin.pb \ + > $archive/public-$today.all.masked.nextclade.pangolin.pb.gz +gzip -c cladeToPublicName $archive/cladeToPublicName.gz +gzip -c lineageToPublicName $archive/lineageToPublicName.gz +ln `pwd`/hgPhyloPlace.description.txt $archive/public-$today.version.txt # Update 'latest' in $archiveRoot -ln -f `pwd`/public-$today.all.nwk $archiveRoot/public-latest.all.nwk -ln -f `pwd`/public-$today.all.masked.pb $archiveRoot/public-latest.all.masked.pb -ln -f `pwd`/public-$today.all.masked.vcf.gz $archiveRoot/public-latest.all.masked.vcf.gz -ln -f `pwd`/public-$today.metadata.tsv.gz $archiveRoot/public-latest.metadata.tsv.gz -ln -f `pwd`/hgPhyloPlace.description.txt $archiveRoot/public-latest.version.txt +ln -f $y/$m/$d/public-$today.all.nwk.gz $archiveRoot/public-latest.all.nwk.gz +ln -f $y/$m/$d/public-$today.all.masked.pb $archiveRoot/public-latest.all.masked.pb +ln -f $y/$m/$d/public-$today.all.masked.pb.gz $archiveRoot/public-latest.all.masked.pb.gz +ln -f $y/$m/$d/public-$today.all.masked.vcf.gz $archiveRoot/public-latest.all.masked.vcf.gz +ln -f $y/$m/$d/public-$today.metadata.tsv.gz $archiveRoot/public-latest.metadata.tsv.gz +ln -f $y/$m/$d/hgPhyloPlace.description.txt $archiveRoot/public-latest.version.txt # Update 'latest' protobuf, metadata and desc in and cgi-bin{,-angie}/hgPhyloPlaceData/wuhCor1/ for dir in /usr/local/apache/cgi-bin{-angie,-beta,}/hgPhyloPlaceData/wuhCor1; do ln -sf `pwd`/public-$today.all.masked.pb $dir/public-latest.all.masked.pb ln -sf `pwd`/public-$today.metadata.tsv.gz $dir/public-latest.metadata.tsv.gz ln -sf `pwd`/hgPhyloPlace.description.txt $dir/public-latest.version.txt done