d898714060e14c8a185e3fde3d84dafe32f93baa
angie
  Tue Feb 22 11:05:07 2022 -0800
Speed up compression by using more threads.

diff --git src/hg/utils/otto/sarscov2phylo/extractPublicTree.sh src/hg/utils/otto/sarscov2phylo/extractPublicTree.sh
index 1f22096..11bca43 100755
--- src/hg/utils/otto/sarscov2phylo/extractPublicTree.sh
+++ src/hg/utils/otto/sarscov2phylo/extractPublicTree.sh
@@ -52,34 +52,34 @@
 # Add pangolin lineage annotations to public protobuf
 grep -v EPI_ISL lineageToName > lineageToPublicName
 time $matUtils annotate -T 50 \
     -i public-$today.all.masked.nextclade.pb \
     -M $scriptDir/pango.clade-mutations.tsv \
     -c lineageToPublicName \
     -f 0.95 \
     -D details.pango.public \
     -o public-$today.all.masked.nextclade.pangolin.pb \
     >& annotate.pango.public
 
 # Extract Newick and VCF from public-only tree
 time $matUtils extract -i public-$today.all.masked.pb \
     -t public-$today.all.nwk \
     -v public-$today.all.masked.vcf
-time gzip -f public-$today.all.masked.vcf
+time pigz -p 8 -f public-$today.all.masked.vcf
 zcat gisaidAndPublic.$today.metadata.tsv.gz \
 | grep -v EPI_ISL_ \
-| gzip -c \
+| pigz -p 8 \
     > public-$today.metadata.tsv.gz
 
 rm public-$today.all.masked.pb
 ln -f public-$today.all.masked.nextclade.pangolin.pb public-$today.all.masked.pb
 
 cncbDate=$(ls -l $cncbDir | sed -re 's/.*cncb\.([0-9]{4}-[0-9][0-9]-[0-9][0-9]).*/\1/')
 echo "sarscov2phylo release 13-11-20; NCBI and COG-UK sequences downloaded $today; CNCB sequences downloaded $cncbDate" \
     > version.txt
 
 $matUtils extract -i public-$today.all.masked.pb -u samples.public.$today
 sampleCountComma=$(echo $(wc -l < samples.public.$today) \
                    | sed -re 's/([0-9]+)([0-9]{3})$/\1,\2/; s/([0-9]+)([0-9]{3},[0-9]{3})$/\1,\2/;')
 echo "$sampleCountComma genomes from GenBank, COG-UK and CNCB ($today); sarscov2phylo 13-11-20 tree with newer sequences added by UShER" \
     > hgPhyloPlace.description.txt