4e46cbfb5a3e3c506b82234e1dbb774b99bb9168 angie Wed Mar 3 18:42:02 2021 -0800 Add protobufs with nextclade and pangolin annotations to public tree archive dir. Update public-latest.* links in more cgi-bin* locations. Clean up a bit. diff --git src/hg/utils/otto/sarscov2phylo/updatePublicTree.sh src/hg/utils/otto/sarscov2phylo/updatePublicTree.sh index b108dfe..32c13ae 100755 --- src/hg/utils/otto/sarscov2phylo/updatePublicTree.sh +++ src/hg/utils/otto/sarscov2phylo/updatePublicTree.sh @@ -182,32 +182,31 @@ > public-$today.all.minAf.001.vcf wc -l public-$today.all.minAf.001.vcf bgzip -f public-$today.all.minAf.001.vcf tabix -p vcf public-$today.all.minAf.001.vcf.gz minAc01=$(( (($sampleCount + 99) / 100) )) vcfFilter -minAc=$minAc01 -rename public-$today.all.minAf.001.vcf.gz \ > public-$today.all.minAf.01.vcf wc -l public-$today.all.minAf.01.vcf bgzip -f public-$today.all.minAf.01.vcf tabix -p vcf public-$today.all.minAf.01.vcf.gz # Parsimony scores on collapsed tree time $find_parsimonious_assignments --tree public-$today.all.nwk \ --vcf <(gunzip -c public-$today.all.vcf.gz) \ -> fpa.out -tail -n+2 fpa.out \ +| tail -n+2 \ | sed -re 's/^[A-Z]([0-9]+)[A-Z,]+.*parsimony_score=([0-9]+).*/\1\t\2/;' \ | tawk '{print "NC_045512v2", $1-1, $1, $2;}' \ | sort -k2n,2n \ > public-$today.all.parsimony.bg bedGraphToBigWig public-$today.all.parsimony.bg /hive/data/genomes/wuhCor1/chrom.sizes \ public-$today.all.parsimony.bw # Metadata for hgPhyloPlace: # Header names same as nextmeta (with strain first) so hgPhyloPlace recognizes them: echo -e "strain\tgenbank_accession\tdate\tcountry\thost\tcompleteness\tlength\tNextstrain_clade\tpangolin_lineage" \ > public-$today.metadata.tsv # It's not always possible to recreate both old and new names correctly from metadata, # so make a file to translate accession or COG-UK to the name used in VCF, tree and protobufs. cut -f 2 $renaming \ | awk -F\| '{ if ($3 == "") { print $1 "\t" $0; } else { print $2 "\t" $0; } }' \ @@ -347,28 +346,35 @@ ln -sf `pwd`/public-$today.nextstrainColors.gz \ /gbdb/wuhCor1/sarsCov2PhyloPub/public.all.nextstrainColors.gz ln -sf `pwd`/version.txt /gbdb/wuhCor1/sarsCov2PhyloPub/public.all.version.txt fi # Link to public trees download directory hierarchy y=$(date +%Y) m=$(date +%m) d=$(date +%d) archiveRoot=/hive/users/angie/publicTrees archive=$archiveRoot/$y/$m/$d mkdir -p $archive ln `pwd`/public-$today.all.nwk $archive/ ln `pwd`/public-$today.all.masked.{pb,vcf.gz} $archive/ ln `pwd`/public-$today.metadata.tsv.gz $archive/ +ln `pwd`/public-$today.all.masked.nextclade.pangolin.pb $archive/ +ln `pwd`/cladeToPublicName $archive/ +ln `pwd`/lineageToPublicName $archive/ + # Update 'latest' in $archiveRoot ln -f `pwd`/public-$today.all.nwk $archiveRoot/public-latest.all.nwk ln -f `pwd`/public-$today.all.masked.pb $archiveRoot/public-latest.all.masked.pb ln -f `pwd`/public-$today.all.masked.vcf.gz $archiveRoot/public-latest.all.masked.vcf.gz ln -f `pwd`/public-$today.metadata.tsv.gz $archiveRoot/public-latest.metadata.tsv.gz ln -f `pwd`/hgPhyloPlace.description.txt $archiveRoot/public-latest.version.txt # Update 'latest' protobuf, metadata and desc in and cgi-bin{,-angie}/hgPhyloPlaceData/wuhCor1/ -for dir in /usr/local/apache/cgi-bin{-angie,}/hgPhyloPlaceData/wuhCor1; do +for dir in /usr/local/apache/cgi-bin{-angie,-demo-angie,-beta,}/hgPhyloPlaceData/wuhCor1; do ln -sf `pwd`/public-$today.all.masked.pb $dir/public-latest.all.masked.pb ln -sf `pwd`/public-$today.metadata.tsv.gz $dir/public-latest.metadata.tsv.gz ln -sf `pwd`/hgPhyloPlace.description.txt $dir/public-latest.version.txt done + +# Clean up +nice xz new*fa &