2ba814d9a2b0df7bdc2cce0bb3e52464288a61bc angie Tue Jul 13 09:44:37 2021 -0700 no need to run matUtils extract -u twice. diff --git src/hg/utils/otto/sarscov2phylo/updateCombinedTree.sh src/hg/utils/otto/sarscov2phylo/updateCombinedTree.sh index dcca06e..bc8966b 100755 --- src/hg/utils/otto/sarscov2phylo/updateCombinedTree.sh +++ src/hg/utils/otto/sarscov2phylo/updateCombinedTree.sh @@ -466,32 +466,31 @@ # EPI_ISL_ ID to public sequence name mapping, so if users upload EPI_ISL IDs for which we have # public names & IDs, we can match them. cut -f 1,3 $epiToPublic > epiToPublic.latest # Update links to latest public+GISAID protobuf and metadata in hgwdev cgi-bin directories for dir in /usr/local/apache/cgi-bin{-angie,-beta,}/hgPhyloPlaceData/wuhCor1; do ln -sf `pwd`/gisaidAndPublic.$today.masked.pb $dir/public.plusGisaid.latest.masked.pb ln -sf `pwd`/gisaidAndPublic.$today.metadata.tsv.gz \ $dir/public.plusGisaid.latest.metadata.tsv.gz ln -sf `pwd`/hgPhyloPlace.plusGisaid.description.txt $dir/public.plusGisaid.latest.version.txt ln -sf `pwd`/epiToPublic.latest $dir/ done # Extract public samples from tree -$matUtils extract -i gisaidAndPublic.$today.masked.pb -u newNames -grep -v EPI_ISL_ newNames > newPublicNames +grep -v EPI_ISL_ samples.$today > newPublicNames $matUtils extract -i gisaidAndPublic.$today.masked.pb \ -s newPublicNames \ -O -o public-$today.all.masked.pb # Extract Newick and VCF from public-only tree $matUtils extract -i public-$today.all.masked.pb \ -t public-$today.all.nwk \ -v public-$today.all.masked.vcf gzip -f public-$today.all.masked.vcf zcat gisaidAndPublic.$today.metadata.tsv.gz \ | grep -v EPI_ISL_ \ | gzip -c \ > public-$today.metadata.tsv.gz grep -v EPI_ISL_ cladeToName > cladeToPublicName grep -v EPI_ISL_ lineageToName > lineageToPublicName