e931fd3e5fdab83eeb110b2450a0f365eff170e2 angie Mon Oct 4 17:04:17 2021 -0700 Omit ORF1a from taxodium amino acid annotations (redundant with ORF1ab). Add optional baseProtobuf arg to updateCombinedTree.sh / makeNewMaskedVcf.sh to start with a custom protobuf (e.g. optimized) instead of eyesterday's protobuf. diff --git src/hg/utils/otto/sarscov2phylo/extractPublicTree.sh src/hg/utils/otto/sarscov2phylo/extractPublicTree.sh index dd6e7e7..055389f 100755 --- src/hg/utils/otto/sarscov2phylo/extractPublicTree.sh +++ src/hg/utils/otto/sarscov2phylo/extractPublicTree.sh @@ -87,31 +87,32 @@ rm public-$today.all.masked.pb ln -f public-$today.all.masked.nextclade.pangolin.pb public-$today.all.masked.pb cncbDate=$(ls -l $cncbDir | sed -re 's/.*cncb\.([0-9]{4}-[0-9][0-9]-[0-9][0-9]).*/\1/') echo "sarscov2phylo release 13-11-20; NCBI and COG-UK sequences downloaded $today; CNCB sequences downloaded $cncbDate" \ > version.txt $matUtils extract -i public-$today.all.masked.pb -u samples.public.$today sampleCountComma=$(echo $(wc -l < samples.public.$today) \ | sed -re 's/([0-9]+)([0-9]{3})$/\1,\2/; s/([0-9]+)([0-9]{3},[0-9]{3})$/\1,\2/;') echo "$sampleCountComma genomes from GenBank, COG-UK and CNCB ($today); sarscov2phylo 13-11-20 tree with newer sequences added by UShER" \ > hgPhyloPlace.description.txt # Make Taxodium-formatted protobuf for display -zcat /hive/data/genomes/wuhCor1/goldenPath/bigZips/genes/ncbiGenes.gtf.gz > ncbiGenes.gtf +zcat /hive/data/genomes/wuhCor1/goldenPath/bigZips/genes/ncbiGenes.gtf.gz \ +| grep -v '"ORF1a"' > ncbiGenes.gtf zcat /hive/data/genomes/wuhCor1/wuhCor1.fa.gz > wuhCor1.fa zcat public-$today.metadata.tsv.gz > metadata.tmp.tsv time $matUtils extract -i public-$today.all.masked.pb \ -f wuhCor1.fa \ -g ncbiGenes.gtf \ -M metadata.tmp.tsv \ --write-taxodium public-$today.all.masked.taxodium.pb rm metadata.tmp.tsv wuhCor1.fa gzip -f public-$today.all.masked.taxodium.pb # Link to public trees download directory hierarchy archiveRoot=/hive/users/angie/publicTrees read y m d < <(echo $today | sed -re 's/-/ /g') archive=$archiveRoot/$y/$m/$d mkdir -p $archive