ba4425b7d509f2d52bec87b22c3cd8a6802ef3db angie Thu Jun 22 10:17:25 2023 -0700 Not making full nextalign.fa.xz at this point, just incrementals. diff --git src/hg/utils/otto/sarscov2phylo/getCncb.sh src/hg/utils/otto/sarscov2phylo/getCncb.sh index 49dba58..abf4910 100755 --- src/hg/utils/otto/sarscov2phylo/getCncb.sh +++ src/hg/utils/otto/sarscov2phylo/getCncb.sh @@ -89,31 +89,31 @@ cat <(xzcat ../cncb.latest/cncb.nonGenBank.acc.fasta.xz) new.accs.fa \ | xz -T 20 > cncb.nonGenBank.acc.fasta.new.xz mv cncb.nonGenBank.acc.fasta.new.xz cncb.nonGenBank.acc.fasta.xz xzcat cncb.nonGenBank.acc.fasta.xz \ | faSomeRecords stdin <(cut -f 1 accToNameBarAcc.tsv) stdout \ | faRenameRecords stdin accToNameBarAcc.tsv cncb.nonGenBank.fasta # Run nextclade cp ../cncb.latest/nextclade.full.tsv.gz . cp ../cncb.latest/nextclade.tsv . if [ -s new.accs.fa ]; then nDataDir=~angie/github/nextclade/data/sars-cov-2 time nextclade run -j 20 new.accs.fa \ --input-dataset $nDataDir \ - --output-fasta nextalign.fa.xz \ + --output-fasta nextalign.new.fa.xz \ --output-tsv nextclade.new.full.tsv.gz >& nextclade.log zcat nextclade.new.full.tsv.gz | cut -f 1,2 | tail -n+2 >> nextclade.tsv sort -u nextclade.tsv > tmp mv tmp nextclade.tsv cat nextclade.new.full.tsv.gz >> nextclade.full.tsv.gz fi # Run pangolin cp ../cncb.latest/pangolin.tsv . if [ -s new.accs.fa ]; then set +x . ~/.bashrc conda activate pangolin set -x set -e