71a5bf4d42948792ba96729c01bec373eed86918 angie Sun Feb 4 09:16:38 2024 -0800 Use nextclade v3 output column indices. diff --git src/hg/utils/otto/sarscov2phylo/getCncb.sh src/hg/utils/otto/sarscov2phylo/getCncb.sh index c06af03..44976b8 100755 --- src/hg/utils/otto/sarscov2phylo/getCncb.sh +++ src/hg/utils/otto/sarscov2phylo/getCncb.sh @@ -93,31 +93,31 @@ xzcat cncb.nonGenBank.acc.fasta.xz \ | faSomeRecords stdin <(cut -f 1 accToNameBarAcc.tsv) stdout \ | faRenameRecords stdin accToNameBarAcc.tsv stdout \ | xz -T 20 > cncb.nonGenBank.fasta.xz # Run nextclade cp ../cncb.latest/nextclade.full.tsv.gz . cp ../cncb.latest/nextclade.tsv . if [ -s new.accs.fa ]; then nDataDir=~angie/github/nextclade/data/sars-cov-2 time nextclade run -j 20 new.accs.fa \ --input-dataset $nDataDir \ --output-fasta nextalign.new.fa.xz \ --output-tsv nextclade.new.full.tsv.gz >& nextclade.log - zcat nextclade.new.full.tsv.gz | cut -f 1,7 | tail -n+2 >> nextclade.tsv + zcat nextclade.new.full.tsv.gz | cut -f 2,8 | tail -n+2 >> nextclade.tsv sort -u nextclade.tsv > tmp mv tmp nextclade.tsv cat nextclade.new.full.tsv.gz >> nextclade.full.tsv.gz fi # Run pangolin cp ../cncb.latest/pangolin.tsv . if [ -s new.accs.fa ]; then set +x . ~/.bashrc conda activate pangolin set -x set -e time pangolin -t 20 new.accs.fa --skip-scorpio --outfile lineages.csv \ >& pangolin.log