71a5bf4d42948792ba96729c01bec373eed86918 angie Sun Feb 4 09:16:38 2024 -0800 Use nextclade v3 output column indices. diff --git src/hg/utils/otto/sarscov2phylo/getCogUk.sh src/hg/utils/otto/sarscov2phylo/getCogUk.sh index be1c1af..85ee6c0 100755 --- src/hg/utils/otto/sarscov2phylo/getCogUk.sh +++ src/hg/utils/otto/sarscov2phylo/getCogUk.sh @@ -42,31 +42,31 @@ curlRetry $cogUrlBase/cog_all.fasta.gz curlRetry $cogUrlBase/cog_metadata.csv.gz curlRetry $cogUrlBase/cog_global_tree.newick zcat cog_all.fasta.gz | xz -T 20 > cog_all.fasta.xz rm cog_all.fasta.gz zcat cog_metadata.csv.gz \ | tail -n +2 \ | awk -F, '{print $1 "\t" $5;}' | sort > cogUkToDate # Reuse nextclade assignments for older sequences; compute nextclade assignments for new seqs. zcat $ottoDir/cogUk.latest/nextclade.full.tsv.gz > nextclade.full.tsv cp $ottoDir/cogUk.latest/nextalign.fa.xz . -comm -13 <(cut -f 1 nextclade.full.tsv | sort) <(fastaNames cog_all.fasta.xz | sort) \ +comm -13 <(cut -f 2 nextclade.full.tsv | sort) <(fastaNames cog_all.fasta.xz | sort) \ > seqsForNextclade if [ -s seqsForNextclade ]; then nDataDir=~angie/github/nextclade/data/sars-cov-2 outTsv=$(mktemp) outFa=$(mktemp) faSomeRecords <(xzcat cog_all.fasta.xz) seqsForNextclade stdout \ | nextclade run -j 30 \ --input-dataset $nDataDir \ --output-fasta $outFa \ --output-tsv $outTsv >& nextclade.log tail -n+2 $outTsv | sed -re 's/"//g;' >> nextclade.full.tsv xz -T 20 < $outFa >> nextalign.fa.xz rm -f $outTsv $outFa fi pigz -f -p 8 nextclade.full.tsv