4da885c024760ff033a7bf8a3a5ea38a3eb37852 angie Mon Mar 1 14:08:12 2021 -0800 Instead of running nextclade on all COG-UK sequences every night, run it only on new sequences. diff --git src/hg/utils/otto/sarscov2phylo/nextcladeCogUk.sh src/hg/utils/otto/sarscov2phylo/nextcladeCogUk.sh deleted file mode 100755 index b55195a..0000000 --- src/hg/utils/otto/sarscov2phylo/nextcladeCogUk.sh +++ /dev/null @@ -1,26 +0,0 @@ -#!/bin/bash -# Don't exit on error, just attempt to run nextclade on cogUk.latest - -# Do not modify this script, modify the source tree copy: -# kent/src/hg/utils/otto/sarscov2phylo/nextcladeCogUk.sh - -source ~/.bashrc - -ottoDir=/hive/data/outside/otto/sarscov2phylo -cogUkDir=$ottoDir/cogUk.latest - -cd $cogUkDir - -# Nextclade needs input to be split into reasonably sized chunks (as of Jan. 2021). -splitDir=splitForNextclade -rm -rf $splitDir -mkdir $splitDir -faSplit about <(xzcat cog_all.fasta.xz) 30000000 $splitDir/chunk - -rm -f nextclade.log nextclade.tsv -for chunkFa in $splitDir/chunk*.fa; do - nextclade -j 50 -i $chunkFa -t >(cut -f 1,2 | tail -n+2 >> nextclade.tsv) >& nextclade.log -done - -rm -rf $splitDir -exit 0