acd9854d3c6537ff237c597534abd5b9afbb7a75 angie Thu Feb 25 10:40:45 2021 -0800 Adding cron scripts for nightly update of public tree, VCFs, metadata and protobufs for hgPhyloPlace. diff --git src/hg/utils/otto/sarscov2phylo/nextcladeCogUk.sh src/hg/utils/otto/sarscov2phylo/nextcladeCogUk.sh new file mode 100755 index 0000000..b55195a --- /dev/null +++ src/hg/utils/otto/sarscov2phylo/nextcladeCogUk.sh @@ -0,0 +1,26 @@ +#!/bin/bash +# Don't exit on error, just attempt to run nextclade on cogUk.latest + +# Do not modify this script, modify the source tree copy: +# kent/src/hg/utils/otto/sarscov2phylo/nextcladeCogUk.sh + +source ~/.bashrc + +ottoDir=/hive/data/outside/otto/sarscov2phylo +cogUkDir=$ottoDir/cogUk.latest + +cd $cogUkDir + +# Nextclade needs input to be split into reasonably sized chunks (as of Jan. 2021). +splitDir=splitForNextclade +rm -rf $splitDir +mkdir $splitDir +faSplit about <(xzcat cog_all.fasta.xz) 30000000 $splitDir/chunk + +rm -f nextclade.log nextclade.tsv +for chunkFa in $splitDir/chunk*.fa; do + nextclade -j 50 -i $chunkFa -t >(cut -f 1,2 | tail -n+2 >> nextclade.tsv) >& nextclade.log +done + +rm -rf $splitDir +exit 0