a501a677b1027e7103b547e54b0e2ee286ca09e7
angie
  Fri Mar 5 15:39:35 2021 -0800
Make getNcbi.sh more efficient: fetch only new IDs from BioSample; run nextclade and pangolin only on new sequences.

diff --git src/hg/utils/otto/sarscov2phylo/nextcladeNcbi.sh src/hg/utils/otto/sarscov2phylo/nextcladeNcbi.sh
deleted file mode 100755
index 4e56ea4..0000000
--- src/hg/utils/otto/sarscov2phylo/nextcladeNcbi.sh
+++ /dev/null
@@ -1,26 +0,0 @@
-#!/bin/bash
-# Don't exit on error, just attempt to run nextclade on ncbi.latest
-
-#	Do not modify this script, modify the source tree copy:
-#	kent/src/hg/utils/otto/sarscov2phylo/nextcladeNcbi.sh
-
-source ~/.bashrc
-
-ottoDir=/hive/data/outside/otto/sarscov2phylo
-ncbiDir=$ottoDir/ncbi.latest
-
-cd $ncbiDir
-
-# Nextclade needs input to be split into reasonably sized chunks (as of Jan. 2021).
-splitDir=splitForNextclade
-rm -rf $splitDir
-mkdir $splitDir
-faSplit about <(xzcat genbank.fa.xz) 30000000 $splitDir/chunk
-
-rm -f nextclade.log nextclade.tsv
-for chunkFa in $splitDir/chunk*.fa; do
-    nextclade -j 50 -i $chunkFa -t >(cut -f 1,2 | tail -n+2 >> nextclade.tsv) >& nextclade.log
-done
-
-rm -rf $splitDir
-exit 0