179775232ff5fe31323f984619b75cbc77204c00
angie
  Sun Jun 27 20:22:29 2021 -0700
Update nextclade to 1.0 (new required command line args).

diff --git src/hg/utils/otto/sarscov2phylo/getCogUk.sh src/hg/utils/otto/sarscov2phylo/getCogUk.sh
index f652693..92d1645 100755
--- src/hg/utils/otto/sarscov2phylo/getCogUk.sh
+++ src/hg/utils/otto/sarscov2phylo/getCogUk.sh
@@ -20,23 +20,34 @@
 curl -S -s $cogUrlBase/cog_global_tree.newick > cog_global_tree.newick
 
 tail -n +2 cog_metadata.csv \
 | awk -F, '{print $1 "\t" $5;}' | sort > cogUkToDate
 
 # Reuse nextclade assignments for older sequences; compute nextclade assignments for new seqs.
 cp $ottoDir/cogUk.latest/nextclade.tsv .
 comm -13 <(cut -f 1 nextclade.tsv | sort) <(fastaNames cog_all.fasta.xz | sort) \
     > seqsForNextclade
 if [ -s seqsForNextclade ]; then
     faSomeRecords <(xzcat cog_all.fasta.xz) seqsForNextclade seqsForNextclade.fa
     splitDir=splitForNextclade
     rm -rf $splitDir
     mkdir $splitDir
     faSplit about seqsForNextclade.fa 30000000 $splitDir/chunk
+    nDataDir=~angie/github/nextclade/data/sars-cov-2
+    outDir=$(mktemp -d)
+    outTsv=$(mktemp)
     for chunkFa in $splitDir/chunk*.fa; do
-        nextclade -j 50 -i $chunkFa -t >(cut -f 1,2 | tail -n+2 >> nextclade.tsv) >& nextclade.log
+        nextclade -j 50 -i $chunkFa \
+            --input-root-seq $nDataDir/reference.fasta \
+            --input-tree $nDataDir/tree.json \
+            --input-qc-config $nDataDir/qc.json \
+            --output-dir $outDir \
+            --output-tsv $outTsv >& nextclade.log
+        cut -f 1,2 $outTsv | tail -n+2 >> nextclade.tsv
+        rm $outTsv
     done
+    rm -rf $outDir
     rm -rf $splitDir
 fi
 
 rm -f $ottoDir/cogUk.latest
 ln -s cogUk.$today $ottoDir/cogUk.latest