5b376b2547f4a5018823202b0379aad6a5d06c69
angie
  Wed Dec 15 19:27:39 2021 -0800
The latest nextclade adds quotes around names with spaces; strip those out.

diff --git src/hg/utils/otto/sarscov2phylo/getNcbi.sh src/hg/utils/otto/sarscov2phylo/getNcbi.sh
index 7119c51..17b617b 100755
--- src/hg/utils/otto/sarscov2phylo/getNcbi.sh
+++ src/hg/utils/otto/sarscov2phylo/getNcbi.sh
@@ -85,31 +85,31 @@
 else
     cp /dev/null nextclade.tsv
     faSplit about <(xzcat genbank.fa.xz) 30000000 $splitDir/chunk
 fi
 if (( $(ls -1 splitForNextclade | wc -l) > 0 )); then
     nDataDir=~angie/github/nextclade/data/sars-cov-2
     outDir=$(mktemp -d)
     outTsv=$(mktemp)
     for chunkFa in $splitDir/chunk*.fa; do
         nextclade -j 50 -i $chunkFa \
             --input-root-seq $nDataDir/reference.fasta \
             --input-tree $nDataDir/tree.json \
             --input-qc-config $nDataDir/qc.json \
             --output-dir $outDir \
             --output-tsv $outTsv >& nextclade.log
-        cut -f 1,2 $outTsv | tail -n+2 >> nextclade.tsv
+        cut -f 1,2 $outTsv | tail -n+2 | sed -re 's/"//g;' >> nextclade.tsv
         rm $outTsv
     done
     rm -rf $outDir
 fi
 wc -l nextclade.tsv
 rm -rf $splitDir nextclade.fa
 
 conda activate pangolin
 runPangolin() {
     fa=$1
     out=$fa.pangolin.csv
     logfile=$(mktemp)
     pangolin $fa --outfile $out > $logfile 2>&1
     rm $logfile
 }