5b376b2547f4a5018823202b0379aad6a5d06c69 angie Wed Dec 15 19:27:39 2021 -0800 The latest nextclade adds quotes around names with spaces; strip those out. diff --git src/hg/utils/otto/sarscov2phylo/getNcbi.sh src/hg/utils/otto/sarscov2phylo/getNcbi.sh index 7119c51..17b617b 100755 --- src/hg/utils/otto/sarscov2phylo/getNcbi.sh +++ src/hg/utils/otto/sarscov2phylo/getNcbi.sh @@ -85,31 +85,31 @@ else cp /dev/null nextclade.tsv faSplit about <(xzcat genbank.fa.xz) 30000000 $splitDir/chunk fi if (( $(ls -1 splitForNextclade | wc -l) > 0 )); then nDataDir=~angie/github/nextclade/data/sars-cov-2 outDir=$(mktemp -d) outTsv=$(mktemp) for chunkFa in $splitDir/chunk*.fa; do nextclade -j 50 -i $chunkFa \ --input-root-seq $nDataDir/reference.fasta \ --input-tree $nDataDir/tree.json \ --input-qc-config $nDataDir/qc.json \ --output-dir $outDir \ --output-tsv $outTsv >& nextclade.log - cut -f 1,2 $outTsv | tail -n+2 >> nextclade.tsv + cut -f 1,2 $outTsv | tail -n+2 | sed -re 's/"//g;' >> nextclade.tsv rm $outTsv done rm -rf $outDir fi wc -l nextclade.tsv rm -rf $splitDir nextclade.fa conda activate pangolin runPangolin() { fa=$1 out=$fa.pangolin.csv logfile=$(mktemp) pangolin $fa --outfile $out > $logfile 2>&1 rm $logfile }