src/hg/utils/otto/sarscov2phylo/gisaidFromChunks.sh 1e6e36834dceedd5c2cb79834bcc84ee0bac4be5

1e6e36834dceedd5c2cb79834bcc84ee0bac4be5
angie
  Mon Jun 28 08:44:36 2021 -0700
Use make -j for pangolin (no multithreading at this point).

diff --git src/hg/utils/otto/sarscov2phylo/gisaidFromChunks.sh src/hg/utils/otto/sarscov2phylo/gisaidFromChunks.sh
index 1bd7e1a..ffe1eb9 100755
--- src/hg/utils/otto/sarscov2phylo/gisaidFromChunks.sh
+++ src/hg/utils/otto/sarscov2phylo/gisaidFromChunks.sh
@@ -1,31 +1,32 @@
 #!/bin/bash
 source ~/.bashrc
 set -beEu -o pipefail
 
 #	Do not modify this script, modify the source tree copy:
 #	kent/src/hg/utils/otto/sarscov2phylo/gisaidFromChunks.sh
 
 # Make nextfasta and nextmeta substitute files from chunks of downloaded GISAID sequences
 
 lastRealNextmeta=metadata_2020-12-08_20-35.tsv.gz
 
 today=$(date +%F)
 
 # Run pangolin and nextclade on any chunks that need it
 cd /hive/users/angie/gisaid/chunks
-make
+make nextclade.tsv
+make -j10
 
 cd /hive/users/angie/gisaid
 # Glom all the chunks together.
 # Remove initial "hCoV-19/" and remove spaces a la nextmeta (e.g. "Hong Kong" -> "HongKong").
 # Strip single quotes (e.g. "Cote d'Ivoire" --> "CotedIvoire").
 # Also remove a stray comma in a name that caused Newick parsing error ("Hungary/US-32533w,/2020").
 # Keep the strain|epiId|date "full names".
 time xzcat chunks/gisaid_epi_isl_*.fa.xz \
 | sed -re 's@^>hCo[Vv]-19/+@>@;  s/[ '"'"',()]//g;  s/\r$//;' \
 | xz -T 50 \
     > gisaid_fullNames_$today.fa.xz
 
 # Make tmp files with a fullName key and various columns that we'll join together.
 fastaNames gisaid_fullNames_$today.fa.xz \
 | awk -F\| -vOFS="\t" '{print $0, $1, $2, $3;}' \