src/hg/utils/otto/sarscov2phylo/gisaidFromChunks.sh 3c3aea0b7973d9497b04647d7c8ed4ebb0f64f5c

3c3aea0b7973d9497b04647d7c8ed4ebb0f64f5c
angie
  Fri Dec 20 15:40:01 2024 -0800
Tweak to run one more command in parallel.

diff --git src/hg/utils/otto/sarscov2phylo/gisaidFromChunks.sh src/hg/utils/otto/sarscov2phylo/gisaidFromChunks.sh
index b4f3e58..7c0369c 100755
--- src/hg/utils/otto/sarscov2phylo/gisaidFromChunks.sh
+++ src/hg/utils/otto/sarscov2phylo/gisaidFromChunks.sh
@@ -39,38 +39,38 @@
 faSize -detailed  <(xzcat gisaid_fullNames_$today.fa.xz) | sort -u > tmp.lengths &
 # Lineage & clade assignments
 sort -u chunks/pangolin.tsv \
     > tmp.lineage &
 sed -re 's/"//g;' chunks/nextclade.tsv \
 | sort -u \
     > tmp.clade &
 # Countries -- go back to unstripped sequence names:
 xzcat chunks/gisaid_epi_isl_*.fa.xz \
 | grep ^\> \
 | sed -re 's@^>hCo[Vv]-19/+@@;' \
 | $scriptDir/gisaidNameToCountry.pl \
 | sort -u \
     > tmp.country &
 
-wait
-
 # Make fasta with strain-name headers a la nextfasta.
 xzcat gisaid_fullNames_$today.fa.xz \
 | sed -re '/^>/ s/\|.*//' \
 | xz -T 20 \
     > sequences_batch_$today.fa.xz &
 
+wait
+
 # Join locally computed fields and sort by EPI ID for joining with latest real nextmeta
 join -t$'\t' -a 1 tmp.first3 tmp.lengths \
 | join -t$'\t' -a 1 -o 1.1,1.2,1.3,1.4,1.5,2.2 - tmp.clade \
 | join -t$'\t' -a 1 -o 1.1,1.2,1.3,1.4,1.5,1.6,2.2 - tmp.lineage \
 | join -t$'\t' -a 1 -o 1.1,1.2,1.3,1.4,1.5,1.6,1.7,2.2 - tmp.country \
 | tawk '{print $3, $2, $4, $5, $6, $7, $8;}' \
 | sort -u \
     > tmp.epiToLocalMeta
 # Join with latest real nextmeta and put locally computed fields in nextmeta column positions.
 # Last real nextmeta has 27 columns.  These are the columns we can fill in:
 #1       strain
 #3       gisaid_epi_isl
 #4       genbank_accession # fold in later, after updating mapping
 #5       date
 #7       country