404482949921223c604b2361aeaa5d2e37b5d990
angie
  Fri Dec 2 11:17:57 2022 -0800
Use multiple cores in pangolin for cluster run -- the number of jobs per machine is limited by RAM.

diff --git src/hg/utils/otto/sarscov2phylo/runPangolin.sh src/hg/utils/otto/sarscov2phylo/runPangolin.sh
index 9488261..b10947c 100755
--- src/hg/utils/otto/sarscov2phylo/runPangolin.sh
+++ src/hg/utils/otto/sarscov2phylo/runPangolin.sh
@@ -1,13 +1,16 @@
 #!/bin/bash
 source ~/.bashrc
 conda activate pangolin
 set -beEu -x -o pipefail
 
 # Run pangolin/pangoLEARN on a file (not pipe) and output full CSV
 # (suitable for cluster run on faSplit sequence chunks)
 
 fa=$1
 out=$fa.pangolin.csv
+
+threadCount=5
+
 logfile=$(mktemp)
-pangolin --analysis-mode pangolearn $fa --outfile $out > $logfile 2>&1
+pangolin -t $threadCount --analysis-mode pangolearn $fa --outfile $out > $logfile 2>&1
 rm $logfile