404482949921223c604b2361aeaa5d2e37b5d990 angie Fri Dec 2 11:17:57 2022 -0800 Use multiple cores in pangolin for cluster run -- the number of jobs per machine is limited by RAM. diff --git src/hg/utils/otto/sarscov2phylo/runPangolin.sh src/hg/utils/otto/sarscov2phylo/runPangolin.sh index 9488261..b10947c 100755 --- src/hg/utils/otto/sarscov2phylo/runPangolin.sh +++ src/hg/utils/otto/sarscov2phylo/runPangolin.sh @@ -1,13 +1,16 @@ #!/bin/bash source ~/.bashrc conda activate pangolin set -beEu -x -o pipefail # Run pangolin/pangoLEARN on a file (not pipe) and output full CSV # (suitable for cluster run on faSplit sequence chunks) fa=$1 out=$fa.pangolin.csv + +threadCount=5 + logfile=$(mktemp) -pangolin --analysis-mode pangolearn $fa --outfile $out > $logfile 2>&1 +pangolin -t $threadCount --analysis-mode pangolearn $fa --outfile $out > $logfile 2>&1 rm $logfile