3ac59e25cc8fb9d681fcb5d52224151b41f36ce5
angie
  Mon Feb 14 12:15:00 2022 -0800
Align, mask and compute md5sum in same was as pangolin, for constructing cache file.

diff --git src/hg/utils/otto/sarscov2phylo/pangoHash.sh src/hg/utils/otto/sarscov2phylo/pangoHash.sh
new file mode 100755
index 0000000..8756977
--- /dev/null
+++ src/hg/utils/otto/sarscov2phylo/pangoHash.sh
@@ -0,0 +1,33 @@
+#!/bin/bash
+source ~/.bashrc
+conda activate pangolin
+set -beEu -o pipefail
+
+inputFasta=$1
+outputHash=$2
+if [ $# -ge 3 ]; then
+    threads=$3
+else
+    threads=1
+fi
+
+pangoReference=~angie/github/pangolin/pangolin/data/reference.fasta
+trimStart=265
+trimEnd=29674
+
+tmpSam=$(mktemp)
+tmpLog=$(mktemp)
+tmpAliMaskedFasta=$(mktemp)
+
+minimap2 -a -x asm5 --sam-hit-only --secondary=no  -t $threads \
+    $pangoReference $inputFasta -o $tmpSam &> $tmpLog
+gofasta sam toMultiAlign \
+    -s $tmpSam \
+    -t $threads \
+    --reference $pangoReference \
+    --trimstart $trimStart \
+    --trimend $trimEnd \
+    --trim \
+    --pad > $tmpAliMaskedFasta 2>>$tmpLog
+faMd5 -threads=$threads $tmpAliMaskedFasta $outputHash
+rm $tmpSam $tmpLog $tmpAliMaskedFasta