3ac59e25cc8fb9d681fcb5d52224151b41f36ce5 angie Mon Feb 14 12:15:00 2022 -0800 Align, mask and compute md5sum in same was as pangolin, for constructing cache file. diff --git src/hg/utils/otto/sarscov2phylo/pangoHash.sh src/hg/utils/otto/sarscov2phylo/pangoHash.sh new file mode 100755 index 0000000..8756977 --- /dev/null +++ src/hg/utils/otto/sarscov2phylo/pangoHash.sh @@ -0,0 +1,33 @@ +#!/bin/bash +source ~/.bashrc +conda activate pangolin +set -beEu -o pipefail + +inputFasta=$1 +outputHash=$2 +if [ $# -ge 3 ]; then + threads=$3 +else + threads=1 +fi + +pangoReference=~angie/github/pangolin/pangolin/data/reference.fasta +trimStart=265 +trimEnd=29674 + +tmpSam=$(mktemp) +tmpLog=$(mktemp) +tmpAliMaskedFasta=$(mktemp) + +minimap2 -a -x asm5 --sam-hit-only --secondary=no -t $threads \ + $pangoReference $inputFasta -o $tmpSam &> $tmpLog +gofasta sam toMultiAlign \ + -s $tmpSam \ + -t $threads \ + --reference $pangoReference \ + --trimstart $trimStart \ + --trimend $trimEnd \ + --trim \ + --pad > $tmpAliMaskedFasta 2>>$tmpLog +faMd5 -threads=$threads $tmpAliMaskedFasta $outputHash +rm $tmpSam $tmpLog $tmpAliMaskedFasta