428e9023e880b56314b2a1b9725415f269f1e54a angie Fri Aug 26 11:42:45 2022 -0700 Use minimap2 -x asm20 --score-N=0 as pangolin does, to ensure consistent alignment to reference before masking & hashing. diff --git src/hg/utils/otto/sarscov2phylo/pangoHash.sh src/hg/utils/otto/sarscov2phylo/pangoHash.sh index 8756977..9cc8253 100755 --- src/hg/utils/otto/sarscov2phylo/pangoHash.sh +++ src/hg/utils/otto/sarscov2phylo/pangoHash.sh @@ -1,33 +1,33 @@ #!/bin/bash source ~/.bashrc conda activate pangolin set -beEu -o pipefail inputFasta=$1 outputHash=$2 if [ $# -ge 3 ]; then threads=$3 else threads=1 fi pangoReference=~angie/github/pangolin/pangolin/data/reference.fasta trimStart=265 trimEnd=29674 tmpSam=$(mktemp) tmpLog=$(mktemp) tmpAliMaskedFasta=$(mktemp) -minimap2 -a -x asm5 --sam-hit-only --secondary=no -t $threads \ +minimap2 -a -x asm20 --sam-hit-only --secondary=no --score-N=0 -t $threads \ $pangoReference $inputFasta -o $tmpSam &> $tmpLog gofasta sam toMultiAlign \ -s $tmpSam \ -t $threads \ --reference $pangoReference \ --trimstart $trimStart \ --trimend $trimEnd \ --trim \ --pad > $tmpAliMaskedFasta 2>>$tmpLog faMd5 -threads=$threads $tmpAliMaskedFasta $outputHash rm $tmpSam $tmpLog $tmpAliMaskedFasta