b81e6071edc73b0d973b29689c9d2dd6c84b8e8d angie Mon Oct 10 15:48:55 2022 -0700 Tighten -maxDiff since it is still a lot less stringent than the --max-path-length filtering applied after placement. diff --git src/hg/utils/otto/sarscov2phylo/makeNewMaskedVcf.sh src/hg/utils/otto/sarscov2phylo/makeNewMaskedVcf.sh index a5af428..3de9790 100755 --- src/hg/utils/otto/sarscov2phylo/makeNewMaskedVcf.sh +++ src/hg/utils/otto/sarscov2phylo/makeNewMaskedVcf.sh @@ -300,23 +300,23 @@ >> $renaming set -o pipefail fi if [ -s newGisaid.filtered.fa ]; then zcat $gisaidDir/metadata_batch_$today.tsv.gz \ | grep -Fwf <(fastaNames newGisaid.filtered.fa) \ | tawk '$3 != "" {print $3 "\t" $1 "|" $3 "|" $5;}' \ >> $renaming fi wc -l $renaming # Make masked VCF tawk '{ if ($1 ~ /^#/) { print; } else if ($7 == "mask") { $1 = "NC_045512v2"; print; } }' \ $problematicSitesVcf > mask.vcf time cat <(twoBitToFa $ref2bit stdout) $alignedFa \ -| faToVcf -maxDiff=1000 \ +| faToVcf -maxDiff=200 \ -excludeFile=exclude.ids \ -verbose=2 stdin stdout \ | vcfRenameAndPrune stdin $renaming stdout \ | vcfFilter -excludeVcf=mask.vcf stdin \ | gzip -c \ > new.masked.vcf.gz