c7b6da0a433f53fdf3c4137a67598d099d9ce1a5
max
  Wed Jan 14 10:52:53 2026 -0800
adding SFARI SPARK to varFreqs supertrack, refs #36642

diff --git src/hg/makeDb/scripts/sparkMergeVcfAddCounts.sh src/hg/makeDb/scripts/sparkMergeVcfAddCounts.sh
new file mode 100755
index 00000000000..b983f8e8c0d
--- /dev/null
+++ src/hg/makeDb/scripts/sparkMergeVcfAddCounts.sh
@@ -0,0 +1,74 @@
+#!/bin/bash
+# Merge all per-chromosome VCFs into a single sites-only VCF
+# Uses bcftools fill-tags plugin to calculate AC/AN/AF from genotypes
+# Output INFO fields: AC, AF, AN, AQ
+
+set -euo pipefail
+
+#INBASE="vcf/SPARK.iWES_v3.2024_08.deepvariant"
+INBASE=$1
+OUTFILE=`basename $INBASE.sites.vcf.gz`
+TMPDIR="tmp_merge_$$"
+BCFTOOLS=~/software/bcftools/bin/bcftools
+export BCFTOOLS_PLUGINS=~/software/bcftools/libexec/bcftools
+export INBASE
+
+# Number of parallel jobs (adjust based on available CPUs/memory)
+NJOBS=${2:-4}
+
+mkdir -p "$TMPDIR"
+
+echo "Processing chromosomes with fill-tags plugin (parallel=$NJOBS)..."
+echo "Started at $(date)"
+
+# Function to process one chromosome
+process_chr() {
+    local chr=$1
+    local infile="${INBASE}.${chr}.vcf.gz"
+    local outfile="$TMPDIR/${chr}.sites.vcf.gz"
+
+    if [[ ! -f "$infile" ]]; then
+        echo "WARNING: $infile not found, skipping" >&2
+        return
+    fi
+
+    echo "Processing $chr..."
+
+    # Fill AC/AN/AF tags, then drop samples
+    $BCFTOOLS +fill-tags "$infile" -- -t AC,AN,AF 2>/dev/null | \
+        $BCFTOOLS view -G -Oz -o "$outfile" - 2>/dev/null
+    $BCFTOOLS index -t "$outfile"
+
+    echo "  Done: $chr"
+}
+
+export -f process_chr
+export TMPDIR BCFTOOLS BCFTOOLS_PLUGINS
+
+# Run chromosomes in parallel
+printf '%s\n' chr{1..22} chrX chrY | xargs -P "$NJOBS" -I {} bash -c 'process_chr "$@"' _ {}
+
+echo ""
+echo "Concatenating all chromosomes..."
+
+# Create file list in chromosome order
+for chr in chr{1..22} chrX chrY; do
+    echo "$TMPDIR/${chr}.sites.vcf.gz"
+done > "$TMPDIR/files.txt"
+
+# Concatenate
+$BCFTOOLS concat -f "$TMPDIR/files.txt" -Oz -o "$OUTFILE"
+$BCFTOOLS index -t "$OUTFILE"
+
+echo "Cleaning up..."
+rm -rf "$TMPDIR"
+
+echo ""
+echo "Done! Output: $OUTFILE"
+echo "Finished at $(date)"
+echo ""
+echo "Verifying output..."
+$BCFTOOLS view -h "$OUTFILE" | grep "^##INFO"
+echo ""
+echo "Sample variants:"
+$BCFTOOLS view "$OUTFILE" 2>/dev/null | grep -v "^#" | head -5