a37d4ffb473b6ebf6c66545f2b7d5f7eef35ef4b
max
  Fri Apr 10 03:00:33 2026 -0700
Color strVar subtracks by expected heterozygosity instead of motif period, fix hgTrackUi filter label truncation, refs #36652

Change all four strVar subtracks (webstr, tommoStr, trexplorer, viennaVntr) from
motif-period-based coloring to expected heterozygosity (het = 1 - sum(p_i^2)),
using a blue-to-red heat map: dark blue (het<0.1) through medium blue, light
purple, salmon, to dark red (het>=0.7). Add het as a filterable bigBed field with
scoreFilter and filterByRange on each track. Update mouseOver, track docs, and
makedoc. Also fix hgTrackUi to strip the "|..." suffix from autoSql comments
when displaying numeric filter labels.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

diff --git src/hg/makeDb/scripts/viennaVntr/viennaVntrHet.py src/hg/makeDb/scripts/viennaVntr/viennaVntrHet.py
new file mode 100644
index 00000000000..638454d6726
--- /dev/null
+++ src/hg/makeDb/scripts/viennaVntr/viennaVntrHet.py
@@ -0,0 +1,62 @@
+#!/usr/bin/env python3
+"""
+Extract expected heterozygosity from Vienna ONT VAMOS multi-sample VCF.
+
+For each locus, counts allele occurrences from GT fields across all samples,
+then computes het = 1 - sum(p_i^2).
+
+Output: TSV with columns chrom, pos (1-based, matching VCF), het
+
+Usage:
+    python3 viennaVntrHet.py vamos-multisample.vcf > het.tsv
+"""
+
+import sys
+from collections import Counter
+
+
+def main():
+    if len(sys.argv) < 2:
+        print(__doc__, file=sys.stderr)
+        sys.exit(1)
+
+    vcf_path = sys.argv[1]
+    count = 0
+
+    with open(vcf_path) as f:
+        for line in f:
+            if line.startswith("#"):
+                continue
+
+            parts = line.split("\t", 10)  # only need first fields + genotypes block
+            chrom = parts[0]
+            pos = parts[1]
+
+            # Parse all GT fields (field 9 onward)
+            gt_block = line.rstrip("\n").split("\t")[9:]
+            allele_counts = Counter()
+            for gt in gt_block:
+                if gt == "./." or gt == ".|.":
+                    continue
+                sep = "|" if "|" in gt else "/"
+                for a in gt.split(sep):
+                    if a != ".":
+                        allele_counts[a] += 1
+
+            total = sum(allele_counts.values())
+            if total == 0:
+                het = -1.0
+            else:
+                sum_p_sq = sum((c / total) ** 2 for c in allele_counts.values())
+                het = round(1.0 - sum_p_sq, 3)
+
+            print(f"{chrom}\t{pos}\t{het}")
+            count += 1
+            if count % 100000 == 0:
+                print(f"  Processed {count} loci...", file=sys.stderr)
+
+    print(f"Done. Wrote {count} loci.", file=sys.stderr)
+
+
+if __name__ == "__main__":
+    main()