9a11061ca6b40fe16bdfd09b1af53192f6c7c85b
max
  Tue Apr 21 08:13:02 2026 -0700
lrSv: add HTML doc pages and conversion scripts for recent subtracks, + hs1 HGSVC3

Subtrack stanzas for these SV callsets landed in earlier commits but
the conversion scripts and per-track HTML description pages were
never added; trackDb therefore had no doc to serve. This commit
catches up.

Docs (new):
- colorsDbSv.html     CoLoRSdb 1,427-sample long-read SVs
- gustafsonSv.html    1KG ONT 100 (Gustafson 2024, PMID 39358015)
- hgsvc2Sv.html       HGSVC2 (Ebert 2021, PMID 33632895)
- hprc2Sv.html        HPRC release-2 pangenome SVs (no PMID yet;
see humanpangenome.org/hprc-data-release-2/)
- onekg3202Sr.html    1KG 3202 Illumina SHORT-READ GATK-SV
(Byrska-Bishop 2022, PMID 36055201)

Scripts (new):
- lrSvGustafson.as / lrSvGustafsonVcfToBed.py
- lrSvHgsvc2.as / lrSvHgsvc2TsvToBed.py  (merges insdel + inv tables)
- lrSvHprc2.as / lrSvHprc2VcfToBed.py    (streams wave-decomposed VCF,
explodes multi-allelic rows,
filters to SV-sized or INV)
- lrSv1kg3202Sr.as / lrSv1kg3202SrVcfToBed.py

HGSVC3 also on hs1:
- hgsvc3Sv.html: note that the hs1 build is native (not lifted):
HGSVC3 aligned all assemblies to both GRCh38 and T2T-CHM13 and
released separate annotation tables per reference. Added the
T2T-CHM13 source URL to the Methods section and the hs1 hgsvc3.bb
download link to Data Access.
- doc/hs1/lrSv.txt (new): hs1-specific wget + build steps; refers
back to doc/hg38/lrSv.txt for the full process.

refs #36258

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

diff --git src/hg/makeDb/scripts/lrSv/lrSv1kg3202SrVcfToBed.py src/hg/makeDb/scripts/lrSv/lrSv1kg3202SrVcfToBed.py
new file mode 100644
index 00000000000..71f0f061da7
--- /dev/null
+++ src/hg/makeDb/scripts/lrSv/lrSv1kg3202SrVcfToBed.py
@@ -0,0 +1,126 @@
+#!/usr/bin/env python3
+"""Convert 1KG 3,202-sample GATK-SV short-read VCF to BED9+.
+
+Short-read comparator track for the lrSv collection.
+
+Source:
+    https://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20210124.SV_Illumina_Integration/1KGP_3202.gatksv_svtools_novelins.freeze_V3.wAF.vcf.gz
+Paper:
+    Byrska-Bishop et al. 2022, Cell, PMID 36055201.
+
+Usage:
+    lrSv1kg3202SrVcfToBed.py input.vcf.gz output.bed
+"""
+
+import gzip
+import sys
+
+SV_COLORS = {
+    "DEL": "200,0,0",      # red
+    "INS": "0,0,200",      # blue
+    "DUP": "0,160,0",      # green
+    "INV": "230,140,0",    # orange
+    "CPX": "140,0,200",    # purple
+    "CTX": "100,100,100",  # grey
+    "CNV": "150,80,0",     # brown
+}
+
+
+def parseInfo(infoStr):
+    d = {}
+    for item in infoStr.split(";"):
+        if "=" in item:
+            k, v = item.split("=", 1)
+            d[k] = v
+        else:
+            d[item] = True
+    return d
+
+
+def toInt(s):
+    if not s:
+        return 0
+    try:
+        return int(float(s))
+    except ValueError:
+        return 0
+
+
+def toFloat(s):
+    if not s:
+        return 0.0
+    try:
+        return float(s)
+    except ValueError:
+        return 0.0
+
+
+def main():
+    if len(sys.argv) != 3:
+        print(__doc__, file=sys.stderr)
+        sys.exit(1)
+
+    inPath, outPath = sys.argv[1], sys.argv[2]
+    opener = gzip.open if inPath.endswith(".gz") else open
+
+    with opener(inPath, "rt") as fIn, open(outPath, "w") as fOut:
+        for line in fIn:
+            if line.startswith("#"):
+                continue
+            fields = line.rstrip("\n").split("\t")
+            chrom = fields[0]
+            pos = int(fields[1])
+            name = fields[2]
+            filt = fields[6]
+            info = parseInfo(fields[7])
+
+            svType = info.get("SVTYPE", ".")
+            end = int(info.get("END", pos))
+            svLen = abs(toInt(info.get("SVLEN", "0")))
+
+            chromStart = pos - 1
+            chromEnd = end
+            if chromEnd <= chromStart:
+                chromEnd = chromStart + 1
+
+            # Translocations: the END is on chr2; cap the item width to 1 bp
+            # on the chromosome-1 side.
+            chr2 = info.get("CHR2", "")
+            if svType == "CTX" and chr2 and chr2 != chrom:
+                chromEnd = chromStart + 1
+
+            color = SV_COLORS.get(svType, "100,100,100")
+
+            row = [
+                chrom,
+                str(chromStart),
+                str(chromEnd),
+                name,
+                "0",
+                ".",
+                str(chromStart),
+                str(chromEnd),
+                color,
+                svType,
+                str(svLen),
+                str(toInt(info.get("AC", "0"))),
+                str(toInt(info.get("AN", "0"))),
+                f"{toFloat(info.get('AF', '0')):.6f}",
+                f"{toFloat(info.get('POPMAX_AF', '0')):.6f}",
+                f"{toFloat(info.get('AFR_AF', '0')):.6f}",
+                f"{toFloat(info.get('AMR_AF', '0')):.6f}",
+                f"{toFloat(info.get('ASN_AF', '0')):.6f}",
+                f"{toFloat(info.get('EUR_AF', '0')):.6f}",
+                f"{toFloat(info.get('SAN_AF', '0')):.6f}",
+                str(toInt(info.get("N_HET", "0"))),
+                str(toInt(info.get("N_HOMALT", "0"))),
+                info.get("ALGORITHMS", ""),
+                info.get("SOURCE", ""),
+                filt,
+                chr2 if svType == "CTX" else "",
+            ]
+            fOut.write("\t".join(row) + "\n")
+
+
+if __name__ == "__main__":
+    main()