9eb4e0937782954c19d664e7d384d210bffb3b25
max
  Sat Jun 13 16:01:42 2026 -0700
lrSv: QA fixes from Lou's review - dedup, shared color palette, deCODE/AoU cleanup

- Drop kwanhoSv (KimPD) from the lrSvAll merge in databases.tsv; it stays on
dev/alpha until published, which also removes its >5 Mb breakend artifacts
from the merged track.
- Remove searchIndex from colorsDbSv, lrSv1kLin and lrSvAll (and the merge
generator): the bigBeds were built without a name index, so by-name search
never worked.
- Single shared per-SV-type color palette in lrSvCommon.py (svColor), used by
every converter and the merge. CPX is purple everywhere (was orange in
1kgOnt/apr/cpc1, colliding with INV's orange), colorsDb DEL is 200,0,0 like
the rest, and TRA/INSDEL get their own colors.
- deCODE: drop byte-identical duplicate rows and blank the fake AC=50
placeholder (AC is now a string field, omitted from the name and mouseOver).
- AoU: numeric-entity-encode non-ASCII gene/trait text and drop duplicate rows.
- gustafson, chirmade101, hprc2v21: drop byte-identical duplicate rows.
- lrSvMergeAll.py: skip byte-identical duplicate source rows instead of summing
their allele counts, which had inflated the per-database and total AC.

refs #36258

diff --git src/hg/makeDb/scripts/lrSv/lrSv1kgOntVcfToBed.py src/hg/makeDb/scripts/lrSv/lrSv1kgOntVcfToBed.py
index 7859c2cedb7..cb2cfe65186 100644
--- src/hg/makeDb/scripts/lrSv/lrSv1kgOntVcfToBed.py
+++ src/hg/makeDb/scripts/lrSv/lrSv1kgOntVcfToBed.py
@@ -2,38 +2,31 @@
 """Convert 1KG ONT SVAN VCF to BED9+ for bigBed, adding allele counts from phased VCF.
 
 Usage:
     lrSv1kgOntVcfToBed.py svan.vcf.gz output.bed chrom.sizes [phased.vcf.gz]
 
 The optional phased VCF provides AC, AN, and AF per variant (matched by ID).
 SVs without a match get AC=-1 (written to AC column), alleleNumber=-1, alleleFreq=-1.
 The canonical `name` column omits the :AC suffix when AC is -1.
 """
 
 import gzip
 import os
 import sys
 
 sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
-from lrSvCommon import svName, normalizeSvType
-
-# Colors by SV class (R,G,B)
-SV_COLORS = {
-    "INS": "0,0,200",      # blue
-    "DEL": "200,0,0",      # red
-    "CPX": "230,140,0",    # orange
-}
+from lrSvCommon import svName, normalizeSvType, svColor
 
 def parseInfo(infoStr):
     """Parse INFO field into a dict."""
     d = {}
     for item in infoStr.split(";"):
         if "=" in item:
             k, v = item.split("=", 1)
             d[k] = v
         else:
             d[item] = True
     return d
 
 def getSvClass(varId):
     """Extract raw SV class from the variant ID. Returns raw string;
     will be normalized by normalizeSvType (COMPLEX -> CPX)."""
@@ -172,31 +165,31 @@
 
             # Number of exons retrotransposed
             try:
                 nbExons = int(info.get("NB_EXONS", "0"))
             except ValueError:
                 nbExons = 0
 
             # Non-canonical MEI flag
             notCanonical = "Yes" if info.get("NOT_CANONICAL") else ""
 
             # Strand
             strand = info.get("STRAND", ".")
             if strand not in ("+", "-"):
                 strand = "."
 
-            color = SV_COLORS.get(svType, "100,100,100")
+            color = svColor(svType)
 
             # Allele counts from phased VCF
             if varId in acMap:
                 ac, an, af = acMap[varId]
                 matched += 1
                 acForName = ac
             else:
                 ac, an, af = -1, -1, -1.0
                 unmatched += 1
                 acForName = None  # drop :AC suffix for SVAN-only rows
 
             # Clip to chrom sizes; skip records on unknown chroms
             if chrom not in chromSizes:
                 skipped += 1
                 continue