9eb4e0937782954c19d664e7d384d210bffb3b25
max
  Sat Jun 13 16:01:42 2026 -0700
lrSv: QA fixes from Lou's review - dedup, shared color palette, deCODE/AoU cleanup

- Drop kwanhoSv (KimPD) from the lrSvAll merge in databases.tsv; it stays on
dev/alpha until published, which also removes its >5 Mb breakend artifacts
from the merged track.
- Remove searchIndex from colorsDbSv, lrSv1kLin and lrSvAll (and the merge
generator): the bigBeds were built without a name index, so by-name search
never worked.
- Single shared per-SV-type color palette in lrSvCommon.py (svColor), used by
every converter and the merge. CPX is purple everywhere (was orange in
1kgOnt/apr/cpc1, colliding with INV's orange), colorsDb DEL is 200,0,0 like
the rest, and TRA/INSDEL get their own colors.
- deCODE: drop byte-identical duplicate rows and blank the fake AC=50
placeholder (AC is now a string field, omitted from the name and mouseOver).
- AoU: numeric-entity-encode non-ASCII gene/trait text and drop duplicate rows.
- gustafson, chirmade101, hprc2v21: drop byte-identical duplicate rows.
- lrSvMergeAll.py: skip byte-identical duplicate source rows instead of summing
their allele counts, which had inflated the per-database and total AC.

refs #36258

diff --git src/hg/makeDb/scripts/lrSv/lrSvCommon.py src/hg/makeDb/scripts/lrSv/lrSvCommon.py
index 0cdfdd61014..eadd7979ff6 100644
--- src/hg/makeDb/scripts/lrSv/lrSvCommon.py
+++ src/hg/makeDb/scripts/lrSv/lrSvCommon.py
@@ -43,41 +43,76 @@
     "TRA":     "TRA",
     "BND":     "BND",
     "MEI":     "MEI",
     "INSDEL":  "INSDEL",
     "MIXED":   "MIXED",
     "LOSS":    "LOSS",
     "GAIN":    "GAIN",
     "BOTH":    "BOTH",
 }
 
 # Types for which the length segment in the name is dropped:
 # CTX/BND have no meaningful single-position length.
 NO_LEN_TYPES = {"CTX", "BND"}
 
 
+# Canonical SV-type -> itemRgb color. Every lrSv subtrack converter (and the
+# lrSvAll merge) MUST use this single map so a given SV type has one color
+# across the whole supertrack. The scheme is the long-standing flat-color set
+# (red/blue/green/orange/purple); the rule that is easy to get wrong is that
+# CPX is purple (NOT orange) so it never collides with INV's orange, and
+# INSDEL/TRA/BND get their own colors so they stay distinct from CPX in the
+# merged track. Pass a raw or canonical type to svColor().
+SV_COLORS = {
+    "DEL":    "200,0,0",      # red
+    "INS":    "0,0,200",      # blue
+    "DUP":    "0,160,0",      # green
+    "INV":    "230,140,0",    # orange
+    "CPX":    "140,0,200",    # purple
+    "MIXED":  "120,120,120",  # grey
+    "INSDEL": "100,100,150",  # slate (deCODE combined ins/del)
+    "MEI":    "0,160,160",    # teal
+    "CNV":    "200,0,160",    # magenta
+    "TRA":    "90,90,90",     # dark grey (translocation)
+    "BND":    "90,90,90",     # dark grey (breakend)
+    "CTX":    "90,90,90",     # dark grey (chromosomal translocation)
+}
+
+# Fallback for any type not in SV_COLORS.
+DEFAULT_SV_COLOR = "100,100,100"
+
+
 def normalizeSvType(raw: Optional[str]) -> str:
     """Return the canonical upper-case svType string for a raw VCF/TSV value."""
     if raw is None:
         return "UNK"
     s = str(raw).strip()
     if not s or s == "." or s == "?":
         return "UNK"
     upper = s.upper()
     return TYPE_ALIASES.get(upper, upper)
 
 
+def svColor(svType: Optional[str]) -> str:
+    """Return the canonical itemRgb color string for an SV type.
+
+    Normalizes the type first, so aliases (e.g. COMPLEX -> CPX) get the
+    right color. Unknown types fall back to DEFAULT_SV_COLOR.
+    """
+    return SV_COLORS.get(normalizeSvType(svType), DEFAULT_SV_COLOR)
+
+
 def shortLen(lenBp: Optional[int]) -> str:
     """Short text form for a length in bp.
 
     <1000 bp  -> integer bp as string, e.g. "200"
     >=1000 bp -> "Xk" or "X.Xk" (at most one decimal, no trailing .0),
                   e.g. "1k", "5.5k", "15k", "1.5k"
     None or <=0 -> empty string.
     """
     if lenBp is None:
         return ""
     try:
         n = int(lenBp)
     except (TypeError, ValueError):
         return ""
     if n <= 0: