9eb4e0937782954c19d664e7d384d210bffb3b25
max
  Sat Jun 13 16:01:42 2026 -0700
lrSv: QA fixes from Lou's review - dedup, shared color palette, deCODE/AoU cleanup

- Drop kwanhoSv (KimPD) from the lrSvAll merge in databases.tsv; it stays on
dev/alpha until published, which also removes its >5 Mb breakend artifacts
from the merged track.
- Remove searchIndex from colorsDbSv, lrSv1kLin and lrSvAll (and the merge
generator): the bigBeds were built without a name index, so by-name search
never worked.
- Single shared per-SV-type color palette in lrSvCommon.py (svColor), used by
every converter and the merge. CPX is purple everywhere (was orange in
1kgOnt/apr/cpc1, colliding with INV's orange), colorsDb DEL is 200,0,0 like
the rest, and TRA/INSDEL get their own colors.
- deCODE: drop byte-identical duplicate rows and blank the fake AC=50
placeholder (AC is now a string field, omitted from the name and mouseOver).
- AoU: numeric-entity-encode non-ASCII gene/trait text and drop duplicate rows.
- gustafson, chirmade101, hprc2v21: drop byte-identical duplicate rows.
- lrSvMergeAll.py: skip byte-identical duplicate source rows instead of summing
their allele counts, which had inflated the per-database and total AC.

refs #36258

diff --git src/hg/makeDb/scripts/lrSv/lrSvDecode.as src/hg/makeDb/scripts/lrSv/lrSvDecode.as
index 5f4fa8abd5a..e84904c0819 100644
--- src/hg/makeDb/scripts/lrSv/lrSvDecode.as
+++ src/hg/makeDb/scripts/lrSv/lrSvDecode.as
@@ -1,19 +1,19 @@
 table lrSvDecode
 "deCODE Icelandic long-read structural variants"
     (
     string chrom;       "Chromosome"
     uint chromStart;    "Start position"
     uint chromEnd;      "End position"
     string name;        "Variant ID"
     uint score;         "Score"
     char[1] strand;     "Strand"
     uint thickStart;    "Thick start (same as chromStart)"
     uint thickEnd;      "Thick end (same as chromEnd)"
     uint reserved;      "Item color"
     string svType;      "SV Type|DEL, INS, or INSDEL (combined insertion/deletion)"
     int svLen;          "SV Length|Length of the variant on the reference in base pairs"
     int insLen;         "Insertion Length|Length of inserted sequence, 0 for DEL/INV/CPX"
-    int AC;             "Allele Count|Placeholder value (50); awaiting real values from deCODE (#35059)"
+    string AC;          "Allele Count|Not published by deCODE for this site-only callset"
     string trrBegin;    "Tandem Repeat Region Start|Start of the surrounding tandem repeat region, if any (TRRBEGIN)"
     string trrEnd;      "Tandem Repeat Region End|End of the surrounding tandem repeat region, if any (TRREND)"
     )