9eb4e0937782954c19d664e7d384d210bffb3b25
max
  Sat Jun 13 16:01:42 2026 -0700
lrSv: QA fixes from Lou's review - dedup, shared color palette, deCODE/AoU cleanup

- Drop kwanhoSv (KimPD) from the lrSvAll merge in databases.tsv; it stays on
dev/alpha until published, which also removes its >5 Mb breakend artifacts
from the merged track.
- Remove searchIndex from colorsDbSv, lrSv1kLin and lrSvAll (and the merge
generator): the bigBeds were built without a name index, so by-name search
never worked.
- Single shared per-SV-type color palette in lrSvCommon.py (svColor), used by
every converter and the merge. CPX is purple everywhere (was orange in
1kgOnt/apr/cpc1, colliding with INV's orange), colorsDb DEL is 200,0,0 like
the rest, and TRA/INSDEL get their own colors.
- deCODE: drop byte-identical duplicate rows and blank the fake AC=50
placeholder (AC is now a string field, omitted from the name and mouseOver).
- AoU: numeric-entity-encode non-ASCII gene/trait text and drop duplicate rows.
- gustafson, chirmade101, hprc2v21: drop byte-identical duplicate rows.
- lrSvMergeAll.py: skip byte-identical duplicate source rows instead of summing
their allele counts, which had inflated the per-database and total AC.

refs #36258

diff --git src/hg/makeDb/trackDb/human/lrSv.ra src/hg/makeDb/trackDb/human/lrSv.ra
index 6296dc37174..0a2042555f0 100644
--- src/hg/makeDb/trackDb/human/lrSv.ra
+++ src/hg/makeDb/trackDb/human/lrSv.ra
@@ -23,31 +23,30 @@
 filterLabel.AC Allele Count
 noScoreFilter on
 
     include lrSvAll.ra
 
     track colorsDbSv
     parent lrSv
     bigDataUrl /gbdb/$D/lrSv/colorsDb/sv.$D.bb
     shortLabel CoLoRSdb 1427 SVs
     longLabel Structural Variants from CoLoRSdb (Consortium of Long-Read Sequencing, 1,427 Samples)
     type bigBed 9 +
     itemRgb on
     visibility dense
     dataVersion v1.2.0
     mouseOver <b>$name</b> ($svType) svLen=$svLen insLen=$insLen AF=$AF AC=$AC/$AN (Hom:$acHom Het:$acHet Hemi:$acHemi) samples=$NS
-    searchIndex name
     filterValues.svType DEL,INS,INV,DUP
     filterType.svType multipleListOr
     filterLabel.svType SV Type
     filter.svLen 0:101381
     filterByRange.svLen on
     filterLabel.svLen SV Length (bp)
     filter.insLen 0:18724
     filterByRange.insLen on
     filterLabel.insLen Insertion Length (bp)
     filter.AC 0:2854
     filterByRange.AC on
     filterLabel.AC Alt Allele Count (AC)
     filter.AF 0:1
     filterByRange.AF on
     filterLimits.AF 0:1
@@ -230,43 +229,40 @@
     filterByRange.alleleFreq on
     filterLimits.alleleFreq 0:1
     filterLabel.alleleFreq Allele Frequency
     filter.carrierCount 1:498
     filterByRange.carrierCount on
     filterLabel.carrierCount Number of Carrier Samples
 
     track decodeSv
     parent lrSv
     bigDataUrl /gbdb/$D/lrSv/decodeSv.bb
     shortLabel deCODE 3622 SVs
     longLabel High-confidence Structural Variants from 3,622 Icelanders (deCODE, Oxford Nanopore)
     type bigBed 9 +
     itemRgb on
     visibility dense
-    mouseOver <b>$name</b> ($svType) svLen=$svLen insLen=$insLen AC=$AC (placeholder)
+    mouseOver <b>$name</b> ($svType) svLen=$svLen insLen=$insLen
     filterValues.svType DEL,INS,INSDEL
     filterType.svType multipleListOr
     filterLabel.svType SV Type
     filter.svLen 0:861080
     filterByRange.svLen on
     filterLabel.svLen SV Length
     filter.insLen 0:22130
     filterByRange.insLen on
     filterLabel.insLen Insertion Length
-    filter.AC 0:50
-    filterByRange.AC on
-    filterLabel.AC Allele Count (placeholder 50)
     skipEmptyFields on
 
     track hprc2v21Sv
     parent lrSv
     bigDataUrl /gbdb/$D/lrSv/hprc2v21.bb
     shortLabel HPRC v2.1 233 SVs
     longLabel Structural Variants from the HPRC v2.1 Pangenome Graph (233 samples, minigraph-cactus, raw deconstruct)
     type bigBed 9 +
     itemRgb on
     visibility dense
     mouseOver <b>$name</b> ($svType) svLen=$svLen insLen=$insLen AF=$alleleFreq AC=$AC/$alleleNumber samples=$nSamples
     filterValues.svType INS,DEL
     filterType.svType multipleListOr
     filterLabel.svType SV Type
     filter.svLen 0:99835
@@ -377,30 +373,34 @@
     filter.AC 1:136
     filterByRange.AC on
     filterLabel.AC Allele Count (carrier haplotypes)
     filter.sampleCount 1:65
     filterByRange.sampleCount on
     filterLabel.sampleCount Sample Count
     filterValues.refTrf True,False
     filterType.refTrf multipleListOr
     filterLabel.refTrf In Tandem Repeat
     filter.refSd 0:1
     filterByRange.refSd on
     filterLimits.refSd 0:1
     filterLabel.refSd Segmental Duplication Overlap
     skipEmptyFields on
 
+    # NOT FOR RELEASE: preliminary and unpublished (Kim et al. 2026 not out yet),
+    # and the callset has breakend artifacts up to 190 Mb (e.g. a single "INV"
+    # spanning all of chr4). Held on dev/alpha and kept out of the lrSvAll merge
+    # (removed from databases.tsv) until the data is published and cleaned.
     track kwanhoSv
     parent lrSv
     bigDataUrl /gbdb/$D/lrSv/kwanho.bb
     shortLabel Kim PD 100 prelim
     longLabel Structural Variants from 100 Post-mortem Brains (Parkinson's disease, ILBD, Control; Kim et al. 2026, PacBio HiFi) - PRELIMINARY, data to be updated, contact the authors before using
     type bigBed 9 +
     itemRgb on
     visibility dense
     mouseOver <b>$name</b> ($svType) svLen=$svLen insLen=$insLen AC=$AC AF_PD=$afPd AF_HC=$afHc diff=$differentialRate
     filterValues.svType DEL,INS,DUP,INV
     filterType.svType multipleListOr
     filterLabel.svType SV Type
     filter.svLen 0:200000000
     filterByRange.svLen on
     filterLabel.svLen SV Length
@@ -505,31 +505,30 @@
     filterByRange.geneCount on
     filterLabel.geneCount Gene Count
     skipEmptyFields on
 
     # NOT FOR RELEASE: data received from Eichler lab via email, not yet published.
     # Do not add to lrSvAll merged track until a preprint or paper is available.
     track lrSv1kLin
     parent lrSv
     bigDataUrl /gbdb/$D/lrSv/lin1218.bb
     shortLabel 1KG Linear 1218 SVs
     longLabel Structural Variants from 1,218 Individuals (1000 Genomes, Linear Long-read Sequencing)
     type bigBed 9 +
     itemRgb on
     visibility dense
     mouseOver <b>$name</b> ($svType) svLen=$svLen insLen=$insLen AC=$AC/$AN AF=$AF AF_AFR=$afAfr AF_EUR=$afEur samples=$NS
-    searchIndex name
     filterValues.svType DEL,INS
     filterType.svType multipleListOr
     filterLabel.svType SV Type
     filter.svLen 0:99565
     filterByRange.svLen on
     filterLabel.svLen SV Length (bp)
     filter.insLen 0:99968
     filterByRange.insLen on
     filterLabel.insLen Insertion Length (bp)
     filter.AC 0:2436
     filterByRange.AC on
     filterLabel.AC Allele Count
     filter.AF 0:1
     filterByRange.AF on
     filterLimits.AF 0:1