9eb4e0937782954c19d664e7d384d210bffb3b25
max
Sat Jun 13 16:01:42 2026 -0700
lrSv: QA fixes from Lou's review - dedup, shared color palette, deCODE/AoU cleanup
- Drop kwanhoSv (KimPD) from the lrSvAll merge in databases.tsv; it stays on
dev/alpha until published, which also removes its >5 Mb breakend artifacts
from the merged track.
- Remove searchIndex from colorsDbSv, lrSv1kLin and lrSvAll (and the merge
generator): the bigBeds were built without a name index, so by-name search
never worked.
- Single shared per-SV-type color palette in lrSvCommon.py (svColor), used by
every converter and the merge. CPX is purple everywhere (was orange in
1kgOnt/apr/cpc1, colliding with INV's orange), colorsDb DEL is 200,0,0 like
the rest, and TRA/INSDEL get their own colors.
- deCODE: drop byte-identical duplicate rows and blank the fake AC=50
placeholder (AC is now a string field, omitted from the name and mouseOver).
- AoU: numeric-entity-encode non-ASCII gene/trait text and drop duplicate rows.
- gustafson, chirmade101, hprc2v21: drop byte-identical duplicate rows.
- lrSvMergeAll.py: skip byte-identical duplicate source rows instead of summing
their allele counts, which had inflated the per-database and total AC.
refs #36258
diff --git src/hg/makeDb/trackDb/human/lrSv.ra src/hg/makeDb/trackDb/human/lrSv.ra
index 6296dc37174..0a2042555f0 100644
--- src/hg/makeDb/trackDb/human/lrSv.ra
+++ src/hg/makeDb/trackDb/human/lrSv.ra
@@ -23,31 +23,30 @@
filterLabel.AC Allele Count
noScoreFilter on
include lrSvAll.ra
track colorsDbSv
parent lrSv
bigDataUrl /gbdb/$D/lrSv/colorsDb/sv.$D.bb
shortLabel CoLoRSdb 1427 SVs
longLabel Structural Variants from CoLoRSdb (Consortium of Long-Read Sequencing, 1,427 Samples)
type bigBed 9 +
itemRgb on
visibility dense
dataVersion v1.2.0
mouseOver $name ($svType) svLen=$svLen insLen=$insLen AF=$AF AC=$AC/$AN (Hom:$acHom Het:$acHet Hemi:$acHemi) samples=$NS
- searchIndex name
filterValues.svType DEL,INS,INV,DUP
filterType.svType multipleListOr
filterLabel.svType SV Type
filter.svLen 0:101381
filterByRange.svLen on
filterLabel.svLen SV Length (bp)
filter.insLen 0:18724
filterByRange.insLen on
filterLabel.insLen Insertion Length (bp)
filter.AC 0:2854
filterByRange.AC on
filterLabel.AC Alt Allele Count (AC)
filter.AF 0:1
filterByRange.AF on
filterLimits.AF 0:1
@@ -230,43 +229,40 @@
filterByRange.alleleFreq on
filterLimits.alleleFreq 0:1
filterLabel.alleleFreq Allele Frequency
filter.carrierCount 1:498
filterByRange.carrierCount on
filterLabel.carrierCount Number of Carrier Samples
track decodeSv
parent lrSv
bigDataUrl /gbdb/$D/lrSv/decodeSv.bb
shortLabel deCODE 3622 SVs
longLabel High-confidence Structural Variants from 3,622 Icelanders (deCODE, Oxford Nanopore)
type bigBed 9 +
itemRgb on
visibility dense
- mouseOver $name ($svType) svLen=$svLen insLen=$insLen AC=$AC (placeholder)
+ mouseOver $name ($svType) svLen=$svLen insLen=$insLen
filterValues.svType DEL,INS,INSDEL
filterType.svType multipleListOr
filterLabel.svType SV Type
filter.svLen 0:861080
filterByRange.svLen on
filterLabel.svLen SV Length
filter.insLen 0:22130
filterByRange.insLen on
filterLabel.insLen Insertion Length
- filter.AC 0:50
- filterByRange.AC on
- filterLabel.AC Allele Count (placeholder 50)
skipEmptyFields on
track hprc2v21Sv
parent lrSv
bigDataUrl /gbdb/$D/lrSv/hprc2v21.bb
shortLabel HPRC v2.1 233 SVs
longLabel Structural Variants from the HPRC v2.1 Pangenome Graph (233 samples, minigraph-cactus, raw deconstruct)
type bigBed 9 +
itemRgb on
visibility dense
mouseOver $name ($svType) svLen=$svLen insLen=$insLen AF=$alleleFreq AC=$AC/$alleleNumber samples=$nSamples
filterValues.svType INS,DEL
filterType.svType multipleListOr
filterLabel.svType SV Type
filter.svLen 0:99835
@@ -377,30 +373,34 @@
filter.AC 1:136
filterByRange.AC on
filterLabel.AC Allele Count (carrier haplotypes)
filter.sampleCount 1:65
filterByRange.sampleCount on
filterLabel.sampleCount Sample Count
filterValues.refTrf True,False
filterType.refTrf multipleListOr
filterLabel.refTrf In Tandem Repeat
filter.refSd 0:1
filterByRange.refSd on
filterLimits.refSd 0:1
filterLabel.refSd Segmental Duplication Overlap
skipEmptyFields on
+ # NOT FOR RELEASE: preliminary and unpublished (Kim et al. 2026 not out yet),
+ # and the callset has breakend artifacts up to 190 Mb (e.g. a single "INV"
+ # spanning all of chr4). Held on dev/alpha and kept out of the lrSvAll merge
+ # (removed from databases.tsv) until the data is published and cleaned.
track kwanhoSv
parent lrSv
bigDataUrl /gbdb/$D/lrSv/kwanho.bb
shortLabel Kim PD 100 prelim
longLabel Structural Variants from 100 Post-mortem Brains (Parkinson's disease, ILBD, Control; Kim et al. 2026, PacBio HiFi) - PRELIMINARY, data to be updated, contact the authors before using
type bigBed 9 +
itemRgb on
visibility dense
mouseOver $name ($svType) svLen=$svLen insLen=$insLen AC=$AC AF_PD=$afPd AF_HC=$afHc diff=$differentialRate
filterValues.svType DEL,INS,DUP,INV
filterType.svType multipleListOr
filterLabel.svType SV Type
filter.svLen 0:200000000
filterByRange.svLen on
filterLabel.svLen SV Length
@@ -505,31 +505,30 @@
filterByRange.geneCount on
filterLabel.geneCount Gene Count
skipEmptyFields on
# NOT FOR RELEASE: data received from Eichler lab via email, not yet published.
# Do not add to lrSvAll merged track until a preprint or paper is available.
track lrSv1kLin
parent lrSv
bigDataUrl /gbdb/$D/lrSv/lin1218.bb
shortLabel 1KG Linear 1218 SVs
longLabel Structural Variants from 1,218 Individuals (1000 Genomes, Linear Long-read Sequencing)
type bigBed 9 +
itemRgb on
visibility dense
mouseOver $name ($svType) svLen=$svLen insLen=$insLen AC=$AC/$AN AF=$AF AF_AFR=$afAfr AF_EUR=$afEur samples=$NS
- searchIndex name
filterValues.svType DEL,INS
filterType.svType multipleListOr
filterLabel.svType SV Type
filter.svLen 0:99565
filterByRange.svLen on
filterLabel.svLen SV Length (bp)
filter.insLen 0:99968
filterByRange.insLen on
filterLabel.insLen Insertion Length (bp)
filter.AC 0:2436
filterByRange.AC on
filterLabel.AC Allele Count
filter.AF 0:1
filterByRange.AF on
filterLimits.AF 0:1