9eb4e0937782954c19d664e7d384d210bffb3b25 max Sat Jun 13 16:01:42 2026 -0700 lrSv: QA fixes from Lou's review - dedup, shared color palette, deCODE/AoU cleanup - Drop kwanhoSv (KimPD) from the lrSvAll merge in databases.tsv; it stays on dev/alpha until published, which also removes its >5 Mb breakend artifacts from the merged track. - Remove searchIndex from colorsDbSv, lrSv1kLin and lrSvAll (and the merge generator): the bigBeds were built without a name index, so by-name search never worked. - Single shared per-SV-type color palette in lrSvCommon.py (svColor), used by every converter and the merge. CPX is purple everywhere (was orange in 1kgOnt/apr/cpc1, colliding with INV's orange), colorsDb DEL is 200,0,0 like the rest, and TRA/INSDEL get their own colors. - deCODE: drop byte-identical duplicate rows and blank the fake AC=50 placeholder (AC is now a string field, omitted from the name and mouseOver). - AoU: numeric-entity-encode non-ASCII gene/trait text and drop duplicate rows. - gustafson, chirmade101, hprc2v21: drop byte-identical duplicate rows. - lrSvMergeAll.py: skip byte-identical duplicate source rows instead of summing their allele counts, which had inflated the per-database and total AC. refs #36258 diff --git src/hg/makeDb/trackDb/human/lrSv.ra src/hg/makeDb/trackDb/human/lrSv.ra index 6296dc37174..0a2042555f0 100644 --- src/hg/makeDb/trackDb/human/lrSv.ra +++ src/hg/makeDb/trackDb/human/lrSv.ra @@ -23,31 +23,30 @@ filterLabel.AC Allele Count noScoreFilter on include lrSvAll.ra track colorsDbSv parent lrSv bigDataUrl /gbdb/$D/lrSv/colorsDb/sv.$D.bb shortLabel CoLoRSdb 1427 SVs longLabel Structural Variants from CoLoRSdb (Consortium of Long-Read Sequencing, 1,427 Samples) type bigBed 9 + itemRgb on visibility dense dataVersion v1.2.0 mouseOver $name ($svType) svLen=$svLen insLen=$insLen AF=$AF AC=$AC/$AN (Hom:$acHom Het:$acHet Hemi:$acHemi) samples=$NS - searchIndex name filterValues.svType DEL,INS,INV,DUP filterType.svType multipleListOr filterLabel.svType SV Type filter.svLen 0:101381 filterByRange.svLen on filterLabel.svLen SV Length (bp) filter.insLen 0:18724 filterByRange.insLen on filterLabel.insLen Insertion Length (bp) filter.AC 0:2854 filterByRange.AC on filterLabel.AC Alt Allele Count (AC) filter.AF 0:1 filterByRange.AF on filterLimits.AF 0:1 @@ -230,43 +229,40 @@ filterByRange.alleleFreq on filterLimits.alleleFreq 0:1 filterLabel.alleleFreq Allele Frequency filter.carrierCount 1:498 filterByRange.carrierCount on filterLabel.carrierCount Number of Carrier Samples track decodeSv parent lrSv bigDataUrl /gbdb/$D/lrSv/decodeSv.bb shortLabel deCODE 3622 SVs longLabel High-confidence Structural Variants from 3,622 Icelanders (deCODE, Oxford Nanopore) type bigBed 9 + itemRgb on visibility dense - mouseOver $name ($svType) svLen=$svLen insLen=$insLen AC=$AC (placeholder) + mouseOver $name ($svType) svLen=$svLen insLen=$insLen filterValues.svType DEL,INS,INSDEL filterType.svType multipleListOr filterLabel.svType SV Type filter.svLen 0:861080 filterByRange.svLen on filterLabel.svLen SV Length filter.insLen 0:22130 filterByRange.insLen on filterLabel.insLen Insertion Length - filter.AC 0:50 - filterByRange.AC on - filterLabel.AC Allele Count (placeholder 50) skipEmptyFields on track hprc2v21Sv parent lrSv bigDataUrl /gbdb/$D/lrSv/hprc2v21.bb shortLabel HPRC v2.1 233 SVs longLabel Structural Variants from the HPRC v2.1 Pangenome Graph (233 samples, minigraph-cactus, raw deconstruct) type bigBed 9 + itemRgb on visibility dense mouseOver $name ($svType) svLen=$svLen insLen=$insLen AF=$alleleFreq AC=$AC/$alleleNumber samples=$nSamples filterValues.svType INS,DEL filterType.svType multipleListOr filterLabel.svType SV Type filter.svLen 0:99835 @@ -377,30 +373,34 @@ filter.AC 1:136 filterByRange.AC on filterLabel.AC Allele Count (carrier haplotypes) filter.sampleCount 1:65 filterByRange.sampleCount on filterLabel.sampleCount Sample Count filterValues.refTrf True,False filterType.refTrf multipleListOr filterLabel.refTrf In Tandem Repeat filter.refSd 0:1 filterByRange.refSd on filterLimits.refSd 0:1 filterLabel.refSd Segmental Duplication Overlap skipEmptyFields on + # NOT FOR RELEASE: preliminary and unpublished (Kim et al. 2026 not out yet), + # and the callset has breakend artifacts up to 190 Mb (e.g. a single "INV" + # spanning all of chr4). Held on dev/alpha and kept out of the lrSvAll merge + # (removed from databases.tsv) until the data is published and cleaned. track kwanhoSv parent lrSv bigDataUrl /gbdb/$D/lrSv/kwanho.bb shortLabel Kim PD 100 prelim longLabel Structural Variants from 100 Post-mortem Brains (Parkinson's disease, ILBD, Control; Kim et al. 2026, PacBio HiFi) - PRELIMINARY, data to be updated, contact the authors before using type bigBed 9 + itemRgb on visibility dense mouseOver $name ($svType) svLen=$svLen insLen=$insLen AC=$AC AF_PD=$afPd AF_HC=$afHc diff=$differentialRate filterValues.svType DEL,INS,DUP,INV filterType.svType multipleListOr filterLabel.svType SV Type filter.svLen 0:200000000 filterByRange.svLen on filterLabel.svLen SV Length @@ -505,31 +505,30 @@ filterByRange.geneCount on filterLabel.geneCount Gene Count skipEmptyFields on # NOT FOR RELEASE: data received from Eichler lab via email, not yet published. # Do not add to lrSvAll merged track until a preprint or paper is available. track lrSv1kLin parent lrSv bigDataUrl /gbdb/$D/lrSv/lin1218.bb shortLabel 1KG Linear 1218 SVs longLabel Structural Variants from 1,218 Individuals (1000 Genomes, Linear Long-read Sequencing) type bigBed 9 + itemRgb on visibility dense mouseOver $name ($svType) svLen=$svLen insLen=$insLen AC=$AC/$AN AF=$AF AF_AFR=$afAfr AF_EUR=$afEur samples=$NS - searchIndex name filterValues.svType DEL,INS filterType.svType multipleListOr filterLabel.svType SV Type filter.svLen 0:99565 filterByRange.svLen on filterLabel.svLen SV Length (bp) filter.insLen 0:99968 filterByRange.insLen on filterLabel.insLen Insertion Length (bp) filter.AC 0:2436 filterByRange.AC on filterLabel.AC Allele Count filter.AF 0:1 filterByRange.AF on filterLimits.AF 0:1