9eb4e0937782954c19d664e7d384d210bffb3b25 max Sat Jun 13 16:01:42 2026 -0700 lrSv: QA fixes from Lou's review - dedup, shared color palette, deCODE/AoU cleanup - Drop kwanhoSv (KimPD) from the lrSvAll merge in databases.tsv; it stays on dev/alpha until published, which also removes its >5 Mb breakend artifacts from the merged track. - Remove searchIndex from colorsDbSv, lrSv1kLin and lrSvAll (and the merge generator): the bigBeds were built without a name index, so by-name search never worked. - Single shared per-SV-type color palette in lrSvCommon.py (svColor), used by every converter and the merge. CPX is purple everywhere (was orange in 1kgOnt/apr/cpc1, colliding with INV's orange), colorsDb DEL is 200,0,0 like the rest, and TRA/INSDEL get their own colors. - deCODE: drop byte-identical duplicate rows and blank the fake AC=50 placeholder (AC is now a string field, omitted from the name and mouseOver). - AoU: numeric-entity-encode non-ASCII gene/trait text and drop duplicate rows. - gustafson, chirmade101, hprc2v21: drop byte-identical duplicate rows. - lrSvMergeAll.py: skip byte-identical duplicate source rows instead of summing their allele counts, which had inflated the per-database and total AC. refs #36258 diff --git src/hg/makeDb/scripts/lrSv/lrSvDecode.as src/hg/makeDb/scripts/lrSv/lrSvDecode.as index 5f4fa8abd5a..e84904c0819 100644 --- src/hg/makeDb/scripts/lrSv/lrSvDecode.as +++ src/hg/makeDb/scripts/lrSv/lrSvDecode.as @@ -1,19 +1,19 @@ table lrSvDecode "deCODE Icelandic long-read structural variants" ( string chrom; "Chromosome" uint chromStart; "Start position" uint chromEnd; "End position" string name; "Variant ID" uint score; "Score" char[1] strand; "Strand" uint thickStart; "Thick start (same as chromStart)" uint thickEnd; "Thick end (same as chromEnd)" uint reserved; "Item color" string svType; "SV Type|DEL, INS, or INSDEL (combined insertion/deletion)" int svLen; "SV Length|Length of the variant on the reference in base pairs" int insLen; "Insertion Length|Length of inserted sequence, 0 for DEL/INV/CPX" - int AC; "Allele Count|Placeholder value (50); awaiting real values from deCODE (#35059)" + string AC; "Allele Count|Not published by deCODE for this site-only callset" string trrBegin; "Tandem Repeat Region Start|Start of the surrounding tandem repeat region, if any (TRRBEGIN)" string trrEnd; "Tandem Repeat Region End|End of the surrounding tandem repeat region, if any (TRREND)" )