9a11061ca6b40fe16bdfd09b1af53192f6c7c85b max Tue Apr 21 08:13:02 2026 -0700 lrSv: add HTML doc pages and conversion scripts for recent subtracks, + hs1 HGSVC3 Subtrack stanzas for these SV callsets landed in earlier commits but the conversion scripts and per-track HTML description pages were never added; trackDb therefore had no doc to serve. This commit catches up. Docs (new): - colorsDbSv.html CoLoRSdb 1,427-sample long-read SVs - gustafsonSv.html 1KG ONT 100 (Gustafson 2024, PMID 39358015) - hgsvc2Sv.html HGSVC2 (Ebert 2021, PMID 33632895) - hprc2Sv.html HPRC release-2 pangenome SVs (no PMID yet; see humanpangenome.org/hprc-data-release-2/) - onekg3202Sr.html 1KG 3202 Illumina SHORT-READ GATK-SV (Byrska-Bishop 2022, PMID 36055201) Scripts (new): - lrSvGustafson.as / lrSvGustafsonVcfToBed.py - lrSvHgsvc2.as / lrSvHgsvc2TsvToBed.py (merges insdel + inv tables) - lrSvHprc2.as / lrSvHprc2VcfToBed.py (streams wave-decomposed VCF, explodes multi-allelic rows, filters to SV-sized or INV) - lrSv1kg3202Sr.as / lrSv1kg3202SrVcfToBed.py HGSVC3 also on hs1: - hgsvc3Sv.html: note that the hs1 build is native (not lifted): HGSVC3 aligned all assemblies to both GRCh38 and T2T-CHM13 and released separate annotation tables per reference. Added the T2T-CHM13 source URL to the Methods section and the hs1 hgsvc3.bb download link to Data Access. - doc/hs1/lrSv.txt (new): hs1-specific wget + build steps; refers back to doc/hg38/lrSv.txt for the full process. refs #36258 Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> diff --git src/hg/makeDb/scripts/lrSv/lrSvHgsvc2.as src/hg/makeDb/scripts/lrSv/lrSvHgsvc2.as new file mode 100644 index 00000000000..24578a4dda4 --- /dev/null +++ src/hg/makeDb/scripts/lrSv/lrSvHgsvc2.as @@ -0,0 +1,40 @@ +table lrSvHgsvc2 +"HGSVC2 long-read structural variants (32 haplotype-resolved genomes)" + ( + string chrom; "Chromosome" + uint chromStart; "Start position" + uint chromEnd; "End position" + string name; "Variant ID" + uint score; "Score" + char[1] strand; "Strand" + uint thickStart; "Thick start (same as chromStart)" + uint thickEnd; "Thick end (same as chromEnd)" + uint reserved; "Item color" + string svType; "SV Type|DEL, INS, or INV" + int svLen; "SV Length|Absolute length of the SV in base pairs" + uint alleleCount; "Allele Count|MERGE_AC - carrier-haplotype count from the callerset merge" + uint sampleCount; "Sample Count|Distinct samples carrying the variant (ignoring haplotype suffix)" + string cytoband; "Cytoband (BAND)" + float refSd; "Segmental Duplication|Fraction of variant overlapping reference segmental duplications" + string refTrf; "In Tandem Repeat|Whether the variant falls in a Tandem Repeat Finder region (True/False)" + uint refseqCds; "RefSeq CDS Overlap|Base pairs overlapping CDS" + uint refseqUtr3; "RefSeq 3' UTR Overlap" + uint refseqUtr5; "RefSeq 5' UTR Overlap" + uint refseqIntron; "RefSeq Intron Overlap" + uint refseqNcrna; "RefSeq ncRNA Overlap" + uint refseqUp5k; "RefSeq +/- 5kb Upstream" + uint refseqDn5k; "RefSeq +/- 5kb Downstream" + string pliMax; "Max gnomAD pLI|Max pLI of genes overlapping this SV" + string loeufMin; "Min gnomAD LOEUF|Minimum LOEUF upper bound of overlapping genes" + string popAllAf; "Allele Frequency (all)|Population-level AF (insdel only)" + string popAfrAf; "AF African (insdel only)" + string popAmrAf; "AF Admixed American (insdel only)" + string popEasAf; "AF East Asian (insdel only)" + string popEurAf; "AF European (insdel only)" + string popSasAf; "AF South Asian (insdel only)" + string regionRefInner; "Inner Inversion Region|Inner coordinate range of the inversion (INV only)" + lstring mergeSamples; "Carrier Haplotypes|Comma-separated haplotype IDs carrying this variant" + string discClass; "Discovery Class|SV discovery class from the HGSVC2 pipeline" + string win500; "Flanking +/- 500 bp" + string win2k; "Flanking +/- 2000 bp" + )