aa61ebc800429515f9ced7e28f669c6042219f43 max Wed Mar 18 09:09:13 2026 -0700 varFreqs supertrack: add GREGoR track, update all HTML docs, move scripts to varFreqs/, refs #36642 Add GREGoR R04 WGS track to varFreqs superTrack. Update Data Access and Methods sections for all 20+ subtrack HTML files with consistent formatting, sequencing methods from source papers, and links to makeDoc and Github scripts. Move all varFreqs conversion scripts into scripts/varFreqs/ subdirectory and update makeDoc paths accordingly. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> diff --git src/hg/makeDb/trackDb/human/varFreqs.ra src/hg/makeDb/trackDb/human/varFreqs.ra index 887d2f7890d..d04d2a4cdc9 100644 --- src/hg/makeDb/trackDb/human/varFreqs.ra +++ src/hg/makeDb/trackDb/human/varFreqs.ra @@ -1,306 +1,359 @@ track varFreqs shortLabel Variant Frequencies longLabel Variant Frequencies from various cohorts or national projects group varRep type bed 12 visibility hide superTrack on + track varFreqsAll + shortLabel All Databases Combined + longLabel Variant Frequencies: All Databases Combined with Consequence Annotations + type bigBed 9 + + parent varFreqs on + bigDataUrl /gbdb/$D/varFreqs/varFreqsAll.bb + visibility dense + itemRgb on + maxWindowToDraw 5000000 + # Variant type and consequence filters + filterValues.varType SNV|SNV,INS|Insertion,DEL|Deletion,MNV|MNV + filterLabel.varType Variant Type + filterValues.consequence missense|Missense,synonymous|Synonymous,stop_gained|Stop Gained,frameshift|Frameshift,splice_donor|Splice Donor,splice_acceptor|Splice Acceptor,intron|Intron,.|Intergenic + filterLabel.consequence Consequence + filterValues.sources aou|AllOfUs,spk|SPARK_iWES,wgs|SFARI_WGS,ga|GenomeAsia100K,kov|KOVA,tom|ToMMo,mcp|MCPS,ind|IndiGenomes,fin|FinnGen,sau|Saudi,swe|SweGen,top|TOPMed,abr|ABraOM + filterType.sources multipleListOr + filterLabel.sources Source Database + # Length filters + filterByRange.refLen on + filterLabel.refLen Reference Length + filterByRange.altLen on + filterLabel.altLen Alternate Length + filterByRange.varLen on + filterLabel.varLen Length Change + # Max AF filter + filterByRange.maxAF on + filterLabel.maxAF Max Allele Frequency + filterLimits.maxAF 0:1 + # Per-database AF filters + filterByRange.aouAF on + filterLabel.aouAF AllOfUs AF + filterByRange.spkAF on + filterLabel.spkAF SPARK AF + filterByRange.wgsAF on + filterLabel.wgsAF SFARI WGS AF + filterByRange.gaAF on + filterLabel.gaAF GenomeAsia AF + filterByRange.kovAF on + filterLabel.kovAF KOVA AF + filterByRange.tomAF on + filterLabel.tomAF ToMMo AF + filterByRange.mcpAF on + filterLabel.mcpAF MCPS AF + filterByRange.indAF on + filterLabel.indAF IndiGenomes AF + filterByRange.finAF on + filterLabel.finAF FinnGen AF + filterByRange.sauAF on + filterLabel.sauAF Saudi AF + filterByRange.sweAF on + filterLabel.sweAF SweGen AF + filterByRange.topAF on + filterLabel.topAF TOPMed AF + filterByRange.abrAF on + filterLabel.abrAF ABraOM AF + # Per-database AC filters + filterByRange.aouAC on + filterLabel.aouAC AllOfUs AC + filterByRange.spkAC on + filterLabel.spkAC SPARK AC + filterByRange.wgsAC on + filterLabel.wgsAC SFARI WGS AC + filterByRange.gaAC on + filterLabel.gaAC GenomeAsia AC + filterByRange.kovAC on + filterLabel.kovAC KOVA AC + filterByRange.tomAC on + filterLabel.tomAC ToMMo AC + filterByRange.mcpAC on + filterLabel.mcpAC MCPS AC + filterByRange.indAC on + filterLabel.indAC IndiGenomes AC + filterByRange.finAC on + filterLabel.finAC FinnGen AC + filterByRange.sauAC on + filterLabel.sauAC Saudi AC + filterByRange.sweAC on + filterLabel.sweAC SweGen AC + filterByRange.topAC on + filterLabel.topAC TOPMed AC + filterByRange.abrAC on + filterLabel.abrAC ABraOM AC + # Population-specific AF filters (AllOfUs) + filterByRange.aouAF_AFR on + filterLabel.aouAF_AFR AllOfUs AFR AF + filterByRange.aouAF_AMR on + filterLabel.aouAF_AMR AllOfUs AMR AF + filterByRange.aouAF_EAS on + filterLabel.aouAF_EAS AllOfUs EAS AF + filterByRange.aouAF_EUR on + filterLabel.aouAF_EUR AllOfUs EUR AF + filterByRange.aouAF_SAS on + filterLabel.aouAF_SAS AllOfUs SAS AF + # Population-specific AF filters (GenomeAsia) + filterByRange.gaAF_NEA on + filterLabel.gaAF_NEA GenomeAsia NEA AF + filterByRange.gaAF_SEA on + filterLabel.gaAF_SEA GenomeAsia SEA AF + filterByRange.gaAF_SAS on + filterLabel.gaAF_SAS GenomeAsia SAS AF + # Population-specific AF filters (MCPS) + filterByRange.mcpAF_AFR on + filterLabel.mcpAF_AFR MCPS AFR AF + filterByRange.mcpAF_EUR on + filterLabel.mcpAF_EUR MCPS EUR AF + filterByRange.mcpAF_IMX on + filterLabel.mcpAF_IMX MCPS Indigenous MX AF + priority 0.1 + track allofus shortLabel AllOfUs v7 245k WGS longLabel Variant Frequencies: AllOfUs - v7 245k WGS, only with allele count >= 20 type vcfTabix parent varFreqs on bigDataUrl /gbdb/$D/varFreqs/allofus/allOfUs.locAncFreq.vcf.gz dataVersion V7 visibility pack priority 0.5 #track me #shortLabel Regeneron Million Exomes 983k WES #longLabel Variant Frequencies: Regeneron One Million Exomes (ME) Project - 983k WGS #parent varFreqs on #bigDataUrl /gbdb/$D/varFreqs/mcps/mcps.freq.vcf.gz #visibility pack #type vcfTabix #hapClusterEnabled true #dataVersion 10/04/2023, v1.1.3 #tableBrowser off #priority 1 track topmed shortLabel NHLBI TOPMED 10 151k WGS longLabel Variant Frequencies: NHLBI TOPMED - 151k WGS type vcfTabix parent varFreqs on bigDataUrl /gbdb/$D/varFreqs/topmed/topmed10.vcf.gz dataVersion Freeze 10 visibility pack priority 2 track sfariSparkExomes shortLabel SFARI Spark 140k WES longLabel Variant Frequencies: SFARI SPARK - 140k WES type vcfTabix parent varFreqs on bigDataUrl /gbdb/$D/varFreqs/sfari/SPARK.iWES_v3.2024_08.deepvariant.norm.vcf.gz dataVersion iWES v3 2024_08 visibility pack priority 2.5 track sfariSparkWgs shortLabel SFARI Spark 12k WGS longLabel Variant Frequencies: SFARI SPARK - 12,519 WGS type vcfTabix parent varFreqs on bigDataUrl /gbdb/$D/varFreqs/sfari/wgs_12519_genome.deepvariant.norm.vcf.gz dataVersion iWGS v1.1 visibility pack priority 2.5 + html sfariSparkExomes #track mcps #shortLabel Mexico City Prospective Study 10k WGS+141k WES #longLabel Variant Frequencies: Mexico City Prospective Study (MCPS) #tableBrowser off #parent varFreqs on #bigDataUrl /gbdb/$D/varFreqs/mcps/mcps.freq.vcf.gz #visibility pack #type vcfTabix #dataVersion May 2023 (v1.2.0) #priority 3 track tommo60kjpn shortLabel Japan ToMMO 61k WGS longLabel Variant Frequencies: Japan 61k - ToMMO SNV+Indels type vcfTabix parent varFreqs on bigDataUrl /gbdb/$D/varFreqs/tommo61kjpn/tommo-61kjpn-20250616-GRCh38-snvindel-af-autosome.vcf.gz visibility pack dataVersion 2025-06-16 priority 5 track alfaVcf shortLabel NCBI ALFA 408k WGS/WES/array longLabel Variant Frequencies: NCBI ALFA (dbGaP data) - 408k mixed WGS/WES/array, 163M variants type vcfTabix parent varFreqs on bigDataUrl /gbdb/$D/varFreqs/alfa/ALFA.vcf.gz visibility pack dataVersion R4 priority 4.1 url https://www.ncbi.nlm.nih.gov/snp/$$#frequency_tab urlLabel NCBI Variation Page track finngen parent varFreqs on visibility pack type vcfTabix shortLabel Finnland Finngen 500k imputed longLabel Variant Frequencies: Finnland Finngen - 500k samples, arrays, imputation used 8.5k WGS priority 4.5 bigDataUrl /gbdb/$D/varFreqs/finngen/finnge_R12_annotated_variants_v1.vcf.gz dataVersion R12 tableBrowser off track swefreq parent varFreqs on visibility pack type vcfTabix shortLabel Sweden SweGen 1k WGS longLabel Variant Frequencies: Sweden SweGen - 1k WGS priority 4.7 bigDataUrl /gbdb/$D/varFreqs/swefreq/swegen_frequencies_fixploidy_GRCh38_20190204.vcf.gz dataVersion 20251201 tableBrowser off track mgrb shortLabel Australia MGRB 4k WGS longLabel Variant Frequencies: Australia Medical Genome Reference Bank - 4,011 WGS type vcfTabix parent varFreqs on bigDataUrl /gbdb/$D/varFreqs/mgrb/MGRB.phase3.GRCh38.norm.vcf.gz dataVersion Phase 3 visibility pack # no downloads as per Matt Hobbs email Jan 28 2026 tableBrowser off track gasp shortLabel GenomeAsia Pilot Subs 1.7k WGS longLabel Variant Frequencies: GenomeAsia Pilot - Substitutions type vcfTabix parent varFreqs on bigDataUrl /gbdb/$D/varFreqs/ga100k/ga100k.subst.vcf.gz visibility pack track gaspIndel shortLabel GenomeAsia Pilot Indels 1.7k WGS longLabel Variant Frequencies: GenomeAsia Pilot - Indels type vcfTabix parent varFreqs on bigDataUrl /gbdb/$D/varFreqs/ga100k/All.indels.annot.cont_withmaf.vcf.gz visibility pack + html gasp track abraom shortLabel Brazil ABraOM 1k WGS longLabel Variant Frequencies: ABraOM Brazil - 1,171 unrelated individuals type vcfTabix parent varFreqs on bigDataUrl /gbdb/$D/varFreqs/abraom/abraom.vcf.gz visibility pack dataVersion SABE-WGS-1171 Sep 2020 track indigenomes shortLabel India IndiGenomes 1k WGS longLabel Variant Frequencies: IndiGenomes India - 1,029 samples type vcfTabix parent varFreqs on bigDataUrl /gbdb/$D/varFreqs/indigenomes/IndiGenomes_Variants.vcf.gz visibility pack track kova shortLabel Korea KOVA 1.9k WGS+3.5k WES longLabel Variant Frequencies: KOVA Korea - 5305 samples, 1.9k WGS+3.5k WES type vcfTabix parent varFreqs on bigDataUrl /gbdb/$D/varFreqs/kova/kova.v7.vcf.gz visibility pack tableBrowser off dataVersion V7 track npm shortLabel Singapore NPM 9.7k WGS longLabel Variant Frequencies: NPM Singapore - 9,770 WGS samples type vcfTabix parent varFreqs on bigDataUrl /gbdb/$D/varFreqs/npm/SG10K_Health_r5.3.2.sites.vcf.bgz visibility pack tableBrowser off dataVersion r5.3.2 + track hrc + shortLabel HRC 30k WGS + longLabel Variant Frequencies: Haplotype Reference Consortium - 30k WGS (excl. 1000 Genomes) + type vcfTabix + parent varFreqs on + bigDataUrl /gbdb/$D/varFreqs/hrc/hrc.vcf.gz + visibility pack + dataVersion r1.1 + track saudi shortLabel Saudi Genome Project 302 WGS longLabel Variant Frequencies: Saudi Genome Project - 302 WGS samples type vcfTabix parent varFreqs on bigDataUrl /gbdb/$D/varFreqs/saudi/saudi.vcf.gz visibility pack track schema shortLabel SCHEMA Schizophrenia 121k WES longLabel Variant Frequencies: SCHEMA Schizophrenia Exome Meta-Analysis - WES 24k cases, 97k controls type vcfTabix parent varFreqs on bigDataUrl /gbdb/$D/varFreqs/schema/SCHEMA_variant_results_withAF.vcf.gz visibility pack dataVersion 2022 priority 4.9 url https://schema.broadinstitute.org/ urlLabel SCHEMA Browser - track varFreqsAll - shortLabel All Databases Combined - longLabel Variant Frequencies: All Databases Combined with Consequence Annotations - type bigBed 9 + + track mxbFreq + shortLabel Mexico Biobank 6k Array + longLabel Variant Frequencies: Mexico Biobank - 6,011 individuals, genotyping array + type vcfTabix parent varFreqs on - bigDataUrl /gbdb/$D/varFreqs/varFreqsAll.bb - visibility dense - itemRgb on - maxWindowToDraw 5000000 - # Variant type and consequence filters - filterValues.varType SNV|SNV,INS|Insertion,DEL|Deletion,MNV|MNV - filterLabel.varType Variant Type - filterValues.consequence missense|Missense,synonymous|Synonymous,stop_gained|Stop Gained,frameshift|Frameshift,splice_donor|Splice Donor,splice_acceptor|Splice Acceptor,intron|Intron,.|Intergenic - filterLabel.consequence Consequence - filterValues.sources aou|AllOfUs,spk|SPARK_iWES,wgs|SFARI_WGS,ga|GenomeAsia100K,kov|KOVA,tom|ToMMo,mcp|MCPS,ind|IndiGenomes,fin|FinnGen,sau|Saudi,swe|SweGen,top|TOPMed,abr|ABraOM - filterType.sources multipleListOr - filterLabel.sources Source Database - # Length filters - filterByRange.refLen on - filterLabel.refLen Reference Length - filterByRange.altLen on - filterLabel.altLen Alternate Length - filterByRange.varLen on - filterLabel.varLen Length Change - # Max AF filter - filterByRange.maxAF on - filterLabel.maxAF Max Allele Frequency - filterLimits.maxAF 0:1 - # Per-database AF filters - filterByRange.aouAF on - filterLabel.aouAF AllOfUs AF - filterByRange.spkAF on - filterLabel.spkAF SPARK AF - filterByRange.wgsAF on - filterLabel.wgsAF SFARI WGS AF - filterByRange.gaAF on - filterLabel.gaAF GenomeAsia AF - filterByRange.kovAF on - filterLabel.kovAF KOVA AF - filterByRange.tomAF on - filterLabel.tomAF ToMMo AF - filterByRange.mcpAF on - filterLabel.mcpAF MCPS AF - filterByRange.indAF on - filterLabel.indAF IndiGenomes AF - filterByRange.finAF on - filterLabel.finAF FinnGen AF - filterByRange.sauAF on - filterLabel.sauAF Saudi AF - filterByRange.sweAF on - filterLabel.sweAF SweGen AF - filterByRange.topAF on - filterLabel.topAF TOPMed AF - filterByRange.abrAF on - filterLabel.abrAF ABraOM AF - # Per-database AC filters - filterByRange.aouAC on - filterLabel.aouAC AllOfUs AC - filterByRange.spkAC on - filterLabel.spkAC SPARK AC - filterByRange.wgsAC on - filterLabel.wgsAC SFARI WGS AC - filterByRange.gaAC on - filterLabel.gaAC GenomeAsia AC - filterByRange.kovAC on - filterLabel.kovAC KOVA AC - filterByRange.tomAC on - filterLabel.tomAC ToMMo AC - filterByRange.mcpAC on - filterLabel.mcpAC MCPS AC - filterByRange.indAC on - filterLabel.indAC IndiGenomes AC - filterByRange.finAC on - filterLabel.finAC FinnGen AC - filterByRange.sauAC on - filterLabel.sauAC Saudi AC - filterByRange.sweAC on - filterLabel.sweAC SweGen AC - filterByRange.topAC on - filterLabel.topAC TOPMed AC - filterByRange.abrAC on - filterLabel.abrAC ABraOM AC - # Population-specific AF filters (AllOfUs) - filterByRange.aouAF_AFR on - filterLabel.aouAF_AFR AllOfUs AFR AF - filterByRange.aouAF_AMR on - filterLabel.aouAF_AMR AllOfUs AMR AF - filterByRange.aouAF_EAS on - filterLabel.aouAF_EAS AllOfUs EAS AF - filterByRange.aouAF_EUR on - filterLabel.aouAF_EUR AllOfUs EUR AF - filterByRange.aouAF_SAS on - filterLabel.aouAF_SAS AllOfUs SAS AF - # Population-specific AF filters (GenomeAsia) - filterByRange.gaAF_NEA on - filterLabel.gaAF_NEA GenomeAsia NEA AF - filterByRange.gaAF_SEA on - filterLabel.gaAF_SEA GenomeAsia SEA AF - filterByRange.gaAF_SAS on - filterLabel.gaAF_SAS GenomeAsia SAS AF - # Population-specific AF filters (MCPS) - filterByRange.mcpAF_AFR on - filterLabel.mcpAF_AFR MCPS AFR AF - filterByRange.mcpAF_EUR on - filterLabel.mcpAF_EUR MCPS EUR AF - filterByRange.mcpAF_IMX on - filterLabel.mcpAF_IMX MCPS Indigenous MX AF + bigDataUrl /gbdb/$D/varFreqs/mxb/mxb.freq.vcf.gz + visibility pack + dataVersion Nov 2025 (hg38 lift) + priority 6 + + track sgdpFreq + shortLabel SGDP 279 WGS + longLabel Variant Frequencies: Simons Genome Diversity Project - 279 WGS, 142 populations + type vcfTabix + parent varFreqs on + bigDataUrl /gbdb/$D/varFreqs/sgdpFreq/sgdp.freq.vcf.gz + visibility pack + dataVersion 2016-12-07 (hg38 lift) + priority 7 + + track gregor + shortLabel GREGoR R4 3.6k WGS + longLabel Variant Frequencies: GREGoR Consortium - Release 4, 3,624 WGS samples, rare disease families + type vcfTabix + parent varFreqs on + bigDataUrl /gbdb/$D/varFreqs/gregor/gregor.vcf.gz + visibility pack + dataVersion R04 (Oct 2025) + priority 8 + + track hgdp1kFreq + shortLabel gnomAD HGDP+1kG 4k WGS + longLabel Variant Frequencies: gnomAD HGDP + 1000 Genomes - 4,094 WGS, 80 populations + type vcfTabix + parent varFreqs on + bigDataUrl /gbdb/$D/varFreqs/hgdp1kFreq/hgdp1k.freq.vcf.gz + visibility pack + dataVersion v3.1.2 + priority 8 +