13dc32436ac510521c3366ea68973d3ec4940937 max Wed Jan 28 04:48:20 2026 -0800 adding gnomad3 coverage track, refs #37036 diff --git src/hg/makeDb/doc/hg38/gnomad.txt src/hg/makeDb/doc/hg38/gnomad.txt index 96c2bd84182..b7b0aab8073 100644 --- src/hg/makeDb/doc/hg38/gnomad.txt +++ src/hg/makeDb/doc/hg38/gnomad.txt @@ -688,15 +688,73 @@ cd /hive/data/outside/gnomAD.4/sv for f in gnomad.v4.1.sv.non_neuro_controls.sites.bed.gz; do out=${f/.bed.gz/}; zcat $f | tail -n +2 | tawk '{print $1, $2, $3, $4, $26, $29, $27, $28, $30, $31, $32, $33, $34, $39, $41, $44, $45, $47, $48, $49, $52, $53, $627, $626, $628, $631, $632, $803, $802, $804, $807, $808, $978}' > $out.bed4Plus_FILTER; done # Remade the bed9Plus using the updated awk file gnomadSvToUcsc_mod.FILTER.awk to add the FILTER field and update the mouseOver: for f in gnomad.v4.1.sv.non_neuro_controls.sites.bed4Plus_FILTER; do out=${f/.bed4Plus_FILTER/}; bedClip -truncate $f /hive/data/genomes/hg38/chrom.sizes stdout | ./gnomadSvToUcsc_mod.FILTER.awk | sort -k1,1 -k2,2n > $out.bed9Plus_FILTER; done # Added the FILTER field to the as file and named it gnomadSvMod_FILTER.as # Changed the name of the previous bigBed and Remade the bigBed with the FILTER: mv gnomad.v4.1.sv.non_neuro_controls.sites.bb gnomad.v4.1.sv.non_neuro_controls.sites_old_Aug_1_2024_.bb for f in gnomad.v4.1.sv.non_neuro_controls.sites.bed9Plus_FILTER; do out=${f/.bed9Plus_FILTER/}; bedToBigBed -tab -type=bed9+30 -as=gnomadSvMod_FILTER.as -extraIndex=name $f /hive/data/genomes/hg38/chrom.sizes $out.bb; done # Added the filter trackDb setting to the gnomadStructuralVariants stanza in human/hg38/gnomad.ra: filterValues.FILTER PASS,HIGH_NCR,IGH_MHC_OVERLAP,UNRESOLVED,REFERENCE_ARTIFACT filterValuesDefault.FILTER PASS filterType.FILTER multipleListAnd + +############################################################################## +# gnomAD Coverage v3.0.1 - Jan 28, 2025 - max +############################################################################## +# Coverage data from gnomAD genomes v3.0.1 +# Source: https://gnomad.broadinstitute.org/downloads#v3-coverage + +cd /hive/data/genomes/hg38/bed/gnomad/coverage + +# Download coverage summary file +wget https://storage.googleapis.com/gcp-public-data--gnomad/release/3.0.1/coverage/genomes/gnomad.genomes.r3.0.1.coverage.summary.tsv.bgz + +# Create chromosome sizes file (gnomAD uses only main chromosomes) +cat > gnomad.chrom.sizes << 'EOF' +chr1 248956422 +chr2 242193529 +chr3 198295559 +chr4 190214555 +chr5 181538259 +chr6 170805979 +chr7 159345973 +chr8 145138636 +chr9 138394717 +chr10 133797422 +chr11 135086622 +chr12 133275309 +chr13 114364328 +chr14 107043718 +chr15 101991189 +chr16 90338345 +chr17 83257441 +chr18 80373285 +chr19 58617616 +chr20 64444167 +chr21 46709983 +chr22 50818468 +chrX 156040895 +EOF + +# Convert TSV to bigWig files using Python script +python3 ~/kent/src/hg/makeDb/scripts/gnomadCovToBw.py +# Creates 11 bigWig files: +# gnomad.coverage.mean.bw - Mean coverage +# gnomad.coverage.median.bw - Median coverage +# gnomad.coverage.over_1.bw - Fraction of samples with >= 1X coverage +# gnomad.coverage.over_5.bw - Fraction of samples with >= 5X coverage +# gnomad.coverage.over_10.bw - Fraction of samples with >= 10X coverage +# gnomad.coverage.over_15.bw - Fraction of samples with >= 15X coverage +# gnomad.coverage.over_20.bw - Fraction of samples with >= 20X coverage +# gnomad.coverage.over_25.bw - Fraction of samples with >= 25X coverage +# gnomad.coverage.over_30.bw - Fraction of samples with >= 30X coverage +# gnomad.coverage.over_50.bw - Fraction of samples with >= 50X coverage +# gnomad.coverage.over_100.bw - Fraction of samples with >= 100X coverage + +# Create symlinks in /gbdb +mkdir -p /gbdb/hg38/gnomAD/coverage +cd /gbdb/hg38/gnomAD/coverage +for f in /hive/data/genomes/hg38/bed/gnomad/coverage/*.bw; do ln -sf "$f" .; done