b2223b223d36a5a9fa31014dab284d6212cfb4e8 gperez2 Wed Aug 7 16:36:26 2024 -0700 Adding the gnomAD v4.1 Structural Variants track, refs #33823 diff --git src/hg/makeDb/doc/hg38/gnomad.txt src/hg/makeDb/doc/hg38/gnomad.txt index 5acbe10..93a6d59 100644 --- src/hg/makeDb/doc/hg38/gnomad.txt +++ src/hg/makeDb/doc/hg38/gnomad.txt @@ -512,15 +512,71 @@ # Copy the old autosql file: cp ../{missense,pli}Metrics.as . # Turn into a bigBed and link sizes=/hive/data/genomes/hg38/chrom.sizes bedToBigBed -type=bed12+6 -as=pliMetrics.as -tab -extraIndex=name,geneName pliByTranscript.tab $sizes pliByTranscript.bb pass1 - making usageList (376 chroms): 443 millis pass2 - checking and writing primary data (168326 records, 18 fields): 3529 millis Sorting and writing extra index 0: 91 millis Sorting and writing extra index 1: 83 millis bedToBigBed -type=bed12+5 -as=missenseMetrics.as -tab -extraIndex=name,geneName missenseByTranscript.tab $sizes missenseByTranscript.bb pass1 - making usageList (376 chroms): 505 millis pass2 - checking and writing primary data (168326 records, 17 fields): 2841 millis Sorting and writing extra index 0: 171 millis Sorting and writing extra index 1: 89 millis +############################################################################## +# gnomAD Structural Variants v4 - Gerardo +# Redmine #33823 + + cd /hive/data/outside/gnomAD.4/sv + wget https://storage.googleapis.com/gcp-public-data--gnomad/release/4.1/genome_sv/gnomad.v4.1.sv.sites.bed.gz + wget https://storage.googleapis.com/gcp-public-data--gnomad/release/4.1/genome_sv/gnomad.v4.1.sv.non_neuro_controls.sites.bed.gz + #Used the gnomad.v4.1.sv.non_neuro_controls.sites.bed.gz file to build the track since it has + #additional annotations of frequencies among non_neuro samples, and non_control samples and the + #two bed files have the same records. + for f in gnomad.v4.1.sv.non_neuro_controls.sites.bed.gz; do out=${f/.bed.gz/}; zcat $f | tail -n +2 | tawk '{print $1, $2, $3, $4, $26, $29, $27, $28, $30, $31, $32, $33, $34, $39, $41, $44, $45, $47, $48, $49, $52, $53}' > $out.bed4Plus; done + # variant types: + + zcat gnomad.v4.1.sv.non_neuro_controls.sites.bed.gz | cut -f45 | sort | uniq -c + 356035 BND + 721 CNV + 15189 CPX + 99 CTX + 1206278 DEL + 269326 DUP + 304645 INS + 2193 INV + 1 SVTYPE + + # add colors based on gnomad website and get in to proper bed9+ + cp /hive/data/outside/gnomAD.2/structuralVariants/gnomadSvToUcsc.awk . + #Modified the gnomadSvToUcsc.awk script and named it gnomadSvToUcsc_mod.awk to get the following + #fields: + #Allele Count for Non-neuro (field #627) + #Allele Number for Non-neuro (field #626) + #Allele Frequency for Non-neuro (field #628) + #Number of heterozygous variant carriers for Non-neuro (field #631) + #Number of homozygous alternate variant carriers for Non-neuro fiedl (field #632) + #Allele Count Controls (field #803) + #Allele Number Controls (field #802) + #Allele Frequency Controls (field #804) + #Number of heterozygous variant carriers for Controls (field #807) + #Number of homozygous alternate variant carriers for Controls (fiedl #808) + + for f in gnomad.v4.1.sv.non_neuro_controls.sites.bed4Plus; do out=${f/.bed4Plus/}; bedClip -truncate $f /hive/data/genomes/hg38/chrom.sizes stdout | ./gnomadSvToUcsc_mod.awk | sort -k1,1 -k2,2n > $out.bed9Plus; done + + cp /hive/data/outside/gnomAD.2/structuralVariants/gnomadSv.as . + #Modified the gnomadSv.as file to include the non_neuro samples, and the control samples, and + #named the file gnomadSvMod.as + for f in gnomad.v4.1.sv.non_neuro_controls.sites.bed9Plus; do out=${f/.bed9Plus/}; bedToBigBed -tab -type=bed9+19 -as=gnomadSvMod.as -extraIndex=name $f /hive/data/genomes/hg38/chrom.sizes $out.bb; done + cd /gbdb/hg38/gnomAD/v4 + mkdir structuralVariants; cd structuralVariants + cp -s /hive/data/outside/gnomAD.4/sv/gnomad.v4.1.sv.non_neuro_controls.sites.bb . + cd ~/kent/src/hg/makeDb/trackDb/human/hg38 + cp gnomad.alpha.ra gnomadSV.alpha.ra + #Copied the gnomadSvFull track stanza from ~/kent/src/hg/makeDb/trackDb/human/hg19/trackDb.gnomad.ra + # Added filters for non-neurological and control Allele Frequencies + cp ../hg19/gnomadSv.html . + #Updated the gnomadSv.html + +##############################################################################