edcdcb9dd80eba31d99e159e27e6db9fe360cece gperez2 Sat Sep 21 17:05:59 2024 -0700 Adding the gnomAD v4.1 CNV track, refs 34253 diff --git src/hg/makeDb/doc/hg38/gnomad.txt src/hg/makeDb/doc/hg38/gnomad.txt index 93a6d59..2911d24 100644 --- src/hg/makeDb/doc/hg38/gnomad.txt +++ src/hg/makeDb/doc/hg38/gnomad.txt @@ -568,15 +568,78 @@ cp /hive/data/outside/gnomAD.2/structuralVariants/gnomadSv.as . #Modified the gnomadSv.as file to include the non_neuro samples, and the control samples, and #named the file gnomadSvMod.as for f in gnomad.v4.1.sv.non_neuro_controls.sites.bed9Plus; do out=${f/.bed9Plus/}; bedToBigBed -tab -type=bed9+19 -as=gnomadSvMod.as -extraIndex=name $f /hive/data/genomes/hg38/chrom.sizes $out.bb; done cd /gbdb/hg38/gnomAD/v4 mkdir structuralVariants; cd structuralVariants cp -s /hive/data/outside/gnomAD.4/sv/gnomad.v4.1.sv.non_neuro_controls.sites.bb . cd ~/kent/src/hg/makeDb/trackDb/human/hg38 cp gnomad.alpha.ra gnomadSV.alpha.ra #Copied the gnomadSvFull track stanza from ~/kent/src/hg/makeDb/trackDb/human/hg19/trackDb.gnomad.ra # Added filters for non-neurological and control Allele Frequencies cp ../hg19/gnomadSv.html . #Updated the gnomadSv.html ############################################################################## +# gnomAD CNVs v4.1 - Gerardo +# Redmine #34253 + + + cd /hive/data/outside/gnomAD.4 + mkdir cnv; cd cnv + wget https://storage.googleapis.com/gcp-public-data--gnomad/release/4.1/exome_cnv/gnomad.v4.1.cnv.non_neuro_controls.bed + + #Looked at the bed file and decided to get the following columns + #chrom chromStart chromEnd name score strand thickStart thickEnd itemRgb SVLEN SVTYPE Genes SC SN SF SC_XX SN_XX SF_XX SC_XY SN_XY SF_XY + #Need to remove the "variant_is_80_" substring from the 4th column string to make the values into gnomAD variant IDs. + #Need to add colors to the 9th column. Variant Types. Deletion (DEL):"255,0,0" and Duplication (DUP):"0,0,255" + + #Loop through each file matching the pattern (in this case, only gnomad.v4.1.cnv.non_neuro_controls.bed). + for f in gnomad.v4.1.cnv.non_neuro_controls.bed; do + + #Create an output filename by replacing the ".bed" extension with nothing. + #This will be used later to save the modified file with a new suffix. + out=${f/.bed/} + + #Process the file: + # Skip the first 100 lines using `tail -n +101`. + # Use `awk` to modify and print the desired columns. + cat $f | tail -n +101 | \ + + #First awk block to make a bed9plus: + # Set both the input and output field separators to tabs (FS=OFS="\t"). + # Use gsub() to remove the "variant_is_80_" prefix from column 4. + # Print specific columns: + # Columns 1, 2, 3, 4. A zero constant for column 5. A period constant for column 6. Columns 2 and 3 again for columns 7 and 8. Columns 9, 13, 14, 19, 22, 32, 42, 52, 62, 72, 82, 92, and 102. + awk 'BEGIN {FS=OFS="\t"} {gsub("variant_is_80_", "", $4); print $1, $2, $3, $4, 0, ".", $2, $3, $9, $13, $14, $19, $22, $32, $42, $52, $62, $72, $82, $92, $102}' | \ + + #Second awk block to add colors to column 9: + # Set both the input and output field separators to tabs (FS=OFS="\t"). + # Check the value of column 11: + # If column 11 contains "DEL", set column 9 to "255,0,0" (red color). + # If column 11 contains "DUP", set column 9 to "0,0,255" (blue color). + # Print the modified line with the updated values. + awk 'BEGIN {FS=OFS="\t"} {if ($11 == "DEL") $9="255,0,0"; else if ($11 == "DUP") $9="0,0,255"; print}' \ + + #Redirect the output to a new file, appending the suffix ".bed9Plus" to the original filename. + > $out.bed9Plus + + # End of the loop. + done + + cat gnomad.v4.1.cnv.non_neuro_controls.bed9Plus | cut -f11 | sort | uniq -c + 20989 DEL + 25026 DUP + + #Made the as file and the file is located in the following directory: /hive/data/outside/gnomAD.4/cnv/gnomadCNV.as + + #Made a bigBed9plus + for f in gnomad.v4.1.cnv.non_neuro_controls.bed9Plus; do out=${f/.bed9Plus/}; bedToBigBed -tab -type=bed9+12 -as=gnomadCNV.as -extraIndex=name $f /hive/data/genomes/hg38/chrom.sizes $out.bb; done + + cd /gbdb/hg38/gnomAD/v4 + mkdir cnv; cd cnv + ln -s /hive/data/outside/gnomAD.4/cnv/gnomad.v4.1.cnv.non_neuro_controls.bb + cd ~/kent/src/hg/makeDb/trackDb/human/hg38 + #Edited the gnomad.alpha.ra file + cp gnomadSv.html gnomadCNV.html + #Updated the gnomadCNV.html +##############################################################################