5fcddbe40be61db85506092f6d7fb206f4fa90b1 jnavarr5 Tue Nov 5 15:29:13 2024 -0800 Adding steps to create symlinks, refs #34097 diff --git src/hg/makeDb/doc/hg38/decipher.txt src/hg/makeDb/doc/hg38/decipher.txt index 02a635b..0190c43 100644 --- src/hg/makeDb/doc/hg38/decipher.txt +++ src/hg/makeDb/doc/hg38/decipher.txt @@ -1,17 +1,17 @@ ######################################################################################## -# DECIPHER Developmental Disorders panel in the Gene2Phenotype database (DDG2P) +# DECIPHER Developmental Disorders panel in the Gene2Phenotype database (DDG2P), hg38/hg19 # November 5, 2024 - Yesenia Puga, Jairo Navarro, Gerardo Perez # Download required files wget https://www.deciphergenomics.org/files/downloads/population_cnv_grch38.txt.gz # Downloads the CNV data file for hg38 wget http://hgdownload.soe.ucsc.edu/goldenPath/hg38/bigZips/hg38.chrom.sizes # Downloads chromosome size data for hg38 wget https://genome.ucsc.edu/goldenPath/help/examples/bedExample2.as # Downloads the .as file defining custom track fields wget http://hgdownload.soe.ucsc.edu/admin/exe/linux.x86_64/bedToBigBed # Downloads the bedToBigBed utility for converting BED to bigBed format chmod 700 bedToBigBed # Changes permissions to make bedToBigBed executable # Prepare the BED file: # Decompresses and trims the CNV data to the first 15 fields zcat population_cnv_grch38.txt.gz | cut -f1-15 > population_cnv_grch38.bed # reorders columns to fit BED format awk 'BEGIN {OFS="\t"} {print $2, $3, $4, $1, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15}' population_cnv_grch38.bed > population_cnv_grch38_reordered.bed @@ -22,30 +22,38 @@ # Adjust the BED file for bigBed conversion: # prepends 'chr' to chromosome numbers and adjusts fields for bigBed awk 'BEGIN{OFS="\t"} {print "chr"$1, $2, $3, $4, 0, ".", $2, $3, 0, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15}' population_cnv_grch38_final.bed > population_cnv_grch38_final_chr.bed # Sort the BED file: # sorts lexicographically by chromosome and numerically by start position LC_ALL=C sort -k1,1 -k2,2n population_cnv_grch38_final_chr.bed > population_cnv_grch38_final_sorted.bed # Add RGB colors to the BED file using a Python script: python3 ../assign_rgb_to_bed.py population_cnv_grch38_final_sorted.bed output_population_cnv_grch38.bed # Convert the BED file to bigBed format, indexing by gene name for faster lookups: ./bedToBigBed -type=bed9+ -as=bedExample2.as -tab -extraIndex=name output_population_cnv_grch38.bed hg38.chrom.sizes population_cnv_grch38.bb +# Add files to hive +cp hg19/population_cnv_grch37.bb /hive/data/genomes/hg19/bed/ddg2p/ddg2pSyndromes.bb +cp hg38/population_cnv_grch38.bb /hive/data/genomes/hg38/bed/ddg2p/ddg2pSyndromes.bb + +# Create symlinks from hive to /gbdb +ln -s /hive/data/genomes/hg19/bed/ddg2p/ddg2pSyndromes.bb /gbdb/hg19/decipher/ddg2pSyndromes.bb +ln -s /hive/data/genomes/hg38/bed/ddg2p/ddg2pSyndromes.bb /gbdb/hg38/decipher/ddg2pSyndromes.bb + ####################################################### Author: Yesenia Puga Program: assign_rgb_to_bed.py ####################################################### # The Python script 'assign_rgb_to_bed.py' is crucial in the data processing pipeline. # It assigns RGB color values based on CNV type ('loss', 'gain', 'del/dup') # to each entry, enhancing visual differentiation on the Genome Browser. It also updates the # 'type' column from numeric identifiers to descriptive text, # which is used in the browser for informative mouseover tooltips, aiding in quick and clear # variant identification. import csv import sys # Define the color mappings for the RGB column based on the CNV type def get_rgb_color(cnv_type):