2f75ea14748d867f3ab9f56ccaa69bc74e9486a3 gperez2 Tue Jan 14 22:35:06 2025 -0800 Renaming/updating DDG2P track to DECIPHER Population CNVs, refs #35053 diff --git src/hg/makeDb/doc/hg38/decipher.txt src/hg/makeDb/doc/hg38/decipher.txt index 0190c43..219bd88 100644 --- src/hg/makeDb/doc/hg38/decipher.txt +++ src/hg/makeDb/doc/hg38/decipher.txt @@ -96,15 +96,52 @@ row[8] = rgb_value # Update RGB value in the 9th column writer.writerow(row) print(f"Updated file successfully. Output saved to {output_bed_file}.") if __name__ == "__main__": if len(sys.argv) != 3: print("Usage: python update_rgb.py ") else: input_bed_file = sys.argv[1] output_bed_file = sys.argv[2] process_file(input_bed_file, output_bed_file) ######################################################################################## +# Renaming/updating DDG2P track to DECIPHER Population CNVs #35053 +# Januarary 14, 2025 - Gerardo Perez + +# Downloaded files +wget https://www.deciphergenomics.org/files/downloads/population_cnv_grch38.txt.gz +zcat population_cnv_grch38.txt.gz | cut -f1-15 > population_cnv_grch38.bed +wget http://hgdownload.soe.ucsc.edu/goldenPath/hg38/bigZips/hg38.chrom.sizes +cp /cluster/home/yepuga/public_html/trackHubs/ddg2p_syndromes/hg38/bedExample2.as /hive/users/gperez2/tracks/decipher/hg38 +cp /cluster/home/yepuga/public_html/trackHubs/ddg2p_syndromes/hg38/assign_rgb_to_bed.py /hive/users/gperez2/tracks/decipher/hg38 + +# Working directory +cd /hive/users/gperez2/tracks/decipher/hg38 + +# Commands +zcat population_cnv_grch38.txt.gz | cut -f1-15 > population_cnv_grch38.bed + +# reorders columns to fit BED format +awk 'BEGIN {OFS="\t"} {print $2, $3, $4, $1, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15}' population_cnv_grch38.bed > population_cnv_grch38_reordered.bed + +# removes the header line +tail -n +2 population_cnv_grch38_reordered.bed > population_cnv_grch38_final.bed + +# Adjust the BED file for bigBed conversion: +# prepends 'chr' to chromosome numbers, subtracts 1 from the start position, and adjusts fields for bigBed +awk 'BEGIN{OFS="\t"} {print "chr"$1, $2-1, $3, $4, 0, ".", $2, $3, 0, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15}' population_cnv_grch38_final.bed > population_cnv_grch38_final_chr.bed + +bedSort population_cnv_grch38_final_chr.bed population_cnv_grch38_sorted.bed + +# Add RGB colors to the BED file using a Python script: +python3 assign_rgb_to_bed.py population_cnv_grch38_sorted.bed output_population_cnv_grch38.bed + +bedToBigBed -type=bed9+ -as=bedExample2.as -tab -extraIndex=name output_population_cnv_grch38.bed hg38.chrom.sizes population_cnv_grch38.bb + +# Moving files +cp hg38/population_cnv_grch38.bb /hive/data/genomes/hg38/bed/decipher/population_cnv.bb + +ln -s /hive/data/genomes/hg38/bed/decipher/population_cnv.bb /gbdb/hg38/decipher/population_cnv.bb