bbeaefa58e83db818c7e57f778510e1802aa32ea hiram Thu Feb 6 15:17:34 2020 -0800 add toUcsc.pl script refs #24818 diff --git src/hg/makeDb/doc/hg19.txt src/hg/makeDb/doc/hg19.txt index ce9eec7..5d8d8ad 100644 --- src/hg/makeDb/doc/hg19.txt +++ src/hg/makeDb/doc/hg19.txt @@ -34017,30 +34017,97 @@ hgLoadGenePred -genePredExt hg19 wgEncodeGencodeCompV19 wgEncodeGencodeCompV19.gp hgLoadGenePred -genePredExt hg19 wgEncodeGencodeBasicV19 wgEncodeGencodeBasicV19.gp ############################################################################## # doseSensitivity (WORKING - 2020-02-06) mkdir /hive/data/genomes/hg19/bed/doseSensitivity cd /hive/data/genomes/hg19/bed/doseSensitivity ftp original files from ClinGen -rw-rw-rw- 1 202784 Jan 28 16:21 ClinGen_gene_curation_list_GRCh37.tsv -rw-rw-rw- 1 45142 Jan 28 16:21 ClinGen_haploinsufficiency_gene_GRCh37.bed -rw-rw-rw- 1 48750 Jan 28 16:22 ClinGen_triplosensitivity_gene_GRCh37.bed + # create perl script for processing: toUcsc.pl + +########################################################################### +#!/usr/bin/env perl + +use strict; +use warnings; + +my $argc = scalar(@ARGV); + +if ($argc != 1) { + printf STDERR "usage: ./toUcsc.pl originalFile.bed > newFile.bed\n"; + exit 255; +} + +my @mouseOver; + +$mouseOver[0] = "not yet evaluated"; +$mouseOver[1] = "no evidence for dosage pathogenicity"; +$mouseOver[2] = "little evidence for dosage pathogenicity"; +$mouseOver[3] = "some evidence for dosage pathogenicity"; +$mouseOver[4] = "sufficient evidence for dosage pathogenicity"; +$mouseOver[5] = "gene associated with autosomal recessive phenotype"; +$mouseOver[6] = "haploinsufficiency unlikely"; + +my $inFile = shift; + +open (FH, "grep -v track $inFile|") or die "can not read $inFile"; +while (my $line = <FH>) { + chomp $line; +# printf STDERR "# %s\n", $line if ($line =~ m/Not/); + my @a = split('\s+', $line, 5); + my $selectMouseOver = 0; + my $color = "0,255,0"; + my $score = $a[4]; + if ($a[4] =~ m/Not/) { + $color = "128,128,128"; + $score = 0; + $selectMouseOver = 0; + } elsif (0 == $a[4]) { + $color = "252,79,89"; + $selectMouseOver = 1; + } elsif (1 == $a[4]) { + $color = "209,45,51"; + $selectMouseOver = 2; + } elsif (2 == $a[4]) { + $color = "160,48,51"; + $selectMouseOver = 3; + } elsif (3 == $a[4]) { + $color = "109,51,43"; + $selectMouseOver = 4; + } elsif (30 == $a[4]) { + $color = "109,51,43"; + $selectMouseOver = 5; + } elsif (40 == $a[4]) { + $color = "0,0,255"; + $selectMouseOver = 6; + } else { + printf STDERR "%s\n", $line; + die "unrecognized column 5 value ?"; + } + printf "%s\t%d\t%d\t%s\t%d\t+\t%d\t%d\t%s\t%s\t%d - %s\n", + $a[0], $a[1], $a[2], $a[3], $score, $a[1], $a[2], $color, $a[4], $score, $mouseOver[$selectMouseOver]; +} +close (FH); +########################################################################### + # convert the original 'bed 5' data to bed 9+2 with perl script: ./toUcsc.pl ClinGen_haploinsufficiency_gene_GRCh37.bed \ | sort -k1,1 -k2,2n > haploInsufficiency.bed ./toUcsc.pl ClinGen_triplosensitivity_gene_GRCh37.bed \ | sort -k1,1 -k2,2n > triploSensitivity.bed # convert bed to bigBed: bedToBigBed -tab -type=bed9+2 -as=doseSensitivity.as \ haploInsufficiency.bed ../../chrom.sizes haploInsufficiency.bb bedToBigBed -tab -type=bed9+2 -as=doseSensitivity.as \ triploSensitivity.bed ../../chrom.sizes triploSensitivity.bb # measure data in bigBed files: