9d63cc04f4da43dc2f454a55c1144b1604c29ab1 hiram Thu Feb 6 15:23:56 2020 -0800 add .as file and ftp location script refs #24818 diff --git src/hg/makeDb/doc/hg19.txt src/hg/makeDb/doc/hg19.txt index 5d8d8ad..bf70be9 100644 --- src/hg/makeDb/doc/hg19.txt +++ src/hg/makeDb/doc/hg19.txt @@ -34007,36 +34007,46 @@ ############################################################################## 2020-01-21: manually update GENCODE V19 to include chrMT (markd) cd /hive/data/genomes/hg19/bed/gencodeV19/chrMT # convert chrM annotation in gff3 to chrMT zcat ../data/release_19/gencode.v19.annotation.gff3.gz | tawk 'NR==1{print;next} $1=="chrM"{$1="chrMT"; print}' >chrMT.gff3 # comprensive and basic are the same on chrM and there are no pseudogenes, so this is # easy cat ../tables/wgEncodeGencodeCompV19.gp chrMT.gp >wgEncodeGencodeCompV19.gp cat ../tables/wgEncodeGencodeBasicV19.gp chrMT.gp >wgEncodeGencodeBasicV19.gp hgLoadGenePred -genePredExt hg19 wgEncodeGencodeCompV19 wgEncodeGencodeCompV19.gp hgLoadGenePred -genePredExt hg19 wgEncodeGencodeBasicV19 wgEncodeGencodeBasicV19.gp ############################################################################## -# doseSensitivity (WORKING - 2020-02-06) +# doseSensitivity (WORKING - 2020-02-06, Ana, Hiram) mkdir /hive/data/genomes/hg19/bed/doseSensitivity cd /hive/data/genomes/hg19/bed/doseSensitivity - ftp original files from ClinGen + ftp original files from ClinGen: + + ftp://ftp.ncbi.nlm.nih.giv/pub/dbVar/clingen/ + + wget --timestamping \ +ftp://ftp.ncbi.nlm.nih.gov/pub/dbVar/clingen/ClinGen_haploinsufficiency_gene_GRCh37.bed + + wget --timestamping \ +ftp://ftp.ncbi.nlm.nih.gov/pub/dbVar/clingen/ClinGen_triplosensitivity_gene_GRCh37.bed + + -rw-rw-rw- 1 202784 Jan 28 16:21 ClinGen_gene_curation_list_GRCh37.tsv -rw-rw-rw- 1 45142 Jan 28 16:21 ClinGen_haploinsufficiency_gene_GRCh37.bed -rw-rw-rw- 1 48750 Jan 28 16:22 ClinGen_triplosensitivity_gene_GRCh37.bed # create perl script for processing: toUcsc.pl ########################################################################### #!/usr/bin/env perl use strict; use warnings; my $argc = scalar(@ARGV); if ($argc != 1) { @@ -34092,30 +34102,48 @@ } printf "%s\t%d\t%d\t%s\t%d\t+\t%d\t%d\t%s\t%s\t%d - %s\n", $a[0], $a[1], $a[2], $a[3], $score, $a[1], $a[2], $color, $a[4], $score, $mouseOver[$selectMouseOver]; } close (FH); ########################################################################### # convert the original 'bed 5' data to bed 9+2 with perl script: ./toUcsc.pl ClinGen_haploinsufficiency_gene_GRCh37.bed \ | sort -k1,1 -k2,2n > haploInsufficiency.bed ./toUcsc.pl ClinGen_triplosensitivity_gene_GRCh37.bed \ | sort -k1,1 -k2,2n > triploSensitivity.bed # convert bed to bigBed: + # using the dosSensitivity.as file: + +table doseSensitivity +"ClinGen dosage sensitivity bed 9 plus original dosageScore" + ( + string chrom; "Chromosome (or contig, scaffold, etc.)" + uint chromStart; "Start position in chromosome" + uint chromEnd; "End position in chromosome" + string name; "Name of item" + uint score; "Score from 0-1000" + char[1] strand; "+ or -" + uint thickStart; "Start of where display should be thick (start codon)" + uint thickEnd; "End of where display should be thick (stop codon)" + uint itemRgb; "color indicates dosage score" + string dosageScore; "dosage score from ClinGen" + string mouseOver; "description for the score meaning" + ) + bedToBigBed -tab -type=bed9+2 -as=doseSensitivity.as \ haploInsufficiency.bed ../../chrom.sizes haploInsufficiency.bb bedToBigBed -tab -type=bed9+2 -as=doseSensitivity.as \ triploSensitivity.bed ../../chrom.sizes triploSensitivity.bb # measure data in bigBed files: bigBedInfo haploInsufficiency.bb | sed -e 's/^/# /;' # version: 4 # fieldCount: 11 # hasHeaderExtension: yes # isCompressed: yes # isSwapped: 0 # extraIndexCount: 0 # itemCount: 1,396