f8575fe7f3223c6279b66e2abc6b3472dad916ef hiram Thu Nov 21 15:48:59 2019 -0800 update ncbiRefSeq tables refs #20215 diff --git src/hg/makeDb/doc/hg19.txt src/hg/makeDb/doc/hg19.txt index c69f2a7..dd97abe 100644 --- src/hg/makeDb/doc/hg19.txt +++ src/hg/makeDb/doc/hg19.txt @@ -33640,15 +33640,77 @@ wget ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE63nnn/GSE63525/suppl/GSE63525_K562_combined.hic wget ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE63nnn/GSE63525/suppl/GSE63525_KBM7_combined.hic wget ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE63nnn/GSE63525/suppl/GSE63525_NHEK_combined.hic mkdir -p /gbdb/hg19/bbi/hic cd /gbdb/hg19/bbi/hic ln -s /hive/data/genomes/hg19/bed/hic/*.hic . ######################################################################### # Illumina GDA (DONE braney 2019-10-16) cd /cluster/data/hg19/bed/gda wget "http://webdata.illumina.com.s3-website-us-east-1.amazonaws.com/downloads/productfiles/global-diversity-array/infinium-global-diversity-array-8-d1-csv.zip" unzip *.zip awk 'BEGIN {FS=","; OFS="\t"} { print "chr" $10, $11-1, $11, $2,0, $21,$4}' *.csv | sort -k1,1 -k2,2n | bedClip stdin /cluster/data/hg19/chrom.sizes gda.bed hgLoadSqlTab hg19 snpArrayIlluminaGDA gda.sql gda.bed + +######################################################################### +# ncbiRefSeq.p13 update (DONE - 2019-11-21 - Hiram) + + mkdir /hive/data/genomes/hg19/bed/ncbiRefSeq.p13.2019-11-21 + cd /hive/data/genomes/hg19/bed/ncbiRefSeq.p13.2019-11-21 + + # running step wise just to be careful + time (~/kent/src/hg/utils/automation/doNcbiRefSeq.pl -buildDir=`pwd` \ + -bigClusterHub=ku -dbHost=hgwdev \ + -stop=download -fileServer=hgwdev -smallClusterHub=ku -workhorse=hgwdev \ + refseq vertebrate_mammalian Homo_sapiens \ + GCF_000001405.25_GRCh37.p13 hg19) > download.log 2>&1 + # real 2m27.137s + + time (~/kent/src/hg/utils/automation/doNcbiRefSeq.pl -buildDir=`pwd` \ + -continue=process -bigClusterHub=ku -dbHost=hgwdev \ + -stop=process -fileServer=hgwdev -smallClusterHub=ku -workhorse=hgwdev \ + refseq vertebrate_mammalian Homo_sapiens \ + GCF_000001405.25_GRCh37.p13 hg19) > process.log 2>&1 + # real 2m9.450s + + ### BEFORE loading this updated table + cat fb.ncbiRefSeq.hg19.txt + # 85414465 bases of 2991694177 (2.855%) in intersection + + featureBits -enrichment hg19 refGene ncbiRefSeq + # refGene 3.002%, ncbiRefSeq 2.855%, both 2.690%, cover 89.59%, enrich 31.38x + + featureBits -enrichment hg19 ncbiRefSeq refGene + # ncbiRefSeq 2.855%, refGene 3.002%, both 2.690%, cover 94.21%, enrich 31.38x + + featureBits -enrichment hg19 ncbiRefSeqCurated refGene + # ncbiRefSeqCurated 2.855%, refGene 3.002%, both 2.690%, cover 94.21%, enrich 31.38x + + featureBits -enrichment hg19 refGene ncbiRefSeqCurated + # refGene 3.002%, ncbiRefSeqCurated 2.855%, both 2.690%, cover 89.59%, enrich 31.38x + + time (~/kent/src/hg/utils/automation/doNcbiRefSeq.pl -buildDir=`pwd` \ + -continue=load -bigClusterHub=ku -dbHost=hgwdev \ + -stop=load -fileServer=hgwdev -smallClusterHub=ku -workhorse=hgwdev \ + refseq vertebrate_mammalian Homo_sapiens \ + GCF_000001405.25_GRCh37.p13 hg19) > load.log 2>&1 + # real 0m21.982s + + ### AFTER loading this updated table + cat fb.ncbiRefSeq.hg19.txt + # 93708953 bases of 2991694177 (3.132%) in intersection + + featureBits -enrichment hg19 refGene ncbiRefSeq + # refGene 3.002%, ncbiRefSeq 3.132%, both 2.983%, cover 99.35%, enrich 31.72x + + featureBits -enrichment hg19 ncbiRefSeq refGene + # ncbiRefSeq 3.132%, refGene 3.002%, both 2.983%, cover 95.23%, enrich 31.72x + + featureBits -enrichment hg19 ncbiRefSeqCurated refGene + # ncbiRefSeqCurated 3.132%, refGene 3.002%, both 2.983%, cover 95.23%, enrich 31.72x + + featureBits -enrichment hg19 refGene ncbiRefSeqCurated + # refGene 3.002%, ncbiRefSeqCurated 3.132%, both 2.983%, cover 99.35%, enrich 31.72x + +#########################################################################