55673f448aba344d8174691c65813bf7994b72ac hiram Fri Dec 6 11:49:27 2019 -0800 updated ncbiRefSeq tables refs #24528 diff --git src/hg/makeDb/doc/hg38/ncbiRefSeq.txt src/hg/makeDb/doc/hg38/ncbiRefSeq.txt index 62ceb99..ed5fa2f 100644 --- src/hg/makeDb/doc/hg38/ncbiRefSeq.txt +++ src/hg/makeDb/doc/hg38/ncbiRefSeq.txt @@ -210,17 +210,102 @@ ########## early experiment, not used later # # and the bigPsl file: # mkdir -p /gbdb/hg38/bbi/ncbiRefSeq # ln -s `pwd`/${asmName}.hg38.bigPsl.bb /gbdb/hg38/bbi/ncbiRefSeqBigPsl.bb # hgBbiDbLink hg38 ncbiRefSeqBigPsl /gbdb/hg38/bbi/ncbiRefSeqBigPsl.bb ########## early experiment, not used later ############################################################################# # addition of HGMD-restricted subset, Max, Jan 29 2019 cd /hive/data/genomes/hg38/bed/ncbiRefSeq.p12.2018-08-10 cat /hive/data/outside/hgmd/2018.4-hgmd-public_hg38.tsv | cut -f7 | sort -u > hgmdTranscripts.txt zcat process/hg38.curated.gp.gz | fgrep -f hgmdTranscripts.txt - > hgmd.curated.gp hgLoadGenePred -genePredExt hg38 ncbiRefSeqHgmd hgmd.curated.gp ############################################################################# +# ncbiRefSeq.p13 update (DONE - 2019-12-06 - Hiram) +# current version information + cat /gbdb/hg38/ncbiRefSeq/ncbiRefSeqVersion.txt + # NCBI Homo sapiens Annotation Release 109 (2018-03-29) +# Version information from the file: + +# /hive/data/outside/ncbi/genomes/refseq/vertebrate_mammalian/Homo_sapiens/ +# all_assembly_versions/GCF_000001405.39_GRCh38.p13/ +# GCF_000001405.39_GRCh38.p13_genomic.gff.gz + +#!annotation-date 09/05/2019 +#!annotation-source NCBI Homo sapiens Updated Annotation Release 109.20190905 + + mkdir /hive/data/genomes/hg38/bed/ncbiRefSeq.p13.2019-12-06 + cd /hive/data/genomes/hg38/bed/ncbiRefSeq.p13.2019-12-06 + + ### BEFORE loading this updated table + + featureBits -countGaps hg38 ncbiRefSeq + # 134109466 bases of 3257347282 (4.117%) in intersection + + featureBits -enrichment hg38 refGene ncbiRefSeq + # refGene 3.098%, ncbiRefSeq 4.332%, both 2.920%, cover 94.23%, enrich 21.75x + + featureBits -enrichment hg38 ncbiRefSeq refGene + # ncbiRefSeq 4.332%, refGene 3.098%, both 2.920%, cover 67.40%, enrich 21.75x + + featureBits -enrichment hg38 ncbiRefSeqCurated refGene + # ncbiRefSeqCurated 2.880%, refGene 3.098%, both 2.846%, cover 98.84%, enrich 31.90x + + featureBits -enrichment hg38 refGene ncbiRefSeqCurated + # refGene 3.098%, ncbiRefSeqCurated 2.880%, both 2.846%, cover 91.86%, enrich 31.90x + + # running step wise just to be careful + time (~/kent/src/hg/utils/automation/doNcbiRefSeq.pl -buildDir=`pwd` \ + -bigClusterHub=ku -dbHost=hgwdev \ + -stop=download -fileServer=hgwdev -smallClusterHub=ku -workhorse=hgwdev \ + refseq vertebrate_mammalian Homo_sapiens \ + GCF_000001405.39_GRCh38.p13 hg38) > download.log 2>&1 + # real 3m23.090s + + time (~/kent/src/hg/utils/automation/doNcbiRefSeq.pl -buildDir=`pwd` \ + -continue=process -bigClusterHub=ku -dbHost=hgwdev \ + -stop=process -fileServer=hgwdev -smallClusterHub=ku -workhorse=hgwdev \ + refseq vertebrate_mammalian Homo_sapiens \ + GCF_000001405.39_GRCh38.p13 hg38) > process.log 2>&1 + # real 6m10.922s + + + time (~/kent/src/hg/utils/automation/doNcbiRefSeq.pl -buildDir=`pwd` \ + -continue=load -bigClusterHub=ku -dbHost=hgwdev \ + -stop=load -fileServer=hgwdev -smallClusterHub=ku -workhorse=hgwdev \ + refseq vertebrate_mammalian Homo_sapiens \ + GCF_000001405.39_GRCh38.p13 hg38) > load.log 2>&1 + # real 0m41.366s + + ### AFTER loading this updated table + # compare this result: + cat fb.ncbiRefSeq.hg38.txt + # 136778258 bases of 3095998939 (4.418%) in intersection + + # with previous version existing table (from fb above): + # 134109466 bases of 3257347282 (4.117%) in intersection + + featureBits -enrichment hg38 refGene ncbiRefSeq + # refGene 3.098%, ncbiRefSeq 4.418%, both 3.073%, cover 99.19%, enrich 22.45x + # previous: + # refGene 3.098%, ncbiRefSeq 4.332%, both 2.920%, cover 94.23%, enrich 21.75x + + featureBits -enrichment hg38 ncbiRefSeq refGene + # ncbiRefSeq 4.418%, refGene 3.098%, both 3.073%, cover 69.56%, enrich 22.45x + # previous: + # ncbiRefSeq 4.332%, refGene 3.098%, both 2.920%, cover 67.40%, enrich 21.75x + + featureBits -enrichment hg38 ncbiRefSeqCurated refGene + # ncbiRefSeqCurated 3.073%, refGene 3.098%, both 3.067%, cover 99.81%, enrich 32.22x + # previous: + # ncbiRefSeqCurated 2.880%, refGene 3.098%, both 2.846%, cover 98.84%, enrich 31.90x + + featureBits -enrichment hg38 refGene ncbiRefSeqCurated + # refGene 3.098%, ncbiRefSeqCurated 3.073%, both 3.067%, cover 98.99%, enrich 32.22x + # previous: + # refGene 3.098%, ncbiRefSeqCurated 2.880%, both 2.846%, cover 91.86%, enrich 31.90x + +#########################################################################