55673f448aba344d8174691c65813bf7994b72ac
hiram
  Fri Dec 6 11:49:27 2019 -0800
updated ncbiRefSeq tables refs #24528

diff --git src/hg/makeDb/doc/hg38/ncbiRefSeq.txt src/hg/makeDb/doc/hg38/ncbiRefSeq.txt
index 62ceb99..ed5fa2f 100644
--- src/hg/makeDb/doc/hg38/ncbiRefSeq.txt
+++ src/hg/makeDb/doc/hg38/ncbiRefSeq.txt
@@ -210,17 +210,102 @@
 
 ########## early experiment, not used later
 # # and the bigPsl file:
 # mkdir -p /gbdb/hg38/bbi/ncbiRefSeq
 # ln -s `pwd`/${asmName}.hg38.bigPsl.bb /gbdb/hg38/bbi/ncbiRefSeqBigPsl.bb
 # hgBbiDbLink hg38 ncbiRefSeqBigPsl /gbdb/hg38/bbi/ncbiRefSeqBigPsl.bb
 ########## early experiment, not used later
 
 #############################################################################
 # addition of HGMD-restricted subset, Max, Jan 29 2019
 cd /hive/data/genomes/hg38/bed/ncbiRefSeq.p12.2018-08-10
 cat /hive/data/outside/hgmd/2018.4-hgmd-public_hg38.tsv | cut -f7 | sort -u > hgmdTranscripts.txt
 zcat process/hg38.curated.gp.gz | fgrep -f hgmdTranscripts.txt - > hgmd.curated.gp
 hgLoadGenePred -genePredExt hg38 ncbiRefSeqHgmd hgmd.curated.gp
 #############################################################################
+# ncbiRefSeq.p13 update (DONE - 2019-12-06 - Hiram)
 
+# current version information
+    cat /gbdb/hg38/ncbiRefSeq/ncbiRefSeqVersion.txt
+    # NCBI Homo sapiens Annotation Release 109 (2018-03-29)
 
+# Version information from the file:
+
+# /hive/data/outside/ncbi/genomes/refseq/vertebrate_mammalian/Homo_sapiens/
+#   all_assembly_versions/GCF_000001405.39_GRCh38.p13/
+#   GCF_000001405.39_GRCh38.p13_genomic.gff.gz
+
+#!annotation-date 09/05/2019
+#!annotation-source NCBI Homo sapiens Updated Annotation Release 109.20190905
+
+    mkdir /hive/data/genomes/hg38/bed/ncbiRefSeq.p13.2019-12-06
+    cd /hive/data/genomes/hg38/bed/ncbiRefSeq.p13.2019-12-06
+
+    ### BEFORE loading this updated table
+
+    featureBits -countGaps hg38 ncbiRefSeq
+ # 134109466 bases of 3257347282 (4.117%) in intersection
+
+    featureBits -enrichment hg38 refGene ncbiRefSeq
+ # refGene 3.098%, ncbiRefSeq 4.332%, both 2.920%, cover 94.23%, enrich 21.75x
+
+    featureBits -enrichment hg38 ncbiRefSeq refGene
+ # ncbiRefSeq 4.332%, refGene 3.098%, both 2.920%, cover 67.40%, enrich 21.75x
+
+    featureBits -enrichment hg38 ncbiRefSeqCurated refGene
+ # ncbiRefSeqCurated 2.880%, refGene 3.098%, both 2.846%, cover 98.84%, enrich 31.90x
+
+    featureBits -enrichment hg38 refGene ncbiRefSeqCurated
+ # refGene 3.098%, ncbiRefSeqCurated 2.880%, both 2.846%, cover 91.86%, enrich 31.90x
+
+    # running step wise just to be careful
+    time (~/kent/src/hg/utils/automation/doNcbiRefSeq.pl -buildDir=`pwd` \
+      -bigClusterHub=ku -dbHost=hgwdev \
+      -stop=download -fileServer=hgwdev -smallClusterHub=ku -workhorse=hgwdev \
+      refseq vertebrate_mammalian Homo_sapiens \
+      GCF_000001405.39_GRCh38.p13 hg38) > download.log 2>&1
+    # real    3m23.090s
+
+    time (~/kent/src/hg/utils/automation/doNcbiRefSeq.pl -buildDir=`pwd` \
+      -continue=process -bigClusterHub=ku -dbHost=hgwdev \
+      -stop=process -fileServer=hgwdev -smallClusterHub=ku -workhorse=hgwdev \
+      refseq vertebrate_mammalian Homo_sapiens \
+      GCF_000001405.39_GRCh38.p13 hg38) > process.log 2>&1
+    # real    6m10.922s
+
+
+    time (~/kent/src/hg/utils/automation/doNcbiRefSeq.pl -buildDir=`pwd` \
+      -continue=load -bigClusterHub=ku -dbHost=hgwdev \
+      -stop=load -fileServer=hgwdev -smallClusterHub=ku -workhorse=hgwdev \
+      refseq vertebrate_mammalian Homo_sapiens \
+      GCF_000001405.39_GRCh38.p13 hg38) > load.log 2>&1
+    # real    0m41.366s
+
+    ### AFTER loading this updated table
+    # compare this result:
+    cat fb.ncbiRefSeq.hg38.txt
+    # 136778258 bases of 3095998939 (4.418%) in intersection
+
+    # with previous version existing table (from fb above):
+    # 134109466 bases of 3257347282 (4.117%) in intersection
+
+    featureBits -enrichment hg38 refGene ncbiRefSeq
+ # refGene 3.098%, ncbiRefSeq 4.418%, both 3.073%, cover 99.19%, enrich 22.45x
+ # previous:
+ # refGene 3.098%, ncbiRefSeq 4.332%, both 2.920%, cover 94.23%, enrich 21.75x
+
+    featureBits -enrichment hg38 ncbiRefSeq refGene
+ # ncbiRefSeq 4.418%, refGene 3.098%, both 3.073%, cover 69.56%, enrich 22.45x
+ # previous:
+ # ncbiRefSeq 4.332%, refGene 3.098%, both 2.920%, cover 67.40%, enrich 21.75x
+
+    featureBits -enrichment hg38 ncbiRefSeqCurated refGene
+ # ncbiRefSeqCurated 3.073%, refGene 3.098%, both 3.067%, cover 99.81%, enrich 32.22x
+ # previous:
+ # ncbiRefSeqCurated 2.880%, refGene 3.098%, both 2.846%, cover 98.84%, enrich 31.90x
+
+    featureBits -enrichment hg38 refGene ncbiRefSeqCurated
+ # refGene 3.098%, ncbiRefSeqCurated 3.073%, both 3.067%, cover 98.99%, enrich 32.22x
+ # previous:
+ # refGene 3.098%, ncbiRefSeqCurated 2.880%, both 2.846%, cover 91.86%, enrich 31.90x
+
+#########################################################################