d52cbc57618c1635024918518f864f9a5f79ca12 max Tue Dec 10 03:52:17 2019 -0800 updating HGMD to 2019 version, refs #24625 diff --git src/hg/makeDb/doc/hg38/ncbiRefSeq.txt src/hg/makeDb/doc/hg38/ncbiRefSeq.txt index ed5fa2f..d672c48 100644 --- src/hg/makeDb/doc/hg38/ncbiRefSeq.txt +++ src/hg/makeDb/doc/hg38/ncbiRefSeq.txt @@ -205,32 +205,35 @@ # checked: 88703 failed: 0 hgLoadGenePred -genePredExt hg38 ncbiRefSeqOther other.gp genePredCheck -db=hg38 ncbiRefSeqOther # checked: 5682 failed: 0 ########## early experiment, not used later # # and the bigPsl file: # mkdir -p /gbdb/hg38/bbi/ncbiRefSeq # ln -s `pwd`/${asmName}.hg38.bigPsl.bb /gbdb/hg38/bbi/ncbiRefSeqBigPsl.bb # hgBbiDbLink hg38 ncbiRefSeqBigPsl /gbdb/hg38/bbi/ncbiRefSeqBigPsl.bb ########## early experiment, not used later ############################################################################# # addition of HGMD-restricted subset, Max, Jan 29 2019 -cd /hive/data/genomes/hg38/bed/ncbiRefSeq.p12.2018-08-10 -cat /hive/data/outside/hgmd/2018.4-hgmd-public_hg38.tsv | cut -f7 | sort -u > hgmdTranscripts.txt +# updated Dec 2019 +cd /hive/data/genomes/hg38/bed/ncbiRefSeq.p13.2019-12-06/ +# change in 2019: ignore the version numbers, otherwise only 1815 transcripts left, big update by HGMD in 2019? +# adding "." so NM_123 doesn't match NM_123123 +cat /hive/data/outside/hgmd/$year.4-hgmd-public_hg38.tsv | cut -f7 | cut -d. -f1 | sort -u | awk '{print $1"."}' > hgmdTranscripts.txt zcat process/hg38.curated.gp.gz | fgrep -f hgmdTranscripts.txt - > hgmd.curated.gp hgLoadGenePred -genePredExt hg38 ncbiRefSeqHgmd hgmd.curated.gp ############################################################################# # ncbiRefSeq.p13 update (DONE - 2019-12-06 - Hiram) # current version information cat /gbdb/hg38/ncbiRefSeq/ncbiRefSeqVersion.txt # NCBI Homo sapiens Annotation Release 109 (2018-03-29) # Version information from the file: # /hive/data/outside/ncbi/genomes/refseq/vertebrate_mammalian/Homo_sapiens/ # all_assembly_versions/GCF_000001405.39_GRCh38.p13/ # GCF_000001405.39_GRCh38.p13_genomic.gff.gz