7c70450f4256398c6e502617f506d6c6585434b0
max
  Fri Aug 18 04:03:34 2023 -0700
hgmd 2021 update hg38, refs #14280

diff --git src/hg/makeDb/doc/hg38/ncbiRefSeq.txt src/hg/makeDb/doc/hg38/ncbiRefSeq.txt
index 25cc78d..828e094 100644
--- src/hg/makeDb/doc/hg38/ncbiRefSeq.txt
+++ src/hg/makeDb/doc/hg38/ncbiRefSeq.txt
@@ -207,36 +207,39 @@
 hgLoadGenePred -genePredExt hg38 ncbiRefSeqOther other.gp
 genePredCheck -db=hg38 ncbiRefSeqOther
 # checked: 5682 failed: 0
 
 ########## early experiment, not used later
 # # and the bigPsl file:
 # mkdir -p /gbdb/hg38/bbi/ncbiRefSeq
 # ln -s `pwd`/${asmName}.hg38.bigPsl.bb /gbdb/hg38/bbi/ncbiRefSeqBigPsl.bb
 # hgBbiDbLink hg38 ncbiRefSeqBigPsl /gbdb/hg38/bbi/ncbiRefSeqBigPsl.bb
 ########## early experiment, not used later
 
 #############################################################################
 # addition of HGMD-restricted subset, Max, Jan 29 2019
 # updated Dec 2019
 # updated Dec 2020
+# updated Aug 2023
 cd /hive/data/genomes/hg38/bed/ncbiRefSeq.p13.2020-10-27/
 # change in 2019: ignore the version numbers, otherwise only 1815 transcripts left, big update by HGMD in 2019?
 # adding "." so NM_123 doesn't match NM_123123                                                                             
 cat /hive/data/outside/hgmd/$year.4-hgmd-public_hg38.tsv | cut -f7 | cut -d. -f1 | sort -u | awk '{print $1"."}' > hgmdTranscripts.txt
 zcat process/hg38.curated.gp.gz | fgrep -f hgmdTranscripts.txt - > hgmd.curated.gp
 hgLoadGenePred -genePredExt hg38 ncbiRefSeqHgmd hgmd.curated.gp
+# wc -l says:
+10772 hgmd.curated.gp for the 2021 version
 #############################################################################
 # ncbiRefSeq.p13 update (DONE - 2019-12-06 - Hiram)
 
 # current version information
     cat /gbdb/hg38/ncbiRefSeq/ncbiRefSeqVersion.txt
     # NCBI Homo sapiens Annotation Release 109 (2018-03-29)
 
 # Version information from the file:
 
 # /hive/data/outside/ncbi/genomes/refseq/vertebrate_mammalian/Homo_sapiens/
 #   all_assembly_versions/GCF_000001405.39_GRCh38.p13/
 #   GCF_000001405.39_GRCh38.p13_genomic.gff.gz
 
 #!annotation-date 09/05/2019
 #!annotation-source NCBI Homo sapiens Updated Annotation Release 109.20190905