4ef2450efd0812268ae30457f8c5d2494821e5a3 max Thu Dec 5 04:56:51 2024 -0800 HGMD 2024 update, refs #34903 diff --git src/hg/makeDb/doc/hg19.txt src/hg/makeDb/doc/hg19.txt index 73a5eb3..10a8186 100644 --- src/hg/makeDb/doc/hg19.txt +++ src/hg/makeDb/doc/hg19.txt @@ -32185,59 +32185,63 @@ # refine generated trackDb.gtexEqtl.ra file and install in makeDb/trackDb/human/hg19 ######## # Load 44 per-tissue tracks: gtexEqtlTissue<tissueName> csh $bin/getxEqtlLoadTissues.csh UCSC_output >&! loadTissuesV2.log & #NOTE: V2 was a second release that followed immediately after first release (which was timed to coincide # with Nature paper pub. V2 revised schema (added ensembl gene ID, additional summary fields) # and color conventions. ########################################################################### # HGMD (updated 12/10/19 max) # HGMD (updated 01/25/18 max) # HGMD (updated 12/12/20 max) # HGMD (updated 08/18/23 max) -# HGMD (updated 05/28/24 max) -# got hgmd from Frank Schacherer Frank.Schacherer@qiagen.com and Rupert Yip Rupert.Yip@qiagen.com +# HGMD (updated 05/28/24 max) from Elias.Hage@qiagen.com +# HGMD (updated 12/05/24 max) from Elias.Hage@qiagen.com +# got hgmd from Frank Schacherer Frank.Schacherer@qiagen.com # see also the file hg38/hgmd.txt -# Got it from Georgios.Stamoulis@qiagen.com, Georgius left -# Got it from Elias.Hage@qiagen.com -year=2023 +year=2024 cd /hive/data/genomes/hg19/bed/hgmd cat /hive/data/outside/hgmd/$year.4-hgmd-public_hg19.tsv | grep -v \# | tawk '{if ($5=="I") {start=$4-1; end=$4+1; col="100,100,100"} else if ($5=="D") {start=$4-1; end=$4; col="170,170,170"} else {start=$4-1; end=$4; col="0,0,0"}; print "chr"$3,start,end,$2":"$1,0,".",start,end,col,$2,$1,$5}' | sed -e 's/M$/substitution/' | sed -e 's/I$/insertion (between the two basepairs, sequence not provided by HGMD)/' | sed -e 's/D$/deletion (endpoint not provided by HGMD)/' | sed -e 's/X$/insertion-deletion (endpoint not provided by HGMD)/' | sed -e 's/R$/regulatory variant/' | sed -e 's/S$/splicing variant/' | sort -k1,1 -k2,2n > hgmd.bed bedToBigBed hgmd.bed /hive/data/genomes/hg19/chrom.sizes hgmd.bb -type=bed9+ -as=hgmd.as -tab ln -s /hive/data/genomes/hg19/bed/hgmd/hgmd.bb /gbdb/hg19/bbi/hgmd.bb hgBbiDbLink hg19 hgmd /gbdb/hg19/bbi/hgmd.bb -# Forgot, finally done Oct 24: also updated hgBeacon +wc -l hgmd.bed +# 2024: 301348 lines in hgmd.bed +# update hgBeacon bigBedToBed /gbdb/hg19/bbi/hgmd.bb /tmp/temp.bed python2 /usr/local/apache/cgi-bin/hgBeacon -f hgmd /tmp/temp.bed hgmd # Forgot, finally done June 26: updated GBIB as qateam +ssh qateam scp /gbdb/hg19/bbi/hgmd.bb hgdownload:/usr/local/apache/htdocs/gbib/prot/ +exit # next restrict RefSeq down to HGMD subset -# addition of HGMD-restricted subset, Max, Jan 29 2019, updated Dec 10 2019, again Aug 2023, May 2024 -cd /hive/data/genomes/hg19/bed/ncbiRefSeq.p13.2022-03-16/ +# addition of HGMD-restricted subset, Max, Jan 29 2019, updated Dec 10 2019, again Aug 2023, May 2024, Dec 2024 +cd /hive/data/genomes/hg19/bed/ncbiRefSeq.p13.2024-09-18/ # change in 2019: ignore the version numbers, otherwise only 1815 transcripts left, big update by HGMD in 2019 # adding "." so NM_123 doesn't match NM_123123 cat /hive/data/outside/hgmd/$year.4-hgmd-public_hg38.tsv | cut -f7 | cut -d. -f1 | sort -u | awk '{print $1"."}' > hgmdTranscripts.txt zcat process/hg19.curated.gp.gz | fgrep -f hgmdTranscripts.txt - > hgmd.curated.gp hgLoadGenePred -genePredExt hg19 ncbiRefSeqHgmd hgmd.curated.gp $ wc -l hgmd.curated.gp 7965 hgmd.curated.gp in 2019 8971 hgmd.curated.gp in 2020 10451 hgmd.curated.gp in 2021 +14693 hgmd.curated.gp in 2024 # now continue the process at ../hg38/hgmd.txt # or ideally make a shell script for all this... ############################################################################# # LASTZ human/hg19 vs. pig/susScr11 - (DONE - 2018-04-02 - Hiram) mkdir /hive/data/genomes/hg19/bed/lastzSusScr11.2018-04-02 cd /hive/data/genomes/hg19/bed/lastzSusScr11.2018-04-02 printf '# human vs pig BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.66/bin/lastz BLASTZ_O=400 BLASTZ_E=30 BLASTZ_M=254 # default BLASTZ_Q score matrix: