dc6ddf665daba39f0d4aac9c83e2d0764da8cf3b markd Fri Mar 12 20:48:48 2021 -0800 import of gencodeV37lift37 diff --git src/hg/makeDb/doc/hg19.gencode.txt src/hg/makeDb/doc/hg19.gencode.txt index a4ddaa6..be819f7 100644 --- src/hg/makeDb/doc/hg19.gencode.txt +++ src/hg/makeDb/doc/hg19.gencode.txt @@ -1,794 +1,842 @@ ############################################################################ 2012-11-11: import and UCSC GENCODE group process of GENCODE V14 (markd) # Due to UCSC Genome Browser using the NC_001807 mitochondrial genome sequence # (chrM) and GENCODE annotating the NC_012920 mitochondrial sequence, the # GENCODE mitochondrial sequences are lifted to UCSC chrM. # download files mkdir -p /hive/data/genomes/hg19/bed/gencodeV14/release cd /hive/data/genomes/hg19/bed/gencodeV14/ # download gencode release wget -nv -r -np ftp://ftp.sanger.ac.uk/pub/gencode/release_14 mv ftp.sanger.ac.uk/pub/gencode/release_14 . rm -rf ftp.sanger.ac.uk/ # silly sanity check: cd release_14 for f in *.gz *.tgz ; do zcat $f >/dev/null ; done # untar main distribution tar -zxf gencode14_GRCh37.tgz cd /hive/data/genomes/hg19/bed/gencodeV14 # obtain transcription support level analysis from UCSC GENCODE group (markd/rachel) mkdir -p data cp /cluster/home/markd/compbio/ccds/branches/transSupV14.1/modules/gencodeTransSupport/exprs/classDev/runs/2012-11-11/results/gencode.v14.transcriptionSupportLevel.{tab,tsv} data/ # create Makefile from previous one. This time, we need to get # if from the ENCODE DCC area. cp /hive/groups/encode/dcc/data/gencodeV13/Makefile . # edit to set version: ver = 14 # on code in the CCDS subversion tree: # svn+ssh://hgwdev.soe.ucsc.edu/projects/compbio/svnroot/hausslerlab/ccds/trunk # and markd's python library (it will be moved to the hausslerlab # repository soon) # may need to update ccds2/modules/gencode/src/lib/gencode/data/gencodeGenes.py # to add new biotypes, use this command to verify and update as needed # be sure to do a make in ccds2/modules/gencode make checkAttrs # build and load tables (time nice make -j 10) >&build.out& # compare tables from previous release to see if number chnaged made # sense. make cmpRelease ## Copy and update trackDb files from previous release. ## Change version and use lower priority so it sorts to top of ## super track page. ## Important to make sure filter attrs.transcriptType matches current set ## figured out with select distinct transcriptType from wgEncodeGencodeAttrsV14 order by transcriptType; cd kent/src/hg/makeDb/trackDb cp human/hg19/wgEncodeGencodeV13.ra human/hg19/wgEncodeGencodeV14.ra cp human/hg19/wgEncodeGencodeV13.html human/hg19/wgEncodeGencodeV14.html # edit these plus human/hg19/trackDb.wgEncode.ra ### IMPORTANT: make sure that hgTracks/gencodeTracks.c registers ### track handler for this version of gencode: registerTrackHandlerOnFamily("wgEncodeGencodeV14", gencodeGeneMethods); ############################################################################## 2013-04-08: import of UCSC GENCODE group processing of GENCODE V15 (markd) # Due to UCSC Genome Browser using the NC_001807 mitochondrial genome sequence # (chrM) and GENCODE annotating the NC_012920 mitochondrial sequence, the # GENCODE mitochondrial sequences are lifted to UCSC chrM. # download files mkdir -p /hive/data/genomes/hg19/bed/gencodeV15 cd /hive/data/genomes/hg19/bed/gencodeV15 # download gencode release wget -nv -r -np ftp://ftp.sanger.ac.uk/pub/gencode/release_15 mv ftp.sanger.ac.uk/pub/gencode/release_15 . rm -rf ftp.sanger.ac.uk/ # silly sanity check: for f in release_15/*.gz release_15/*.tgz ; do zcat $f >/dev/null ; done # untar main distribution cd release_15 tar -zxf gencode15_GRCh37.tgz cd /hive/data/genomes/hg19/bed/gencodeV15 # obtain transcription support level analysis from UCSC GENCODE group (markd/rachel) mkdir -p data cp /cluster/home/markd/compbio/ccds/branches/transSupV15.1/modules/gencodeTransSupport/exprs/classDev/runs/2013-03-28/results/gencode.v15.transcriptionSupportLevel.{tab,tsv} data/ # create Makefile from previous one. cp ../gencodeV14/Makefile . # edit to set version: ver = 15 # on code in the CCDS subversion tree: # svn+ssh://hgwdev.soe.ucsc.edu/projects/compbio/svnroot/hausslerlab/ccds/trunk # and markd's python library may need to update ccds2/modules/gencode/src/lib/gencode/data/gencodeGenes.py # to add new biotypes, use this command to verify and update as needed # be sure to do a make in ccds2/modules/gencode make checkAttrs # build and load tables (time nice make -j 10) >&build.out& # compare tables from previous release to see if number chnaged made # sense. make cmpRelease ## Copy and update trackDb files from previous release. ## Change version and use lower priority so it sorts to top of ## super track page. ## Important to make sure filter attrs.transcriptType matches current set ## figured out with select distinct transcriptType from wgEncodeGencodeAttrsV15 order by transcriptType; cd kent/src/hg/makeDb/trackDb cp human/hg19/wgEncodeGencodeV14.ra human/hg19/wgEncodeGencodeV15.ra cp human/hg19/wgEncodeGencodeV14.html human/hg19/wgEncodeGencodeV15.html # edit these plus human/hg19/trackDb.wgEncode.ra ### IMPORTANT: make sure that hgTracks/gencodeTracks.c registers ### track handler for this version of gencode: registerTrackHandler("wgEncodeGencodeV15", gencodeGeneMethods); ############################################################################ 2013-06-02: import of UCSC GENCODE group processing of GENCODE V16 (markd) # Due to UCSC Genome Browser using the NC_001807 mitochondrial genome sequence # (chrM) and GENCODE annotating the NC_012920 mitochondrial sequence, the # GENCODE mitochondrial sequences are lifted to UCSC chrM. # Thus is the first release to include haplotype regions # download files mkdir -p /hive/data/genomes/hg19/bed/gencodeV16/data cd /hive/data/genomes/hg19/bed/gencodeV16 # download gencode release cd data wget -nv -r -np ftp://ftp.sanger.ac.uk/pub/gencode/release_16 mv ftp.sanger.ac.uk/pub/gencode/release_16 . rm -rf ftp.sanger.ac.uk/ cd /hive/data/genomes/hg19/bed/gencodeV16 # obtain transcription support level analysis from UCSC GENCODE group (markd/rachel) mkdir -p data cp /cluster/home/markd/compbio/ccds/branches/transSupV16.1/modules/gencodeTransSupport/exprs/classDev/runs/2013-06-16/results/gencode.v16.transcriptionSupportLevel.{tab,tsv} data/ # create Makefile from previous one. cp ../gencodeV15/Makefile . # edit to set version: ver = 16 # on code in the CCDS subversion tree: # svn+ssh://hgwdev.soe.ucsc.edu/projects/compbio/svnroot/hausslerlab/ccds/trunk # and markd's python library may need to update ccds2/modules/gencode/src/lib/gencode/data/gencodeGenes.py # to add new biotypes, use this command to verify and update as needed # be sure to do a make in ccds2/modules/gencode make checkAttrs # build and load tables (time nice make -j 10) >&build.out& # compare tables from previous release to see if number chnaged made # sense. make cmpRelease ## Copy and update trackDb files from previous release. ## Change version and use lower priority so it sorts to top of ## super track page. ## Important to make sure filter attrs.transcriptType matches current set ## figured out with select distinct transcriptType from wgEncodeGencodeAttrsV16 order by transcriptType; cd kent/src/hg/makeDb/trackDb cp human/hg19/wgEncodeGencodeV15.ra human/hg19/wgEncodeGencodeV16.ra cp human/hg19/wgEncodeGencodeV15.html human/hg19/wgEncodeGencodeV16.html # edit these plus human/hg19/trackDb.wgEncode.ra # - set priorities in wgEncodeGencodeV16.ra in reverse order with previous # tracks so newest shows up first # priority - set to previous version priority minus 0.001 # searchPriority - set each to previous -0.001 # - make current track default to pack and hide previous [ONLY if it's going to be pushed] # superTrack wgEncodeGencodeSuper pack ### IMPORTANT: make sure that hgTracks/gencodeTracks.c registers ### track handler for this version of gencode: registerTrackHandler("wgEncodeGencodeV16", gencodeGeneMethods); # sanity check number of rows in each table make cmpRelease # update all.joiner and validate # look for the last section `begin Gencode V??' in all.joiner # and copy and update version # repeat this until happy, editing minCheck as needed for tbl in $(hgsql -Ne 'show tables like "wgEncodeGencode%V16"' hg19) ; do runJoiner.csh hg19 $tbl ~/kent/src/hg/makeDb/schema/all.joiner noTimes ; done >&check/joiner.out ############################################################################## 2013-06-19: import of UCSC GENCODE group processing of GENCODE V17 (markd) # Due to UCSC Genome Browser using the NC_001807 mitochondrial genome sequence # (chrM) and GENCODE annotating the NC_012920 mitochondrial sequence, the # GENCODE mitochondrial sequences are lifted to UCSC chrM. # Thus is the first release to include haplotype regions # download files mkdir -p /hive/data/genomes/hg19/bed/gencodeV17/data cd /hive/data/genomes/hg19/bed/gencodeV17 # download gencode release cd data wget -nv -r -np ftp://ftp.sanger.ac.uk/pub/gencode/release_17 mv ftp.sanger.ac.uk/pub/gencode/release_17 . rm -rf ftp.sanger.ac.uk/ cd /hive/data/genomes/hg19/bed/gencodeV17 # obtain transcription support level analysis from UCSC GENCODE group (markd/rachel) mkdir -p data cp /cluster/home/markd/compbio/ccds/branches/transSupV17.1/modules/gencodeTransSupport/exprs/classDev/runs/2013-06-19/results/gencode.v17.transcriptionSupportLevel.{tab,tsv} data/ # create Makefile from previous one. cp ../gencodeV16/Makefile . # edit to set version: ver = 17 # on code in the CCDS subversion tree: # svn+ssh://hgwdev.soe.ucsc.edu/projects/compbio/svnroot/hausslerlab/ccds/trunk # and markd's python library may need to update ccds2/modules/gencode/src/lib/gencode/data/gencodeGenes.py # to add new biotypes, use this command to verify and update as needed # be sure to do a make in ccds2/modules/gencode make checkAttrs # build and load tables (time nice make -j 10) >&build.out& # compare tables from previous release to see if number chnaged made # sense. make cmpRelease ## Copy and update trackDb files from previous release. ## Change version and use lower priority so it sorts to top of ## super track page. ## Important to make sure filter attrs.transcriptType matches current set ## figured out with select distinct transcriptType from wgEncodeGencodeAttrsV17 order by transcriptType; cd kent/src/hg/makeDb/trackDb cp human/hg19/wgEncodeGencodeV16.ra human/hg19/wgEncodeGencodeV17.ra cp human/hg19/wgEncodeGencodeV16.html human/hg19/wgEncodeGencodeV17.html # edit these plus human/hg19/trackDb.wgEncode.ra # - set priorities in wgEncodeGencodeV17.ra in reverse order with previous # tracks so newest shows up first # priority - set to previous version priority minus 0.001 # searchPriority - set each to previous -0.001 # - make current track default to pack and hide previous [ONLY if it's going to be pushed] # superTrack wgEncodeGencodeSuper pack ### IMPORTANT: make sure that hgTracks/gencodeTracks.c registers ### track handler for this version of gencode: registerTrackHandler("wgEncodeGencodeV17", gencodeGeneMethods); # sanity check number of rows in each table make cmpRelease # update all.joiner and validate # look for the last section `begin Gencode V??' in all.joiner # and copy and update version # repeat this until happy, editing minCheck as needed for tbl in $(hgsql -Ne 'show tables like "wgEncodeGencode%V17"' hg19) ; do runJoiner.csh hg19 $tbl ~/kent/src/hg/makeDb/schema/all.joiner noTimes ; done >&check/joiner.out ######################################################################### 2016-03-02: import of UCSC GENCODE group processing of GENCODE V24lift37 (markd) # download files mkdir -p /hive/data/genomes/hg19/bed/V24lift37 cd /hive/data/genomes/hg19/bed/V24lift37 # edit hg/makeDb/outside/gencode/gencodeLoad.mk to set # release and transcript support versions # download, build and load tables (time nice make -j 10 -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk) >&build.1.out& # compare tables from previous release to see if number changed makes # sense (results in gencode-cmp.tsv) make -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk cmpRelease ## Copy and update trackDb files from previous release. ## Change version and use lower priority so it sorts to top of ## super track page. Follow instructiuons in ra file to ensure ## filters are correct. cd kent/src/hg/makeDb/trackDb cp human/hg38/wgEncodeGencodeV24.ra human/hg19/wgEncodeGencodeV24lift37.ra cp human/hg38/wgEncodeGencodeV24.html human/hg19/wgEncodeGencodeV24lift37.html # edit these plus human/hg38/trackDb.wgEncode.ra # - set priorities in wgEncodeGencodeV24.ra tracks so newest shows up first # priority - set to previous version priority minus 0.001 # searchPriority - set each to previous minus 0.001 # - make current track default to pack and hide previous [ONLY if it's going to be pushed] # superTrack wgEncodeGencodeSuper pack # - Update wgEncodeGencodeSuper.html to describe new release and to # pick up other updates. # update all.joiner and validate # look for the last section `begin Gencode V??' in all.joiner # and copy and update version # repeat this until happy, editing minCheck as needed # output in check/joiner.out cd /hive/data/genomes/hg19/bed/gencodeV24lift37 make -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk joinerCheck ######################################################################### 2017-04-16: import of UCSC GENCODE group processing of GENCODE V26lift37 (markd) # edit hg/makeDb/outside/gencode/gencodeLoad.mk to set release and ensembl versions # download, build and load tables mkdir -p /hive/data/genomes/hg19/bed/gencodeV26lift37 pushd /hive/data/genomes/hg19/bed/gencodeV26lift37 (time nice make -j 10 -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk) >&build.1.out& # compare tables from previous release to see if number changed makes # sense. Results are in gencode-cmp.tsv # generate trackDb and joiner blurb pushd kent/src/hg/makeDb/trackDb ../../makeDb/outside/gencode/gencodeGenerateTrackDbs hg19 26lift37 88 'March 2017' # edit human/hg19/trackDb.wgEncode.ra to add new .ra file include make DBS=hg19 # Update human/hg19/wgEncodeGencodeSuper.html and update 'Release Notes' # to describe new release. [ONLY if it's going to be pushed] # edit all.joiner to add ~/tmp/gencodeV26lift37.joiner # verify with: pushd /hive/data/genomes/hg19/bed/gencodeV26lift37 make -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk joinerCheck # commit all ############################################################################## 2017-09-14: import of UCSC GENCODE group processing of GENCODE V27lift37 (markd) # edit hg/makeDb/outside/gencode/gencodeLoad.mk to set release and ensembl versions # download, build and load tables mkdir -p /hive/data/genomes/hg19/bed/gencodeV27lift37 pushd /hive/data/genomes/hg19/bed/gencodeV27lift37 (time nice make -j 10 -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk) >&build.1.out& # compare tables from previous release to see if number changed makes # sense. Results are in gencode-cmp.tsv # generate trackDb and joiner blurb pushd kent/src/hg/makeDb/trackDb ../../makeDb/outside/gencode/gencodeGenerateTrackDbs hg19 27lift37 90 'August 2017' # Update human/hg19/wgEncodeGencodeSuper.html and update 'Release Notes' # to describe new release. [ONLY if it's going to be pushed] # edit human/hg19/trackDb.wgEncode.ra to add new .ra file include make DBS=hg19 # edit all.joiner to add ~/tmp/gencodeV27lift37.joiner # verify with: pushd /hive/data/genomes/hg19/bed/gencodeV27lift37 make -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk joinerCheck # commit all ########################################################################### 2018-03-08: update UCSC GENCODE V19 to include protein id (for VAI) cd /hive/data/genomes/hg19/bed/gencodeV19/ # move tables that need to be rebuilt mkdir -p prev/pre-proteinId mv tables/wgEncodeGencodeAttrsV19.tab tables/wgEncodeGencodeTagV19.tab prev/pre-proteinId/ mv loaded/wgEncodeGencodeAttrsV19.tab.loaded loaded/wgEncodeGencodeTagV19.tab.loaded prev/pre-proteinId/ # V19 didn't have protein ids in GTF, need to get from ensembl database cd ~/hive/tmp ftp://ftp.ensembl.org/pub/release-74/mysql/homo_sapiens_core_74_37/homo_sapiens_core_74_37.sql.gz ftp://ftp.ensembl.org/pub/release-74/mysql/homo_sapiens_core_74_37/transcript.txt.gz ftp://ftp.ensembl.org/pub/release-74/mysql/homo_sapiens_core_74_37/translation.txt.gz # load small subset hgsql -e 'create database markd_ens' zcat homo_sapiens_core_74_37.sql.gz |hgsql markd_ens gunzip trans* hgsqlimport markd_ens $(pwd)/transcript.txt $(pwd)/translation.txt # add proteinId column to wgEncodeGencodeAttrsV19 hgsql CREATE table markd_ens.trans_prot SELECT concat(trans.stable_id, ".", CAST(trans.version AS CHAR)) transcriptId, concat(prot.stable_id, ".", CAST(prot.version AS CHAR)) proteinId FROM markd_ens.transcript trans, markd_ens.translation prot WHERE prot.transcript_id = trans.transcript_id; CREATE INDEX transcriptId ON markd_ens.trans_prot(transcriptId); UPDATE hg19.wgEncodeGencodeAttrsV19 as attr INNER JOIN markd_ens.trans_prot as tp ON attr.transcriptId = tp.transcriptId SET attr.proteinId = tp.proteinId; SELECT count(*) FROM wgEncodeGencodeAttrsV19 WHERE transcriptClass="coding" AND proteinId = ""; -> 0 !!! got them all DROP DATABASE markd_ens; # 2018-03-19: update search to include protein id edit kent/src/hg/makeDb/trackDb/human/hg19/wgEncodeGencodeV19.ra ######################################################################### 2018-04-17: import of UCSC GENCODE group processing of GENCODE V28lift37 (markd) # edit hg/makeDb/outside/gencode/gencodeLoad.mk to set release and ensembl versions # download, build and load tables mkdir -p /hive/data/genomes/hg19/bed/gencodeV28lift37 pushd /hive/data/genomes/hg19/bed/gencodeV28lift37 (time nice make -j 10 -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk) >&build.1.out& # compare tables from previous release to see if number changed makes # sense. Results are in gencode-cmp.tsv # generate trackDb and joiner blurb pushd ~/kent/src/hg/makeDb/trackDb ../../makeDb/outside/gencode/gencodeGenerateTrackDbs hg19 28lift37 92 'Apr 2018' # Update human/hg19/wgEncodeGencodeSuper.html and update 'Release Notes' # to describe new release. [ONLY if it's going to be pushed] # edit human/hg19/trackDb.wgEncode.ra to add new .ra file include make DBS=hg19 # edit all.joiner to add ~/tmp/gencodeV28lift37.joiner # verify with: pushd /hive/data/genomes/hg19/bed/gencodeV28lift37 make -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk joinerCheck # commit all # 2018-05-01 markd # fix bug in wgEncodeGencodeEntrezGeneV28lift37 with change in gencodeLoad.mk. Reload the table. mkdir -p /hive/data/genomes/hg19/bed/gencodeV28lift37 rm tables/wgEncodeGencodeEntrezGeneV28lift37.tab (time nice make -j 10 -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk) >&build.3.out& ############################################################################## 2018-11-09: import of UCSC GENCODE group processing of GENCODE V29lift37 (markd) # edit hg/makeDb/outside/gencode/gencodeLoad.mk to set release and ensembl versions # download, build and load tables mkdir -p /hive/data/genomes/hg19/bed/gencodeV29lift37 pushd /hive/data/genomes/hg19/bed/gencodeV29lift37 (time nice make -j 10 -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk) >&build.1.out& # Contained invalid biotype for one transcript (ENST00000649475.1_1), fix for now cd data/release_29lift37 mv gencode.v29lift37.annotation.gff3.gz gencode.v29lift37.annotation.orig.gff3.gz zcat gencode.v29lift37.annotation.orig.gff3.gz | sed -e 's/lincrna/lincRNA/g' | gzip -c> gencode.v29lift37.annotation.gff3.gz cd ../.. (time nice make -j 10 -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk) >&build.2.out& # compare tables from previous release to see if number changed makes # sense. Results are in gencode-cmp.tsv # generate trackDb and joiner blurb pushd ~/kent/src/hg/makeDb/trackDb ../../makeDb/outside/gencode/gencodeGenerateTrackDbs hg19 29lift37 94 'Oct 2018' # Update human/hg19/wgEncodeGencodeSuper.html and update 'Release Notes' # to describe new release. [ONLY if it's going to be pushed] # edit human/hg19/trackDb.wgEncode.ra to add new .ra file include make DBS=hg19 # edit all.joiner to add ~/tmp/gencodeV29lift37.joiner # verify with: pushd /hive/data/genomes/hg19/bed/gencodeV29lift37 make -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk joinerCheck # commit all ############################################################################# 2019-04-08: import of UCSC GENCODE group processing of GENCODE V30lift37 (markd) # edit hg/makeDb/outside/gencode/gencodeLoad.mk to set release and ensembl versions # download, build and load tables mkdir -p /hive/data/genomes/hg19/bed/gencodeV30lift37 pushd /hive/data/genomes/hg19/bed/gencodeV30lift37 (time nice make -j 10 -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk) >&build.1.out& # compare tables from previous release to see if number changed makes # sense. Results are in gencode-cmp.tsv # generate trackDb and joiner blurb pushd ~/kent/src/hg/makeDb/trackDb ../../makeDb/outside/gencode/gencodeGenerateTrackDbs hg19 30lift37 96 'Apr 2019' # Update human/hg19/wgEncodeGencodeSuper.html and update 'Release Notes' # to describe new release. [ONLY if it's going to be pushed] # edit human/hg19/trackDb.wgEncode.ra to add new .ra file include make DBS=hg19 # edit all.joiner to add ~/tmp/gencodeV30lift37.joiner # verify with: pushd /hive/data/genomes/hg19/bed/gencodeV30lift37 make -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk joinerCheck # commit all ############################################################################# 2019-07-05: import of UCSC GENCODE group processing of GENCODE V31lift37 (markd) # Replaced import of pre-release # edit hg/makeDb/outside/gencode/gencodeLoad.mk to set release and ensembl versions # download, build and load tables mkdir -p /hive/data/genomes/hg19/bed/gencodeV31lift37 pushd /hive/data/genomes/hg19/bed/gencodeV31lift37 (time nice make -j 10 -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk) >&build.1.out& # compare tables from previous release to see if number changed makes # sense. Results are in gencode-cmp.tsv # generate trackDb and joiner blurb pushd ~/kent/src/hg/makeDb/trackDb ../../makeDb/outside/gencode/gencodeGenerateTrackDbs hg19 31lift37 97 'June 2019' # Update human/hg19/wgEncodeGencodeSuper.html and update 'Release Notes' # to describe new release. [ONLY if it's going to be pushed] # edit human/hg19/trackDb.wgEncode.ra to add new .ra file include make DBS=hg19 # edit all.joiner to add ~/tmp/gencodeV31lift37.joiner # verify with: pushd /hive/data/genomes/hg19/bed/gencodeV31lift37 make -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk joinerCheck # commit all ############################################################################## 2019-09-03: import of UCSC GENCODE group processing of GENCODE V32lift37 (markd) # PRE-RELEASE # edit hg/makeDb/outside/gencode/gencodeLoad.mk to set release and ensembl versions # download, build and load tables mkdir -p /hive/data/genomes/hg19/bed/gencodeV32lift37 pushd /hive/data/genomes/hg19/bed/gencodeV32lift37 (time nice make -j 10 -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk) >&build.1.out& # compare tables from previous release to see if number changed makes # sense. Results are in gencode-cmp.tsv # generate trackDb and joiner blurb pushd ~/kent/src/hg/makeDb/trackDb ../../makeDb/outside/gencode/gencodeGenerateTrackDbs hg19 32lift37 98 'Sept 2019' # Update human/hg19/wgEncodeGencodeSuper.html and update 'Release Notes' # to describe new release. [ONLY if it's going to be pushed] # edit human/hg19/trackDb.wgEncode.ra to add new .ra file include make DBS=hg19 # edit all.joiner to add ~/tmp/gencodeV32lift37.joiner # verify with: pushd /hive/data/genomes/hg19/bed/gencodeV32lift37 make -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk joinerCheck # commit all 2019-08-30: repeat above for final release mv /hive/data/genomes/hg19/bed/gencodeV32lift37 /hive/data/genomes/hg19/bed/gencodeV32lift37pre # redo do above imports ######################################################################### 2019-11-17: import of UCSC GENCODE group processing of GENCODE V33lift37 PRE-RELEASE (markd) # # edit hg/makeDb/outside/gencode/gencodeLoad.mk to set release and ensembl versions # download, build and load tables mkdir -p /hive/data/genomes/hg19/bed/gencodeV33lift37 pushd /hive/data/genomes/hg19/bed/gencodeV33lift37 (time nice make -j 10 -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk) >&build.1.out& # compare tables from previous release to see if number changed makes # sense. Results are in gencode-cmp.tsv # generate trackDb and joiner blurb pushd ~/kent/src/hg/makeDb/trackDb ../../makeDb/outside/gencode/gencodeGenerateTrackDbs hg19 33lift37 99 'Nov 2019' # Update human/hg19/wgEncodeGencodeSuper.html and update 'Release Notes' # to describe new release. [ONLY if it's going to be pushed] # edit human/hg19/trackDb.wgEncode.ra to add new .ra file include make DBS=hg19 # edit all.joiner to add ~/tmp/gencodeV33lift37.joiner # verify with: pushd /hive/data/genomes/hg19/bed/gencodeV33lift37 make -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk joinerCheck # commit all ############################################################################## 2020-01-16: import of UCSC GENCODE group processing of GENCODE V33lift37 (markd) # deprecate pre-release mv /hive/data/genomes/hg19/bed/gencodeV33lift37 /hive/data/genomes/hg19/bed/gencodeV33lift37Pre # edit hg/makeDb/outside/gencode/gencodeLoad.mk to set release and ensembl versions # download, build and load tables mkdir -p /hive/data/genomes/hg19/bed/gencodeV33lift37 pushd /hive/data/genomes/hg19/bed/gencodeV33lift37 (time nice make -j 10 -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk) >&build.1.out& # compare tables from previous release to see if number changed makes # sense. Results are in gencode-cmp.tsv # generate trackDb and joiner blurb pushd ~/kent/src/hg/makeDb/trackDb ../../makeDb/outside/gencode/gencodeGenerateTrackDbs hg19 33lift37 99 'Jan 2020' # Update human/hg19/wgEncodeGencodeSuper.html and update 'Release Notes' # to describe new release. [ONLY if it's going to be pushed] # edit human/hg19/trackDb.gencode.ra to add new .ra file include make DBS=hg19 # edit all.joiner to add ~/tmp/gencodeV33lift37.joiner # verify with: pushd /hive/data/genomes/hg19/bed/gencodeV33lift37 make -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk joinerCheck # commit all ############################################################################## 2020-01-21: manually update GENCODE V19 to include chrMT (markd) cd /hive/data/genomes/hg19/bed/gencodeV19/chrMT # convert chrM annotation in gff3 to chrMT zcat ../data/release_19/gencode.v19.annotation.gff3.gz | tawk 'NR==1{print;next} $1=="chrM"{$1="chrMT"; print}' >chrMT.gff3 # comprensive and basic are the same on chrM and there are no pseudogenes, so this is # easy cat ../tables/wgEncodeGencodeCompV19.gp chrMT.gp >wgEncodeGencodeCompV19.gp cat ../tables/wgEncodeGencodeBasicV19.gp chrMT.gp >wgEncodeGencodeBasicV19.gp hgLoadGenePred -genePredExt hg19 wgEncodeGencodeCompV19 wgEncodeGencodeCompV19.gp hgLoadGenePred -genePredExt hg19 wgEncodeGencodeBasicV19 wgEncodeGencodeBasicV19.gp ############################################################################## 2020-03-11: import of UCSC GENCODE group processing of GENCODE V34lift37 pre-release (markd) # edit hg/makeDb/outside/gencode/gencodeLoad.mk to set release and ensembl versions # download, build and load tables mkdir -p /hive/data/genomes/hg19/bed/gencodeV34lift37Pre pushd /hive/data/genomes/hg19/bed/gencodeV34lift37Pre (time nice make -j 10 -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk) >&build.1.out& # compare tables from previous release to see if number changed makes # sense. Results are in gencode-cmp.tsv # generate trackDb and joiner blurb pushd ~/kent/src/hg/makeDb/trackDb ../../makeDb/outside/gencode/gencodeGenerateTrackDbs hg19 34lift37 100 'March 2020' # Update human/hg19/wgEncodeGencodeSuper.html and update 'Release Notes' # to describe new release. # edit human/hg19/trackDb.gencode.ra to add new .ra file include make DBS=hg19 # edit all.joiner to add ~/tmp/gencodeV34lift37.joiner # verify with: pushd /hive/data/genomes/hg19/bed/gencodeV34lift37Pre make -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk joinerCheck # commit all ### 2020-03-11: final release (markd) mkdir -p /hive/data/genomes/hg19/bed/gencodeV34lift37 repeat above, only making release month April ../../makeDb/outside/gencode/gencodeGenerateTrackDbs hg19 34lift37 100 'April 2020' ############################################################################## 2020-10-14: import of UCSC GENCODE group processing of GENCODE V35lift37 pre-release (markd) # edit hg/makeDb/outside/gencode/gencodeLoad.mk to set release and ensembl versions # download, build and load tables mkdir -p /hive/data/genomes/hg19/bed/gencodeV35lift37Pre pushd /hive/data/genomes/hg19/bed/gencodeV35lift37Pre (time nice make -j 10 -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk) >&build.1.out& # compare tables from previous release to see if number changed makes # sense. Results are in gencode-cmp.tsv # generate trackDb and joiner blurb pushd ~/kent/src/hg/makeDb/trackDb ../../makeDb/outside/gencode/gencodeGenerateTrackDbs hg19 35lift37 101 'June 2020' # Update human/hg19/wgEncodeGencodeSuper.html and update 'Release Notes' # to describe new release. # edit human/hg19/trackDb.gencode.ra to add new .ra file include make DBS=hg19 # edit all.joiner to add ~/tmp/gencodeV35lift37.joiner # verify with: pushd /hive/data/genomes/hg19/bed/gencodeV35lift37Pre make -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk joinerCheck # commit all ############################################################################## 2020-11-12: import of UCSC GENCODE group processing of GENCODE V35lift37 (markd) # edit hg/makeDb/outside/gencode/gencodeLoad.mk to set release and ensembl versions # download, build and load tables mkdir -p /hive/data/genomes/hg19/bed/gencodeV35lift37 pushd /hive/data/genomes/hg19/bed/gencodeV35lift37 (time nice make -j 10 -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk) >&build.1.out& # compare tables from previous release to see if number changed makes # sense. Results are in gencode-cmp.tsv # generate trackDb and joiner blurb pushd ~/kent/src/hg/makeDb/trackDb ../../makeDb/outside/gencode/bin/gencodeGenerateTrackDbsOldSchema hg19 35lift37 101 'June 2020' # Update human/hg19/wgEncodeGencodeSuper.html and update 'Release Notes' # to describe new release. # edit human/hg19/trackDb.gencode.ra to add new .ra file include make DBS=hg19 # edit all.joiner to add ~/tmp/gencodeV35lift37.joiner # verify with: pushd /hive/data/genomes/hg19/bed/gencodeV35lift37 make -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk joinerCheck # commit all make alpha DBS=hg19 ############################################################################## 2020-12-21: import of UCSC GENCODE group processing of GENCODE V36lift37 (markd) # edit hg/makeDb/outside/gencode/gencodeLoad.mk to set release and ensembl versions # download, build and load tables mkdir -p /hive/data/genomes/hg19/bed/gencodeV36lift37 pushd /hive/data/genomes/hg19/bed/gencodeV36lift37 (time nice make -j 10 -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk) >&build.1.out& # compare tables from previous release to see if number changed makes # sense. Results are in gencode-cmp.tsv # generate trackDb and joiner blurb pushd ~/kent/src/hg/makeDb/trackDb ../../makeDb/outside/gencode/bin/gencodeGenerateTrackDbsOldSchema hg19 36lift37 102 'Nov 2020' # Update human/hg19/wgEncodeGencodeSuper.html and update 'Release Notes' # to describe new release. # edit human/hg19/trackDb.gencode.ra to add new .ra file include make DBS=hg19 # edit all.joiner to add ~/tmp/gencodeV36lift37.joiner # verify with: pushd /hive/data/genomes/hg19/bed/gencodeV36lift37 make -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk joinerCheck problem: Error: 1 of 233706 elements (0.000%) of hg19.wgEncodeGencodeAttrsV36lift37.transcriptId are not in key wgEncodeGencodeTranscriptSourceV36lift37.transcriptId line 4061 of /cluster/home/markd/kent/src/hg/makeDb/schema/all.joiner Error: 1 of 233706 elements (0.000%) of hg19.wgEncodeGencodeAttrsV36lift37.geneId are not in key wgEncodeGencodeGeneSourceV36lift37.geneId line 4025 of /cluster/home/markd/kent/src/hg/makeDb/schema/all.joiner The gene/transcript pair is missing from metadata: ENSG00000168939.6 ENST00000302805.2 These are missing from data/release_36lift37/gencode.v36lift37.metadata.Transcript_source.gz data/release_36lift37/gencode.v36lift37.metadata.Gene_source.gz this is the weird cases of SPRV3 which has now has a transcript past the PAR ENSG00000168939.6 SPRY3 protein_coding ENST00000302805.2 SPRY3-001 protein_coding OTTHUMG00000022675.2 OTTHUMT00000058823.2 CCDS14769.4 2 coding ENSP00000302978.2 CCDS,PAR,appris_principal,basic ENSG00000168939.12_4 SPRY3 protein_coding ENST00000302805.7_1 SPRY3-201 protein_coding OTTHUMG00000022675.3_4 OTTHUMT00000058823.3_1 CCDS14769.4 2 coding ENSP00000302978.2 CCDS,appris_principal_1,basic Edit tables to work around it for now and work out with EBI. # commit all make alpha DBS=hg19 -wgEncodeGencodeTranscriptSourceV36lift37 +############################################################################## +2021-03-12: import of UCSC GENCODE group processing of GENCODE V37lift37 (markd) + # edit hg/makeDb/outside/gencode/gencodeLoad.mk to set release and ensembl versions + + # download, build and load tables + mkdir -p /hive/data/genomes/hg19/bed/gencodeV37lift37 + pushd /hive/data/genomes/hg19/bed/gencodeV37lift37 + (time nice make -j 10 -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk) >&build.1.out& + + # compare tables from previous release to see if number changed makes + # sense. Results are in gencode-cmp.tsv + # generate trackDb and joiner blurb + pushd ~/kent/src/hg/makeDb/trackDb + ../../makeDb/outside/gencode/bin/gencodeGenerateTrackDbsOldSchema hg19 37lift37 103 'Feb 2021' + + # Update human/hg19/wgEncodeGencodeSuper.html and update 'Release Notes' + # to describe new release. + + # edit human/hg19/trackDb.gencode.ra to add new .ra file include + jkmake DBS=hg19 + + # edit all.joiner to add ~/tmp/gencodeV37lift37.joiner + # verify with: + pushd /hive/data/genomes/hg19/bed/gencodeV37lift37 + make -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk joinerCheck + + problem: + Error: 1 of 236066 elements (0.000%) of hg19.wgEncodeGencodeAttrsV37lift37.transcriptId are not in key wgEncodeGencodeTranscriptSourceV37lift37.transcriptId line 4248 of /cluster/home/markd/kent/src/hg/makeDb/schema/all.joiner + Error: 1 of 236066 elements (0.000%) of hg19.wgEncodeGencodeAttrsV37lift37.geneId are not in key wgEncodeGencodeGeneSourceV37lift37.geneId line 4212 of /cluster/home/markd/kent/src/hg/makeDb/schema/all.joiner + Error: 1 of 236066 elements (0.000%) of hg19.wgEncodeGencodeAttrsV37lift37.geneId are not in key wgEncodeGencodeGeneSourceV37lift37.geneId line 4212 of /cluster/home/markd/kent/src/hg/makeDb/schema/all.joiner + Error: 1 of 236066 elements (0.000%) of hg19.wgEncodeGencodeAttrsV37lift37.transcriptId are not in key wgEncodeGencodeTranscriptSourceV37lift37.transcriptId line 4248 of /cluster/home/markd/kent/src/hg/makeDb/schema/all.joiner + + The gene/transcript pair is missing from metadata: + ENSG00000168939.6 / ENST00000302805.2 + These are missing from + data/release_37lift37/gencode.v37lift37.metadata.Transcript_source.gz + data/release_37lift37/gencode.v37lift37.metadata.Gene_source.gz + + this is the weird cases of SPRV3 which has now has a transcript past the PAR + + echo -e 'ENST00000302805.2\tensembl_havana_transcript_homo_sapiens' >> tables/wgEncodeGencodeTranscriptSourceV37lift37.tab + echo -e 'ENSG00000168939.6\tensembl_havana_transcript_homo_sapiens' >> tables/wgEncodeGencodeGeneSourceV37lift37.tab + + (time nice make -j 10 -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk) >&build.4.out& + + make -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk joinerCheck + + # commit all + jkmake alpha DBS=hg19 ##############################################################################