00d631b305da84b695309ff2f85ae8d25cd01fc7 markd Mon Jun 17 02:13:06 2019 -0700 split gencode hg38 doc into a separate file diff --git src/hg/makeDb/doc/hg38/gencode.txt src/hg/makeDb/doc/hg38/gencode.txt new file mode 100644 index 0000000..d174ebf --- /dev/null +++ src/hg/makeDb/doc/hg38/gencode.txt @@ -0,0 +1,512 @@ +######################################################################### +2014-08-11: import of UCSC GENCODE group processing of GENCODE V20 (markd) + # download files + mkdir -p /hive/data/genomes/hg38/bed/gencodeV20/data + cd /hive/data/genomes/hg38/bed/gencodeV20 + + # download gencode release + cd data + wget -nv -r -np ftp://ftp.sanger.ac.uk/pub/gencode/Gencode_human/release_20 + mv ftp.sanger.ac.uk/pub/gencode/Gencode_human/release_20 . + rm -rf ftp.sanger.ac.uk + + cd .. + # create Makefile from previous one. + cp /hive/data/genomes/hg19/bed/gencodeV19/Makefile . + + # build and load tables + (time nice make -j 10) >&build.1.out& + + # compare tables from previous release to see if number changed makes + # sense. + make cmpRelease >gencode-cmp.tsv + + ## Copy and update trackDb files from previous release. + ## Change version and use lower priority so it sorts to top of + ## super track page. + ## Important to make sure filter attrs.transcriptType matches current set + ## figured out with + select distinct transcriptType from wgEncodeGencodeAttrsV20 order by transcriptType; + cd kent/src/hg/makeDb/trackDb + cp human/hg38/wgEncodeGencodeV18.ra human/hg38/wgEncodeGencodeV20.ra + cp human/hg38/wgEncodeGencodeV18.html human/hg38/wgEncodeGencodeV20.html + + # edit these plus human/hg38/trackDb.wgEncode.ra + # - set priorities in wgEncodeGencodeV20.ra in reverse order with previous + # tracks so newest shows up first + # priority - set to previous version priority minus 0.001 + # searchPriority - set each to previous minus 0.001 + # - make current track default to pack and hide previous [ONLY if it's going to be pushed] + # superTrack wgEncodeGencodeSuper pack + # - Update wgEncodeGencodeSuper.html to describe new release and to + # pick up other updates. + + ### IMPORTANT: make sure that hgTracks/gencodeTracks.c registers + ### track handler for this version of gencode: + registerTrackHandler("wgEncodeGencodeV20", gencodeGeneMethods); + + # update all.joiner and validate + # look for the last section `begin Gencode V??' in all.joiner + # and copy and update version + # repeat this until happy, editing minCheck as needed + cd /hive/data/genomes/hg38/bed/gencodeV20 + make joinerCheck # output in check/joiner.out + + +######################################################################### +2014-10-16: import of UCSC GENCODE group processing of GENCODE V21 (markd) + # download files + mkdir -p /hive/data/genomes/hg38/bed/gencodeV21/data + cd /hive/data/genomes/hg38/bed/gencodeV21 + + # create Makefile from previous one. + # WARNING: next build start with hg/makeDb/outside/gencode/gencodeLoad.mk + cp /hive/data/genomes/hg38/bed/gencodeV20/Makefile . + + # download, build and load tables + (time nice make -j 10) >&build.1.out& + + # compare tables from previous release to see if number changed makes + # sense. + make cmpRelease >gencode-cmp.tsv + + ## Copy and update trackDb files from previous release. + ## Change version and use lower priority so it sorts to top of + ## super track page. + ## Important to make sure filter attrs.transcriptType matches current set + ## figured out with + select distinct transcriptType from wgEncodeGencodeAttrsV21 order by transcriptType; + cd kent/src/hg/makeDb/trackDb + cp human/hg38/wgEncodeGencodeV18.ra human/hg38/wgEncodeGencodeV21.ra + cp human/hg38/wgEncodeGencodeV18.html human/hg38/wgEncodeGencodeV21.html + + # edit these plus human/hg38/trackDb.wgEncode.ra + # - set priorities in wgEncodeGencodeV21.ra in reverse order with previous + # tracks so newest shows up first + # priority - set to previous version priority minus 0.001 + # searchPriority - set each to previous minus 0.001 + # - make current track default to pack and hide previous [ONLY if it's going to be pushed] + # superTrack wgEncodeGencodeSuper pack + # - Update wgEncodeGencodeSuper.html to describe new release and to + # pick up other updates. + + ### IMPORTANT: make sure that hgTracks/gencodeTracks.c registers + ### track handler for this version of gencode: + registerTrackHandler("wgEncodeGencodeV21", gencodeGeneMethods); + + # update all.joiner and validate + # look for the last section `begin Gencode V??' in all.joiner + # and copy and update version + # repeat this until happy, editing minCheck as needed + cd /hive/data/genomes/hg38/bed/gencodeV21 + make joinerCheck # output in check/joiner.out + + +######################################################################### +2015-03-13: import of UCSC GENCODE group processing of GENCODE V22 (markd) + # download files + mkdir -p /hive/data/genomes/hg38/bed/gencodeV22 + cd /hive/data/genomes/hg38/bed/gencodeV22 + + # edit hg/makeDb/outside/gencode/gencodeLoad.mk to set + # release and transcript support versions + + + # download, build and load tables + (time nice make -j 10 -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk) >&build.1.out& + + # compare tables from previous release to see if number changed makes + # sense. + make -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk cmpRelease >gencode-cmp.tsv + + ## Copy and update trackDb files from previous release. + ## Change version and use lower priority so it sorts to top of + ## super track page. + ## Important to make sure filter attrs.transcriptType matches current set + ## figured out with + select distinct transcriptType from wgEncodeGencodeAttrsV22 order by transcriptType; + cd kent/src/hg/makeDb/trackDb + cp human/hg38/wgEncodeGencodeV18.ra human/hg38/wgEncodeGencodeV22.ra + cp human/hg38/wgEncodeGencodeV18.html human/hg38/wgEncodeGencodeV22.html + + # edit these plus human/hg38/trackDb.wgEncode.ra + # - set priorities in wgEncodeGencodeV22.ra in reverse order with previous + # tracks so newest shows up first + # priority - set to previous version priority minus 0.001 + # searchPriority - set each to previous minus 0.001 + # - make current track default to pack and hide previous [ONLY if it's going to be pushed] + # superTrack wgEncodeGencodeSuper pack + # - Update wgEncodeGencodeSuper.html to describe new release and to + # pick up other updates. + + # update all.joiner and validate + # look for the last section `begin Gencode V??' in all.joiner + # and copy and update version + # repeat this until happy, editing minCheck as needed + # output in check/joiner.out + cd /hive/data/genomes/hg38/bed/gencodeV22 + make -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk joinerCheck + +######################################################################### +2015-10-02: import of UCSC GENCODE group processing of GENCODE V23 (markd) + # download files + mkdir -p /hive/data/genomes/hg38/bed/gencodeV23 + cd /hive/data/genomes/hg38/bed/gencodeV23 + + # edit hg/makeDb/outside/gencode/gencodeLoad.mk to set + # release and transcript support versions + + + # download, build and load tables + (time nice make -j 10 -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk) >&build.1.out& + + # compare tables from previous release to see if number changed makes + # sense. + make -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk cmpRelease >gencode-cmp.tsv + + ## Copy and update trackDb files from previous release. + ## Change version and use lower priority so it sorts to top of + ## super track page. + ## Important to make sure filter attrs.transcriptType matches current set + ## figured out with + select distinct transcriptType from wgEncodeGencodeAttrsV23 order by transcriptType; + cd kent/src/hg/makeDb/trackDb + cp human/hg38/wgEncodeGencodeV22.ra human/hg38/wgEncodeGencodeV23.ra + cp human/hg38/wgEncodeGencodeV22.html human/hg38/wgEncodeGencodeV23.html + + # edit these plus human/hg38/trackDb.wgEncode.ra + # - set priorities in wgEncodeGencodeV23.ra tracks so newest shows up first + # priority - set to previous version priority minus 0.001 + # searchPriority - set each to previous minus 0.001 + # - make current track default to pack and hide previous [ONLY if it's going to be pushed] + # superTrack wgEncodeGencodeSuper pack + # - Update wgEncodeGencodeSuper.html to describe new release and to + # pick up other updates. + + # update all.joiner and validate + # look for the last section `begin Gencode V??' in all.joiner + # and copy and update version + # repeat this until happy, editing minCheck as needed + # output in check/joiner.out + cd /hive/data/genomes/hg38/bed/gencodeV23 + make -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk joinerCheck + +######################################################################### +2015-12-08: import of UCSC GENCODE group processing of GENCODE V24 (markd) + # download files + mkdir -p /hive/data/genomes/hg38/bed/gencodeV24 + cd /hive/data/genomes/hg38/bed/gencodeV24 + + # edit hg/makeDb/outside/gencode/gencodeLoad.mk to set + # release and transcript support versions + + + # download, build and load tables + (time nice make -j 10 -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk) >&build.1.out& + + # compare tables from previous release to see if number changed makes + # sense (results in gencode-cmp.tsv) + make -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk cmpRelease + + ## Copy and update trackDb files from previous release. + ## Change version and use lower priority so it sorts to top of + ## super track page. Follow instructiuons in ra file to ensure + ## filters are correct. + cd kent/src/hg/makeDb/trackDb + cp human/hg38/wgEncodeGencodeV23.ra human/hg38/wgEncodeGencodeV24.ra + cp human/hg38/wgEncodeGencodeV23.html human/hg38/wgEncodeGencodeV24.html + + + # edit these plus human/hg38/trackDb.wgEncode.ra + # - set priorities in wgEncodeGencodeV24.ra tracks so newest shows up first + # priority - set to previous version priority minus 0.001 + # searchPriority - set each to previous minus 0.001 + # - make current track default to pack and hide previous [ONLY if it's going to be pushed] + # superTrack wgEncodeGencodeSuper pack + # - Update wgEncodeGencodeSuper.html to describe new release and to + # pick up other updates. + + # update all.joiner and validate + # look for the last section `begin Gencode V??' in all.joiner + # and copy and update version + # repeat this until happy, editing minCheck as needed + # output in check/joiner.out + ### NOT DONE SINCE TRACK NOT BEING PUSHED + cd /hive/data/genomes/hg38/bed/gencodeV24 + make -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk joinerCheck + +######################################################################### +2016-07-23: import of UCSC GENCODE group processing of GENCODE V25 (markd) + # edit hg/makeDb/outside/gencode/gencodeLoad.mk to set release and ensembl versions + + # download, build and load tables + mkdir -p /hive/data/genomes/hg38/bed/gencodeV25 + pushd /hive/data/genomes/hg38/bed/gencodeV25 + (time nice make -j 10 -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk) >&build.1.out& + + # compare tables from previous release to see if number changed makes + # sense. Results are in gencode-cmp.tsv + + ## Copy and update trackDb files from previous release. + ## Change version and use lower priority so it sorts to top of + ## super track page. Follow instructions in ra file to ensure + ## filters are correct. + cd kent/src/hg/makeDb/trackDb + cp mouse/hg38/wgEncodeGencodeVM9.ra mouse/hg38/wgEncodeGencodeV25.ra + cp mouse/hg38/wgEncodeGencodeVM9.html mouse/hg38/wgEncodeGencodeV25.html + + # edit these plus mouse/hg38/trackDb.wgEncode.ra + # - set priorities in wgEncodeGencodeV25.ra tracks so newest shows up first + # priority - set to previous version priority minus 0.001 + # searchPriority - set each to previous minus 0.001 + # - make current track default to pack and hide previous [ONLY if it's going to be pushed] + # superTrack wgEncodeGencodeSuper pack + # - Update wgEncodeGencodeSuper.html to describe new release and to + # pick up other updates. [ONLY if it's going to be pushed] + + # update all.joiner and validate + # look for the last section `begin Gencode V??' in all.joiner + # and copy and update version + # repeat this until happy, editing minCheck as needed + # output in check/joiner.out + cd /hive/data/genomes/hg38/bed/gencodeV25 + make -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk joinerCheck + +######################################################################### +2017-04-16: import of UCSC GENCODE group processing of GENCODE V26 (markd) + # edit hg/makeDb/outside/gencode/gencodeLoad.mk to set release and ensembl versions + + # download, build and load tables + mkdir -p /hive/data/genomes/hg38/bed/gencodeV26 + pushd /hive/data/genomes/hg38/bed/gencodeV26 + (time nice make -j 10 -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk) >&build.1.out& + + # compare tables from previous release to see if number changed makes + # sense. Results are in gencode-cmp.tsv + + # generate trackDb and joiner blurb + pushd kent/src/hg/makeDb/trackDb + ../../makeDb/outside/gencode/gencodeGenerateTrackDbs hg38 26 88 'March 2017' + + # edit human/hg38/trackDb.wgEncode.ra to add new .ra file include + make DBS=hg38 + + # Update human/hg38/wgEncodeGencodeSuper.html and update 'Release Notes' + # to describe new release. [ONLY if it's going to be pushed] + + # edit all.joiner to add ~/tmp/gencodeV26.joiner + # verify with: + pushd /hive/data/genomes/hg38/bed/gencodeV26 + make -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk joinerCheck + + # commit all +######################################################################### +2017-09-14: import of UCSC GENCODE group processing of GENCODE V27 (markd) + # edit hg/makeDb/outside/gencode/gencodeLoad.mk to set release and ensembl versions + + # download, build and load tables + mkdir -p /hive/data/genomes/hg38/bed/gencodeV27 + pushd /hive/data/genomes/hg38/bed/gencodeV27 + (time nice make -j 10 -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk) >&build.1.out& + + # compare tables from previous release to see if number changed makes + # sense. Results are in gencode-cmp.tsv + + # generate trackDb and joiner blurb + pushd kent/src/hg/makeDb/trackDb + ../../makeDb/outside/gencode/gencodeGenerateTrackDbs hg38 27 90 'Aug 2017' + + # Update human/hg38/wgEncodeGencodeSuper.html and update 'Release Notes' + # to describe new release. [ONLY if it's going to be pushed] + + # edit human/hg38/trackDb.wgEncode.ra to add new .ra file include + make DBS=hg38 + + # edit all.joiner to add ~/tmp/gencodeV27.joiner + # verify with: + pushd /hive/data/genomes/hg38/bed/gencodeV27 + make -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk joinerCheck + + # commit all + +######################################################################### +2018-03-07: update UCSC GENCODE V27 to include protein id (for VAI) and fix PAR tag + +# save existing data, move a +mkdir -p prev/pre-proteinId +mv data/gencode.tab prev/pre-proteinId/ +mv tables/wgEncodeGencodeAttrsV27.tab tables/wgEncodeGencodeTagV27.tab prev/pre-proteinId/ +mv loaded/wgEncodeGencodeAttrsV27.tab.loaded loaded/wgEncodeGencodeTagV27.tab.loaded prev/pre-proteinId/ + +# rebuild, getting protein ids. +make -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk loadTables >&prot-ids.log + +# 2018-03-19: update search to include protein id +cd kent/src/hg/makeDb/trackDb +../../makeDb/outside/gencode/gencodeGenerateTrackDbs hg38 27 90 'Aug 2017' + +######################################################################### +2018-04-11: import of UCSC GENCODE group processing of GENCODE V28 (markd) + # edit hg/makeDb/outside/gencode/gencodeLoad.mk to set release and ensembl versions + + # download, build and load tables + mkdir -p /hive/data/genomes/hg38/bed/gencodeV28 + pushd /hive/data/genomes/hg38/bed/gencodeV28 + (time nice make -j 10 -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk) >&build.1.out& + + # compare tables from previous release to see if number changed makes + # sense. Results are in gencode-cmp.tsv + + # generate trackDb and joiner blurb + pushd kent/src/hg/makeDb/trackDb + ../../makeDb/outside/gencode/gencodeGenerateTrackDbs hg38 28 92 'Apr 2018' + + # Update human/hg38/wgEncodeGencodeSuper.html and update 'Release Notes' + # to describe new release. [ONLY if it's going to be pushed] + + # edit human/hg38/trackDb.wgEncode.ra to add new .ra file include + make DBS=hg38 + + # edit all.joiner to add ~/tmp/gencodeV28.joiner + # verify with: + pushd /hive/data/genomes/hg38/bed/gencodeV28 + make -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk joinerCheck + + # commit all + +######################################################################### +2018-09-05: import of UCSC GENCODE group processing of GENCODE V29 (markd) + 2018-11-09 this was the prerelease and the /hive/data/genomes/hg38/bed/gencodeV29 + was renamed to /hive/data/genomes/hg38/bed/gencodeV29.pre when the full release was + downloaded. + + # edit hg/makeDb/outside/gencode/gencodeLoad.mk to set release and ensembl versions + + # This is a pre-release from + # ftp://ftp.ebi.ac.uk/pub/databases/havana/gencode_pre/Gencode_human/release_29/ + + # download, build and load tables + mkdir -p /hive/data/genomes/hg38/bed/gencodeV29 + pushd /hive/data/genomes/hg38/bed/gencodeV29 + + # pre-release contained invalid biotype, fix for now + mv gencode.v29.chr_patch_hapl_scaff.annotation.gtf.gz gencode.v29.chr_patch_hapl_scaff.annotation.orig.gtf.gz + zcat gencode.v29.chr_patch_hapl_scaff.annotation.orig.gtf.gz | sed -e 's/"lincrna"/"lincRNA"/g' | gzip -c> gencode.v29.chr_patch_hapl_scaff.annotation.gtf.gz + + (time nice make -j 10 -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk) >&build.1.out& + + # compare tables from previous release to see if number changed makes + # sense. Results are in gencode-cmp.tsv + + # generate trackDb and joiner blurb + pushd ~/kent/src/hg/makeDb/trackDb + ../../makeDb/outside/gencode/gencodeGenerateTrackDbs hg38 29 94 'Sept 2018' + + # Edit human/hg38/wgEncodeGencodeSuper.html and update 'Release Notes' + # to describe new release. [ONLY if it's going to be pushed] + + # edit human/hg38/trackDb.wgEncode.ra to add new .ra file include + make DBS=hg38 + + # edit all.joiner to add ~/tmp/gencodeV29.joiner + # verify with: + pushd /hive/data/genomes/hg38/bed/gencodeV29 + make -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk joinerCheck + + # commit all + +######################################################################### +2018-11-09: import of UCSC GENCODE group processing of GENCODE V29 (markd) + # edit hg/makeDb/outside/gencode/gencodeLoad.mk to set release and ensembl versions + + # download, build and load tables + mkdir -p /hive/data/genomes/hg38/bed/gencodeV29 + pushd /hive/data/genomes/hg38/bed/gencodeV29 + + (time nice make -j 10 -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk) >&build.1.out& + + # compare tables from previous release to see if number changed makes + # sense. Results are in gencode-cmp.tsv + + # generate trackDb and joiner blurb + pushd ~/kent/src/hg/makeDb/trackDb + ../../makeDb/outside/gencode/gencodeGenerateTrackDbs hg38 29 94 'Oct 2018' + + # Edit human/hg38/wgEncodeGencodeSuper.html and update 'Release Notes' + # to describe new release. [ONLY if it's going to be pushed] + + # edit human/hg38/trackDb.wgEncode.ra to add new .ra file include + make DBS=hg38 + + # edit all.joiner to add ~/tmp/gencodeV29.joiner + # verify with: + pushd /hive/data/genomes/hg38/bed/gencodeV29 + make -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk joinerCheck + + # commit all + +######################################################################### +2019-04-08: import of UCSC GENCODE group processing of GENCODE V30 (markd) + # edit hg/makeDb/outside/gencode/gencodeLoad.mk to set release and ensembl versions + + # download, build and load tables + mkdir -p /hive/data/genomes/hg38/bed/gencodeV30 + pushd /hive/data/genomes/hg38/bed/gencodeV30 + + (time nice make -j 10 -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk) >&build.1.out& + + # compare tables from previous release to see if number changed makes + # sense. Results are in gencode-cmp.tsv + + # generate trackDb and joiner blurb + pushd ~/kent/src/hg/makeDb/trackDb + ../../makeDb/outside/gencode/gencodeGenerateTrackDbs hg38 30 96 'Apr 2019' + + # Edit human/hg38/wgEncodeGencodeSuper.html and update 'Release Notes' + # to describe new release. [ONLY if it's going to be pushed] + + # edit human/hg38/trackDb.gencode.ra to add new .ra file include + make DBS=hg38 + + ## only if being pushed to RR: + # edit all.joiner to add ~/tmp/gencodeV30.joiner + # verify with: + pushd /hive/data/genomes/hg38/bed/gencodeV30 + make -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk joinerCheck + + # commit all + +######################################################################### +2019-05-17: import of UCSC GENCODE group processing of GENCODE V31 (markd) + # import of pre-release, it will be promoted if there are no changes in the final release. + # edit hg/makeDb/outside/gencode/gencodeLoad.mk to set release and ensembl versions + + # download, build and load tables + mkdir -p /hive/data/genomes/hg38/bed/gencodeV31-pre + pushd /hive/data/genomes/hg38/bed/gencodeV31-pre + + (time nice make -j 10 -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk) >&build.1.out& + + # compare tables from previous release to see if number changed makes + # sense. Results are in gencode-cmp.tsv + + # generate trackDb and joiner blurb + pushd ~/kent/src/hg/makeDb/trackDb + ../../makeDb/outside/gencode/gencodeGenerateTrackDbs hg38 31 97 'June 2019' + + # Edit human/hg38/wgEncodeGencodeSuper.html and update 'Release Notes' + # to describe new release. [ONLY if it's going to be pushed] + + # edit human/hg38/trackDb.gencode.ra to add new .ra file include + make DBS=hg38 + + ## only if being pushed to RR: + # edit all.joiner to add ~/tmp/gencodeV31.joiner + # verify with: + pushd /hive/data/genomes/hg38/bed/gencodeV31 + make -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk joinerCheck + + # commit all + # if pushing public, add ticket and MARK QA READY + +#########################################################################