703e07e1d02ee1e346d5dcf6c0610a3d4d5f4069 markd Tue Jun 29 17:12:38 2021 -0700 import of GENCODE V38lift37 diff --git src/hg/makeDb/doc/hg19.gencode.txt src/hg/makeDb/doc/hg19.gencode.txt index be819f7..a0d1f67 100644 --- src/hg/makeDb/doc/hg19.gencode.txt +++ src/hg/makeDb/doc/hg19.gencode.txt @@ -828,15 +828,65 @@ data/release_37lift37/gencode.v37lift37.metadata.Gene_source.gz this is the weird cases of SPRV3 which has now has a transcript past the PAR echo -e 'ENST00000302805.2\tensembl_havana_transcript_homo_sapiens' >> tables/wgEncodeGencodeTranscriptSourceV37lift37.tab echo -e 'ENSG00000168939.6\tensembl_havana_transcript_homo_sapiens' >> tables/wgEncodeGencodeGeneSourceV37lift37.tab (time nice make -j 10 -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk) >&build.4.out& make -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk joinerCheck # commit all jkmake alpha DBS=hg19 ############################################################################## +2021-06-29: import of UCSC GENCODE group processing of GENCODE V38lift37 (markd) + # edit hg/makeDb/outside/gencode/gencodeLoad.mk to set release and ensembl versions + + # download, build and load tables + mkdir -p /hive/data/genomes/hg19/bed/gencodeV38lift37 + pushd /hive/data/genomes/hg19/bed/gencodeV38lift37 + (time nice make -j 10 -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk) >&build.1.out& + + # compare tables from previous release to see if number changed makes + # sense. Results are in gencode-cmp.tsv + # generate trackDb and joiner blurb + pushd ~/kent/src/hg/makeDb/trackDb + ../../makeDb/outside/gencode/bin/gencodeGenerateTrackDbsOldSchema hg19 38lift37 104 'May 2021' + + # Update human/hg19/wgEncodeGencodeSuper.html and update 'Release Notes' + # to describe new release. + + # edit human/hg19/trackDb.gencode.ra to add new .ra file include + jkmake DBS=hg19 + + # edit all.joiner to add ~/tmp/gencodeV38lift37.joiner + # verify with: + pushd /hive/data/genomes/hg19/bed/gencodeV38lift37 + make -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk joinerCheck + + problem: + Error: 1 of 236066 elements (0.000%) of hg19.wgEncodeGencodeAttrsV38lift37.transcriptId are not in key wgEncodeGencodeTranscriptSourceV38lift37.transcriptId line 4248 of /cluster/home/markd/kent/src/hg/makeDb/schema/all.joiner + Error: 1 of 236066 elements (0.000%) of hg19.wgEncodeGencodeAttrsV38lift37.geneId are not in key wgEncodeGencodeGeneSourceV38lift37.geneId line 4212 of /cluster/home/markd/kent/src/hg/makeDb/schema/all.joiner + Error: 1 of 236066 elements (0.000%) of hg19.wgEncodeGencodeAttrsV38lift37.geneId are not in key wgEncodeGencodeGeneSourceV38lift37.geneId line 4212 of /cluster/home/markd/kent/src/hg/makeDb/schema/all.joiner + Error: 1 of 236066 elements (0.000%) of hg19.wgEncodeGencodeAttrsV38lift37.transcriptId are not in key wgEncodeGencodeTranscriptSourceV38lift37.transcriptId line 4248 of /cluster/home/markd/kent/src/hg/makeDb/schema/all.joiner + + The gene/transcript pair is missing from metadata: + ENSG00000168939.6 / ENST00000302805.2 + These are missing from + data/release_37lift37/gencode.v37lift37.metadata.Transcript_source.gz + data/release_37lift37/gencode.v37lift37.metadata.Gene_source.gz + + this is the weird cases of SPRV3 which has now has a transcript past the PAR + + echo -e 'ENST00000302805.2\tensembl_havana_transcript_homo_sapiens' >> tables/wgEncodeGencodeTranscriptSourceV38lift37.tab + echo -e 'ENSG00000168939.6\tensembl_havana_transcript_homo_sapiens' >> tables/wgEncodeGencodeGeneSourceV38lift37.tab + + (time nice make -j 10 -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk) >&build.4.out& + + make -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk joinerCheck + + # commit all + jkmake alpha DBS=hg19 + +##############################################################################