dc6ddf665daba39f0d4aac9c83e2d0764da8cf3b markd Fri Mar 12 20:48:48 2021 -0800 import of gencodeV37lift37 diff --git src/hg/makeDb/doc/hg19.gencode.txt src/hg/makeDb/doc/hg19.gencode.txt index a4ddaa6..be819f7 100644 --- src/hg/makeDb/doc/hg19.gencode.txt +++ src/hg/makeDb/doc/hg19.gencode.txt @@ -777,18 +777,66 @@ The gene/transcript pair is missing from metadata: ENSG00000168939.6 ENST00000302805.2 These are missing from data/release_36lift37/gencode.v36lift37.metadata.Transcript_source.gz data/release_36lift37/gencode.v36lift37.metadata.Gene_source.gz this is the weird cases of SPRV3 which has now has a transcript past the PAR ENSG00000168939.6 SPRY3 protein_coding ENST00000302805.2 SPRY3-001 protein_coding OTTHUMG00000022675.2 OTTHUMT00000058823.2 CCDS14769.4 2 coding ENSP00000302978.2 CCDS,PAR,appris_principal,basic ENSG00000168939.12_4 SPRY3 protein_coding ENST00000302805.7_1 SPRY3-201 protein_coding OTTHUMG00000022675.3_4 OTTHUMT00000058823.3_1 CCDS14769.4 2 coding ENSP00000302978.2 CCDS,appris_principal_1,basic Edit tables to work around it for now and work out with EBI. # commit all make alpha DBS=hg19 -wgEncodeGencodeTranscriptSourceV36lift37 +############################################################################## +2021-03-12: import of UCSC GENCODE group processing of GENCODE V37lift37 (markd) + # edit hg/makeDb/outside/gencode/gencodeLoad.mk to set release and ensembl versions + + # download, build and load tables + mkdir -p /hive/data/genomes/hg19/bed/gencodeV37lift37 + pushd /hive/data/genomes/hg19/bed/gencodeV37lift37 + (time nice make -j 10 -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk) >&build.1.out& + + # compare tables from previous release to see if number changed makes + # sense. Results are in gencode-cmp.tsv + # generate trackDb and joiner blurb + pushd ~/kent/src/hg/makeDb/trackDb + ../../makeDb/outside/gencode/bin/gencodeGenerateTrackDbsOldSchema hg19 37lift37 103 'Feb 2021' + + # Update human/hg19/wgEncodeGencodeSuper.html and update 'Release Notes' + # to describe new release. + + # edit human/hg19/trackDb.gencode.ra to add new .ra file include + jkmake DBS=hg19 + + # edit all.joiner to add ~/tmp/gencodeV37lift37.joiner + # verify with: + pushd /hive/data/genomes/hg19/bed/gencodeV37lift37 + make -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk joinerCheck + + problem: + Error: 1 of 236066 elements (0.000%) of hg19.wgEncodeGencodeAttrsV37lift37.transcriptId are not in key wgEncodeGencodeTranscriptSourceV37lift37.transcriptId line 4248 of /cluster/home/markd/kent/src/hg/makeDb/schema/all.joiner + Error: 1 of 236066 elements (0.000%) of hg19.wgEncodeGencodeAttrsV37lift37.geneId are not in key wgEncodeGencodeGeneSourceV37lift37.geneId line 4212 of /cluster/home/markd/kent/src/hg/makeDb/schema/all.joiner + Error: 1 of 236066 elements (0.000%) of hg19.wgEncodeGencodeAttrsV37lift37.geneId are not in key wgEncodeGencodeGeneSourceV37lift37.geneId line 4212 of /cluster/home/markd/kent/src/hg/makeDb/schema/all.joiner + Error: 1 of 236066 elements (0.000%) of hg19.wgEncodeGencodeAttrsV37lift37.transcriptId are not in key wgEncodeGencodeTranscriptSourceV37lift37.transcriptId line 4248 of /cluster/home/markd/kent/src/hg/makeDb/schema/all.joiner + + The gene/transcript pair is missing from metadata: + ENSG00000168939.6 / ENST00000302805.2 + These are missing from + data/release_37lift37/gencode.v37lift37.metadata.Transcript_source.gz + data/release_37lift37/gencode.v37lift37.metadata.Gene_source.gz + + this is the weird cases of SPRV3 which has now has a transcript past the PAR + + echo -e 'ENST00000302805.2\tensembl_havana_transcript_homo_sapiens' >> tables/wgEncodeGencodeTranscriptSourceV37lift37.tab + echo -e 'ENSG00000168939.6\tensembl_havana_transcript_homo_sapiens' >> tables/wgEncodeGencodeGeneSourceV37lift37.tab + + (time nice make -j 10 -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk) >&build.4.out& + + make -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk joinerCheck + + # commit all + jkmake alpha DBS=hg19 ##############################################################################