70fde199fda0e697d5c0e5e01470ff433fa00dc1 markd Tue May 14 00:06:40 2024 -0700 Import of GENCODE V46lift37, including add an option to import code to drop specific transcripts. This deals incorrectly included PAR transcript. diff --git src/hg/makeDb/doc/hg19.gencode.txt src/hg/makeDb/doc/hg19.gencode.txt index 8a497f5..bc6de51 100644 --- src/hg/makeDb/doc/hg19.gencode.txt +++ src/hg/makeDb/doc/hg19.gencode.txt @@ -1263,15 +1263,61 @@ # edit all.joiner to add ~/tmp/gencodeV45lift37.joiner # verify with: pushd /hive/data/genomes/hg19/bed/gencodeV45lift37/hgcImport${pre} make -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk joinerCheck These two complaint are cause by the new PAR id scheme causing a chrY V19 copy to be brought in. This needs to fixed in some way Error: 1 duplicates in hg19.wgEncodeGencodeTranscriptSourceV45lift37.transcriptId including 'ENST00000302805.2' Error: 1 duplicates in hg19.for this release. # commit all jkmake alpha DBS=hg19 ############################################################################## +2024-05-13: import of UCSC GENCODE group processing of GENCODE V46lift38 final release (markd) + # edit hg/makeDb/outside/gencode/gencodeLoad.mk to set release and ensembl versions + # set shell variable: + pre="" + + # download, build and load tables + mkdir -p /hive/data/genomes/hg19/bed/gencodeV46lift37/hgcImport${pre} + pushd /hive/data/genomes/hg19/bed/gencodeV46lift37/hgcImport{pre} + + (time nice make -O -j 10 -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk) >&build.1.out& + + # Incorrect gene symbol lines in metadata.HNGC where there are only two columns. + # Reported and remove locally with + cd data/release_46lift37/ + mv gencode.v46lift37.metadata.HGNC.gz gencode.v46lift37.metadata.HGNC.badrows.gz + zcat gencode.v46lift37.metadata.HGNC.badrows.gz | tawk 'NF==3' | pigz -c >gencode.v46lift37.metadata.HGNC.gz + + # compare tables from previous release to see if number changed makes + # sense. Results are in gencode-cmp.tsv + # generate trackDb and joiner blurb + pushd ~/kent/src/hg/makeDb/trackDb + ../../makeDb/outside/gencode/bin/gencodeGenerateTrackDbs hg19 46lift37 112 'May 2024' + + # edit human/hg19/trackDb.gencode.ra to add new .ra file include + # edit all.joiner to add ~/tmp/gencodeV46lift37.joiner + + # Update human/hg19/wgEncodeGencodeSuper.html and update 'Release Notes' + # to describe new release. + + jkmake DBS=hg19 + + # edit all.joiner to add ~/tmp/gencodeV46lift37.joiner + # verify with: + pushd /hive/data/genomes/hg19/bed/gencodeV46lift37/hgcImport${pre} + make -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk joinerCheck + + These two complaint are cause by the new PAR id scheme causing a chrY V19 copy to be brought in. + This needs to fixed in some way + Error: 1 duplicates in hg19.wgEncodeGencodeTranscriptSourceV46lift37.transcriptId including 'ENST00000302805.2' + Error: 1 duplicates in hg19.for this release. + + + # commit all + jkmake alpha DBS=hg19 + +##############################################################################