ca39d76b0c4deafcb785e8bea631d316b3bb59ed markd Fri Oct 28 21:28:51 2022 -0700 import of GENCODE V42 final release with addition of transcript rankings diff --git src/hg/makeDb/outside/gencode/gencodeLoad.mk src/hg/makeDb/outside/gencode/gencodeLoad.mk index 0b44272..8d8fa96 100644 --- src/hg/makeDb/outside/gencode/gencodeLoad.mk +++ src/hg/makeDb/outside/gencode/gencodeLoad.mk @@ -20,32 +20,32 @@ host=$(shell hostname) ppid=$(shell echo $$PPID) tmpExt = ${host}.${ppid}.tmp SHELL = bash -e export SHELLOPTS=pipefail ## # programs, etc ## mach = $(shell uname -m) ## # Release info and files from Sanger. # BEGIN EDIT THESE EACH RELEASE ## -#preRelease = no -preRelease = yes +preRelease = no +#preRelease = yes db = hg38 #db = hg19 #db = mm39 #db = mm10 ifeq (${db},mm10) grcRefAssembly = GRCm38 verBase = M25 prevVer = M24 backmapTargetVer = M25 ver = ${verBase}lift37 gencodeOrg = Gencode_mouse ftpReleaseSubdir = release_${verBase}/GRCm38_mapping annGffTypeName = chr_patch_hapl_scaff.annotation isBackmap = yes else ifeq (${db},mm39) @@ -81,30 +81,31 @@ ifeq (${preRelease},yes) # pre-release baseUrl = rsync://ftp.ebi.ac.uk/pub/databases/havana/gencode_pre else # official release baseUrl = rsync://ftp.ebi.ac.uk/pub/databases/gencode endif rel = V${ver} releaseUrl = ${baseUrl}/${gencodeOrg}/${ftpReleaseSubdir} dataDir = data relDir = ${dataDir}/release_${ver} annotationGff = ${relDir}/gencode.v${ver}.${annGffTypeName}.gff3.gz pseudo2WayGff = ${relDir}/gencode.v${ver}.2wayconspseudos.gff3.gz polyAGff = ${relDir}/gencode.v${ver}.polyAs.gff3.gz +transcriptRanks = ${relDir}/gencode.v${ver}.transcript_rankings.txt.gz gencodeBinDir = ${HOME}/kent/src/hg/makeDb/outside/gencode/bin gencodeMakeTracks = ${gencodeBinDir}/gencodeMakeTracks gencodeMakeAttrs = ${gencodeBinDir}/gencodeMakeAttrs gencodeExonSupportToTable = ${gencodeBinDir}/gencodeExonSupportToTable gencodeGxfToGenePred = ${gencodeBinDir}/gencodeGxfToGenePred gencodePolyaGxfToGenePred = ${gencodeBinDir}/gencodePolyaGxfToGenePred gencodeGxfToAttrs = ${gencodeBinDir}/gencodeGxfToAttrs buildGencodeToUcscLift = ${HOME}/kent/src/hg/makeDb/outside/gencode/bin/buildGencodeToUcscLift gencodeBackMapMetadataIds = ${gencodeBinDir}/gencodeBackMapMetadataIds encodeAutoSqlDir = ${HOME}/kent/src/hg/lib/encode ## # intermediate data not loaded into tracks ## @@ -352,42 +353,42 @@ @mkdir -p $(dir $@) zcat $< | tawk '$$1!~/^ENSTR/' | sort -k 1,1 | ${metaFilterCmd} /dev/stdin > $@.${tmpExt} mv -f $@.${tmpExt} $@ ## # intermediate data for ensembl/havana, not loaded into databases ## ${gencodeGp}: ${annotationGff} ${gencodeToUcscChain} @mkdir -p $(dir $@) ${gencodeGxfToGenePred} ${db} ${annotationGff} ${gencodeToUcscChain} $@.${tmpExt} mv -f $@.${tmpExt} $@ touch $@ ${gencodeTsv}: ${annotationGff} @mkdir -p $(dir $@) - ${gencodeGxfToAttrs} ${annotationGff} $@.${tmpExt} + ${gencodeGxfToAttrs} --transcriptRanks=${transcriptRanks} ${annotationGff} $@.${tmpExt} mv -f $@.${tmpExt} $@ ${targetGencodeTsv}: @mkdir -p $(dir $@) hgsql ${db} -e 'select * from wgEncodeGencodeAttrsV${backmapTargetVer}' > $@.${tmpExt} mv -f $@.${tmpExt} $@ # check attributes so code can be updated to handle new biotypes checkAttrs: ${annotationGff} - ${gencodeGxfToAttrs} ${annotationGff} /dev/null + ${gencodeGxfToAttrs} --transcriptRanks=${transcriptRanks} ${annotationGff} /dev/null ## # load tables # browser commands use static tmp file name, so use lock file to serialize ## loadLock = flock load.lock loadTables: ${genePredExtTables:%=${loadedDir}/%.genePredExt.loaded} \ ${genePredTables:%=${loadedDir}/%.genePred.loaded} \ ${tabTables:%=${loadedDir}/%.tab.loaded} ${loadedDir}/%.genePredExt.loaded: ${tableDir}/%.gp @mkdir -p $(dir $@) ${loadLock} hgLoadGenePred -genePredExt ${db} $* $< touch $@