1fe89284953dd069f7155892bdcb8306adea13ef markd Thu Nov 3 21:31:54 2022 -0700 import of v42lift37 diff --git src/hg/makeDb/outside/gencode/gencodeLoad.mk src/hg/makeDb/outside/gencode/gencodeLoad.mk index 8d1456d..b0b77d8 100644 --- src/hg/makeDb/outside/gencode/gencodeLoad.mk +++ src/hg/makeDb/outside/gencode/gencodeLoad.mk @@ -23,89 +23,92 @@ SHELL = bash -e export SHELLOPTS=pipefail ## # programs, etc ## mach = $(shell uname -m) ## # Release info and files from Sanger. # BEGIN EDIT THESE EACH RELEASE ## preRelease = no #preRelease = yes #db = hg38 -#db = hg19 -db = mm39 +db = hg19 +#db = mm39 #db = mm10 ifeq (${db},mm10) grcRefAssembly = GRCm38 verBase = M25 prevVer = M24 backmapTargetVer = M25 ver = ${verBase}lift37 gencodeOrg = Gencode_mouse ftpReleaseSubdir = release_${verBase}/GRCm38_mapping annGffTypeName = chr_patch_hapl_scaff.annotation isBackmap = yes else ifeq (${db},mm39) grcRefAssembly = GRCm39 ver = M31 prevVer = M30 gencodeOrg = Gencode_mouse ftpReleaseSubdir = release_${ver} annGffTypeName = chr_patch_hapl_scaff.annotation else ifeq (${db},hg38) grcRefAssembly = GRCh38 ver = 42 prevVer = 41 gencodeOrg = Gencode_human ftpReleaseSubdir = release_${ver} annGffTypeName = chr_patch_hapl_scaff.annotation else ifeq (${db},hg19) grcRefAssembly = GRCh37 - verBase = 41 + verBase = 42 + prevVer = 41lift37 ver = ${verBase}lift37 - prevVer = 38lift37 backmapTargetVer = 19 ftpReleaseSubdir = release_${verBase}/GRCh37_mapping gencodeOrg = Gencode_human annGffTypeName = annotation isBackmap = yes else $(error unimplement genome database: ${db}) endif # END EDIT THESE EACH RELEASE ifeq (${preRelease},yes) # pre-release baseUrl = rsync://ftp.ebi.ac.uk/pub/databases/havana/gencode_pre else # official release baseUrl = rsync://ftp.ebi.ac.uk/pub/databases/gencode endif rel = V${ver} releaseUrl = ${baseUrl}/${gencodeOrg}/${ftpReleaseSubdir} dataDir = data relDir = ${dataDir}/release_${ver} annotationGff = ${relDir}/gencode.v${ver}.${annGffTypeName}.gff3.gz pseudo2WayGff = ${relDir}/gencode.v${ver}.2wayconspseudos.gff3.gz polyAGff = ${relDir}/gencode.v${ver}.polyAs.gff3.gz +ifneq (${isBackmap},yes) transcriptRanks = ${relDir}/gencode.v${ver}.transcript_rankings.txt.gz + transcriptRanksOpt = --transcriptRanks=${transcriptRanks} +endif gencodeBinDir = ${HOME}/kent/src/hg/makeDb/outside/gencode/bin gencodeMakeTracks = ${gencodeBinDir}/gencodeMakeTracks gencodeMakeAttrs = ${gencodeBinDir}/gencodeMakeAttrs gencodeExonSupportToTable = ${gencodeBinDir}/gencodeExonSupportToTable gencodeGxfToGenePred = ${gencodeBinDir}/gencodeGxfToGenePred gencodePolyaGxfToGenePred = ${gencodeBinDir}/gencodePolyaGxfToGenePred gencodeGxfToAttrs = ${gencodeBinDir}/gencodeGxfToAttrs buildGencodeToUcscLift = ${HOME}/kent/src/hg/makeDb/outside/gencode/bin/buildGencodeToUcscLift gencodeBackMapMetadataIds = ${gencodeBinDir}/gencodeBackMapMetadataIds encodeAutoSqlDir = ${HOME}/kent/src/hg/lib/encode ## # intermediate data not loaded into tracks ## @@ -354,42 +357,42 @@ @mkdir -p $(dir $@) zcat $< | tawk '$$1!~/^ENSTR/' | sort -k 1,1 | ${metaFilterCmd} /dev/stdin > $@.${tmpExt} mv -f $@.${tmpExt} $@ ## # intermediate data for ensembl/havana, not loaded into databases ## ${gencodeGp}: ${annotationGff} ${gencodeToUcscChain} @mkdir -p $(dir $@) ${gencodeGxfToGenePred} ${db} ${annotationGff} ${gencodeToUcscChain} $@.${tmpExt} mv -f $@.${tmpExt} $@ touch $@ ${gencodeTsv}: ${annotationGff} @mkdir -p $(dir $@) - ${gencodeGxfToAttrs} --transcriptRanks=${transcriptRanks} ${annotationGff} $@.${tmpExt} + ${gencodeGxfToAttrs} ${transcriptRanksOpt} ${annotationGff} $@.${tmpExt} mv -f $@.${tmpExt} $@ ${targetGencodeTsv}: @mkdir -p $(dir $@) hgsql ${db} -e 'select * from wgEncodeGencodeAttrsV${backmapTargetVer}' > $@.${tmpExt} mv -f $@.${tmpExt} $@ # check attributes so code can be updated to handle new biotypes checkAttrs: ${annotationGff} - ${gencodeGxfToAttrs} --transcriptRanks=${transcriptRanks} ${annotationGff} /dev/null + ${gencodeGxfToAttrs} ${transcriptRanksOpt} ${annotationGff} /dev/null ## # load tables # browser commands use static tmp file name, so use lock file to serialize ## loadLock = flock load.lock loadTables: ${genePredExtTables:%=${loadedDir}/%.genePredExt.loaded} \ ${genePredTables:%=${loadedDir}/%.genePred.loaded} \ ${tabTables:%=${loadedDir}/%.tab.loaded} ${loadedDir}/%.genePredExt.loaded: ${tableDir}/%.gp @mkdir -p $(dir $@) ${loadLock} hgLoadGenePred -genePredExt ${db} $* $< touch $@