dc6ddf665daba39f0d4aac9c83e2d0764da8cf3b markd Fri Mar 12 20:48:48 2021 -0800 import of gencodeV37lift37 diff --git src/hg/makeDb/outside/gencode/gencodeLoad.mk src/hg/makeDb/outside/gencode/gencodeLoad.mk index 64785c6..b814e3e 100644 --- src/hg/makeDb/outside/gencode/gencodeLoad.mk +++ src/hg/makeDb/outside/gencode/gencodeLoad.mk @@ -19,115 +19,100 @@ .SECONDARY: host=$(shell hostname) ppid=$(shell echo $$PPID) tmpExt = ${host}.${ppid}.tmp SHELL = bash -e export SHELLOPTS=pipefail ## # programs, etc ## mach = $(shell uname -m) ## # Release info and files from Sanger. # BEGIN EDIT THESE EACH RELEASE -# -# - ensemblPrevVersion is use to get chrom name mappings for pre-release, -# as this doesn't change between release. ## preRelease = no #preRelease = yes -db = hg38 -#db = hg19 -#db = mm10 +#db = hg38 +db = hg19 #db = mm39 +#db = mm10 ifeq (${db},mm10) grcRefAssembly = GRCm38 verBase = M25 prevVer = M24 ver = ${verBase}lift37 gencodeOrg = Gencode_mouse ftpReleaseSubdir = release_${verBase}/GRCm38_mapping annGffTypeName = chr_patch_hapl_scaff.annotation - ensemblVer = 101_37 # only used to get genome chromsome name mappings, don't change - ensemblPrevVer = ${ensemblVer} # doesn't change - ensemblCDnaDb = mus_musculus_cdna_${ensemblPrevVer} isBackmap = yes else ifeq (${db},mm39) grcRefAssembly = GRCm39 ver = M26 prevVer = gencodeOrg = Gencode_mouse ftpReleaseSubdir = release_${ver} annGffTypeName = chr_patch_hapl_scaff.annotation - ensemblVer = 102_38 - ensemblPrevVer = - ensemblCDnaDb = mus_musculus_cdna_${ensemblPrevVer} else ifeq (${db},hg38) grcRefAssembly = GRCh38 ver = 37 prevVer = 36 gencodeOrg = Gencode_human ftpReleaseSubdir = release_${ver} annGffTypeName = chr_patch_hapl_scaff.annotation - ensemblVer = 103_38 - ensemblPrevVer = 102_38 - ensemblCDnaDb = homo_sapiens_cdna_${ensemblPrevVer} else ifeq (${db},hg19) grcRefAssembly = GRCh37 - verBase = 36 + verBase = 37 ver = ${verBase}lift37 + prevVer = 36lift37 backmapTargetVer = 19 ftpReleaseSubdir = release_${verBase}/GRCh37_mapping - prevVer = 35lift37 gencodeOrg = Gencode_human annGffTypeName = annotation - ensemblVer = 74_37 # only used to get genome chromsome name mappings, don't change - ensemblPrevVer = ${ensemblVer} # doesn't change - ensemblCDnaDb = homo_sapiens_cdna_${ensemblPrevVer} isBackmap = yes else $(error unimplement genome database: ${db}) endif # END EDIT THESE EACH RELEASE ifeq (${preRelease},yes) # pre-release baseUrl = ftp://ftp.ebi.ac.uk/pub/databases/havana/gencode_pre else # official release baseUrl = ftp://ftp.ebi.ac.uk/pub/databases/gencode endif rel = V${ver} releaseUrl = ${baseUrl}/${gencodeOrg}/${ftpReleaseSubdir} dataDir = data relDir = ${dataDir}/release_${ver} annotationGff = ${relDir}/gencode.v${ver}.${annGffTypeName}.gff3.gz pseudo2WayGff = ${relDir}/gencode.v${ver}.2wayconspseudos.gff3.gz polyAGff = ${relDir}/gencode.v${ver}.polyAs.gff3.gz ccdsBinDir = ~markd/compbio/ccds/ccds2/output/bin/$(mach)/opt gencodeMakeTracks = ${ccdsBinDir}/gencodeMakeTracks gencodeMakeAttrs = ${ccdsBinDir}/gencodeMakeAttrs gencodeExonSupportToTable = ${ccdsBinDir}/gencodeExonSupportToTable gencodeGxfToGenePred = ${ccdsBinDir}/gencodeGxfToGenePred gencodePolyaGxfToGenePred = ${ccdsBinDir}/gencodePolyaGxfToGenePred gencodeGxfToAttrs = ${ccdsBinDir}/gencodeGxfToAttrs -ensToUcscChromMap = ${ccdsBinDir}/ensToUcscChromMap +ensToUcscMkLift = ${HOME}/kent/src/hg/makeDb/outside/gencode/bin/ensToUcscMkLift gencodeBackMapMetadataIds = ${ccdsBinDir}/gencodeBackMapMetadataIds encodeAutoSqlDir = ${HOME}/kent/src/hg/lib/encode ## # intermediate data not loaded into tracks ## gencodeGp = ${dataDir}/gencode.gp gencodeTsv = ${dataDir}/gencode.tsv ensemblToUcscChain = ${dataDir}/ensemblToUcsc.chain # flag indicating fetch was done fetchDone = ${relDir}/done ## # track and table data @@ -296,31 +281,31 @@ gff3ToGenePred -allowMinimalGenes $< $@.${tmpExt} mv -f $@.${tmpExt} $@ ${tablePolyAGp}: ${polyAGff} ${ensemblToUcscChain} @mkdir -p $(dir $@) ${gencodePolyaGxfToGenePred} $< ${ensemblToUcscChain} $@.${tmpExt} mv -f $@.${tmpExt} $@ ${tableUniProtTab}: ${tableSwissProtMeta} ${tableTrEMBLMeta} ${gencodeTsv} @mkdir -p $(dir $@) ((${metaFilterCmdGz} ${tableSwissProtMeta} | tawk '{print $$0,"SwissProt"}') && (${metaFilterCmdGz} ${tableTrEMBLMeta} | tawk '{print $$0,"TrEMBL"}')) | sort -k 1,1 > $@.${tmpExt} mv -f $@.${tmpExt} $@ ${ensemblToUcscChain}: @mkdir -p $(dir $@) - ${ensToUcscChromMap} ${ensemblCDnaDb} ${grcRefAssembly} ${db} /dev/stdout | pslSwap stdin stdout | pslToChain stdin $@.${tmpExt} + ${ensToUcscMkLift} ${db} $@.${tmpExt} mv -f $@.${tmpExt} $@ # other tab files, just copy to name following convention to make load rules # work ifeq (${isBackmap}, yes) metaFilterCmd = ${gencodeBackMapMetadataIds} ${gencodeTsv} ${targetGencodeTsv} metaFilterCmdGz = ${metaFilterCmd} metaFilterDepend = ${gencodeTsv} ${targetGencodeTsv} else metaFilterCmd = cat metaFilterCmdGz = zcat metaFilterDepend = ${gencodeTsv} endif define copyMetadataTabGz mkdir -p $(dir $@)