a705056fed4ed3269b4dcbca18194b4f166a36c0 markd Fri Oct 15 17:11:33 2021 -0700 import of gencode V39 pre-release diff --git src/hg/makeDb/outside/gencode/gencodeLoad.mk src/hg/makeDb/outside/gencode/gencodeLoad.mk index a4580e1..0b2ba79 100644 --- src/hg/makeDb/outside/gencode/gencodeLoad.mk +++ src/hg/makeDb/outside/gencode/gencodeLoad.mk @@ -20,114 +20,114 @@ host=$(shell hostname) ppid=$(shell echo $$PPID) tmpExt = ${host}.${ppid}.tmp SHELL = bash -e export SHELLOPTS=pipefail ## # programs, etc ## mach = $(shell uname -m) ## # Release info and files from Sanger. # BEGIN EDIT THESE EACH RELEASE ## -preRelease = no -#preRelease = yes +#preRelease = no +preRelease = yes db = hg38 #db = hg19 #db = mm39 #db = mm10 ifeq (${db},mm10) grcRefAssembly = GRCm38 verBase = M25 prevVer = M24 backmapTargetVer = M25 ver = ${verBase}lift37 gencodeOrg = Gencode_mouse ftpReleaseSubdir = release_${verBase}/GRCm38_mapping annGffTypeName = chr_patch_hapl_scaff.annotation isBackmap = yes asmReptUrl = https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/001/635/GCF_000001635.26_GRCm38.p6/GCF_000001635.26_GRCm38.p6_assembly_report.txt else ifeq (${db},mm39) grcRefAssembly = GRCm39 ver = M27 prevVer = M26 gencodeOrg = Gencode_mouse ftpReleaseSubdir = release_${ver} annGffTypeName = chr_patch_hapl_scaff.annotation asmReptUrl = https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/001/635/GCF_000001635.27_GRCm39/GCF_000001635.27_GRCm39_assembly_report.txt else ifeq (${db},hg38) grcRefAssembly = GRCh38 - ver = 38 - prevVer = 37 + ver = 39 + prevVer = 38 gencodeOrg = Gencode_human ftpReleaseSubdir = release_${ver} annGffTypeName = chr_patch_hapl_scaff.annotation asmReptUrl = https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/001/405/GCF_000001405.39_GRCh38.p13/GCF_000001405.39_GRCh38.p13_assembly_report.txt else ifeq (${db},hg19) grcRefAssembly = GRCh37 verBase = 38 ver = ${verBase}lift37 prevVer = 37lift37 backmapTargetVer = 19 ftpReleaseSubdir = release_${verBase}/GRCh37_mapping gencodeOrg = Gencode_human annGffTypeName = annotation isBackmap = yes asmReptUrl = https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/001/405/GCF_000001405.39_GRCh38.p13/GCF_000001405.39_GRCh38.p13_assembly_report.txt else $(error unimplement genome database: ${db}) endif # END EDIT THESE EACH RELEASE ifeq (${preRelease},yes) # pre-release - baseUrl = ftp://ftp.ebi.ac.uk/pub/databases/havana/gencode_pre + baseUrl = rsync://ftp.ebi.ac.uk/pub/databases/havana/gencode_pre else # official release - baseUrl = ftp://ftp.ebi.ac.uk/pub/databases/gencode + baseUrl = rsync://ftp.ebi.ac.uk/pub/databases/gencode endif rel = V${ver} releaseUrl = ${baseUrl}/${gencodeOrg}/${ftpReleaseSubdir} dataDir = data relDir = ${dataDir}/release_${ver} annotationGff = ${relDir}/gencode.v${ver}.${annGffTypeName}.gff3.gz pseudo2WayGff = ${relDir}/gencode.v${ver}.2wayconspseudos.gff3.gz polyAGff = ${relDir}/gencode.v${ver}.polyAs.gff3.gz ccdsBinDir = ~markd/compbio/ccds/ccds2/output/bin/$(mach)/opt gencodeMakeTracks = ${ccdsBinDir}/gencodeMakeTracks gencodeMakeAttrs = ${ccdsBinDir}/gencodeMakeAttrs gencodeExonSupportToTable = ${ccdsBinDir}/gencodeExonSupportToTable gencodeGxfToGenePred = ${ccdsBinDir}/gencodeGxfToGenePred gencodePolyaGxfToGenePred = ${ccdsBinDir}/gencodePolyaGxfToGenePred gencodeGxfToAttrs = ${ccdsBinDir}/gencodeGxfToAttrs buildGencodeToUcscLift = ${HOME}/kent/src/hg/makeDb/outside/gencode/bin/buildGencodeToUcscLift gencodeBackMapMetadataIds = ${ccdsBinDir}/gencodeBackMapMetadataIds encodeAutoSqlDir = ${HOME}/kent/src/hg/lib/encode ## # intermediate data not loaded into tracks ## gencodeGp = ${dataDir}/gencode.gp gencodeTsv = ${dataDir}/gencode.tsv gencodeToUcscChain = ${dataDir}/gencodeToUcsc.chain -asmRept = ${dataDir}/$(notdir asmReptUrl) +asmRept = ${dataDir}/$(notdir ${asmReptUrl}) # flag indicating fetch was done fetchDone = ${relDir}/done ## # track and table data ## tableDir = tables tablePre = wgEncodeGencode # subset track and pattern for generate genePred and track names for each subset # obtained from gencode.v*.annotation.level_1_2.gtf, gencode.v*.annotation.level_3.gtf tableBasic = ${tablePre}Basic${rel} tableBasicGp = ${tableDir}/${tableBasic}.gp @@ -227,33 +227,31 @@ # directory for flags indicating tables were loaded loadedDir = loaded # directory for output and flags for sanity checks checkDir = check all: fetch mkTables loadTables checkSanity cmpRelease listTables ## # fetch release, this doesn't get subdirectories so as not to copy the lift releases ## fetch: ${fetchDone} ${fetchDone}: - @mkdir -p $(dir $@) ${dataDir} - wget -nv --cut-dirs=4 --directory-prefix=${relDir} -np "${releaseUrl}/*" - chmod a-w ${relDir}/* + rsync -a --include='gencode.*' --exclude='*' '${releaseUrl}/' ${relDir} touch $@ ## # dependencies for files from release ## ${annotationGff}: ${fetchDone} ${pseudo2WayGff}: ${fetchDone} ${polyAGff}: ${fetchDone} ${tableGeneSourceMeta}: ${fetchDone} ${tableTranscriptSourceMeta}: ${fetchDone} ${tableTranscriptSupportMeta}: ${fetchDone} ${tableExonSupportMeta}: ${fetchDone} ${tableGeneSymbolMeta}: ${fetchDone} ${tablePdbMeta}: ${fetchDone} ${tablePubMedMeta}: ${fetchDone} @@ -292,31 +290,31 @@ ${gencodePolyaGxfToGenePred} $< ${gencodeToUcscChain} $@.${tmpExt} mv -f $@.${tmpExt} $@ ${tableUniProtTab}: ${tableSwissProtMeta} ${tableTrEMBLMeta} ${gencodeTsv} @mkdir -p $(dir $@) ((${metaFilterCmdGz} ${tableSwissProtMeta} | tawk '{print $$0,"SwissProt"}') && (${metaFilterCmdGz} ${tableTrEMBLMeta} | tawk '{print $$0,"TrEMBL"}')) | sort -k 1,1 > $@.${tmpExt} mv -f $@.${tmpExt} $@ ${gencodeToUcscChain}: ${asmRept} @mkdir -p $(dir $@) ${buildGencodeToUcscLift} ${db} ${asmRept} $@.${tmpExt} mv -f $@.${tmpExt} $@ ${asmRept}: @mkdir -p $(dir $@) - wget -nv -O $@.${tmpExt} ${asmReptUrl} + wget -nv -o /dev/stderr -O $@.${tmpExt} ${asmReptUrl} mv -f $@.${tmpExt} $@ # other tab files, just copy to name following convention to make load rules # work ifeq (${isBackmap}, yes) metaFilterCmd = ${gencodeBackMapMetadataIds} ${gencodeTsv} ${targetGencodeTsv} metaFilterCmdGz = ${metaFilterCmd} metaFilterDepend = ${gencodeTsv} ${targetGencodeTsv} else metaFilterCmd = cat metaFilterCmdGz = zcat metaFilterDepend = ${gencodeTsv} endif define copyMetadataTabGz mkdir -p $(dir $@)