bd0945ac583598387ea83c8b3c20593bd934b06a markd Sat Sep 25 16:29:23 2021 -0700 reload V38 with new changes diff --git src/hg/makeDb/outside/gencode/gencodeLoad.mk src/hg/makeDb/outside/gencode/gencodeLoad.mk index 1bbcb21..a4580e1 100644 --- src/hg/makeDb/outside/gencode/gencodeLoad.mk +++ src/hg/makeDb/outside/gencode/gencodeLoad.mk @@ -36,54 +36,58 @@ #preRelease = yes db = hg38 #db = hg19 #db = mm39 #db = mm10 ifeq (${db},mm10) grcRefAssembly = GRCm38 verBase = M25 prevVer = M24 backmapTargetVer = M25 ver = ${verBase}lift37 gencodeOrg = Gencode_mouse ftpReleaseSubdir = release_${verBase}/GRCm38_mapping annGffTypeName = chr_patch_hapl_scaff.annotation isBackmap = yes + asmReptUrl = https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/001/635/GCF_000001635.26_GRCm38.p6/GCF_000001635.26_GRCm38.p6_assembly_report.txt else ifeq (${db},mm39) grcRefAssembly = GRCm39 ver = M27 prevVer = M26 gencodeOrg = Gencode_mouse ftpReleaseSubdir = release_${ver} annGffTypeName = chr_patch_hapl_scaff.annotation + asmReptUrl = https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/001/635/GCF_000001635.27_GRCm39/GCF_000001635.27_GRCm39_assembly_report.txt else ifeq (${db},hg38) grcRefAssembly = GRCh38 ver = 38 prevVer = 37 gencodeOrg = Gencode_human ftpReleaseSubdir = release_${ver} annGffTypeName = chr_patch_hapl_scaff.annotation + asmReptUrl = https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/001/405/GCF_000001405.39_GRCh38.p13/GCF_000001405.39_GRCh38.p13_assembly_report.txt else ifeq (${db},hg19) grcRefAssembly = GRCh37 verBase = 38 ver = ${verBase}lift37 prevVer = 37lift37 backmapTargetVer = 19 ftpReleaseSubdir = release_${verBase}/GRCh37_mapping gencodeOrg = Gencode_human annGffTypeName = annotation isBackmap = yes + asmReptUrl = https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/001/405/GCF_000001405.39_GRCh38.p13/GCF_000001405.39_GRCh38.p13_assembly_report.txt else $(error unimplement genome database: ${db}) endif # END EDIT THESE EACH RELEASE ifeq (${preRelease},yes) # pre-release baseUrl = ftp://ftp.ebi.ac.uk/pub/databases/havana/gencode_pre else # official release baseUrl = ftp://ftp.ebi.ac.uk/pub/databases/gencode endif rel = V${ver} @@ -99,30 +103,31 @@ gencodeMakeAttrs = ${ccdsBinDir}/gencodeMakeAttrs gencodeExonSupportToTable = ${ccdsBinDir}/gencodeExonSupportToTable gencodeGxfToGenePred = ${ccdsBinDir}/gencodeGxfToGenePred gencodePolyaGxfToGenePred = ${ccdsBinDir}/gencodePolyaGxfToGenePred gencodeGxfToAttrs = ${ccdsBinDir}/gencodeGxfToAttrs buildGencodeToUcscLift = ${HOME}/kent/src/hg/makeDb/outside/gencode/bin/buildGencodeToUcscLift gencodeBackMapMetadataIds = ${ccdsBinDir}/gencodeBackMapMetadataIds encodeAutoSqlDir = ${HOME}/kent/src/hg/lib/encode ## # intermediate data not loaded into tracks ## gencodeGp = ${dataDir}/gencode.gp gencodeTsv = ${dataDir}/gencode.tsv gencodeToUcscChain = ${dataDir}/gencodeToUcsc.chain +asmRept = ${dataDir}/$(notdir asmReptUrl) # flag indicating fetch was done fetchDone = ${relDir}/done ## # track and table data ## tableDir = tables tablePre = wgEncodeGencode # subset track and pattern for generate genePred and track names for each subset # obtained from gencode.v*.annotation.level_1_2.gtf, gencode.v*.annotation.level_3.gtf tableBasic = ${tablePre}Basic${rel} tableBasicGp = ${tableDir}/${tableBasic}.gp @@ -280,33 +285,38 @@ ${table2WayConsPseudoGp}: ${pseudo2WayGff} @mkdir -p $(dir $@) gff3ToGenePred -allowMinimalGenes $< $@.${tmpExt} mv -f $@.${tmpExt} $@ ${tablePolyAGp}: ${polyAGff} ${gencodeToUcscChain} @mkdir -p $(dir $@) ${gencodePolyaGxfToGenePred} $< ${gencodeToUcscChain} $@.${tmpExt} mv -f $@.${tmpExt} $@ ${tableUniProtTab}: ${tableSwissProtMeta} ${tableTrEMBLMeta} ${gencodeTsv} @mkdir -p $(dir $@) ((${metaFilterCmdGz} ${tableSwissProtMeta} | tawk '{print $$0,"SwissProt"}') && (${metaFilterCmdGz} ${tableTrEMBLMeta} | tawk '{print $$0,"TrEMBL"}')) | sort -k 1,1 > $@.${tmpExt} mv -f $@.${tmpExt} $@ -${gencodeToUcscChain}: +${gencodeToUcscChain}: ${asmRept} @mkdir -p $(dir $@) - ${buildGencodeToUcscLift} ${db} $@.${tmpExt} + ${buildGencodeToUcscLift} ${db} ${asmRept} $@.${tmpExt} + mv -f $@.${tmpExt} $@ + +${asmRept}: + @mkdir -p $(dir $@) + wget -nv -O $@.${tmpExt} ${asmReptUrl} mv -f $@.${tmpExt} $@ # other tab files, just copy to name following convention to make load rules # work ifeq (${isBackmap}, yes) metaFilterCmd = ${gencodeBackMapMetadataIds} ${gencodeTsv} ${targetGencodeTsv} metaFilterCmdGz = ${metaFilterCmd} metaFilterDepend = ${gencodeTsv} ${targetGencodeTsv} else metaFilterCmd = cat metaFilterCmdGz = zcat metaFilterDepend = ${gencodeTsv} endif define copyMetadataTabGz mkdir -p $(dir $@)