f8e605d52fb7a05d5c5b19d3d6a14e7e554c0a0a markd Sat Sep 25 13:59:34 2021 -0700 change ensembl to ucsc names (programs, files, variables) to be gencode to ucsc, as gencode does not actually use ensembl names, it use ENCODE, which uses UCSC for primary assembly and GenBank access or other sequences diff --git src/hg/makeDb/outside/gencode/gencodeHgGeneLoad.mk src/hg/makeDb/outside/gencode/gencodeHgGeneLoad.mk index 5a7711d..873d4c0 100644 --- src/hg/makeDb/outside/gencode/gencodeHgGeneLoad.mk +++ src/hg/makeDb/outside/gencode/gencodeHgGeneLoad.mk @@ -90,38 +90,38 @@ endif rel = V${ver} releaseUrl = ${baseUrl}/${gencodeOrg}/${ftpReleaseSubdir} dataDir = data relRootDir = release relDir = ${relRootDir}/release_${ver} annotationGff = ${relDir}/gencode.v${ver}.${annGffTypeName}.gff3.gz kentDir = ${HOME}/kent/src gencodeBinDir = ${kentDir}/hg/makeDb/outside/gencode/bin autoSqlDir = ${kentDir}/hg/lib gencodeExonSupportToTable = ${gencodeBinDir}/gencodeExonSupportToTable gencodeGxfToGenePred = ${gencodeBinDir}/gencodeGxfToGenePred gencodeGxfToAttrs = ${gencodeBinDir}/gencodeGxfToAttrs -ensToUcscMkLift = ${gencodeBinDir}/ensToUcscMkLift +buildGencodeToUcscLift = ${gencodeBinDir}/buildGencodeToUcscLift gencodeBackMapMetadataIds = ${gencodeBinDir}/gencodeBackMapMetadataIds ## # intermediate data not loaded into tracks ## gencodeAttrsTsv = ${dataDir}/gencodeAttrs.tsv -ensemblToUcscChain = ${dataDir}/ensemblToUcsc.chain +gencodeToUcscChain = ${dataDir}/gencodeToUcsc.chain # flag indicating fetch was done fetchDone = ${relDir}/done ## # track and table data ## tableDir = tables tablePre = gencode tableAnnot = ${tablePre}Annot${rel} tableAnnotGp = ${tableDir}/${tableAnnot}.gp tableAttrs = ${tablePre}Attrs${rel} tableAttrsTab = ${tableDir}/${tableAttrs}.tab @@ -225,76 +225,76 @@ ${tableToGeneSymbolMeta}: ${fetchDone} ${tableToPdbMeta}: ${fetchDone} ${tableToPubMedMeta}: ${fetchDone} ${tableToRefSeqMeta}: ${fetchDone} ${tableSwissProtMeta}: ${fetchDone} ${tableTrEMBLMeta}: ${fetchDone} ${tableAnnotationRemarkMeta}: ${fetchDone} ${tableToEntrezGeneMeta}: ${fetchDone} ## # primary table files ## mkTables: ${genePredExtTables:%=${tableDir}/%.gp} \ ${tabTables:%=${tableDir}/%.tab} -${tableAnnotGp}: ${annotationGff} ${ensemblToUcscChain} +${tableAnnotGp}: ${annotationGff} ${gencodeToUcscChain} @mkdir -p $(dir $@) - ${gencodeGxfToGenePred} ${db} ${annotationGff} ${ensemblToUcscChain} $@.${tmpExt} + ${gencodeGxfToGenePred} ${db} ${annotationGff} ${gencodeToUcscChain} $@.${tmpExt} mv -f $@.${tmpExt} $@ ${tableToUniProtTab}: ${tableSwissProtMeta} ${tableTrEMBLMeta} ${gencodeAttrsTsv} @mkdir -p $(dir $@) ((${metaFilterCmdGz} ${tableSwissProtMeta} | tawk '{print $$0,"SwissProt"}') && (${metaFilterCmdGz} ${tableTrEMBLMeta} | tawk '{print $$0,"TrEMBL"}')) | sort -k 1,1 > $@.${tmpExt} mv -f $@.${tmpExt} $@ -${ensemblToUcscChain}: +${gencodeToUcscChain}: @mkdir -p $(dir $@) - ${ensToUcscMkLift} ${db} $@.${tmpExt} + ${buildGencodeToUcscLift} ${db} $@.${tmpExt} mv -f $@.${tmpExt} $@ # other tab files, just copy to name following convention to make load rules # work ifeq (${isBackmap}, yes) metaFilterCmd = ${gencodeBackMapMetadataIds} ${gencodeAttrsTsv} ${targetGencodeTsv} metaFilterCmdGz = ${metaFilterCmd} metaFilterDepend = ${gencodeAttrsTsv} ${targetGencodeTsv} else metaFilterCmd = cat metaFilterCmdGz = zcat metaFilterDepend = endif define copyMetadataTabGz mkdir -p $(dir $@) ${metaFilterCmdGz} $< > $@.${tmpExt} mv -f $@.${tmpExt} $@ endef define copyMetadataTab mkdir -p $(dir $@) ${metaFilterCmd} $< > $@.${tmpExt} mv -f $@.${tmpExt} $@ endef ${tableGeneSourceTab}: ${tableGeneSourceMeta} ${metaFilterDepend} ${copyMetadataTabGz} ${tableTranscriptSourceTab}: ${tableTranscriptSourceMeta} ${metaFilterDepend} ${copyMetadataTabGz} ${tableTranscriptSupportTab}: ${tableTranscriptSupportMeta} ${metaFilterDepend} ${copyMetadataTabGz} -${tableExonSupportTab}: ${tableExonSupportMeta} ${ensemblToUcscChain} ${metaFilterDepend} +${tableExonSupportTab}: ${tableExonSupportMeta} ${gencodeToUcscChain} ${metaFilterDepend} @mkdir -p $(dir $@) - ${gencodeExonSupportToTable} ${tableExonSupportMeta} ${ensemblToUcscChain} $@.${tmpExt} + ${gencodeExonSupportToTable} ${tableExonSupportMeta} ${gencodeToUcscChain} $@.${tmpExt} mv -f $@.${tmpExt} $@ ${tableToGeneSymbolTab}: ${tableToGeneSymbolMeta} ${metaFilterDepend} ${copyMetadataTabGz} ${tableToPdbTab}: ${tableToPdbMeta} ${metaFilterDepend} ${copyMetadataTabGz} ${tableToPubMedTab}: ${tableToPubMedMeta} ${metaFilterDepend} ${copyMetadataTabGz} ${tableToRefSeqTab}: ${tableToRefSeqMeta} ${metaFilterDepend} ${copyMetadataTabGz} # convert to zero-based, 1/2 open ${tableAnnotationRemarkTab}: ${tableAnnotationRemarkMeta} ${metaFilterDepend} @mkdir -p $(dir $@) ${metaFilterCmdGz} $< | tawk '{print $$1,gensub("\\\\n|\\\\","","g",$$2)}' | sort -k 1,1 > $@.${tmpExt} mv -f $@.${tmpExt} $@