f8e605d52fb7a05d5c5b19d3d6a14e7e554c0a0a markd Sat Sep 25 13:59:34 2021 -0700 change ensembl to ucsc names (programs, files, variables) to be gencode to ucsc, as gencode does not actually use ensembl names, it use ENCODE, which uses UCSC for primary assembly and GenBank access or other sequences diff --git src/hg/makeDb/outside/gencode/gencodeLoad.mk src/hg/makeDb/outside/gencode/gencodeLoad.mk index 54b3387..1bbcb21 100644 --- src/hg/makeDb/outside/gencode/gencodeLoad.mk +++ src/hg/makeDb/outside/gencode/gencodeLoad.mk @@ -89,40 +89,40 @@ rel = V${ver} releaseUrl = ${baseUrl}/${gencodeOrg}/${ftpReleaseSubdir} dataDir = data relDir = ${dataDir}/release_${ver} annotationGff = ${relDir}/gencode.v${ver}.${annGffTypeName}.gff3.gz pseudo2WayGff = ${relDir}/gencode.v${ver}.2wayconspseudos.gff3.gz polyAGff = ${relDir}/gencode.v${ver}.polyAs.gff3.gz ccdsBinDir = ~markd/compbio/ccds/ccds2/output/bin/$(mach)/opt gencodeMakeTracks = ${ccdsBinDir}/gencodeMakeTracks gencodeMakeAttrs = ${ccdsBinDir}/gencodeMakeAttrs gencodeExonSupportToTable = ${ccdsBinDir}/gencodeExonSupportToTable gencodeGxfToGenePred = ${ccdsBinDir}/gencodeGxfToGenePred gencodePolyaGxfToGenePred = ${ccdsBinDir}/gencodePolyaGxfToGenePred gencodeGxfToAttrs = ${ccdsBinDir}/gencodeGxfToAttrs -ensToUcscMkLift = ${HOME}/kent/src/hg/makeDb/outside/gencode/bin/ensToUcscMkLift +buildGencodeToUcscLift = ${HOME}/kent/src/hg/makeDb/outside/gencode/bin/buildGencodeToUcscLift gencodeBackMapMetadataIds = ${ccdsBinDir}/gencodeBackMapMetadataIds encodeAutoSqlDir = ${HOME}/kent/src/hg/lib/encode ## # intermediate data not loaded into tracks ## gencodeGp = ${dataDir}/gencode.gp gencodeTsv = ${dataDir}/gencode.tsv -ensemblToUcscChain = ${dataDir}/ensemblToUcsc.chain +gencodeToUcscChain = ${dataDir}/gencodeToUcsc.chain # flag indicating fetch was done fetchDone = ${relDir}/done ## # track and table data ## tableDir = tables tablePre = wgEncodeGencode # subset track and pattern for generate genePred and track names for each subset # obtained from gencode.v*.annotation.level_1_2.gtf, gencode.v*.annotation.level_3.gtf tableBasic = ${tablePre}Basic${rel} tableBasicGp = ${tableDir}/${tableBasic}.gp @@ -270,76 +270,76 @@ @mkdir -p $(dir $@) ${gencodeMakeTracks} $$(echo $* | tr A-Z a-z) ${gencodeGp} ${gencodeTsv} $@.${tmpExt} mv -f $@.${tmpExt} $@ ${tableTagTab}: ${tableAttrsTab} ${tableAttrsTab}: ${gencodeGp} ${gencodeTsv} @mkdir -p $(dir $@) ${gencodeMakeAttrs} ${gencodeGp} ${gencodeTsv} $@.${tmpExt} ${tableTagTab} mv -f $@.${tmpExt} $@ ${table2WayConsPseudoGp}: ${pseudo2WayGff} @mkdir -p $(dir $@) gff3ToGenePred -allowMinimalGenes $< $@.${tmpExt} mv -f $@.${tmpExt} $@ -${tablePolyAGp}: ${polyAGff} ${ensemblToUcscChain} +${tablePolyAGp}: ${polyAGff} ${gencodeToUcscChain} @mkdir -p $(dir $@) - ${gencodePolyaGxfToGenePred} $< ${ensemblToUcscChain} $@.${tmpExt} + ${gencodePolyaGxfToGenePred} $< ${gencodeToUcscChain} $@.${tmpExt} mv -f $@.${tmpExt} $@ ${tableUniProtTab}: ${tableSwissProtMeta} ${tableTrEMBLMeta} ${gencodeTsv} @mkdir -p $(dir $@) ((${metaFilterCmdGz} ${tableSwissProtMeta} | tawk '{print $$0,"SwissProt"}') && (${metaFilterCmdGz} ${tableTrEMBLMeta} | tawk '{print $$0,"TrEMBL"}')) | sort -k 1,1 > $@.${tmpExt} mv -f $@.${tmpExt} $@ -${ensemblToUcscChain}: +${gencodeToUcscChain}: @mkdir -p $(dir $@) - ${ensToUcscMkLift} ${db} $@.${tmpExt} + ${buildGencodeToUcscLift} ${db} $@.${tmpExt} mv -f $@.${tmpExt} $@ # other tab files, just copy to name following convention to make load rules # work ifeq (${isBackmap}, yes) metaFilterCmd = ${gencodeBackMapMetadataIds} ${gencodeTsv} ${targetGencodeTsv} metaFilterCmdGz = ${metaFilterCmd} metaFilterDepend = ${gencodeTsv} ${targetGencodeTsv} else metaFilterCmd = cat metaFilterCmdGz = zcat metaFilterDepend = ${gencodeTsv} endif define copyMetadataTabGz mkdir -p $(dir $@) ${metaFilterCmdGz} $< > $@.${tmpExt} mv -f $@.${tmpExt} $@ endef define copyMetadataTab mkdir -p $(dir $@) ${metaFilterCmd} $< > $@.${tmpExt} mv -f $@.${tmpExt} $@ endef ${tableGeneSourceTab}: ${tableGeneSourceMeta} ${metaFilterDepend} ${copyMetadataTabGz} ${tableTranscriptSourceTab}: ${tableTranscriptSourceMeta} ${metaFilterDepend} ${copyMetadataTabGz} ${tableTranscriptSupportTab}: ${tableTranscriptSupportMeta} ${metaFilterDepend} ${copyMetadataTabGz} -${tableExonSupportTab}: ${tableExonSupportMeta} ${ensemblToUcscChain} ${metaFilterDepend} +${tableExonSupportTab}: ${tableExonSupportMeta} ${gencodeToUcscChain} ${metaFilterDepend} @mkdir -p $(dir $@) - ${gencodeExonSupportToTable} ${tableExonSupportMeta} ${ensemblToUcscChain} $@.${tmpExt} + ${gencodeExonSupportToTable} ${tableExonSupportMeta} ${gencodeToUcscChain} $@.${tmpExt} mv -f $@.${tmpExt} $@ ${tableGeneSymbolTab}: ${tableGeneSymbolMeta} ${metaFilterDepend} ${copyMetadataTabGz} ${tablePdbTab}: ${tablePdbMeta} ${metaFilterDepend} ${copyMetadataTabGz} ${tablePubMedTab}: ${tablePubMedMeta} ${metaFilterDepend} ${copyMetadataTabGz} ${tableRefSeqTab}: ${tableRefSeqMeta} ${metaFilterDepend} ${copyMetadataTabGz} ${tableTranscriptionSupportLevelTab}: ${tableTranscriptionSupportLevelData} mkdir -p $(dir $@) cp $< $@.${tmpExt} mv -f $@.${tmpExt} $@ @@ -349,33 +349,33 @@ zcat $< | tawk '{print $$1,$$2-1,$$3,$$4,$$5-1,$$6,$$7,$$8}' | sort -k 4,4 -k 5,5n > $@.${tmpExt} mv -f $@.${tmpExt} $@ ${tableAnnotationRemarkTab}: ${tableAnnotationRemarkMeta} ${metaFilterDepend} @mkdir -p $(dir $@) ${metaFilterCmdGz} $< | tawk '{print $$1,gensub("\\\\n|\\\\","","g",$$2)}' | sort -k 1,1 > $@.${tmpExt} mv -f $@.${tmpExt} $@ # drop ENSTR entries that are a hack to support PAR sequences in GTF ${tableEntrezGeneTab}: ${tableEntrezGeneMeta} ${metaFilterDepend} @mkdir -p $(dir $@) zcat $< | tawk '$$1!~/^ENSTR/' | sort -k 1,1 | ${metaFilterCmd} /dev/stdin > $@.${tmpExt} mv -f $@.${tmpExt} $@ ## # intermediate data for ensembl/havana, not loaded into databases ## -${gencodeGp}: ${annotationGff} ${ensemblToUcscChain} +${gencodeGp}: ${annotationGff} ${gencodeToUcscChain} @mkdir -p $(dir $@) - ${gencodeGxfToGenePred} ${db} ${annotationGff} ${ensemblToUcscChain} $@.${tmpExt} + ${gencodeGxfToGenePred} ${db} ${annotationGff} ${gencodeToUcscChain} $@.${tmpExt} mv -f $@.${tmpExt} $@ ${tableTranscriptionSupportLevelData}: ${metaFilterDepend} touch $@ ${gencodeTsv}: ${annotationGff} @mkdir -p $(dir $@) ${gencodeGxfToAttrs} --keepGoing ${annotationGff} $@.${tmpExt} --tslTabOut=${tableTranscriptionSupportLevelData}.${tmpExt} mv -f ${tableTranscriptionSupportLevelData}.${tmpExt} ${tableTranscriptionSupportLevelData} mv -f $@.${tmpExt} $@ ${targetGencodeTsv}: @mkdir -p $(dir $@) hgsql ${db} -e 'select * from wgEncodeGencodeAttrsV${backmapTargetVer}' > $@.${tmpExt} mv -f $@.${tmpExt} $@