e863249eae18325e041c2948722b98cb5a5643e4 markd Fri Nov 4 14:47:29 2022 -0700 Dropped GENCODE 2-way pseudogenes from latest releases. These should have never been included diff --git src/hg/makeDb/outside/gencode/gencodeLoad.mk src/hg/makeDb/outside/gencode/gencodeLoad.mk index b0b77d8..49241d6 100644 --- src/hg/makeDb/outside/gencode/gencodeLoad.mk +++ src/hg/makeDb/outside/gencode/gencodeLoad.mk @@ -22,32 +22,32 @@ tmpExt = ${host}.${ppid}.tmp SHELL = bash -e export SHELLOPTS=pipefail ## # programs, etc ## mach = $(shell uname -m) ## # Release info and files from Sanger. # BEGIN EDIT THESE EACH RELEASE ## preRelease = no #preRelease = yes -#db = hg38 -db = hg19 +db = hg38 +#db = hg19 #db = mm39 #db = mm10 ifeq (${db},mm10) grcRefAssembly = GRCm38 verBase = M25 prevVer = M24 backmapTargetVer = M25 ver = ${verBase}lift37 gencodeOrg = Gencode_mouse ftpReleaseSubdir = release_${verBase}/GRCm38_mapping annGffTypeName = chr_patch_hapl_scaff.annotation isBackmap = yes else ifeq (${db},mm39) grcRefAssembly = GRCm39 ver = M31 @@ -79,31 +79,30 @@ ifeq (${preRelease},yes) # pre-release baseUrl = rsync://ftp.ebi.ac.uk/pub/databases/havana/gencode_pre else # official release baseUrl = rsync://ftp.ebi.ac.uk/pub/databases/gencode endif rel = V${ver} releaseUrl = ${baseUrl}/${gencodeOrg}/${ftpReleaseSubdir} dataDir = data relDir = ${dataDir}/release_${ver} annotationGff = ${relDir}/gencode.v${ver}.${annGffTypeName}.gff3.gz -pseudo2WayGff = ${relDir}/gencode.v${ver}.2wayconspseudos.gff3.gz polyAGff = ${relDir}/gencode.v${ver}.polyAs.gff3.gz ifneq (${isBackmap},yes) transcriptRanks = ${relDir}/gencode.v${ver}.transcript_rankings.txt.gz transcriptRanksOpt = --transcriptRanks=${transcriptRanks} endif gencodeBinDir = ${HOME}/kent/src/hg/makeDb/outside/gencode/bin gencodeMakeTracks = ${gencodeBinDir}/gencodeMakeTracks gencodeMakeAttrs = ${gencodeBinDir}/gencodeMakeAttrs gencodeExonSupportToTable = ${gencodeBinDir}/gencodeExonSupportToTable gencodeGxfToGenePred = ${gencodeBinDir}/gencodeGxfToGenePred gencodePolyaGxfToGenePred = ${gencodeBinDir}/gencodePolyaGxfToGenePred gencodeGxfToAttrs = ${gencodeBinDir}/gencodeGxfToAttrs buildGencodeToUcscLift = ${HOME}/kent/src/hg/makeDb/outside/gencode/bin/buildGencodeToUcscLift gencodeBackMapMetadataIds = ${gencodeBinDir}/gencodeBackMapMetadataIds @@ -130,33 +129,30 @@ tableBasic = ${tablePre}Basic${rel} tableBasicGp = ${tableDir}/${tableBasic}.gp tableComp = ${tablePre}Comp${rel} tableCompGp = ${tableDir}/${tableComp}.gp tablePseudo = ${tablePre}PseudoGene${rel} tablePseudoGp = ${tableDir}/${tablePseudo}.gp tableAttrs = ${tablePre}Attrs${rel} tableAttrsTab = ${tableDir}/${tableAttrs}.tab tableTag = ${tablePre}Tag${rel} tableTagTab = ${tableDir}/${tableTag}.tab -# obtained from gencode.v*.2wayconspseudos.GRCh37.gtf -table2WayConsPseudo = ${tablePre}2wayConsPseudo${rel} -table2WayConsPseudoGp = ${tableDir}/${table2WayConsPseudo}.gp # obtained from gencode.v*.polyAs.gtf tablePolyA = ${tablePre}Polya${rel} tablePolyAGp = ${tableDir}/${tablePolyA}.gp # other metadata tableGeneSourceMeta = ${relDir}/gencode.v${ver}.metadata.Gene_source.gz tableGeneSource = ${tablePre}GeneSource${rel} tableGeneSourceTab = ${tableDir}/${tableGeneSource}.tab tableTranscriptSourceMeta = ${relDir}/gencode.v${ver}.metadata.Transcript_source.gz tableTranscriptSource = ${tablePre}TranscriptSource${rel} tableTranscriptSourceTab = ${tableDir}/${tableTranscriptSource}.tab tableTranscriptSupportMeta = ${relDir}/gencode.v${ver}.metadata.Transcript_supporting_feature.gz @@ -205,59 +201,57 @@ tableEntrezGeneTab = ${tableDir}/${tableEntrezGene}.tab tableTranscriptionSupportLevel = ${tablePre}TranscriptionSupportLevel${rel} tableTranscriptionSupportLevelTab = ${tableDir}/${tableTranscriptionSupportLevel}.tab genePredExtTables = ${tableBasic} ${tableComp} ${tablePseudo} genePredTables = tabTables = ${tableAttrs} ${tableTag} ${tableGeneSource} \ ${tableTranscriptSource} ${tableTranscriptSupport} \ ${tableGeneSymbol} ${tablePdb} ${tablePubMed} ${tableRefSeq} ${tableUniProt} \ ${tableAnnotationRemark} ${tableEntrezGene} ${tableTranscriptionSupportLevel} ifeq (${isBackmap}, yes) targetGencodeTsv = ${dataDir}/target-gencode.tsv else # these are not included in backmap releases - genePredTables = ${table2WayConsPseudo} genePredExtTables += ${tablePolyA} tabTables += ${tableExonSupport} endif allTables = ${genePredExtTables} ${genePredTables} ${tabTables} # directory for flags indicating tables were loaded loadedDir = loaded # directory for output and flags for sanity checks checkDir = check all: fetch mkTables loadTables checkSanity cmpRelease listTables ## # fetch release, this doesn't get subdirectories so as not to copy the lift releases ## fetch: ${fetchDone} ${fetchDone}: @mkdir -p $(dir $@) rsync -a --include='gencode.*' --exclude='*' '${releaseUrl}/' ${relDir} touch $@ ## # dependencies for files from release ## ${annotationGff}: ${fetchDone} -${pseudo2WayGff}: ${fetchDone} ${polyAGff}: ${fetchDone} ${tableGeneSourceMeta}: ${fetchDone} ${tableTranscriptSourceMeta}: ${fetchDone} ${tableTranscriptSupportMeta}: ${fetchDone} ${tableExonSupportMeta}: ${fetchDone} ${tableGeneSymbolMeta}: ${fetchDone} ${tablePdbMeta}: ${fetchDone} ${tablePubMedMeta}: ${fetchDone} ${tableRefSeqMeta}: ${fetchDone} ${tableSwissProtMeta}: ${fetchDone} ${tableTrEMBLMeta}: ${fetchDone} ${tablePolyAFeatureMeta}: ${fetchDone} ${tableAnnotationRemarkMeta}: ${fetchDone} ${tableEntrezGeneMeta}: ${fetchDone} @@ -270,35 +264,30 @@ # grab subset name from file pattern (this is what tr command below does) ${tableDir}/${tablePre}%${rel}.gp: ${gencodeGp} ${gencodeTsv} @mkdir -p $(dir $@) ${gencodeMakeTracks} $$(echo $* | tr A-Z a-z) ${gencodeGp} ${gencodeTsv} $@.${tmpExt} mv -f $@.${tmpExt} $@ ${tableTagTab}: ${tableAttrsTab} ${tableTranscriptionSupportLevelTab}: ${tableAttrsTab} ${tableAttrsTab}: ${gencodeGp} ${gencodeTsv} @mkdir -p $(dir $@) ${gencodeMakeAttrs} ${gencodeGp} ${gencodeTsv} $@.${tmpExt} ${tableTagTab}.${tmpExt} ${tableTranscriptionSupportLevelTab}.${tmpExt} mv -f ${tableTranscriptionSupportLevelTab}.${tmpExt} ${tableTranscriptionSupportLevelTab} mv -f ${tableTagTab}.${tmpExt} ${tableTagTab} mv -f $@.${tmpExt} $@ -${table2WayConsPseudoGp}: ${pseudo2WayGff} - @mkdir -p $(dir $@) - gff3ToGenePred -allowMinimalGenes $< $@.${tmpExt} - mv -f $@.${tmpExt} $@ - ${tablePolyAGp}: ${polyAGff} ${gencodeToUcscChain} @mkdir -p $(dir $@) ${gencodePolyaGxfToGenePred} $< ${gencodeToUcscChain} $@.${tmpExt} mv -f $@.${tmpExt} $@ ${tableUniProtTab}: ${tableSwissProtMeta} ${tableTrEMBLMeta} ${gencodeTsv} @mkdir -p $(dir $@) ((${metaFilterCmdGz} ${tableSwissProtMeta} | tawk '{print $$0,"SwissProt"}') && (${metaFilterCmdGz} ${tableTrEMBLMeta} | tawk '{print $$0,"TrEMBL"}')) | sort -k 1,1 > $@.${tmpExt} mv -f $@.${tmpExt} $@ ${gencodeToUcscChain}: @mkdir -p $(dir $@) ${buildGencodeToUcscLift} ${db} $@.${tmpExt} mv -f $@.${tmpExt} $@