b8b9d9c2a21c34a52d876df57e215c53722e53e6 markd Wed Jul 10 20:02:51 2019 -0700 fixed problem loading gencode 2way consensus pseudogenes from GFF3 diff --git src/hg/makeDb/outside/gencode/gencodeLoad.mk src/hg/makeDb/outside/gencode/gencodeLoad.mk index c7875bc..b7ee42a 100644 --- src/hg/makeDb/outside/gencode/gencodeLoad.mk +++ src/hg/makeDb/outside/gencode/gencodeLoad.mk @@ -24,32 +24,32 @@ export SHELLOPTS=pipefail ## # programs, etc ## mach = $(shell uname -m) ## # Release info and files from Sanger. # BEGIN EDIT THESE EACH RELEASE # # - ensemblPrevVersion is use to get chrom name mappings for pre-release, # as this doesn't change between release. ## #db = hg38 -db = hg19 -#db = mm10 +#db = hg19 +db = mm10 preRelease = no #preRelease = yes ifeq (${db},mm10) grcRefAssembly = GRCm38 ver = M22 prevVer = M21 gencodeOrg = Gencode_mouse ftpReleaseSubdir = release_${ver} annGffTypeName = chr_patch_hapl_scaff.annotation ensemblVer = 97_38 ensemblPrevVer = 96_38 ensemblCDnaDb = mus_musculus_cdna_${ensemblPrevVer} else ifeq (${db},hg38) grcRefAssembly = GRCh38 ver = 31 @@ -268,31 +268,31 @@ # grab subset name from file pattern (this is what tr command below does) ${tableDir}/${tablePre}%${rel}.gp: ${gencodeGp} ${gencodeTsv} @mkdir -p $(dir $@) ${gencodeMakeTracks} $$(echo $* | tr A-Z a-z) ${gencodeGp} ${gencodeTsv} $@.${tmpExt} mv -f $@.${tmpExt} $@ ${tableTagTab}: ${tableAttrsTab} ${tableAttrsTab}: ${gencodeGp} ${gencodeTsv} @mkdir -p $(dir $@) ${gencodeMakeAttrs} ${gencodeGp} ${gencodeTsv} $@.${tmpExt} ${tableTagTab} mv -f $@.${tmpExt} $@ ${table2WayConsPseudoGp}: ${pseudo2WayGff} @mkdir -p $(dir $@) - zcat $< | tawk '$$3=="transcript"{$$3 = "exon"} {print $$0}' | gff3ToGenePred stdin $@.${tmpExt} + gff3ToGenePred -allowMinimalGenes $< $@.${tmpExt} mv -f $@.${tmpExt} $@ ${tablePolyAGp}: ${polyAGff} ${ensemblToUcscChain} @mkdir -p $(dir $@) ${gencodePolyaGxfToGenePred} $< ${ensemblToUcscChain} $@.${tmpExt} mv -f $@.${tmpExt} $@ ${tableUniProtTab}: ${tableSwissProtMeta} ${tableTrEMBLMeta} ${gencodeTsv} @mkdir -p $(dir $@) ((${metaFilterCmdGz} ${tableSwissProtMeta} | tawk '{print $$0,"SwissProt"}') && (${metaFilterCmdGz} ${tableTrEMBLMeta} | tawk '{print $$0,"TrEMBL"}')) | sort -k 1,1 > $@.${tmpExt} mv -f $@.${tmpExt} $@ ${ensemblToUcscChain}: @mkdir -p $(dir $@) ${ensToUcscChromMap} ${ensemblCDnaDb} ${grcRefAssembly} ${db} /dev/stdout | pslSwap stdin stdout | pslToChain stdin $@.${tmpExt}