e863249eae18325e041c2948722b98cb5a5643e4
markd
  Fri Nov 4 14:47:29 2022 -0700
Dropped GENCODE 2-way pseudogenes from latest releases.  These should have never been included

diff --git src/hg/makeDb/outside/gencode/gencodeLoad.mk src/hg/makeDb/outside/gencode/gencodeLoad.mk
index b0b77d8..49241d6 100644
--- src/hg/makeDb/outside/gencode/gencodeLoad.mk
+++ src/hg/makeDb/outside/gencode/gencodeLoad.mk
@@ -22,32 +22,32 @@
 tmpExt = ${host}.${ppid}.tmp
 SHELL = bash -e
 export SHELLOPTS=pipefail
 
 ##
 # programs, etc
 ##
 mach = $(shell uname -m)
 
 ##
 # Release info and files from Sanger.
 # BEGIN EDIT THESE EACH RELEASE
 ##
 preRelease = no
 #preRelease = yes
-#db = hg38
-db = hg19
+db = hg38
+#db = hg19
 #db = mm39
 #db = mm10
 ifeq (${db},mm10)
     grcRefAssembly = GRCm38
     verBase = M25
     prevVer = M24
     backmapTargetVer = M25
     ver = ${verBase}lift37
     gencodeOrg = Gencode_mouse
     ftpReleaseSubdir = release_${verBase}/GRCm38_mapping
     annGffTypeName = chr_patch_hapl_scaff.annotation
     isBackmap = yes
 else ifeq (${db},mm39)
     grcRefAssembly = GRCm39
     ver = M31
@@ -79,31 +79,30 @@
 
 
 ifeq (${preRelease},yes)
     # pre-release
     baseUrl = rsync://ftp.ebi.ac.uk/pub/databases/havana/gencode_pre
 else
     # official release
     baseUrl = rsync://ftp.ebi.ac.uk/pub/databases/gencode
 endif
 
 rel = V${ver}
 releaseUrl = ${baseUrl}/${gencodeOrg}/${ftpReleaseSubdir}
 dataDir = data
 relDir = ${dataDir}/release_${ver}
 annotationGff = ${relDir}/gencode.v${ver}.${annGffTypeName}.gff3.gz
-pseudo2WayGff = ${relDir}/gencode.v${ver}.2wayconspseudos.gff3.gz
 polyAGff = ${relDir}/gencode.v${ver}.polyAs.gff3.gz
 ifneq (${isBackmap},yes)
    transcriptRanks = ${relDir}/gencode.v${ver}.transcript_rankings.txt.gz
    transcriptRanksOpt = --transcriptRanks=${transcriptRanks}
 endif
 
 gencodeBinDir = ${HOME}/kent/src/hg/makeDb/outside/gencode/bin
 gencodeMakeTracks = ${gencodeBinDir}/gencodeMakeTracks
 gencodeMakeAttrs = ${gencodeBinDir}/gencodeMakeAttrs
 gencodeExonSupportToTable = ${gencodeBinDir}/gencodeExonSupportToTable
 gencodeGxfToGenePred = ${gencodeBinDir}/gencodeGxfToGenePred
 gencodePolyaGxfToGenePred = ${gencodeBinDir}/gencodePolyaGxfToGenePred
 gencodeGxfToAttrs = ${gencodeBinDir}/gencodeGxfToAttrs
 buildGencodeToUcscLift = ${HOME}/kent/src/hg/makeDb/outside/gencode/bin/buildGencodeToUcscLift
 gencodeBackMapMetadataIds = ${gencodeBinDir}/gencodeBackMapMetadataIds
@@ -130,33 +129,30 @@
 tableBasic = ${tablePre}Basic${rel}
 tableBasicGp = ${tableDir}/${tableBasic}.gp
 
 tableComp = ${tablePre}Comp${rel}
 tableCompGp = ${tableDir}/${tableComp}.gp
 
 tablePseudo = ${tablePre}PseudoGene${rel}
 tablePseudoGp = ${tableDir}/${tablePseudo}.gp
 
 tableAttrs = ${tablePre}Attrs${rel}
 tableAttrsTab = ${tableDir}/${tableAttrs}.tab
 
 tableTag = ${tablePre}Tag${rel}
 tableTagTab = ${tableDir}/${tableTag}.tab
 
-# obtained from gencode.v*.2wayconspseudos.GRCh37.gtf
-table2WayConsPseudo = ${tablePre}2wayConsPseudo${rel}
-table2WayConsPseudoGp = ${tableDir}/${table2WayConsPseudo}.gp
 
 # obtained from gencode.v*.polyAs.gtf
 tablePolyA = ${tablePre}Polya${rel}
 tablePolyAGp = ${tableDir}/${tablePolyA}.gp
 
 # other metadata
 tableGeneSourceMeta = ${relDir}/gencode.v${ver}.metadata.Gene_source.gz
 tableGeneSource = ${tablePre}GeneSource${rel}
 tableGeneSourceTab = ${tableDir}/${tableGeneSource}.tab
 
 tableTranscriptSourceMeta = ${relDir}/gencode.v${ver}.metadata.Transcript_source.gz
 tableTranscriptSource = ${tablePre}TranscriptSource${rel}
 tableTranscriptSourceTab = ${tableDir}/${tableTranscriptSource}.tab
 
 tableTranscriptSupportMeta = ${relDir}/gencode.v${ver}.metadata.Transcript_supporting_feature.gz
@@ -205,59 +201,57 @@
 tableEntrezGeneTab = ${tableDir}/${tableEntrezGene}.tab
 
 tableTranscriptionSupportLevel  = ${tablePre}TranscriptionSupportLevel${rel}
 tableTranscriptionSupportLevelTab = ${tableDir}/${tableTranscriptionSupportLevel}.tab
 
 genePredExtTables = ${tableBasic} ${tableComp} ${tablePseudo}
 genePredTables =
 tabTables = ${tableAttrs} ${tableTag} ${tableGeneSource} \
 	    ${tableTranscriptSource} ${tableTranscriptSupport} \
 	    ${tableGeneSymbol} ${tablePdb} ${tablePubMed} ${tableRefSeq} ${tableUniProt} \
 	    ${tableAnnotationRemark} ${tableEntrezGene}  ${tableTranscriptionSupportLevel}
 ifeq (${isBackmap}, yes)
     targetGencodeTsv = ${dataDir}/target-gencode.tsv
 else
     # these are not included in backmap releases
-    genePredTables = ${table2WayConsPseudo}
     genePredExtTables += ${tablePolyA}
     tabTables += ${tableExonSupport}
 endif
 allTables = ${genePredExtTables} ${genePredTables} ${tabTables}
 
 # directory for flags indicating tables were loaded
 loadedDir = loaded
 
 # directory for output and flags for sanity checks
 checkDir = check
 
 all: fetch mkTables loadTables checkSanity cmpRelease listTables
 
 
 ##
 # fetch release, this doesn't get subdirectories so as not to copy the lift releases
 ##
 fetch: ${fetchDone}
 ${fetchDone}:
 	@mkdir -p $(dir $@)
 	rsync -a --include='gencode.*' --exclude='*' '${releaseUrl}/' ${relDir}
 	touch $@
 
 ##
 # dependencies for files from release
 ##
 ${annotationGff}: ${fetchDone}
-${pseudo2WayGff}: ${fetchDone}
 ${polyAGff}: ${fetchDone}
 ${tableGeneSourceMeta}: ${fetchDone}
 ${tableTranscriptSourceMeta}: ${fetchDone}
 ${tableTranscriptSupportMeta}: ${fetchDone}
 ${tableExonSupportMeta}: ${fetchDone}
 ${tableGeneSymbolMeta}: ${fetchDone}
 ${tablePdbMeta}: ${fetchDone}
 ${tablePubMedMeta}: ${fetchDone}
 ${tableRefSeqMeta}: ${fetchDone}
 ${tableSwissProtMeta}: ${fetchDone}
 ${tableTrEMBLMeta}: ${fetchDone}
 ${tablePolyAFeatureMeta}: ${fetchDone}
 ${tableAnnotationRemarkMeta}: ${fetchDone}
 ${tableEntrezGeneMeta}: ${fetchDone}
 
@@ -270,35 +264,30 @@
 # grab subset name from file pattern (this is what tr command below does)
 ${tableDir}/${tablePre}%${rel}.gp: ${gencodeGp} ${gencodeTsv}
 	@mkdir -p $(dir $@)
 	${gencodeMakeTracks}  $$(echo $* | tr A-Z a-z) ${gencodeGp} ${gencodeTsv} $@.${tmpExt}
 	mv -f $@.${tmpExt} $@
 
 ${tableTagTab}: ${tableAttrsTab}
 ${tableTranscriptionSupportLevelTab}: ${tableAttrsTab}
 ${tableAttrsTab}: ${gencodeGp} ${gencodeTsv}
 	@mkdir -p $(dir $@)
 	${gencodeMakeAttrs} ${gencodeGp} ${gencodeTsv} $@.${tmpExt} ${tableTagTab}.${tmpExt} ${tableTranscriptionSupportLevelTab}.${tmpExt}
 	mv -f ${tableTranscriptionSupportLevelTab}.${tmpExt} ${tableTranscriptionSupportLevelTab}
 	mv -f ${tableTagTab}.${tmpExt} ${tableTagTab}
 	mv -f $@.${tmpExt} $@
 
-${table2WayConsPseudoGp}: ${pseudo2WayGff}
-	@mkdir -p $(dir $@)
-	gff3ToGenePred -allowMinimalGenes $< $@.${tmpExt}
-	mv -f $@.${tmpExt} $@
-
 ${tablePolyAGp}: ${polyAGff} ${gencodeToUcscChain}
 	@mkdir -p $(dir $@)
 	${gencodePolyaGxfToGenePred} $< ${gencodeToUcscChain} $@.${tmpExt}
 	mv -f $@.${tmpExt} $@
 
 ${tableUniProtTab}: ${tableSwissProtMeta} ${tableTrEMBLMeta} ${gencodeTsv}
 	@mkdir -p $(dir $@)
 	((${metaFilterCmdGz} ${tableSwissProtMeta} | tawk '{print $$0,"SwissProt"}') && (${metaFilterCmdGz}  ${tableTrEMBLMeta} | tawk '{print $$0,"TrEMBL"}')) | sort -k 1,1 > $@.${tmpExt}
 	mv -f $@.${tmpExt} $@
 
 ${gencodeToUcscChain}:
 	@mkdir -p $(dir $@)
 	${buildGencodeToUcscLift} ${db} $@.${tmpExt}
 	mv -f $@.${tmpExt} $@