dc6ddf665daba39f0d4aac9c83e2d0764da8cf3b
markd
  Fri Mar 12 20:48:48 2021 -0800
import of gencodeV37lift37

diff --git src/hg/makeDb/outside/gencode/gencodeLoad.mk src/hg/makeDb/outside/gencode/gencodeLoad.mk
index 64785c6..b814e3e 100644
--- src/hg/makeDb/outside/gencode/gencodeLoad.mk
+++ src/hg/makeDb/outside/gencode/gencodeLoad.mk
@@ -19,115 +19,100 @@
 .SECONDARY:
 host=$(shell hostname)
 ppid=$(shell echo $$PPID)
 tmpExt = ${host}.${ppid}.tmp
 SHELL = bash -e
 export SHELLOPTS=pipefail
 
 ##
 # programs, etc
 ##
 mach = $(shell uname -m)
 
 ##
 # Release info and files from Sanger.
 # BEGIN EDIT THESE EACH RELEASE
-#
-# - ensemblPrevVersion is use to get chrom name mappings for pre-release,
-#   as this doesn't change between release.
 ##
 preRelease = no
 #preRelease = yes
-db = hg38
-#db = hg19
-#db = mm10
+#db = hg38
+db = hg19
 #db = mm39
+#db = mm10
 ifeq (${db},mm10)
     grcRefAssembly = GRCm38
     verBase = M25
     prevVer = M24
     ver = ${verBase}lift37
     gencodeOrg = Gencode_mouse
     ftpReleaseSubdir = release_${verBase}/GRCm38_mapping
     annGffTypeName = chr_patch_hapl_scaff.annotation
-    ensemblVer = 101_37      # only used to get genome chromsome name mappings, don't change
-    ensemblPrevVer = ${ensemblVer}  # doesn't change
-    ensemblCDnaDb = mus_musculus_cdna_${ensemblPrevVer}
     isBackmap = yes
 else ifeq (${db},mm39)
     grcRefAssembly = GRCm39
     ver = M26
     prevVer = 
     gencodeOrg = Gencode_mouse
     ftpReleaseSubdir = release_${ver}
     annGffTypeName = chr_patch_hapl_scaff.annotation
-    ensemblVer = 102_38
-    ensemblPrevVer =
-    ensemblCDnaDb = mus_musculus_cdna_${ensemblPrevVer}
 else ifeq (${db},hg38)
     grcRefAssembly = GRCh38
     ver = 37
     prevVer = 36
     gencodeOrg = Gencode_human
     ftpReleaseSubdir = release_${ver}
     annGffTypeName = chr_patch_hapl_scaff.annotation
-    ensemblVer = 103_38
-    ensemblPrevVer = 102_38
-    ensemblCDnaDb = homo_sapiens_cdna_${ensemblPrevVer}
 else ifeq (${db},hg19)
     grcRefAssembly = GRCh37
-    verBase = 36
+    verBase = 37
     ver = ${verBase}lift37
+    prevVer = 36lift37
     backmapTargetVer = 19
     ftpReleaseSubdir = release_${verBase}/GRCh37_mapping
-    prevVer = 35lift37
     gencodeOrg = Gencode_human
     annGffTypeName = annotation
-    ensemblVer = 74_37      # only used to get genome chromsome name mappings, don't change
-    ensemblPrevVer = ${ensemblVer}  # doesn't change
-    ensemblCDnaDb = homo_sapiens_cdna_${ensemblPrevVer}
     isBackmap = yes
 else
     $(error unimplement genome database: ${db})
 endif
 # END EDIT THESE EACH RELEASE
 
 
 ifeq (${preRelease},yes)
     # pre-release
     baseUrl = ftp://ftp.ebi.ac.uk/pub/databases/havana/gencode_pre
 else
     # official release
     baseUrl = ftp://ftp.ebi.ac.uk/pub/databases/gencode
 endif
 
 rel = V${ver}
 releaseUrl = ${baseUrl}/${gencodeOrg}/${ftpReleaseSubdir}
 dataDir = data
 relDir = ${dataDir}/release_${ver}
 annotationGff = ${relDir}/gencode.v${ver}.${annGffTypeName}.gff3.gz
 pseudo2WayGff = ${relDir}/gencode.v${ver}.2wayconspseudos.gff3.gz
 polyAGff = ${relDir}/gencode.v${ver}.polyAs.gff3.gz
 
 ccdsBinDir = ~markd/compbio/ccds/ccds2/output/bin/$(mach)/opt
 gencodeMakeTracks = ${ccdsBinDir}/gencodeMakeTracks
 gencodeMakeAttrs = ${ccdsBinDir}/gencodeMakeAttrs
 gencodeExonSupportToTable = ${ccdsBinDir}/gencodeExonSupportToTable
 gencodeGxfToGenePred = ${ccdsBinDir}/gencodeGxfToGenePred
 gencodePolyaGxfToGenePred = ${ccdsBinDir}/gencodePolyaGxfToGenePred
 gencodeGxfToAttrs = ${ccdsBinDir}/gencodeGxfToAttrs
-ensToUcscChromMap = ${ccdsBinDir}/ensToUcscChromMap
+ensToUcscMkLift = ${HOME}/kent/src/hg/makeDb/outside/gencode/bin/ensToUcscMkLift
 gencodeBackMapMetadataIds = ${ccdsBinDir}/gencodeBackMapMetadataIds
 encodeAutoSqlDir = ${HOME}/kent/src/hg/lib/encode
 
 ##
 # intermediate data not loaded into tracks
 ##
 gencodeGp = ${dataDir}/gencode.gp
 gencodeTsv = ${dataDir}/gencode.tsv
 ensemblToUcscChain = ${dataDir}/ensemblToUcsc.chain
 
 # flag indicating fetch was done
 fetchDone = ${relDir}/done
 
 ##
 # track and table data
@@ -296,31 +281,31 @@
 	gff3ToGenePred -allowMinimalGenes $< $@.${tmpExt}
 	mv -f $@.${tmpExt} $@
 
 ${tablePolyAGp}: ${polyAGff} ${ensemblToUcscChain}
 	@mkdir -p $(dir $@)
 	${gencodePolyaGxfToGenePred} $< ${ensemblToUcscChain} $@.${tmpExt}
 	mv -f $@.${tmpExt} $@
 
 ${tableUniProtTab}: ${tableSwissProtMeta} ${tableTrEMBLMeta} ${gencodeTsv}
 	@mkdir -p $(dir $@)
 	((${metaFilterCmdGz} ${tableSwissProtMeta} | tawk '{print $$0,"SwissProt"}') && (${metaFilterCmdGz}  ${tableTrEMBLMeta} | tawk '{print $$0,"TrEMBL"}')) | sort -k 1,1 > $@.${tmpExt}
 	mv -f $@.${tmpExt} $@
 
 ${ensemblToUcscChain}:
 	@mkdir -p $(dir $@)
-	${ensToUcscChromMap} ${ensemblCDnaDb} ${grcRefAssembly} ${db} /dev/stdout | pslSwap stdin stdout | pslToChain stdin $@.${tmpExt}
+	${ensToUcscMkLift} ${db} $@.${tmpExt}
 	mv -f $@.${tmpExt} $@
 
 # other tab files, just copy to name following convention to make load rules
 # work
 ifeq (${isBackmap}, yes)
    metaFilterCmd = ${gencodeBackMapMetadataIds} ${gencodeTsv} ${targetGencodeTsv}
    metaFilterCmdGz = ${metaFilterCmd}
    metaFilterDepend = ${gencodeTsv} ${targetGencodeTsv}
 else
    metaFilterCmd = cat
    metaFilterCmdGz = zcat
    metaFilterDepend = ${gencodeTsv}
 endif
 define copyMetadataTabGz
 mkdir -p $(dir $@)