0a6421e764888bb6e8e1b2058501f7ce4188d814
markd
  Mon Jun 17 02:08:53 2019 -0700
import of prerelease of gencode V31lift37

diff --git src/hg/makeDb/outside/gencode/gencodeLoad.mk src/hg/makeDb/outside/gencode/gencodeLoad.mk
index 159f4bc..45edbc5 100644
--- src/hg/makeDb/outside/gencode/gencodeLoad.mk
+++ src/hg/makeDb/outside/gencode/gencodeLoad.mk
@@ -52,30 +52,31 @@
     ensemblCDnaDb = mus_musculus_cdna_${ensemblPrevVer}
 else ifeq (${db},hg38)
     grcRefAssembly = GRCh38
     ver = 31
     prevVer = 30
     gencodeOrg = Gencode_human
     ftpReleaseSubdir = release_${ver}
     annGffTypeName = chr_patch_hapl_scaff.annotation
     ensemblVer = 97_38
     ensemblPrevVer = 96_38
     ensemblCDnaDb = homo_sapiens_cdna_${ensemblPrevVer}
 else ifeq (${db},hg19)
     grcRefAssembly = GRCh37
     verBase = 31
     ver = ${verBase}lift37
+    backmapTargetVer = 19
     ftpReleaseSubdir = release_${verBase}/GRCh37_mapping
     prevVer = 29lift37
     gencodeOrg = Gencode_human
     annGffTypeName = annotation
     ensemblVer = 74_37      # only used to get genome chromsome name mappings
     ensemblPrevVer = ${ensemblVer}  # doesn't change
     ensemblCDnaDb = homo_sapiens_cdna_${ensemblPrevVer}
     isBackmap = yes
 else
     $(error unimplement genome database: ${db})
 endif
 # END EDIT THESE EACH RELEASE
 
 
 ifeq (${preRelease},yes)
@@ -198,31 +199,33 @@
 
 tableEntrezGeneMeta = ${relDir}/gencode.v${ver}.metadata.EntrezGene.gz
 tableEntrezGene = ${tablePre}EntrezGene${rel}
 tableEntrezGeneTab = ${tableDir}/${tableEntrezGene}.tab
 
 tableTranscriptionSupportLevelData = ${dataDir}/gencode.v${ver}.transcriptionSupportLevel.tab
 tableTranscriptionSupportLevel  = ${tablePre}TranscriptionSupportLevel${rel}
 tableTranscriptionSupportLevelTab = ${tableDir}/${tableTranscriptionSupportLevel}.tab
 
 genePredExtTables = ${tableBasic} ${tableComp} ${tablePseudo}
 genePredTables =
 tabTables = ${tableAttrs} ${tableTag} ${tableGeneSource} \
 	    ${tableTranscriptSource} ${tableTranscriptSupport} \
 	    ${tableGeneSymbol} ${tablePdb} ${tablePubMed} ${tableRefSeq} ${tableUniProt} \
 	    ${tableAnnotationRemark} ${tableEntrezGene}  ${tableTranscriptionSupportLevel}
-ifneq (${isBackmap}, yes)
+ifeq (${isBackmap}, yes)
+    targetGencodeTsv = ${dataDir}/target-gencode.tsv
+else
     # these are not included in backmap releases
     genePredTables = ${table2WayConsPseudo}
     genePredExtTables += ${tablePolyA}
     tabTables += ${tableExonSupport}
 endif
 allTables = ${genePredExtTables} ${genePredTables} ${tabTables}
 
 # directory for flags indicating tables were loaded
 loadedDir = loaded
 
 # directory for output and flags for sanity checks
 checkDir = check
 
 all: fetch mkTables loadTables checkSanity cmpRelease listTables
 
@@ -286,102 +289,110 @@
 	mv -f $@.${tmpExt} $@
 
 ${tableUniProtTab}: ${tableSwissProtMeta} ${tableTrEMBLMeta} ${gencodeTsv}
 	@mkdir -p $(dir $@)
 	((${metaFilterCmdGz} ${tableSwissProtMeta} | tawk '{print $$0,"SwissProt"}') && (${metaFilterCmdGz}  ${tableTrEMBLMeta} | tawk '{print $$0,"TrEMBL"}')) | sort -k 1,1 > $@.${tmpExt}
 	mv -f $@.${tmpExt} $@
 
 ${ensemblToUcscChain}:
 	@mkdir -p $(dir $@)
 	${ensToUcscChromMap} ${ensemblCDnaDb} ${grcRefAssembly} ${db} /dev/stdout | pslSwap stdin stdout | pslToChain stdin $@.${tmpExt}
 	mv -f $@.${tmpExt} $@
 
 # other tab files, just copy to name following convention to make load rules
 # work
 ifeq (${isBackmap}, yes)
-   metaFilterCmd = ${gencodeBackMapMetadataIds} ${gencodeTsv}
+   metaFilterCmd = ${gencodeBackMapMetadataIds} ${gencodeTsv} ${targetGencodeTsv}
    metaFilterCmdGz = ${metaFilterCmd}
+   metaFilterDepend = ${gencodeTsv} ${targetGencodeTsv}
 else
    metaFilterCmd = cat
    metaFilterCmdGz = zcat
+   metaFilterDepend = ${gencodeTsv}
 endif
 define copyMetadataTabGz
 mkdir -p $(dir $@)
 ${metaFilterCmdGz} $< > $@.${tmpExt}
 mv -f $@.${tmpExt} $@
 endef
 define copyMetadataTab
 mkdir -p $(dir $@)
 ${metaFilterCmd} $< > $@.${tmpExt}
 mv -f $@.${tmpExt} $@
 endef
 
-${tableGeneSourceTab}: ${tableGeneSourceMeta} ${gencodeTsv}
+${tableGeneSourceTab}: ${tableGeneSourceMeta} ${metaFilterDepend}
 	${copyMetadataTabGz}
-${tableTranscriptSourceTab}: ${tableTranscriptSourceMeta} ${gencodeTsv}
+${tableTranscriptSourceTab}: ${tableTranscriptSourceMeta} ${metaFilterDepend}
 	${copyMetadataTabGz}
-${tableTranscriptSupportTab}: ${tableTranscriptSupportMeta} ${gencodeTsv}
+${tableTranscriptSupportTab}: ${tableTranscriptSupportMeta} ${metaFilterDepend}
 	${copyMetadataTabGz}
-${tableExonSupportTab}: ${tableExonSupportMeta} ${ensemblToUcscChain} ${gencodeTsv}
+${tableExonSupportTab}: ${tableExonSupportMeta} ${ensemblToUcscChain} ${metaFilterDepend}
 	@mkdir -p $(dir $@)
 	${gencodeExonSupportToTable} ${tableExonSupportMeta} ${ensemblToUcscChain} $@.${tmpExt}
 	mv -f $@.${tmpExt} $@
-${tableGeneSymbolTab}: ${tableGeneSymbolMeta} ${gencodeTsv}
+${tableGeneSymbolTab}: ${tableGeneSymbolMeta} ${metaFilterDepend}
 	${copyMetadataTabGz}
-${tablePdbTab}: ${tablePdbMeta} ${gencodeTsv}
+${tablePdbTab}: ${tablePdbMeta} ${metaFilterDepend}
 	${copyMetadataTabGz}
-${tablePubMedTab}: ${tablePubMedMeta} ${gencodeTsv}
+${tablePubMedTab}: ${tablePubMedMeta} ${metaFilterDepend}
 	${copyMetadataTabGz}
-${tableRefSeqTab}: ${tableRefSeqMeta} ${gencodeTsv}
+${tableRefSeqTab}: ${tableRefSeqMeta} ${metaFilterDepend}
 	${copyMetadataTabGz}
 
 ${tableTranscriptionSupportLevelTab}: ${tableTranscriptionSupportLevelData}
 	mkdir -p $(dir $@)
 	cp $< $@.${tmpExt}
 	mv -f $@.${tmpExt} $@
 
 # convert to zero-based, 1/2 open
-${tablePolyAFeatureTab}: ${tablePolyAFeatureMeta} ${gencodeTsv}
+${tablePolyAFeatureTab}: ${tablePolyAFeatureMeta} ${metaFilterDepend}
 	@mkdir -p $(dir $@)
 	zcat $< | tawk '{print $$1,$$2-1,$$3,$$4,$$5-1,$$6,$$7,$$8}' | sort -k 4,4 -k 5,5n > $@.${tmpExt}
 	mv -f $@.${tmpExt} $@
-${tableAnnotationRemarkTab}: ${tableAnnotationRemarkMeta} ${gencodeTsv}
+${tableAnnotationRemarkTab}: ${tableAnnotationRemarkMeta} ${metaFilterDepend}
 	@mkdir -p $(dir $@)
 	${metaFilterCmdGz} $<  | tawk '{print $$1,gensub("\\\\n|\\\\","","g",$$2)}' | sort -k 1,1 > $@.${tmpExt}
 	mv -f $@.${tmpExt} $@
 # drop ENSTR entries that are a hack to support PAR sequences in GTF
-${tableEntrezGeneTab}: ${tableEntrezGeneMeta} ${gencodeTsv}
+${tableEntrezGeneTab}: ${tableEntrezGeneMeta} ${metaFilterDepend}
 	@mkdir -p $(dir $@)
 	zcat $< | tawk '$$1!~/^ENSTR/' | sort -k 1,1 | ${metaFilterCmd} /dev/stdin > $@.${tmpExt}
 	mv -f $@.${tmpExt} $@
 
 ##
 # intermediate data for ensembl/havana, not loaded into databases
 ##
 ${gencodeGp}: ${annotationGff} ${ensemblToUcscChain}
 	@mkdir -p $(dir $@)
 	${gencodeGxfToGenePred} ${annotationGff} ${ensemblToUcscChain} $@.${tmpExt}
 	mv -f $@.${tmpExt} $@
 
-${tableTranscriptionSupportLevelData}: ${gencodeTsv}
+${tableTranscriptionSupportLevelData}: ${metaFilterDepend}
 	touch $@
 ${gencodeTsv}: ${annotationGff}
 	@mkdir -p $(dir $@)
 	${gencodeGxfToAttrs} --keepGoing ${annotationGff} $@.${tmpExt} --tslTabOut=${tableTranscriptionSupportLevelData}.${tmpExt}
 	mv -f ${tableTranscriptionSupportLevelData}.${tmpExt} ${tableTranscriptionSupportLevelData}
 	mv -f $@.${tmpExt} $@
 
+${targetGencodeTsv}:
+	@mkdir -p $(dir $@)
+	hgsql ${db}  -e 'select * from wgEncodeGencodeAttrsV${backmapTargetVer}' > $@.${tmpExt}
+	mv -f $@.${tmpExt} $@
+
+
 # check attributes so code can be updated to handle new biotypes
 checkAttrs: ${annotationGff}
 	${gencodeGxfToAttrs} ${annotationGff} /dev/null
 
 ##
 # load tables
 #  browser commands use static tmp file name, so use lock file to serialize
 ##
 loadLock = flock load.lock
 
 loadTables: ${genePredExtTables:%=${loadedDir}/%.genePredExt.loaded} \
 	    ${genePredTables:%=${loadedDir}/%.genePred.loaded} \
 	    ${tabTables:%=${loadedDir}/%.tab.loaded}
 
 ${loadedDir}/%.genePredExt.loaded: ${tableDir}/%.gp