fcdf5c401c80840d28c346409c4fbb527544fac7
markd
  Tue Jun 9 13:59:33 2020 -0700
make gencode hgc robust against metadata that is sometimes not mapped in the backmap releases

diff --git src/hg/makeDb/outside/gencode/gencodeLoad.mk src/hg/makeDb/outside/gencode/gencodeLoad.mk
index cb37e47..2c30cf0 100644
--- src/hg/makeDb/outside/gencode/gencodeLoad.mk
+++ src/hg/makeDb/outside/gencode/gencodeLoad.mk
@@ -23,32 +23,32 @@
 SHELL = bash -e
 export SHELLOPTS=pipefail
 
 ##
 # programs, etc
 ##
 mach = $(shell uname -m)
 
 ##
 # Release info and files from Sanger.
 # BEGIN EDIT THESE EACH RELEASE
 #
 # - ensemblPrevVersion is use to get chrom name mappings for pre-release,
 #   as this doesn't change between release.
 ##
-db = hg38
-#db = hg19
+#db = hg38
+db = hg19
 #db = mm10
 #preRelease = no
 preRelease = yes
 ifeq (${db},mm10)
     grcRefAssembly = GRCm38
     ver = M25
     prevVer = M24
     gencodeOrg = Gencode_mouse
     ftpReleaseSubdir = release_${ver}
     annGffTypeName = chr_patch_hapl_scaff.annotation
     ensemblVer = 100_38
     ensemblPrevVer = 99_38
     ensemblCDnaDb = mus_musculus_cdna_${ensemblPrevVer}
 else ifeq (${db},hg38)
     grcRefAssembly = GRCh38
@@ -407,34 +407,38 @@
 
 # generic tables
 ${loadedDir}/%.tab.loaded: ${tableDir}/%.tab
 	@mkdir -p $(dir $@)
 	${loadLock} hgLoadSqlTab ${db} $* ${encodeAutoSqlDir}/$(subst ${rel},,$*).sql $<
 	touch $@
 
 ##
 # sanity checks
 ##
 # check if the .incorrect files is empty
 define checkForIncorrect
 awk 'END{if (NR != 0) {print "Incorrect data, see " FILENAME>"/dev/stderr"; exit 1}}' $(basename $@).incorrect
 endef
 
-checkSanity: ${checkDir}/${tableGeneSource}.checked ${checkDir}/${tableTranscriptSource}.checked \
-	${checkDir}/${tableBasic}.checked ${checkDir}/${tableBasic}.pseudo.checked \
+checkSanity:: ${checkDir}/${tableBasic}.checked ${checkDir}/${tableBasic}.pseudo.checked \
 	${checkDir}/${tableComp}.pseudo.checked
 
+# backmap does have all gene/transcript source entries.
+ifneq (${isBackmap},yes)
+checkSanity:: ${checkDir}/${tableGeneSource}.checked   ${checkDir}/${tableTranscriptSource}.checked
+endif
+
 # are gene source all in attrs
 ${checkDir}/${tableGeneSource}.checked: ${loadedDir}/${tableGeneSource}.tab.loaded ${loadedDir}/${tableAttrs}.tab.loaded
 	@mkdir -p $(dir $@)
 	hgsql -Ne 'select geneId from ${tableAttrs} where geneId not in (select geneId from ${tableGeneSource})' ${db} | sort -u >$(basename $@).incorrect
 	@$(checkForIncorrect)
 	touch $@
 
 # are transcript source all in attrs
 ${checkDir}/${tableTranscriptSource}.checked: ${loadedDir}/${tableTranscriptSource}.tab.loaded  ${loadedDir}/${tableAttrs}.tab.loaded
 	@mkdir -p $(dir $@)
 	hgsql -Ne 'select transcriptId from ${tableAttrs} where transcriptId not in (select transcriptId from ${tableTranscriptSource})' ${db} | sort -u >$(basename $@).incorrect
 	@$(checkForIncorrect)
 	touch $@
 
 # make sure all basic are in comprehensive