703e07e1d02ee1e346d5dcf6c0610a3d4d5f4069
markd
  Tue Jun 29 17:12:38 2021 -0700
import of GENCODE V38lift37

diff --git src/hg/makeDb/doc/hg19.gencode.txt src/hg/makeDb/doc/hg19.gencode.txt
index be819f7..a0d1f67 100644
--- src/hg/makeDb/doc/hg19.gencode.txt
+++ src/hg/makeDb/doc/hg19.gencode.txt
@@ -828,15 +828,65 @@
          data/release_37lift37/gencode.v37lift37.metadata.Gene_source.gz
 
       this is the weird cases of SPRV3 which has now has a transcript past the PAR
 
       echo -e 'ENST00000302805.2\tensembl_havana_transcript_homo_sapiens' >> tables/wgEncodeGencodeTranscriptSourceV37lift37.tab 
       echo -e 'ENSG00000168939.6\tensembl_havana_transcript_homo_sapiens' >> tables/wgEncodeGencodeGeneSourceV37lift37.tab 
 
       (time nice make -j 10 -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk) >&build.4.out&
 
       make -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk joinerCheck
       
     # commit all
     jkmake alpha DBS=hg19
 
 ##############################################################################
+2021-06-29: import of UCSC GENCODE group processing of GENCODE V38lift37 (markd)
+    # edit hg/makeDb/outside/gencode/gencodeLoad.mk to set release and ensembl versions
+
+    # download, build and load tables
+    mkdir -p /hive/data/genomes/hg19/bed/gencodeV38lift37
+    pushd /hive/data/genomes/hg19/bed/gencodeV38lift37
+    (time nice make -j 10 -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk) >&build.1.out&
+
+    # compare tables from previous release to see if number changed makes
+    # sense.  Results are in gencode-cmp.tsv
+    # generate trackDb and joiner blurb
+    pushd ~/kent/src/hg/makeDb/trackDb
+    ../../makeDb/outside/gencode/bin/gencodeGenerateTrackDbsOldSchema hg19 38lift37 104 'May 2021'
+
+    # Update human/hg19/wgEncodeGencodeSuper.html and update 'Release Notes'
+    # to describe new release.
+
+    # edit human/hg19/trackDb.gencode.ra to add new .ra file include
+    jkmake DBS=hg19
+
+    # edit  all.joiner to add ~/tmp/gencodeV38lift37.joiner
+    # verify with:
+    pushd /hive/data/genomes/hg19/bed/gencodeV38lift37
+    make -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk joinerCheck
+
+    problem:
+    Error: 1 of 236066 elements (0.000%) of hg19.wgEncodeGencodeAttrsV38lift37.transcriptId are not in key wgEncodeGencodeTranscriptSourceV38lift37.transcriptId line 4248 of /cluster/home/markd/kent/src/hg/makeDb/schema/all.joiner
+    Error: 1 of 236066 elements (0.000%) of hg19.wgEncodeGencodeAttrsV38lift37.geneId are not in key wgEncodeGencodeGeneSourceV38lift37.geneId line 4212 of /cluster/home/markd/kent/src/hg/makeDb/schema/all.joiner
+    Error: 1 of 236066 elements (0.000%) of hg19.wgEncodeGencodeAttrsV38lift37.geneId are not in key wgEncodeGencodeGeneSourceV38lift37.geneId line 4212 of /cluster/home/markd/kent/src/hg/makeDb/schema/all.joiner
+    Error: 1 of 236066 elements (0.000%) of hg19.wgEncodeGencodeAttrsV38lift37.transcriptId are not in key wgEncodeGencodeTranscriptSourceV38lift37.transcriptId line 4248 of /cluster/home/markd/kent/src/hg/makeDb/schema/all.joiner
+
+    The gene/transcript pair is missing from metadata:
+       ENSG00000168939.6 / ENST00000302805.2
+    These are missing from
+	 data/release_37lift37/gencode.v37lift37.metadata.Transcript_source.gz
+         data/release_37lift37/gencode.v37lift37.metadata.Gene_source.gz
+
+      this is the weird cases of SPRV3 which has now has a transcript past the PAR
+
+      echo -e 'ENST00000302805.2\tensembl_havana_transcript_homo_sapiens' >> tables/wgEncodeGencodeTranscriptSourceV38lift37.tab 
+      echo -e 'ENSG00000168939.6\tensembl_havana_transcript_homo_sapiens' >> tables/wgEncodeGencodeGeneSourceV38lift37.tab 
+
+      (time nice make -j 10 -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk) >&build.4.out&
+
+      make -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk joinerCheck
+      
+    # commit all
+    jkmake alpha DBS=hg19
+
+##############################################################################