f4aa15bef8c781b695f09979a28a9beb68c90963
markd
  Wed May 15 00:09:39 2024 -0700
change workaround for gencode gene symbol file with short rows

diff --git src/hg/makeDb/doc/hg19.gencode.txt src/hg/makeDb/doc/hg19.gencode.txt
index bc6de51..fbc70a2 100644
--- src/hg/makeDb/doc/hg19.gencode.txt
+++ src/hg/makeDb/doc/hg19.gencode.txt
@@ -1270,54 +1270,52 @@
       Error: 1 duplicates in hg19.wgEncodeGencodeTranscriptSourceV45lift37.transcriptId including 'ENST00000302805.2'
       Error: 1 duplicates in hg19.for this release.
 
 
     # commit all
     jkmake alpha DBS=hg19
 
 ##############################################################################
 2024-05-13: import of UCSC GENCODE group processing of GENCODE V46lift38 final release (markd)
     # edit hg/makeDb/outside/gencode/gencodeLoad.mk to set release and ensembl versions
     # set shell variable:
     pre=""
 
     # download, build and load tables
     mkdir -p /hive/data/genomes/hg19/bed/gencodeV46lift37/hgcImport${pre}
-    pushd /hive/data/genomes/hg19/bed/gencodeV46lift37/hgcImport{pre}
+    pushd /hive/data/genomes/hg19/bed/gencodeV46lift37/hgcImport${pre}
 
     (time nice make -O -j 10 -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk) >&build.1.out&
 
     # Incorrect gene symbol lines in metadata.HNGC where there are only two columns.
-    # Reported and remove locally with
+    # These are old symbols that have not been mapped to HGNC ids.  Change table
+    # so they have a blank column
     cd data/release_46lift37/
     mv gencode.v46lift37.metadata.HGNC.gz gencode.v46lift37.metadata.HGNC.badrows.gz
-    zcat gencode.v46lift37.metadata.HGNC.badrows.gz | tawk 'NF==3' | pigz -c >gencode.v46lift37.metadata.HGNC.gz
+    zcat gencode.v46lift37.metadata.HGNC.badrows.gz | tawk 'NF==2{$3=""}{print}' | pigz -c > gencode.v46lift37.metadata.HGNC.gz
 
     # compare tables from previous release to see if number changed makes
     # sense.  Results are in gencode-cmp.tsv
     # generate trackDb and joiner blurb
     pushd ~/kent/src/hg/makeDb/trackDb
     ../../makeDb/outside/gencode/bin/gencodeGenerateTrackDbs hg19 46lift37 112 'May 2024'
 
     # edit human/hg19/trackDb.gencode.ra to add new .ra file include
     # edit all.joiner to add ~/tmp/gencodeV46lift37.joiner
 
     # Update human/hg19/wgEncodeGencodeSuper.html and update 'Release Notes'
     # to describe new release.
 
     jkmake DBS=hg19
 
     # edit  all.joiner to add ~/tmp/gencodeV46lift37.joiner
     # verify with:
     pushd /hive/data/genomes/hg19/bed/gencodeV46lift37/hgcImport${pre}
     make -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk joinerCheck
 
-      These two complaint are cause by the new PAR id scheme causing a chrY V19 copy to be brought in.
-      This needs to fixed in some way
-      Error: 1 duplicates in hg19.wgEncodeGencodeTranscriptSourceV46lift37.transcriptId including 'ENST00000302805.2'
-      Error: 1 duplicates in hg19.for this release.
-
+      These two complaint are cause by the new PAR id scheme causing a chrY
+      V19 copy to be brought in are now fixed by filter and input time.
 
     # commit all
     jkmake alpha DBS=hg19
 
 ##############################################################################