f5183290e802b44254b3b4d381f369cee8390fca
markd
  Wed Apr 29 11:09:57 2020 -0700
moved gencode track doc to their own files for mm10 and hg19 as it is overwhelming the main file

diff --git src/hg/makeDb/doc/hg19.txt src/hg/makeDb/doc/hg19.txt
index 152ba42..4d9b4e5 100644
--- src/hg/makeDb/doc/hg19.txt
+++ src/hg/makeDb/doc/hg19.txt
@@ -20219,96 +20219,30 @@
     cd /hive/data/genomes/hg19/bed/blat.hg17.2012-11-08
     # check it with -debug first to see if it is going to work:
     time doSameSpeciesLiftOver.pl -buildDir=`pwd` -bigClusterHub=swarm \
 	-ooc=/hive/data/genomes/hg19/11.ooc \
 	-debug -dbHost=hgwdev -workhorse=hgwdev hg19 hg17 > do.log 2>&1
     # if that is OK, then run it:
     time doSameSpeciesLiftOver.pl -buildDir=`pwd` -bigClusterHub=swarm \
 	-ooc=/hive/data/genomes/hg19/11.ooc \
 	-dbHost=hgwdev -workhorse=hgwdev hg19 hg17 > do.log 2>&1
     #	real    333m16.756s
 
     # verify this file exists:
     #	/gbdb/hg19/liftOver/hg19ToHg17.over.chain.gz
     # and try out the conversion on genome-test from hg19 to hg17
 
-############################################################################
-2012-11-11: import and UCSC GENCODE group process of GENCODE V14 (markd)
-    # Due to UCSC Genome Browser using the NC_001807 mitochondrial genome sequence
-    # (chrM) and GENCODE annotating the NC_012920 mitochondrial sequence, the
-    # GENCODE mitochondrial sequences are lifted to UCSC chrM.
-
-    # download files
-    mkdir -p /hive/data/genomes/hg19/bed/gencodeV14/release
-    cd /hive/data/genomes/hg19/bed/gencodeV14/
-
-    # download gencode release
-    wget -nv -r -np ftp://ftp.sanger.ac.uk/pub/gencode/release_14
-    mv ftp.sanger.ac.uk/pub/gencode/release_14 .
-    rm -rf ftp.sanger.ac.uk/
-
-    # silly sanity check:
-    cd release_14
-    for f in *.gz *.tgz ; do zcat $f >/dev/null ; done
-
-    # untar main distribution
-    tar -zxf gencode14_GRCh37.tgz
-
-    cd /hive/data/genomes/hg19/bed/gencodeV14
-
-    # obtain transcription support level analysis from UCSC GENCODE group (markd/rachel)
-    mkdir -p data
-    cp /cluster/home/markd/compbio/ccds/branches/transSupV14.1/modules/gencodeTransSupport/exprs/classDev/runs/2012-11-11/results/gencode.v14.transcriptionSupportLevel.{tab,tsv} data/
-
-    # create Makefile from previous one.  This time, we need to get
-    # if from the ENCODE DCC area.
-    cp /hive/groups/encode/dcc/data/gencodeV13/Makefile .
-    # edit to set version:
-    ver = 14
-
-    # on code in the CCDS subversion tree:
-    #   svn+ssh://hgwdev.soe.ucsc.edu/projects/compbio/svnroot/hausslerlab/ccds/trunk
-    # and markd's python library (it will be moved to the hausslerlab
-    # repository soon)
-    # may need to update
-        ccds2/modules/gencode/src/lib/gencode/data/gencodeGenes.py
-    # to add new biotypes, use this command to verify and update as needed
-    # be sure to do a make in ccds2/modules/gencode
-    make checkAttrs
-
-    # build and load tables
-    (time nice make -j 10) >&build.out&
-
-    # compare tables from previous release to see if number chnaged made
-    # sense.
-        make cmpRelease
-
-    ## Copy and update trackDb files from previous release.
-    ## Change version and use lower priority so it sorts to top of
-    ## super track page.
-    ## Important to make sure filter attrs.transcriptType matches current set
-    ## figured out with
-    select distinct transcriptType from wgEncodeGencodeAttrsV14 order by transcriptType;
-    cd kent/src/hg/makeDb/trackDb
-    cp human/hg19/wgEncodeGencodeV13.ra human/hg19/wgEncodeGencodeV14.ra
-    cp human/hg19/wgEncodeGencodeV13.html human/hg19/wgEncodeGencodeV14.html
-    # edit these plus human/hg19/trackDb.wgEncode.ra
-
-    ### IMPORTANT: make sure that hgTracks/gencodeTracks.c registers
-    ### track handler for this version of gencode:
-    registerTrackHandlerOnFamily("wgEncodeGencodeV14", gencodeGeneMethods);
-
 #########################################################################
 # QPCR PRIMERS (DONE - 2012-12-10 - Chin)
 # The track name is changed to "qPCR Primers"
 # Reload table with new track_mouse.BED (2013-01-28)
     # Download
     mkdir /hive/data/outside/Weizmann/qPcrPrimers
     cd /hive/data/outside/Weizmann/qPcrPrimers
     wget http://www.weizmann.ac.il/complex/compphys/software/Amit/primers/human/track_human.BED
     mkdir -p /hive/data/genomes/hg19/bed/qPcrPrimers
     cat track_human.BED | grep -v track \
        > /hive/data/genomes/hg19/bed/qPcrPrimers/qPcrPrimers_hg19.bed
     cd /hive/data/genomes/hg19/bed/qPcrPrimers
     hgLoadBed -bedDetail -tab -renameSqlTable \
       -sqlTable=$HOME/kent/src/hg/lib/bedDetail.sql \
       hg19 qPcrPrimers qPcrPrimers_hg19.bed
@@ -20668,93 +20602,30 @@
     # *** NOTE FOR NEXT TIME: gadPos now also looks in a couple Gencode V14
     #     tables if they exist.  If they don't anymore, you should look for
     #     suitable replacements if there are a lot of unfound IDs. ***)
     gadPos hg19 stdout | sort -k1,1 -k2n,2n -k4,4 -u > gad.tab
 #Found in ensCanonical: 7758
 #Found in refGene: 10
 #Found in kgAlias: 193
 #Found in Gencode: 178
 #Not found: 94
 
     # use -nobin option to ensure display order is according to genomic position
     # -- table is very small so performance is fine
     hgLoadBed -nobin hg19 gad gad.tab
 #Read 9156 elements of size 4 from gad.tab
 
-##############################################################################
-2013-04-08: import of UCSC GENCODE group processing of GENCODE V15 (markd)
-    # Due to UCSC Genome Browser using the NC_001807 mitochondrial genome sequence
-    # (chrM) and GENCODE annotating the NC_012920 mitochondrial sequence, the
-    # GENCODE mitochondrial sequences are lifted to UCSC chrM.
-
-    # download files
-    mkdir -p /hive/data/genomes/hg19/bed/gencodeV15
-    cd /hive/data/genomes/hg19/bed/gencodeV15
-
-    # download gencode release
-    wget -nv -r -np ftp://ftp.sanger.ac.uk/pub/gencode/release_15
-    mv ftp.sanger.ac.uk/pub/gencode/release_15 .
-    rm -rf ftp.sanger.ac.uk/
-
-    # silly sanity check:
-    for f in release_15/*.gz release_15/*.tgz ; do zcat $f >/dev/null ; done
-
-    # untar main distribution
-    cd release_15
-    tar -zxf gencode15_GRCh37.tgz
-
-    cd /hive/data/genomes/hg19/bed/gencodeV15
-
-    # obtain transcription support level analysis from UCSC GENCODE group (markd/rachel)
-    mkdir -p data
-    cp /cluster/home/markd/compbio/ccds/branches/transSupV15.1/modules/gencodeTransSupport/exprs/classDev/runs/2013-03-28/results/gencode.v15.transcriptionSupportLevel.{tab,tsv} data/
-
-    # create Makefile from previous one.
-    cp ../gencodeV14/Makefile .
-    # edit to set version:
-    ver = 15
-
-    # on code in the CCDS subversion tree:
-    #   svn+ssh://hgwdev.soe.ucsc.edu/projects/compbio/svnroot/hausslerlab/ccds/trunk
-    # and markd's python library may need to update
-        ccds2/modules/gencode/src/lib/gencode/data/gencodeGenes.py
-    # to add new biotypes, use this command to verify and update as needed
-    # be sure to do a make in ccds2/modules/gencode
-    make checkAttrs
-
-    # build and load tables
-    (time nice make -j 10) >&build.out&
-
-    # compare tables from previous release to see if number chnaged made
-    # sense.
-        make cmpRelease
-
-    ## Copy and update trackDb files from previous release.
-    ## Change version and use lower priority so it sorts to top of
-    ## super track page.
-    ## Important to make sure filter attrs.transcriptType matches current set
-    ## figured out with
-    select distinct transcriptType from wgEncodeGencodeAttrsV15 order by transcriptType;
-    cd kent/src/hg/makeDb/trackDb
-    cp human/hg19/wgEncodeGencodeV14.ra human/hg19/wgEncodeGencodeV15.ra
-    cp human/hg19/wgEncodeGencodeV14.html human/hg19/wgEncodeGencodeV15.html
-    # edit these plus human/hg19/trackDb.wgEncode.ra
-
-    ### IMPORTANT: make sure that hgTracks/gencodeTracks.c registers
-    ### track handler for this version of gencode:
-    registerTrackHandler("wgEncodeGencodeV15", gencodeGeneMethods);
-
 #########################################################################
 # UPDATE COSMIC TRACK - v64 (DONE - 2013-04-17 - Hiram)
     # take a look at:
     # ftp://ftp.sanger.ac.uk/pub/CGP/cosmic/data_export/Request_based_exports/
     # to see what the new version file name is, then:
 
     cd /hive/data/genomes/hg19/bed/cosmic
 
     time ~/kent/src/hg/utils/automation/loadCosmic.pl -dryRun hg19 \
 ftp://ftp.sanger.ac.uk/pub/CGP/cosmic/data_export/Request_based_exports/UCSCMutExp_v64_260313.csv.gz
 #       New length: 677957
 #       Old length: 616299
 #       Percent bed overlap with previous version: 100.00%
 #       Number of deleted IDs: 1
 #       Number of added IDs: 61659
@@ -21086,104 +20957,30 @@
 cd /hive/groups/gencode/pseudogenes/retroFinder/hg19.20130228/retro/hg19.5
 retroFinder/trunk/src/pipeline/filterMrna.sh DEF
 retroFinder/trunk/src/pipeline/filterEst.sh DEF
 retroFinder/trunk/src/pipeline/analyseExpress.sh DEF
 cd /hive/groups/gencode/pseudogenes/retroFinder/hg19.20130228/mrnaBlastz
 retroFinder/trunk/src/pipeline/ucscStep6.sh DEF
 #added ucscRetroAli to trackDb.ra
 # copied
 # /hive/groups/gencode/pseudogenes/retroFinder/hg19.20130228/retro/hg19.5/trackDb.retro
 # entry to kent/src/hg/makeDb/trackDb/human/hg19/trackDb.ra and edited it
 # to add the version number and date.
 # Scripts copied ucscRetroAli5.psl, ucscRetroInfo5.bed and ucscRetroCds5.tab
 # to /hive/data/genomes/hg19/bed/retro/
 
 ############################################################################
-2013-06-02: import of UCSC GENCODE group processing of GENCODE V16 (markd)
-    # Due to UCSC Genome Browser using the NC_001807 mitochondrial genome sequence
-    # (chrM) and GENCODE annotating the NC_012920 mitochondrial sequence, the
-    # GENCODE mitochondrial sequences are lifted to UCSC chrM.
-    # Thus is the first release to include haplotype regions
-
-    # download files
-    mkdir -p /hive/data/genomes/hg19/bed/gencodeV16/data
-    cd /hive/data/genomes/hg19/bed/gencodeV16
-
-    # download gencode release
-    cd data
-    wget -nv -r -np ftp://ftp.sanger.ac.uk/pub/gencode/release_16
-    mv ftp.sanger.ac.uk/pub/gencode/release_16 .
-    rm -rf ftp.sanger.ac.uk/
-
-    cd /hive/data/genomes/hg19/bed/gencodeV16
-
-    # obtain transcription support level analysis from UCSC GENCODE group (markd/rachel)
-    mkdir -p data
-    cp /cluster/home/markd/compbio/ccds/branches/transSupV16.1/modules/gencodeTransSupport/exprs/classDev/runs/2013-06-16/results/gencode.v16.transcriptionSupportLevel.{tab,tsv} data/
-
-    # create Makefile from previous one.
-    cp ../gencodeV15/Makefile .
-    # edit to set version:
-    ver = 16
-
-    # on code in the CCDS subversion tree:
-    #   svn+ssh://hgwdev.soe.ucsc.edu/projects/compbio/svnroot/hausslerlab/ccds/trunk
-    # and markd's python library may need to update
-        ccds2/modules/gencode/src/lib/gencode/data/gencodeGenes.py
-    # to add new biotypes, use this command to verify and update as needed
-    # be sure to do a make in ccds2/modules/gencode
-    make checkAttrs
-
-    # build and load tables
-    (time nice make -j 10) >&build.out&
-
-    # compare tables from previous release to see if number chnaged made
-    # sense.
-        make cmpRelease
-
-    ## Copy and update trackDb files from previous release.
-    ## Change version and use lower priority so it sorts to top of
-    ## super track page.
-    ## Important to make sure filter attrs.transcriptType matches current set
-    ## figured out with
-    select distinct transcriptType from wgEncodeGencodeAttrsV16 order by transcriptType;
-    cd kent/src/hg/makeDb/trackDb
-    cp human/hg19/wgEncodeGencodeV15.ra human/hg19/wgEncodeGencodeV16.ra
-    cp human/hg19/wgEncodeGencodeV15.html human/hg19/wgEncodeGencodeV16.html
-
-    # edit these plus human/hg19/trackDb.wgEncode.ra
-    # - set priorities in wgEncodeGencodeV16.ra in reverse order with previous
-    #   tracks so newest shows up first
-    #     priority - set to previous version priority minus 0.001
-    #     searchPriority - set each to previous -0.001
-    # - make current track default to pack and hide previous [ONLY if it's going to be pushed]
-    #     superTrack wgEncodeGencodeSuper pack
-
-    ### IMPORTANT: make sure that hgTracks/gencodeTracks.c registers
-    ### track handler for this version of gencode:
-    registerTrackHandler("wgEncodeGencodeV16", gencodeGeneMethods);
-
-    # sanity check number of rows in each table
-    make cmpRelease
-
-    # update all.joiner and validate
-    # look for the last section `begin Gencode V??' in all.joiner
-    # and copy and update version
-    # repeat this until happy, editing minCheck as needed
-    for tbl in $(hgsql -Ne 'show tables like "wgEncodeGencode%V16"' hg19) ; do runJoiner.csh hg19 $tbl ~/kent/src/hg/makeDb/schema/all.joiner noTimes ; done >&check/joiner.out
-
-############################################################################
 # LASTZ Tenrec EchTel2 (DONE - 2013-06-12 - Hiram)
     screen -S hg19EchTel2  # use screen to manage the long running job
     mkdir /hive/data/genomes/hg19/bed/lastzEchTel2.2013-06-12
     cd /hive/data/genomes/hg19/bed/lastzEchTel2.2013-06-12
 
     cat << '_EOF_' > DEF
 # Human vs. Tenrec
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
 
 # TARGET: Human Hg19
 SEQ1_DIR=/scratch/data/hg19/nib
 SEQ1_LEN=/scratch/data/hg19/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
@@ -21582,104 +21379,30 @@
     cd /hive/data/genomes/vicPac2/bed/blastz.hg19.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
         /hive/data/genomes/hg19/bed/lastzVicPac2.2013-06-17/DEF \
 	-swap -syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
 	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
     #	real    206m52.132s
     cat fb.vicPac2.chainHg19Link.txt
     #	1428125689 bases of 2078582856 (68.707%) in intersection
 
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/vicPac2/bed
     ln -s blastz.hg19.swap lastz.hg19
 
 ##############################################################################
-2013-06-19: import of UCSC GENCODE group processing of GENCODE V17 (markd)
-    # Due to UCSC Genome Browser using the NC_001807 mitochondrial genome sequence
-    # (chrM) and GENCODE annotating the NC_012920 mitochondrial sequence, the
-    # GENCODE mitochondrial sequences are lifted to UCSC chrM.
-    # Thus is the first release to include haplotype regions
-
-    # download files
-    mkdir -p /hive/data/genomes/hg19/bed/gencodeV17/data
-    cd /hive/data/genomes/hg19/bed/gencodeV17
-
-    # download gencode release
-    cd data
-    wget -nv -r -np ftp://ftp.sanger.ac.uk/pub/gencode/release_17
-    mv ftp.sanger.ac.uk/pub/gencode/release_17 .
-    rm -rf ftp.sanger.ac.uk/
-
-    cd /hive/data/genomes/hg19/bed/gencodeV17
-
-    # obtain transcription support level analysis from UCSC GENCODE group (markd/rachel)
-    mkdir -p data
-    cp /cluster/home/markd/compbio/ccds/branches/transSupV17.1/modules/gencodeTransSupport/exprs/classDev/runs/2013-06-19/results/gencode.v17.transcriptionSupportLevel.{tab,tsv} data/
-
-    # create Makefile from previous one.
-    cp ../gencodeV16/Makefile .
-    # edit to set version:
-    ver = 17
-
-    # on code in the CCDS subversion tree:
-    #   svn+ssh://hgwdev.soe.ucsc.edu/projects/compbio/svnroot/hausslerlab/ccds/trunk
-    # and markd's python library may need to update
-        ccds2/modules/gencode/src/lib/gencode/data/gencodeGenes.py
-    # to add new biotypes, use this command to verify and update as needed
-    # be sure to do a make in ccds2/modules/gencode
-    make checkAttrs
-
-    # build and load tables
-    (time nice make -j 10) >&build.out&
-
-    # compare tables from previous release to see if number chnaged made
-    # sense.
-        make cmpRelease
-
-    ## Copy and update trackDb files from previous release.
-    ## Change version and use lower priority so it sorts to top of
-    ## super track page.
-    ## Important to make sure filter attrs.transcriptType matches current set
-    ## figured out with
-    select distinct transcriptType from wgEncodeGencodeAttrsV17 order by transcriptType;
-    cd kent/src/hg/makeDb/trackDb
-    cp human/hg19/wgEncodeGencodeV16.ra human/hg19/wgEncodeGencodeV17.ra
-    cp human/hg19/wgEncodeGencodeV16.html human/hg19/wgEncodeGencodeV17.html
-
-    # edit these plus human/hg19/trackDb.wgEncode.ra
-    # - set priorities in wgEncodeGencodeV17.ra in reverse order with previous
-    #   tracks so newest shows up first
-    #     priority - set to previous version priority minus 0.001
-    #     searchPriority - set each to previous -0.001
-    # - make current track default to pack and hide previous [ONLY if it's going to be pushed]
-    #     superTrack wgEncodeGencodeSuper pack
-
-    ### IMPORTANT: make sure that hgTracks/gencodeTracks.c registers
-    ### track handler for this version of gencode:
-    registerTrackHandler("wgEncodeGencodeV17", gencodeGeneMethods);
-
-    # sanity check number of rows in each table
-    make cmpRelease
-
-    # update all.joiner and validate
-    # look for the last section `begin Gencode V??' in all.joiner
-    # and copy and update version
-    # repeat this until happy, editing minCheck as needed
-    for tbl in $(hgsql -Ne 'show tables like "wgEncodeGencode%V17"' hg19) ; do runJoiner.csh hg19 $tbl ~/kent/src/hg/makeDb/schema/all.joiner noTimes ; done >&check/joiner.out
-
-##############################################################################
 # LASTZ White-throated sparrow ZonAlb1 (DONE - 2013-06-26 - Hiram)
     mkdir /hive/data/genomes/hg19/bed/lastzZonAlb1.2013-06-26
     cd /hive/data/genomes/hg19/bed/lastzZonAlb1.2013-06-26
 
     cat << '_EOF_' > DEF
 # human vs white-throated sparrow
 # distant settings for human-aves alignment
 BLASTZ_H=2000
 BLASTZ_Y=3400
 BLASTZ_L=10000
 BLASTZ_K=2200
 BLASTZ_Q=/scratch/data/blastz/HoxD55.q
 
 # TARGET: Human hg19
 SEQ1_DIR=/scratch/data/hg19/nib
@@ -29208,84 +28931,30 @@
     # was modified to use the unmasked hg19/hg38 sequences
     doSameSpeciesLiftOver.pl -debug -stop=net -buildDir=`pwd` \
       -bigClusterHub=ku \
        -dbHost=hgwdev -workhorse=hgwdev \
           -ooc=/hive/data/genomes/hg19/11.ooc hg19 hg38
     # Turns out the chain step procedure will not construct the proper
     # set of files in debug mode (pslParts.lst) because it can not.
     # the chain step has to be run for real:
     doSameSpeciesLiftOver.pl -continue=chain -buildDir=`pwd` \
       -bigClusterHub=ku \
        -dbHost=hgwdev -workhorse=hgwdev \
           -ooc=/hive/data/genomes/hg19/11.ooc hg19 hg38 > chain.log 2>&1
 
     # verify the convert link on the browser is now active from hg19 to hg38
 
-#############################################################################
-2013-12-13: import of UCSC GENCODE group processing of GENCODE V19 (markd)
-    # download files
-    mkdir -p /hive/data/genomes/hg19/bed/gencodeV19/data
-    cd /hive/data/genomes/hg19/bed/gencodeV19
-
-    # download gencode release
-    cd data
-    wget -nv -r -np ftp://ftp.sanger.ac.uk/pub/gencode/release_19
-    mv ftp.sanger.ac.uk/pub/gencode/release_19 .
-    rm -rf ftp.sanger.ac.uk/
-
-    cd /hive/data/genomes/hg19/bed/gencodeV19
-    # create Makefile from previous one.
-    cp ../gencodeV18/Makefile .
-
-    # build and load tables
-    (time nice make -j 10) >&build.1.out&
-
-    # compare tables from previous release to see if number changed makes
-    # sense.
-        make cmpRelease >gencode-cmp.tsv
-
-    ## Copy and update trackDb files from previous release.
-    ## Change version and use lower priority so it sorts to top of
-    ## super track page.
-    ## Important to make sure filter attrs.transcriptType matches current set
-    ## figured out with
-    select distinct transcriptType from wgEncodeGencodeAttrsV19 order by transcriptType;
-    cd kent/src/hg/makeDb/trackDb
-    cp human/hg19/wgEncodeGencodeV18.ra human/hg19/wgEncodeGencodeV19.ra
-    cp human/hg19/wgEncodeGencodeV18.html human/hg19/wgEncodeGencodeV19.html
-
-    # edit these plus human/hg19/trackDb.wgEncode.ra
-    # - set priorities in wgEncodeGencodeV19.ra in reverse order with previous
-    #   tracks so newest shows up first
-    #     priority - set to previous version priority minus 0.001
-    #     searchPriority - set each to previous -0.001
-    # - make current track default to pack and hide previous [ONLY if it's going to be pushed]
-    #     superTrack wgEncodeGencodeSuper pack
-    # - Update wgEncodeGencodeSuper.html to describe new release and to
-    #   pick up other updates.
-
-    ### IMPORTANT: make sure that hgTracks/gencodeTracks.c registers
-    ### track handler for this version of gencode:
-    registerTrackHandler("wgEncodeGencodeV19", gencodeGeneMethods);
-
-    # update all.joiner and validate
-    # look for the last section `begin Gencode V??' in all.joiner
-    # and copy and update version
-    # repeat this until happy, editing minCheck as needed
-    cd /hive/data/genomes/hg19/bed/gencodeV19
-    for tbl in $(hgsql -Ne 'show tables like "wgEncodeGencode%V19"' hg19) ; do runJoiner.csh hg19 $tbl ~/kent/src/hg/makeDb/schema/all.joiner noTimes ; done >&check/joiner.out
-
 ##############################################################################
 # hg19 <-> hg38 difference tracks (DONE - 2013-12-28 - Hiram)
     # note: the procedure for this is in the hg38.txt file under
     # this same heading.  The end result is the loading of the table:
 
     cd /hive/data/genomes/hg19/bed/liftOverHg38
     hgLoadBed hg19 hg38ContigDiff hg19.itemRgb.bed
 
 
 ##########################################################################
 # NEANDERTAL AND DENISOVA METHYLATION (DONE 8/27/14 angie)
     # RM #13439
     mkdir /hive/data/genomes/hg19/bed/neandertalMethylation
     cd /hive/data/genomes/hg19/bed/neandertalMethylation
     wget http://carmelab.huji.ac.il/data/Reconstructed_Methylation_Neandertal.zip
@@ -31428,72 +31097,30 @@
 # The -S10G parameter is only supported in newer sort versions
 # if it complains, just remove it. It will just take longer.
 time sort -k4,4 -S10G --parallel=20 hg19.bed > hg19.s4.bed
 
 # convert the hg19 bed to bigBed
 cd /hive/data/genomes/hg19/bed/patents/hg19
 join -t $'\t' -1 4 -2 1 ../data/hg19.s4.bed ../data/seqAndPatentSummary.tab -o '1.1 1.2 1.3 1.4 1.5 1.6 1.7 1.8 1.9 1.10 1.11 1.12 2.2 2.3 2.4 2.5 2.6 2.7 2.8 2.9 2.10 2.11 2.12' | patSeqFilterBulkAndAnnotate ../data/htPatents.txt patBulk.bed patNonBulk.bed -c ../data/seqCounts.tab
 bedSort patNonBulk.bed patNonBulk.bed
 bedSort patBulk.bed patBulk.bed
 bedToBigBed patNonBulk.bed /cluster/data/genomes/hg19/chrom.sizes patNonBulk.bb -tab -as=../patSummary.as -type=bed12+
 bedToBigBed patBulk.bed /cluster/data/genomes/hg19/chrom.sizes patBulk.bb -tab -as=../patSummary.as -type=bed12+
 hgBbiDbLink hg19 patBulk /gbdb/hg19/bbi/patBulk.bb
 hgBbiDbLink hg19 patNonBulk /gbdb/hg19/bbi/patNonBulk.bb
 
 #########################################################################
-2016-03-02: import of UCSC GENCODE group processing of GENCODE V24lift37 (markd)
-    # download files
-    mkdir -p /hive/data/genomes/hg19/bed/V24lift37
-    cd /hive/data/genomes/hg19/bed/V24lift37
-
-    # edit hg/makeDb/outside/gencode/gencodeLoad.mk to set
-    # release and transcript support versions
-
-
-    # download, build and load tables
-    (time nice make -j 10 -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk) >&build.1.out&
-
-    # compare tables from previous release to see if number changed makes
-    # sense (results in gencode-cmp.tsv)
-        make -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk cmpRelease
-
-    ## Copy and update trackDb files from previous release.
-    ## Change version and use lower priority so it sorts to top of
-    ## super track page.  Follow instructiuons in ra file to ensure
-    ## filters are correct.
-    cd kent/src/hg/makeDb/trackDb
-    cp human/hg38/wgEncodeGencodeV24.ra human/hg19/wgEncodeGencodeV24lift37.ra
-    cp human/hg38/wgEncodeGencodeV24.html human/hg19/wgEncodeGencodeV24lift37.html
-
-    # edit these plus human/hg38/trackDb.wgEncode.ra
-    # - set priorities in wgEncodeGencodeV24.ra tracks so newest shows up first
-    #     priority - set to previous version priority minus 0.001
-    #     searchPriority - set each to previous minus 0.001
-    # - make current track default to pack and hide previous [ONLY if it's going to be pushed]
-    #     superTrack wgEncodeGencodeSuper pack
-    # - Update wgEncodeGencodeSuper.html to describe new release and to
-    #   pick up other updates.
-
-    # update all.joiner and validate
-    # look for the last section `begin Gencode V??' in all.joiner
-    # and copy and update version
-    # repeat this until happy, editing minCheck as needed
-    # output in check/joiner.out
-    cd /hive/data/genomes/hg19/bed/gencodeV24lift37
-    make -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk joinerCheck
-
-#########################################################################
 # killer whale/orcOrc1 Lastz run  (WORKING - 2016-06-03 - Hiram)
     # note: incorrect date on this directory name, should be 2016-06-03
 
     mkdir /hive/data/genomes/hg19/bed/lastzOrcOrc1.2016-07-03
     cd  /hive/data/genomes/hg19/bed/lastzOrcOrc1.2016-07-03
     printf '# human vs killer whale
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.66/bin/lastz
 BLASTZ_M=254
 
 # TARGET: Human Hg19
 SEQ1_DIR=/scratch/data/hg19/hg19.2bit
 SEQ1_LEN=/scratch/data/hg19/chrom.sizes
 SEQ1_CHUNK=10000000
 SEQ1_LAP=10000
 
@@ -31875,58 +31502,30 @@
 # *** All done!
 
 
 ##############################################################################
 # SNPMASKED SEQUENCE FOR SNP149 (DONE 3/24/17 angie)
     # Redmine #18330
     screen -S mask -t mask
     ~/kent/src/hg/utils/automation/doDbSnpMaskSequence.pl hg19 149 -debug
 # *** Steps were performed in /hive/data/genomes/hg19/snp149Mask.2017-03-24
     cd /hive/data/genomes/hg19/snp149Mask.2017-03-24
     ~/kent/src/hg/utils/automation/doDbSnpMaskSequence.pl hg19 149 \
       >>& do.log & tail -f do.log
 # *** All done!
 
 #########################################################################
-2017-04-16: import of UCSC GENCODE group processing of GENCODE V26lift37 (markd)
-    # edit hg/makeDb/outside/gencode/gencodeLoad.mk to set release and ensembl versions
-
-    # download, build and load tables
-    mkdir -p /hive/data/genomes/hg19/bed/gencodeV26lift37
-    pushd /hive/data/genomes/hg19/bed/gencodeV26lift37
-    (time nice make -j 10 -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk) >&build.1.out&
-
-    # compare tables from previous release to see if number changed makes
-    # sense.  Results are in gencode-cmp.tsv
-
-    # generate trackDb and joiner blurb
-    pushd kent/src/hg/makeDb/trackDb
-    ../../makeDb/outside/gencode/gencodeGenerateTrackDbs hg19 26lift37 88 'March 2017'
-
-    # edit human/hg19/trackDb.wgEncode.ra to add new .ra file include
-    make DBS=hg19
-
-    # Update human/hg19/wgEncodeGencodeSuper.html and update 'Release Notes'
-    # to describe new release. [ONLY if it's going to be pushed]
-
-    # edit  all.joiner to add ~/tmp/gencodeV26lift37.joiner
-    # verify with:
-    pushd /hive/data/genomes/hg19/bed/gencodeV26lift37
-    make  -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk joinerCheck
-
-    # commit all
-#########################################################################
 # COSMIC v81 DONE Chris Eisenhart 2017-05-11
 # Make a new COSCMIC track for hg19
 mkdir /hive/data/outside/cosmic/hg19/v81
 cd /hive/data/outside/cosmic/hg19/v81
 
 # Get the new data
 sftp ceisenha@ucsc.edu@sftp-cancer.sanger.ac.uk
 # Login to SFTP server then run these commands
 get /files/grch37/cosmic/v81/CosmicMutantExport.tsv.gz
 
 # Get the schema from V80
 cp ~/kent/src/hg/lib/cosmicNew.as .
 
 # Remove the 'NS' fields, search for the \t after to exclude the E'NS'ST transcripts.
 zcat CosmicMutantExport.tsv.gz | sed 's/NS\t/\t/g' > cosMut.tsv
@@ -32315,58 +31914,30 @@
 
 # Use a script to convert to bed format.
 cosmicToBed cosMut.tsv cosMut.bed
 # This many lines were skipped, 128966 for not having genomic coordinate
 
 # Sort and convert to big bed using the .as file.
 sort -k1,1 -k2,2n cosMut.bed > sCosMut.bed
 bedToBigBed -type=bed8+31 -as=cosmicNew.as sCosMut.bed /hive/data/genomes/hg19/chrom.sizes cosMutHg19V82.bb -tab -extraIndex=name,cosmLabel
 
 # Link it up so the outside world can see it.
 cd /gbdb/hg19/cosmic/
 ln -s /hive/data/outside/cosmic/hg19/v82/cosMutHg19V82.bb .
 ##############################################################################
 # snpedia (DONE - 2017-09-06 - Max)
 # see ../hg38/snpedia.txt
-##############################################################################
-2017-09-14: import of UCSC GENCODE group processing of GENCODE V27lift37 (markd)
-    # edit hg/makeDb/outside/gencode/gencodeLoad.mk to set release and ensembl versions
-
-    # download, build and load tables
-    mkdir -p /hive/data/genomes/hg19/bed/gencodeV27lift37
-    pushd /hive/data/genomes/hg19/bed/gencodeV27lift37
-    (time nice make -j 10 -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk) >&build.1.out&
-
-    # compare tables from previous release to see if number changed makes
-    # sense.  Results are in gencode-cmp.tsv
-
-    # generate trackDb and joiner blurb
-    pushd kent/src/hg/makeDb/trackDb
-    ../../makeDb/outside/gencode/gencodeGenerateTrackDbs hg19 27lift37 90 'August 2017'
-
-    # Update human/hg19/wgEncodeGencodeSuper.html and update 'Release Notes'
-    # to describe new release. [ONLY if it's going to be pushed]
-
-    # edit human/hg19/trackDb.wgEncode.ra to add new .ra file include
-    make DBS=hg19
-
-    # edit  all.joiner to add ~/tmp/gencodeV27lift37.joiner
-    # verify with:
-    pushd /hive/data/genomes/hg19/bed/gencodeV27lift37
-    make  -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk joinerCheck
-
-    # commit all
 #########################################################################
 # LASTZ human/hg19 Gorilla/gorGor5 - (DONE - 2017-11-08 - Hiram)
     mkdir /hive/data/genomes/hg19/bed/lastzGorGor5.2017-11-08
     cd /hive/data/genomes/hg19/bed/lastzGorGor5.2017-11-08
 
     printf '# human vs gorilla
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.66/bin/lastz
 BLASTZ_T=2
 BLASTZ_O=600
 BLASTZ_E=150
 BLASTZ_M=254
 BLASTZ_K=4500
 BLASTZ_Y=15000
 BLASTZ_Q=/scratch/data/blastz/human_chimp.v2.q
 #       A     C     G     T
@@ -32581,69 +32152,30 @@
 # HGMD (updated 01/25/18 max)
 # got hgmd 2017 from Frank Schacherer Frank.Schacherer@qiagen.com and Rupert Yip Rupert.Yip@qiagen.com
 # see also the file hg38/hgmd.txt
 year=2019
 cd /hive/data/genomes/hg19/bed/hgmd
 cat /hive/data/outside/hgmd/$year.4-hgmd-public_hg19.tsv | grep -v \# | tawk '{if ($5=="I") {start=$4-1; end=$4+1; col="100,100,100"} else if ($5=="D") {start=$4-1; end=$4; col="170,170,170"} else {start=$4-1; end=$4; col="0,0,0"}; print "chr"$3,start,end,$2":"$1,0,".",start,end,col,$2,$1,$5}' | sed -e 's/M$/substitution/' | sed -e 's/I$/insertion (between the two basepairs, sequence not provided by HGMD)/' | sed -e 's/D$/deletion (endpoint not provided by HGMD)/' | sed -e 's/X$/insertion-deletion (endpoint not provided by HGMD)/' | sed -e 's/R$/regulatory variant/' | sed -e 's/S$/splicing variant/' | sort -k1,1 -k2,2n > hgmd.bed
 bedToBigBed hgmd.bed /hive/data/genomes/hg19/chrom.sizes hgmd.bb -type=bed9+ -as=hgmd.as -tab
 ln -s /hive/data/genomes/hg19/bed/hgmd/hgmd.bb /gbdb/hg19/bbi/hgmd.bb
 hgBbiDbLink hg19 hgmd /gbdb/hg19/bbi/hgmd.bb
 # Forgot, finally done Oct 24: also updated hgBeacon
 bigBedToBed /gbdb/hg19/bbi/hgmd.bb /tmp/temp.bed
 /usr/local/apache/cgi-bin/hgBeacon -f hgmd temp.bed hgmd
 # Forgot, finally done June 26: updated GBIB as qateam
 scp /gbdb/hg19/bbi/hgmd.bb hgdownload:/usr/local/apache/gbib/prot/
 
-###########################################################################
-2018-03-08: update UCSC GENCODE V19 to include protein id (for VAI)
-
-cd /hive/data/genomes/hg19/bed/gencodeV19/
-# move tables that need to be rebuilt
-mkdir -p prev/pre-proteinId
-mv tables/wgEncodeGencodeAttrsV19.tab tables/wgEncodeGencodeTagV19.tab prev/pre-proteinId/
-mv loaded/wgEncodeGencodeAttrsV19.tab.loaded loaded/wgEncodeGencodeTagV19.tab.loaded prev/pre-proteinId/
-
-
-# V19 didn't have protein ids in GTF, need to get from ensembl database
- cd ~/hive/tmp
- ftp://ftp.ensembl.org/pub/release-74/mysql/homo_sapiens_core_74_37/homo_sapiens_core_74_37.sql.gz
- ftp://ftp.ensembl.org/pub/release-74/mysql/homo_sapiens_core_74_37/transcript.txt.gz
- ftp://ftp.ensembl.org/pub/release-74/mysql/homo_sapiens_core_74_37/translation.txt.gz
-
- # load small subset
- hgsql -e 'create database markd_ens'
- zcat homo_sapiens_core_74_37.sql.gz |hgsql markd_ens
- gunzip trans*
- hgsqlimport markd_ens $(pwd)/transcript.txt $(pwd)/translation.txt
- # add proteinId column to wgEncodeGencodeAttrsV19
- hgsql
- CREATE table markd_ens.trans_prot
- SELECT concat(trans.stable_id, ".", CAST(trans.version AS CHAR)) transcriptId,
-        concat(prot.stable_id,  ".", CAST(prot.version AS CHAR)) proteinId
-     FROM markd_ens.transcript trans, markd_ens.translation prot
-     WHERE prot.transcript_id = trans.transcript_id;
-  CREATE INDEX transcriptId ON markd_ens.trans_prot(transcriptId);
-  UPDATE hg19.wgEncodeGencodeAttrsV19 as attr
-  INNER JOIN markd_ens.trans_prot as tp  ON attr.transcriptId = tp.transcriptId
-  SET attr.proteinId = tp.proteinId;
-  SELECT count(*) FROM wgEncodeGencodeAttrsV19 WHERE transcriptClass="coding" AND proteinId = "";
-  -> 0 !!! got them all
-  DROP DATABASE markd_ens;
-
-# 2018-03-19: update search to include protein id
-edit kent/src/hg/makeDb/trackDb/human/hg19/wgEncodeGencodeV19.ra
-
 #############################################################################
 # LASTZ human/hg19 vs. pig/susScr11 - (DONE - 2018-04-02 - Hiram)
     mkdir /hive/data/genomes/hg19/bed/lastzSusScr11.2018-04-02
     cd /hive/data/genomes/hg19/bed/lastzSusScr11.2018-04-02
 
     printf '# human vs pig
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.66/bin/lastz
 BLASTZ_O=400
 BLASTZ_E=30
 BLASTZ_M=254
 # default BLASTZ_Q score matrix:
 #       A     C     G     T
 # A    91  -114   -31  -123
 # C  -114   100  -125   -31
 # G   -31  -125   100  -114
@@ -32701,66 +32233,30 @@
     cat fb.susScr11.chainHg19Link.txt
     # 1386496715 bases of 2472073034 (56.086%) in intersection
     cat fb.susScr11.chainSynHg19Link.txt
     # 1353158526 bases of 2472073034 (54.738%) in intersection
 
     # testing -trackHub option
     time ($HOME/kent/src/hg/utils/automation/doRecipBest.pl \
  -load -trackHub -workhorse=hgwdev -buildDir=`pwd` susScr11 hg19) \
 	> rbest.log 2>&1
     # real    610m45.624s
 
     cat fb.susScr11.chainRBest.Hg19.txt
     # 1342604720 bases of 2472073034 (54.311%) in intersection
 
 #########################################################################
-2018-04-17: import of UCSC GENCODE group processing of GENCODE V28lift37 (markd)
-    # edit hg/makeDb/outside/gencode/gencodeLoad.mk to set release and ensembl versions
-
-    # download, build and load tables
-    mkdir -p /hive/data/genomes/hg19/bed/gencodeV28lift37
-    pushd /hive/data/genomes/hg19/bed/gencodeV28lift37
-    (time nice make -j 10 -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk) >&build.1.out&
-
-    # compare tables from previous release to see if number changed makes
-    # sense.  Results are in gencode-cmp.tsv
-
-    # generate trackDb and joiner blurb
-    pushd ~/kent/src/hg/makeDb/trackDb
-    ../../makeDb/outside/gencode/gencodeGenerateTrackDbs hg19 28lift37 92 'Apr 2018'
-
-    # Update human/hg19/wgEncodeGencodeSuper.html and update 'Release Notes'
-    # to describe new release. [ONLY if it's going to be pushed]
-
-    # edit human/hg19/trackDb.wgEncode.ra to add new .ra file include
-    make DBS=hg19
-
-    # edit  all.joiner to add ~/tmp/gencodeV28lift37.joiner
-    # verify with:
-    pushd /hive/data/genomes/hg19/bed/gencodeV28lift37
-    make -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk joinerCheck
-
-    # commit all
-
-    # 2018-05-01 markd
-    # fix bug in wgEncodeGencodeEntrezGeneV28lift37 with change in gencodeLoad.mk.  Reload the table.
-    mkdir -p /hive/data/genomes/hg19/bed/gencodeV28lift37
-    rm tables/wgEncodeGencodeEntrezGeneV28lift37.tab
-    (time nice make -j 10 -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk) >&build.3.out&
-
-
-#########################################################################
 # DBSNP B151 / SNP151 (DONE 4/16/18 angie)
     # Redmine #21010
     mkdir -p /hive/data/outside/dbSNP/151/human_hg19
     cd /hive/data/outside/dbSNP/151/human_hg19
     # Look at the directory listing of ftp://ftp.ncbi.nih.gov/snp/organisms/
     # to find the subdir name to use as orgDir below (human_9606_b151_GRCh37p13 in this case).
     # Go to that subdirectory, then to database/organism_data/ and look for files
     # whose names start with b151_* and may or may not end with a suffix that identifies
     # the build assembly version or some annotation version.  If there is a suffix shared
     # by all b151_* files, add that to config.ra as the "buildAssembly".
     # Since this build is on GRCh37.p13 like b144 above, use the liftUp.lft file
     # and ignoreDbSnpContigsFile constructed for b144.
     cat > config.ra <<EOF
 db hg19
 orgDir human_9606_b151_GRCh37p13
@@ -33115,67 +32611,30 @@
     bedToBigBed -tab -type=bed5 -as=pLI.as pLI.bed.sorted $chromSizes pLI.bb
     bedToBigBed -tab -type=bed5 -as=misZ.as misZ.bed.sorted $chromSizes misZ.bb
     mkdir /gbdb/hg19/gnomAD/pLI/
     ln -s `pwd`/*.bb /gbdb/hg19/gnomAD/pLI/
 
     # TODO: Ask Anne for feedback and changes, especially on color
 
 ##############################################################################
 # crispr 10K shoulders (DONE - 2018-11-09 - Hiram)
     time (~/kent/src/hg/utils/automation/doCrispr.pl \
      -stop=load -buildDir=`pwd` -smallClusterHub=hgwdev-101 hg19 ncbiRefSeq) \
 	> do.log 2>&1
     # real    5896m44.643s
 
 ##############################################################################
-2018-11-09: import of UCSC GENCODE group processing of GENCODE V29lift37 (markd)
-    # edit hg/makeDb/outside/gencode/gencodeLoad.mk to set release and ensembl versions
-
-    # download, build and load tables
-    mkdir -p /hive/data/genomes/hg19/bed/gencodeV29lift37
-    pushd /hive/data/genomes/hg19/bed/gencodeV29lift37
-    (time nice make -j 10 -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk) >&build.1.out&
-
-    # Contained invalid biotype for one transcript (ENST00000649475.1_1), fix for now
-    cd data/release_29lift37
-    mv gencode.v29lift37.annotation.gff3.gz  gencode.v29lift37.annotation.orig.gff3.gz
-    zcat gencode.v29lift37.annotation.orig.gff3.gz | sed -e 's/lincrna/lincRNA/g' | gzip -c>  gencode.v29lift37.annotation.gff3.gz
-    cd ../..
-    (time nice make -j 10 -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk) >&build.2.out&
-
-
-    # compare tables from previous release to see if number changed makes
-    # sense.  Results are in gencode-cmp.tsv
-
-    # generate trackDb and joiner blurb
-    pushd ~/kent/src/hg/makeDb/trackDb
-    ../../makeDb/outside/gencode/gencodeGenerateTrackDbs hg19 29lift37 94 'Oct 2018'
-
-    # Update human/hg19/wgEncodeGencodeSuper.html and update 'Release Notes'
-    # to describe new release. [ONLY if it's going to be pushed]
-
-    # edit human/hg19/trackDb.wgEncode.ra to add new .ra file include
-    make DBS=hg19
-
-    # edit  all.joiner to add ~/tmp/gencodeV29lift37.joiner
-    # verify with:
-    pushd /hive/data/genomes/hg19/bed/gencodeV29lift37
-    make -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk joinerCheck
-
-    # commit all
-
-##############################################################################
 # LASTZ human/hg19 vs. chimp/panTro6 - (DONE - 2018-12-14 - Hiram)
     mkdir /hive/data/genomes/hg19/bed/lastzPanTro6.2018-12-14
     cd /hive/data/genomes/hg19/bed/lastzPanTro6.2018-12-14
 
     printf '# human vs chimp
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.04.00/bin/lastz
 # maximum M allowed with lastz is only 254
 BLASTZ_M=254
 BLASTZ_O=600
 BLASTZ_E=150
 BLASTZ_K=4500
 BLASTZ_Y=15000
 BLASTZ_L=4500
 BLASTZ_T=2
 BLASTZ_Q=/hive/data/staging/data/blastz/human_chimp.v2.q
@@ -33340,59 +32799,30 @@
 cat process/hg19.curated.gp | fgrep -f hgmdTranscripts.txt - > hgmd.curated.gp
 hgLoadGenePred -genePredExt hg19 ncbiRefSeqHgmd hgmd.curated.gp
 $ wc -l hgmd.curated.gp 
 7965 hgmd.curated.gp
 #############################################################################
 # genomenom mastermind track, Max, Feb 2019
 cd /hive/data/genomes/hg19/bed/mastermind/
 wget 'https://mastermind.genomenon.com/cvr/download?format=csv' -O - > mastermind.2018.11.26.csv.gz
 unzip mastermind.2018.11.26.csv.zip
 mv mastermind_cited_variants_reference-2018.11.26-csv/ 2018-11-26
 hgsql hg19 -NB -e 'select alias, chrom from chromAlias where source = "refseq";' > chromAlias.tab
 python ~/kent/src/hg/makeDb/mastermind/mastermindToBed.py 2018-11-26/mastermind_cited_variants_reference-2018.11.26.csv
 bedSort mastermind.bed mastermind.bed
 bedToBigBed -type=bed9+ -as=~/kent/src/hg/makeDb/mastermind/mastermind.as -tab mastermind.bed /hive/data/genomes/hg19/chrom.sizes  mastermind.bb
 ln -s `pwd`/mastermind.bb /gbdb/hg19/bbi/mastermind.bb
-#############################################################################
-2019-04-08: import of UCSC GENCODE group processing of GENCODE V30lift37 (markd)
-    # edit hg/makeDb/outside/gencode/gencodeLoad.mk to set release and ensembl versions
-
-    # download, build and load tables
-    mkdir -p /hive/data/genomes/hg19/bed/gencodeV30lift37
-    pushd /hive/data/genomes/hg19/bed/gencodeV30lift37
-    (time nice make -j 10 -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk) >&build.1.out&
-
-    # compare tables from previous release to see if number changed makes
-    # sense.  Results are in gencode-cmp.tsv
-
-    # generate trackDb and joiner blurb
-    pushd ~/kent/src/hg/makeDb/trackDb
-    ../../makeDb/outside/gencode/gencodeGenerateTrackDbs hg19 30lift37 96 'Apr 2019'
-
-    # Update human/hg19/wgEncodeGencodeSuper.html and update 'Release Notes'
-    # to describe new release. [ONLY if it's going to be pushed]
-
-    # edit human/hg19/trackDb.wgEncode.ra to add new .ra file include
-    make DBS=hg19
-
-    # edit  all.joiner to add ~/tmp/gencodeV30lift37.joiner
-    # verify with:
-    pushd /hive/data/genomes/hg19/bed/gencodeV30lift37
-    make -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk joinerCheck
-
-    # commit all
-
 ##############################################################################
 # DGV GOLD (DATABASE OF GENOMIC VARIANTS GOLD STANDARD) (DONE 5/06/19 ChrisL)
 # Redmine #23371
 ##############################################################################
     TODAY=`date +%y%m%d`
     mkdir -p /hive/data/genomes/hg19/bed/dgv/$TODAY
     cd /hive/data/genomes/hg19/bed/dgv/$TODAY
     wget http://dgv.tcag.ca/dgv/docs/DGV.GS.March2016.50percent.GainLossSep.Final.hg19.gff3
 
     # GFF3 with the 9th field full of extra info that we need to recreate the blocks
     # as seen at the DGV website. See note-6 in the redmine (23371) for an example
     # of the different cnv representations (1, 2, or 3 blocks).
 
     # what sub-fields are in the 9th field:
     head -1 DGV.GS.March2016.50percent.GainLossSep.Final.hg19.gff3 | cut -f9 | tr ';' '\n' | cut -d'=' -f1
@@ -33478,94 +32908,30 @@
     # itemCount: 38,185
     # primaryDataSize: 30,841,362
     # primaryIndexSize: 6,892
     # zoomLevels: 8
     # chromCount: 24
     # basesCovered: 580,564,080
     # meanDepth (of bases covered): 3.668451
     # minDepth: 1.000000
     # maxDepth: 81.000000
     # std of depth: 5.825349
 
     # link into gbdb
     mkdir -p /gbdb/hg19/dgv
     ln -s `pwd`/dgvGold.bb /gbdb/hg19/dgv/
 
-#############################################################################
-2019-07-05: import of UCSC GENCODE group processing of GENCODE V31lift37 (markd)
-    # Replaced import of pre-release
-    # edit hg/makeDb/outside/gencode/gencodeLoad.mk to set release and ensembl versions
-
-    # download, build and load tables
-    mkdir -p /hive/data/genomes/hg19/bed/gencodeV31lift37
-    pushd /hive/data/genomes/hg19/bed/gencodeV31lift37
-    (time nice make -j 10 -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk) >&build.1.out&
-
-    # compare tables from previous release to see if number changed makes
-    # sense.  Results are in gencode-cmp.tsv
-
-    # generate trackDb and joiner blurb
-    pushd ~/kent/src/hg/makeDb/trackDb
-    ../../makeDb/outside/gencode/gencodeGenerateTrackDbs hg19 31lift37 97 'June 2019'
-
-    # Update human/hg19/wgEncodeGencodeSuper.html and update 'Release Notes'
-    # to describe new release. [ONLY if it's going to be pushed]
-
-    # edit human/hg19/trackDb.wgEncode.ra to add new .ra file include
-    make DBS=hg19
-
-    # edit  all.joiner to add ~/tmp/gencodeV31lift37.joiner
-    # verify with:
-    pushd /hive/data/genomes/hg19/bed/gencodeV31lift37
-    make -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk joinerCheck
-
-    # commit all
-
-##############################################################################
-2019-09-03: import of UCSC GENCODE group processing of GENCODE V32lift37 (markd)
-    # PRE-RELEASE
-    # edit hg/makeDb/outside/gencode/gencodeLoad.mk to set release and ensembl versions
-
-    # download, build and load tables
-    mkdir -p /hive/data/genomes/hg19/bed/gencodeV32lift37
-    pushd /hive/data/genomes/hg19/bed/gencodeV32lift37
-    (time nice make -j 10 -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk) >&build.1.out&
-
-    # compare tables from previous release to see if number changed makes
-    # sense.  Results are in gencode-cmp.tsv
-
-    # generate trackDb and joiner blurb
-    pushd ~/kent/src/hg/makeDb/trackDb
-    ../../makeDb/outside/gencode/gencodeGenerateTrackDbs hg19 32lift37 98 'Sept 2019'
-
-    # Update human/hg19/wgEncodeGencodeSuper.html and update 'Release Notes'
-    # to describe new release. [ONLY if it's going to be pushed]
-
-    # edit human/hg19/trackDb.wgEncode.ra to add new .ra file include
-    make DBS=hg19
-
-    # edit  all.joiner to add ~/tmp/gencodeV32lift37.joiner
-    # verify with:
-    pushd /hive/data/genomes/hg19/bed/gencodeV32lift37
-    make -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk joinerCheck
-
-    # commit all
-
-2019-08-30: repeat above for final release
-    mv /hive/data/genomes/hg19/bed/gencodeV32lift37 /hive/data/genomes/hg19/bed/gencodeV32lift37pre
-    # redo do above imports
-
 ##############################################################################
 # LASTZ human/hg19 Tree chimp/rheMac10 - (DONE - 2019-07-09 - Hiram)
     mkdir /hive/data/genomes/hg19/bed/lastzRheMac10.2019-07-09
     cd /hive/data/genomes/hg19/bed/lastzRheMac10.2019-07-09
 
     printf '# human vs macaca mulatta
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.04.00/bin/lastz
 # maximum M allowed with lastz is only 254
 BLASTZ_M=254
 BLASTZ_Q=/hive/data/staging/data/blastz/human_chimp.v2.q
 BLASTZ_O=600
 BLASTZ_E=150
 # other parameters from panTro2 vs hg18 lastz on advice from Webb
 BLASTZ_K=4500
 BLASTZ_Y=15000
@@ -33804,59 +33170,30 @@
 #      int[blockCount] blockSizes;  "Size of each exon"
 #      int[blockCount] blockStarts; "0-based start position of each exon"
 #      string _mouseOver;  "Mouseover label"
 #      string geneName;   "Associated Gene symbol"
 #      string synonymous; "Synonymous metrics"
 #      string missense;   "Missense metrics"
 #      string pLoF;       "Predicted Loss of Function metrics
 #      )
 
 sizes=/hive/data/genomes/hg19/chrom.sizes
 bedToBigBed -type=bed12+5 -as=pliMetrics.as -tab -extraIndex=name,geneName pliByGene.bed $sizes pliByGene.bb
 bedToBigBed -type=bed12+5 -as=pliMetrics.as -tab -extraIndex=name,geneName pliByTranscript.bed $sizes pliByTranscript.bb
 cd /gbdb/hg19/gnomAD/pLI/
 ln -s /hive/data/outside/gnomAD.2/constraint/pliByGene.bb
 ln -s /hive/data/outside/gnomAD.2/constraint/pliByTranscript.bb
-#########################################################################
-2019-11-17: import of UCSC GENCODE group processing of GENCODE V33lift37 PRE-RELEASE (markd)
-    # 
-    # edit hg/makeDb/outside/gencode/gencodeLoad.mk to set release and ensembl versions
-
-    # download, build and load tables
-    mkdir -p /hive/data/genomes/hg19/bed/gencodeV33lift37
-    pushd /hive/data/genomes/hg19/bed/gencodeV33lift37
-    (time nice make -j 10 -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk) >&build.1.out&
-
-    # compare tables from previous release to see if number changed makes
-    # sense.  Results are in gencode-cmp.tsv
-
-    # generate trackDb and joiner blurb
-    pushd ~/kent/src/hg/makeDb/trackDb
-    ../../makeDb/outside/gencode/gencodeGenerateTrackDbs hg19 33lift37 99 'Nov 2019'
-
-    # Update human/hg19/wgEncodeGencodeSuper.html and update 'Release Notes'
-    # to describe new release. [ONLY if it's going to be pushed]
-
-    # edit human/hg19/trackDb.wgEncode.ra to add new .ra file include
-    make DBS=hg19
-
-    # edit  all.joiner to add ~/tmp/gencodeV33lift37.joiner
-    # verify with:
-    pushd /hive/data/genomes/hg19/bed/gencodeV33lift37
-    make -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk joinerCheck
-
-    # commit all
 ##############################################################################
 2020-01-13: Add size filter to dgvMerged and dgvSupporting track (ChrisL)
 cd /hive/data/genomes/hg19/bed/dgv/160810
 zcat dgvMerged.bed.gz | tawk '{print $0, $3-$2}' > dgvMergedWithSize.bed
 zcat dgvSupporting.bed.gz | tawk '{print $0, $3-$2}' > dgvSupportingWithSize.bed
 cat dgvPlusSize.as
 # table dgvPlus
 # "Database of Genomic Variants incorporating dbVar, July 2013 and later"
 #     (
 #     string chrom;       "Reference sequence chromosome or scaffold"
 #     uint   chromStart;  "Start position in chromosome"
 #     uint   chromEnd;    "End position in chromosome"
 #     string name;        "ID of merged variant or supporting variant"
 #     uint   score;       "Score from 0-1000 (placeholder for BED 9+ format)"
 #     char[1] strand;     "+ or - (placeholder for BED 9+ format)"
@@ -33964,74 +33301,30 @@
     # pass2 - checking and writing primary data (6507 records, 17 fields): 134 millis
     # only a few genes:
     cut -f13 missenseConstrained.bed | sort | uniq | wc -l
     # 2700
     ln -s /gbdb/hg19/gnomAD/missenseConstrained.bb missenseConstrained.bb
 
 ##############################################################################
 # adding RefSeq Select to NCBIRefSeq, Max, Feb 17 2020
 cd /hive/data/genomes/hg19/bed/ncbiRefSeq.p13.2019-11-21
 zcat download/*_genomic.gff.gz | egrep 'tag=(RefSeq|MANE) Select'  | cut -f9- | tr ';' '\n' | grep Name= | grep -v NP_ | cut -d= -f2 | sort -u > refseqSelectTranscripts.txt
 cat process/hg19.curated.gp | fgrep -f refseqSelectTranscripts.txt - > refseqSelect.curated.gp
 hgLoadGenePred -genePredExt hg19 ncbiRefSeqSelect refseqSelect.curated.gp
 wc -l refseqSelect.curated.gp
 21436 refseqSelect.curated.gp
 ##############################################################################
-2020-01-16: import of UCSC GENCODE group processing of GENCODE V33lift37 (markd)
-    # deprecate pre-release
-    mv /hive/data/genomes/hg19/bed/gencodeV33lift37 /hive/data/genomes/hg19/bed/gencodeV33lift37Pre
-    # edit hg/makeDb/outside/gencode/gencodeLoad.mk to set release and ensembl versions
-
-    # download, build and load tables
-    mkdir -p /hive/data/genomes/hg19/bed/gencodeV33lift37
-    pushd /hive/data/genomes/hg19/bed/gencodeV33lift37
-    (time nice make -j 10 -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk) >&build.1.out&
-
-    # compare tables from previous release to see if number changed makes
-    # sense.  Results are in gencode-cmp.tsv
-
-    # generate trackDb and joiner blurb
-    pushd ~/kent/src/hg/makeDb/trackDb
-    ../../makeDb/outside/gencode/gencodeGenerateTrackDbs hg19 33lift37 99 'Jan 2020'
-
-    # Update human/hg19/wgEncodeGencodeSuper.html and update 'Release Notes'
-    # to describe new release. [ONLY if it's going to be pushed]
-
-    # edit human/hg19/trackDb.gencode.ra to add new .ra file include
-    make DBS=hg19
-
-    # edit  all.joiner to add ~/tmp/gencodeV33lift37.joiner
-    # verify with:
-    pushd /hive/data/genomes/hg19/bed/gencodeV33lift37
-    make -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk joinerCheck
-
-    # commit all
-##############################################################################
-2020-01-21: manually update GENCODE V19 to include chrMT (markd)
-
-  cd /hive/data/genomes/hg19/bed/gencodeV19/chrMT
-  # convert chrM annotation in gff3 to chrMT
-  zcat ../data/release_19/gencode.v19.annotation.gff3.gz | tawk 'NR==1{print;next} $1=="chrM"{$1="chrMT"; print}' >chrMT.gff3
-  # comprensive and basic are the same on chrM and there are no pseudogenes, so this is
-  # easy
-  cat ../tables/wgEncodeGencodeCompV19.gp chrMT.gp >wgEncodeGencodeCompV19.gp
-  cat ../tables/wgEncodeGencodeBasicV19.gp chrMT.gp >wgEncodeGencodeBasicV19.gp
-  
-  hgLoadGenePred -genePredExt hg19  wgEncodeGencodeCompV19 wgEncodeGencodeCompV19.gp
-  hgLoadGenePred -genePredExt hg19  wgEncodeGencodeBasicV19 wgEncodeGencodeBasicV19.gp
-
-##############################################################################
 # doseSensitivity (WORKING - 2020-02-06, Ana, Hiram)
 
   mkdir /hive/data/genomes/hg19/bed/doseSensitivity
   cd /hive/data/genomes/hg19/bed/doseSensitivity
 
   ftp original files from ClinGen:
 
   ftp://ftp.ncbi.nlm.nih.giv/pub/dbVar/clingen/
 
   wget --timestamping \
 ftp://ftp.ncbi.nlm.nih.gov/pub/dbVar/clingen/ClinGen_haploinsufficiency_gene_GRCh37.bed
 
   wget --timestamping \
 ftp://ftp.ncbi.nlm.nih.gov/pub/dbVar/clingen/ClinGen_triplosensitivity_gene_GRCh37.bed
 
@@ -34246,58 +33539,30 @@
      109025 INS x 
         748 INV x 
        1108 MCNV x
           1 SVTYPE
 
     # add colors based on gnomad website and get in to proper bed9+
     for f in *.bed4Plus; do out=${f/.bed4Plus/}; bedClip -truncate $f $chromSizes stdout | ./gnomadSvToUcsc.awk | sort -k1,1 -k2,2n > $out.bed9Plus; done
 
     chromSizes=/hive/data/genomes/hg19/chrom.sizes
     for f in *.bed9Plus; do out=${f/.bed9Plus/}; bedToBigBed -tab -type=bed9+19 -as=gnomadSv.as -extraIndex=name $f $chromSizes $out.bb; done
     mkdir -p /gbdb/hg19/gnomAD/structuralVariants/ 
     cd /gbdb/hg19/gnomAD/structuralVariants/
     cp -s /hive/data/outside/gnomAD.2/structuralVariants/*.bb .
 
 ##############################################################################
-2020-03-11: import of UCSC GENCODE group processing of GENCODE V34lift37 pre-release (markd)
-    # edit hg/makeDb/outside/gencode/gencodeLoad.mk to set release and ensembl versions
-
-    # download, build and load tables
-    mkdir -p /hive/data/genomes/hg19/bed/gencodeV34lift37Pre
-    pushd /hive/data/genomes/hg19/bed/gencodeV34lift37Pre
-    (time nice make -j 10 -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk) >&build.1.out&
-
-    # compare tables from previous release to see if number changed makes
-    # sense.  Results are in gencode-cmp.tsv
-
-    # generate trackDb and joiner blurb
-    pushd ~/kent/src/hg/makeDb/trackDb
-    ../../makeDb/outside/gencode/gencodeGenerateTrackDbs hg19 34lift37 100 'March 2020'
-
-    # Update human/hg19/wgEncodeGencodeSuper.html and update 'Release Notes'
-    # to describe new release. [ONLY if it's going to be pushed]
-
-    # edit human/hg19/trackDb.gencode.ra to add new .ra file include
-    make DBS=hg19
-
-    # edit  all.joiner to add ~/tmp/gencodeV34lift37.joiner
-    # verify with:
-    pushd /hive/data/genomes/hg19/bed/gencodeV34lift37Pre
-    make -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk joinerCheck
-
-    # commit all
-##############################################################################
 # NCBI regions that are problematic for sequencing, Mon Nov 18 05:06:17 PST 2019, Max
 
 mkdir /hive/data/genomes/hg19/bed/specialRegions/orig
 cd /hive/data/genomes/hg19/bed/specialRegions/orig
 # download and convert to Excel
 wget ftp://ftp.ncbi.nlm.nih.gov/variation/get-rm/highly_homologous_genes/Table_S1_List1_NGS_Dead_Zone_exon_level.xlsx
 wget ftp://ftp.ncbi.nlm.nih.gov/variation/get-rm/highly_homologous_genes/Table_S2_List2_NGS_Problem_List_High_Stringency_exon_level.xlsx
 wget ftp://ftp.ncbi.nlm.nih.gov/variation/get-rm/highly_homologous_genes/Table_S3_List3_NGS_Problem_List_Low_Stringency_exon_level.xlsx
 wget ftp://ftp.ncbi.nlm.nih.gov/variation/get-rm/highly_homologous_genes/Table_S4_List4_Sanger_Dead_Zone_exon_level.xlsx
 in2csv Table_S1_List1_NGS_Dead_Zone_exon_level.xlsx | csvformat -T > Table_S1_List1_NGS_Dead_Zone_exon_level.tsv
 in2csv  Table_S2_List2_NGS_Problem_List_High_Stringency_exon_level.xlsx | csvformat -T > Table_S2_List2_NGS_Problem_List_High_Stringency_exon_level.tsv
 in2csv Table_S3_List3_NGS_Problem_List_Low_Stringency_exon_level.xlsx | csvformat -T > Table_S3_List3_NGS_Problem_List_Low_Stringency_exon_level.tsv
 in2csv Table_S4_List4_Sanger_Dead_Zone_exon_level.xlsx | csvformat -T > Table_S4_List4_Sanger_Dead_Zone_exon_level.tsv
 
 cd ..