f5183290e802b44254b3b4d381f369cee8390fca markd Wed Apr 29 11:09:57 2020 -0700 moved gencode track doc to their own files for mm10 and hg19 as it is overwhelming the main file diff --git src/hg/makeDb/doc/hg19.txt src/hg/makeDb/doc/hg19.txt index 152ba42..4d9b4e5 100644 --- src/hg/makeDb/doc/hg19.txt +++ src/hg/makeDb/doc/hg19.txt @@ -20219,96 +20219,30 @@ cd /hive/data/genomes/hg19/bed/blat.hg17.2012-11-08 # check it with -debug first to see if it is going to work: time doSameSpeciesLiftOver.pl -buildDir=`pwd` -bigClusterHub=swarm \ -ooc=/hive/data/genomes/hg19/11.ooc \ -debug -dbHost=hgwdev -workhorse=hgwdev hg19 hg17 > do.log 2>&1 # if that is OK, then run it: time doSameSpeciesLiftOver.pl -buildDir=`pwd` -bigClusterHub=swarm \ -ooc=/hive/data/genomes/hg19/11.ooc \ -dbHost=hgwdev -workhorse=hgwdev hg19 hg17 > do.log 2>&1 # real 333m16.756s # verify this file exists: # /gbdb/hg19/liftOver/hg19ToHg17.over.chain.gz # and try out the conversion on genome-test from hg19 to hg17 -############################################################################ -2012-11-11: import and UCSC GENCODE group process of GENCODE V14 (markd) - # Due to UCSC Genome Browser using the NC_001807 mitochondrial genome sequence - # (chrM) and GENCODE annotating the NC_012920 mitochondrial sequence, the - # GENCODE mitochondrial sequences are lifted to UCSC chrM. - - # download files - mkdir -p /hive/data/genomes/hg19/bed/gencodeV14/release - cd /hive/data/genomes/hg19/bed/gencodeV14/ - - # download gencode release - wget -nv -r -np ftp://ftp.sanger.ac.uk/pub/gencode/release_14 - mv ftp.sanger.ac.uk/pub/gencode/release_14 . - rm -rf ftp.sanger.ac.uk/ - - # silly sanity check: - cd release_14 - for f in *.gz *.tgz ; do zcat $f >/dev/null ; done - - # untar main distribution - tar -zxf gencode14_GRCh37.tgz - - cd /hive/data/genomes/hg19/bed/gencodeV14 - - # obtain transcription support level analysis from UCSC GENCODE group (markd/rachel) - mkdir -p data - cp /cluster/home/markd/compbio/ccds/branches/transSupV14.1/modules/gencodeTransSupport/exprs/classDev/runs/2012-11-11/results/gencode.v14.transcriptionSupportLevel.{tab,tsv} data/ - - # create Makefile from previous one. This time, we need to get - # if from the ENCODE DCC area. - cp /hive/groups/encode/dcc/data/gencodeV13/Makefile . - # edit to set version: - ver = 14 - - # on code in the CCDS subversion tree: - # svn+ssh://hgwdev.soe.ucsc.edu/projects/compbio/svnroot/hausslerlab/ccds/trunk - # and markd's python library (it will be moved to the hausslerlab - # repository soon) - # may need to update - ccds2/modules/gencode/src/lib/gencode/data/gencodeGenes.py - # to add new biotypes, use this command to verify and update as needed - # be sure to do a make in ccds2/modules/gencode - make checkAttrs - - # build and load tables - (time nice make -j 10) >&build.out& - - # compare tables from previous release to see if number chnaged made - # sense. - make cmpRelease - - ## Copy and update trackDb files from previous release. - ## Change version and use lower priority so it sorts to top of - ## super track page. - ## Important to make sure filter attrs.transcriptType matches current set - ## figured out with - select distinct transcriptType from wgEncodeGencodeAttrsV14 order by transcriptType; - cd kent/src/hg/makeDb/trackDb - cp human/hg19/wgEncodeGencodeV13.ra human/hg19/wgEncodeGencodeV14.ra - cp human/hg19/wgEncodeGencodeV13.html human/hg19/wgEncodeGencodeV14.html - # edit these plus human/hg19/trackDb.wgEncode.ra - - ### IMPORTANT: make sure that hgTracks/gencodeTracks.c registers - ### track handler for this version of gencode: - registerTrackHandlerOnFamily("wgEncodeGencodeV14", gencodeGeneMethods); - ######################################################################### # QPCR PRIMERS (DONE - 2012-12-10 - Chin) # The track name is changed to "qPCR Primers" # Reload table with new track_mouse.BED (2013-01-28) # Download mkdir /hive/data/outside/Weizmann/qPcrPrimers cd /hive/data/outside/Weizmann/qPcrPrimers wget http://www.weizmann.ac.il/complex/compphys/software/Amit/primers/human/track_human.BED mkdir -p /hive/data/genomes/hg19/bed/qPcrPrimers cat track_human.BED | grep -v track \ > /hive/data/genomes/hg19/bed/qPcrPrimers/qPcrPrimers_hg19.bed cd /hive/data/genomes/hg19/bed/qPcrPrimers hgLoadBed -bedDetail -tab -renameSqlTable \ -sqlTable=$HOME/kent/src/hg/lib/bedDetail.sql \ hg19 qPcrPrimers qPcrPrimers_hg19.bed @@ -20668,93 +20602,30 @@ # *** NOTE FOR NEXT TIME: gadPos now also looks in a couple Gencode V14 # tables if they exist. If they don't anymore, you should look for # suitable replacements if there are a lot of unfound IDs. ***) gadPos hg19 stdout | sort -k1,1 -k2n,2n -k4,4 -u > gad.tab #Found in ensCanonical: 7758 #Found in refGene: 10 #Found in kgAlias: 193 #Found in Gencode: 178 #Not found: 94 # use -nobin option to ensure display order is according to genomic position # -- table is very small so performance is fine hgLoadBed -nobin hg19 gad gad.tab #Read 9156 elements of size 4 from gad.tab -############################################################################## -2013-04-08: import of UCSC GENCODE group processing of GENCODE V15 (markd) - # Due to UCSC Genome Browser using the NC_001807 mitochondrial genome sequence - # (chrM) and GENCODE annotating the NC_012920 mitochondrial sequence, the - # GENCODE mitochondrial sequences are lifted to UCSC chrM. - - # download files - mkdir -p /hive/data/genomes/hg19/bed/gencodeV15 - cd /hive/data/genomes/hg19/bed/gencodeV15 - - # download gencode release - wget -nv -r -np ftp://ftp.sanger.ac.uk/pub/gencode/release_15 - mv ftp.sanger.ac.uk/pub/gencode/release_15 . - rm -rf ftp.sanger.ac.uk/ - - # silly sanity check: - for f in release_15/*.gz release_15/*.tgz ; do zcat $f >/dev/null ; done - - # untar main distribution - cd release_15 - tar -zxf gencode15_GRCh37.tgz - - cd /hive/data/genomes/hg19/bed/gencodeV15 - - # obtain transcription support level analysis from UCSC GENCODE group (markd/rachel) - mkdir -p data - cp /cluster/home/markd/compbio/ccds/branches/transSupV15.1/modules/gencodeTransSupport/exprs/classDev/runs/2013-03-28/results/gencode.v15.transcriptionSupportLevel.{tab,tsv} data/ - - # create Makefile from previous one. - cp ../gencodeV14/Makefile . - # edit to set version: - ver = 15 - - # on code in the CCDS subversion tree: - # svn+ssh://hgwdev.soe.ucsc.edu/projects/compbio/svnroot/hausslerlab/ccds/trunk - # and markd's python library may need to update - ccds2/modules/gencode/src/lib/gencode/data/gencodeGenes.py - # to add new biotypes, use this command to verify and update as needed - # be sure to do a make in ccds2/modules/gencode - make checkAttrs - - # build and load tables - (time nice make -j 10) >&build.out& - - # compare tables from previous release to see if number chnaged made - # sense. - make cmpRelease - - ## Copy and update trackDb files from previous release. - ## Change version and use lower priority so it sorts to top of - ## super track page. - ## Important to make sure filter attrs.transcriptType matches current set - ## figured out with - select distinct transcriptType from wgEncodeGencodeAttrsV15 order by transcriptType; - cd kent/src/hg/makeDb/trackDb - cp human/hg19/wgEncodeGencodeV14.ra human/hg19/wgEncodeGencodeV15.ra - cp human/hg19/wgEncodeGencodeV14.html human/hg19/wgEncodeGencodeV15.html - # edit these plus human/hg19/trackDb.wgEncode.ra - - ### IMPORTANT: make sure that hgTracks/gencodeTracks.c registers - ### track handler for this version of gencode: - registerTrackHandler("wgEncodeGencodeV15", gencodeGeneMethods); - ######################################################################### # UPDATE COSMIC TRACK - v64 (DONE - 2013-04-17 - Hiram) # take a look at: # ftp://ftp.sanger.ac.uk/pub/CGP/cosmic/data_export/Request_based_exports/ # to see what the new version file name is, then: cd /hive/data/genomes/hg19/bed/cosmic time ~/kent/src/hg/utils/automation/loadCosmic.pl -dryRun hg19 \ ftp://ftp.sanger.ac.uk/pub/CGP/cosmic/data_export/Request_based_exports/UCSCMutExp_v64_260313.csv.gz # New length: 677957 # Old length: 616299 # Percent bed overlap with previous version: 100.00% # Number of deleted IDs: 1 # Number of added IDs: 61659 @@ -21086,104 +20957,30 @@ cd /hive/groups/gencode/pseudogenes/retroFinder/hg19.20130228/retro/hg19.5 retroFinder/trunk/src/pipeline/filterMrna.sh DEF retroFinder/trunk/src/pipeline/filterEst.sh DEF retroFinder/trunk/src/pipeline/analyseExpress.sh DEF cd /hive/groups/gencode/pseudogenes/retroFinder/hg19.20130228/mrnaBlastz retroFinder/trunk/src/pipeline/ucscStep6.sh DEF #added ucscRetroAli to trackDb.ra # copied # /hive/groups/gencode/pseudogenes/retroFinder/hg19.20130228/retro/hg19.5/trackDb.retro # entry to kent/src/hg/makeDb/trackDb/human/hg19/trackDb.ra and edited it # to add the version number and date. # Scripts copied ucscRetroAli5.psl, ucscRetroInfo5.bed and ucscRetroCds5.tab # to /hive/data/genomes/hg19/bed/retro/ ############################################################################ -2013-06-02: import of UCSC GENCODE group processing of GENCODE V16 (markd) - # Due to UCSC Genome Browser using the NC_001807 mitochondrial genome sequence - # (chrM) and GENCODE annotating the NC_012920 mitochondrial sequence, the - # GENCODE mitochondrial sequences are lifted to UCSC chrM. - # Thus is the first release to include haplotype regions - - # download files - mkdir -p /hive/data/genomes/hg19/bed/gencodeV16/data - cd /hive/data/genomes/hg19/bed/gencodeV16 - - # download gencode release - cd data - wget -nv -r -np ftp://ftp.sanger.ac.uk/pub/gencode/release_16 - mv ftp.sanger.ac.uk/pub/gencode/release_16 . - rm -rf ftp.sanger.ac.uk/ - - cd /hive/data/genomes/hg19/bed/gencodeV16 - - # obtain transcription support level analysis from UCSC GENCODE group (markd/rachel) - mkdir -p data - cp /cluster/home/markd/compbio/ccds/branches/transSupV16.1/modules/gencodeTransSupport/exprs/classDev/runs/2013-06-16/results/gencode.v16.transcriptionSupportLevel.{tab,tsv} data/ - - # create Makefile from previous one. - cp ../gencodeV15/Makefile . - # edit to set version: - ver = 16 - - # on code in the CCDS subversion tree: - # svn+ssh://hgwdev.soe.ucsc.edu/projects/compbio/svnroot/hausslerlab/ccds/trunk - # and markd's python library may need to update - ccds2/modules/gencode/src/lib/gencode/data/gencodeGenes.py - # to add new biotypes, use this command to verify and update as needed - # be sure to do a make in ccds2/modules/gencode - make checkAttrs - - # build and load tables - (time nice make -j 10) >&build.out& - - # compare tables from previous release to see if number chnaged made - # sense. - make cmpRelease - - ## Copy and update trackDb files from previous release. - ## Change version and use lower priority so it sorts to top of - ## super track page. - ## Important to make sure filter attrs.transcriptType matches current set - ## figured out with - select distinct transcriptType from wgEncodeGencodeAttrsV16 order by transcriptType; - cd kent/src/hg/makeDb/trackDb - cp human/hg19/wgEncodeGencodeV15.ra human/hg19/wgEncodeGencodeV16.ra - cp human/hg19/wgEncodeGencodeV15.html human/hg19/wgEncodeGencodeV16.html - - # edit these plus human/hg19/trackDb.wgEncode.ra - # - set priorities in wgEncodeGencodeV16.ra in reverse order with previous - # tracks so newest shows up first - # priority - set to previous version priority minus 0.001 - # searchPriority - set each to previous -0.001 - # - make current track default to pack and hide previous [ONLY if it's going to be pushed] - # superTrack wgEncodeGencodeSuper pack - - ### IMPORTANT: make sure that hgTracks/gencodeTracks.c registers - ### track handler for this version of gencode: - registerTrackHandler("wgEncodeGencodeV16", gencodeGeneMethods); - - # sanity check number of rows in each table - make cmpRelease - - # update all.joiner and validate - # look for the last section `begin Gencode V??' in all.joiner - # and copy and update version - # repeat this until happy, editing minCheck as needed - for tbl in $(hgsql -Ne 'show tables like "wgEncodeGencode%V16"' hg19) ; do runJoiner.csh hg19 $tbl ~/kent/src/hg/makeDb/schema/all.joiner noTimes ; done >&check/joiner.out - -############################################################################ # LASTZ Tenrec EchTel2 (DONE - 2013-06-12 - Hiram) screen -S hg19EchTel2 # use screen to manage the long running job mkdir /hive/data/genomes/hg19/bed/lastzEchTel2.2013-06-12 cd /hive/data/genomes/hg19/bed/lastzEchTel2.2013-06-12 cat << '_EOF_' > DEF # Human vs. Tenrec BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz # TARGET: Human Hg19 SEQ1_DIR=/scratch/data/hg19/nib SEQ1_LEN=/scratch/data/hg19/chrom.sizes SEQ1_CHUNK=20000000 SEQ1_LAP=10000 @@ -21582,104 +21379,30 @@ cd /hive/data/genomes/vicPac2/bed/blastz.hg19.swap time nice -n +19 doBlastzChainNet.pl -verbose=2 \ /hive/data/genomes/hg19/bed/lastzVicPac2.2013-06-17/DEF \ -swap -syntenicNet \ -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \ -chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 & # real 206m52.132s cat fb.vicPac2.chainHg19Link.txt # 1428125689 bases of 2078582856 (68.707%) in intersection # set sym link to indicate this is the lastz for this genome: cd /hive/data/genomes/vicPac2/bed ln -s blastz.hg19.swap lastz.hg19 ############################################################################## -2013-06-19: import of UCSC GENCODE group processing of GENCODE V17 (markd) - # Due to UCSC Genome Browser using the NC_001807 mitochondrial genome sequence - # (chrM) and GENCODE annotating the NC_012920 mitochondrial sequence, the - # GENCODE mitochondrial sequences are lifted to UCSC chrM. - # Thus is the first release to include haplotype regions - - # download files - mkdir -p /hive/data/genomes/hg19/bed/gencodeV17/data - cd /hive/data/genomes/hg19/bed/gencodeV17 - - # download gencode release - cd data - wget -nv -r -np ftp://ftp.sanger.ac.uk/pub/gencode/release_17 - mv ftp.sanger.ac.uk/pub/gencode/release_17 . - rm -rf ftp.sanger.ac.uk/ - - cd /hive/data/genomes/hg19/bed/gencodeV17 - - # obtain transcription support level analysis from UCSC GENCODE group (markd/rachel) - mkdir -p data - cp /cluster/home/markd/compbio/ccds/branches/transSupV17.1/modules/gencodeTransSupport/exprs/classDev/runs/2013-06-19/results/gencode.v17.transcriptionSupportLevel.{tab,tsv} data/ - - # create Makefile from previous one. - cp ../gencodeV16/Makefile . - # edit to set version: - ver = 17 - - # on code in the CCDS subversion tree: - # svn+ssh://hgwdev.soe.ucsc.edu/projects/compbio/svnroot/hausslerlab/ccds/trunk - # and markd's python library may need to update - ccds2/modules/gencode/src/lib/gencode/data/gencodeGenes.py - # to add new biotypes, use this command to verify and update as needed - # be sure to do a make in ccds2/modules/gencode - make checkAttrs - - # build and load tables - (time nice make -j 10) >&build.out& - - # compare tables from previous release to see if number chnaged made - # sense. - make cmpRelease - - ## Copy and update trackDb files from previous release. - ## Change version and use lower priority so it sorts to top of - ## super track page. - ## Important to make sure filter attrs.transcriptType matches current set - ## figured out with - select distinct transcriptType from wgEncodeGencodeAttrsV17 order by transcriptType; - cd kent/src/hg/makeDb/trackDb - cp human/hg19/wgEncodeGencodeV16.ra human/hg19/wgEncodeGencodeV17.ra - cp human/hg19/wgEncodeGencodeV16.html human/hg19/wgEncodeGencodeV17.html - - # edit these plus human/hg19/trackDb.wgEncode.ra - # - set priorities in wgEncodeGencodeV17.ra in reverse order with previous - # tracks so newest shows up first - # priority - set to previous version priority minus 0.001 - # searchPriority - set each to previous -0.001 - # - make current track default to pack and hide previous [ONLY if it's going to be pushed] - # superTrack wgEncodeGencodeSuper pack - - ### IMPORTANT: make sure that hgTracks/gencodeTracks.c registers - ### track handler for this version of gencode: - registerTrackHandler("wgEncodeGencodeV17", gencodeGeneMethods); - - # sanity check number of rows in each table - make cmpRelease - - # update all.joiner and validate - # look for the last section `begin Gencode V??' in all.joiner - # and copy and update version - # repeat this until happy, editing minCheck as needed - for tbl in $(hgsql -Ne 'show tables like "wgEncodeGencode%V17"' hg19) ; do runJoiner.csh hg19 $tbl ~/kent/src/hg/makeDb/schema/all.joiner noTimes ; done >&check/joiner.out - -############################################################################## # LASTZ White-throated sparrow ZonAlb1 (DONE - 2013-06-26 - Hiram) mkdir /hive/data/genomes/hg19/bed/lastzZonAlb1.2013-06-26 cd /hive/data/genomes/hg19/bed/lastzZonAlb1.2013-06-26 cat << '_EOF_' > DEF # human vs white-throated sparrow # distant settings for human-aves alignment BLASTZ_H=2000 BLASTZ_Y=3400 BLASTZ_L=10000 BLASTZ_K=2200 BLASTZ_Q=/scratch/data/blastz/HoxD55.q # TARGET: Human hg19 SEQ1_DIR=/scratch/data/hg19/nib @@ -29208,84 +28931,30 @@ # was modified to use the unmasked hg19/hg38 sequences doSameSpeciesLiftOver.pl -debug -stop=net -buildDir=`pwd` \ -bigClusterHub=ku \ -dbHost=hgwdev -workhorse=hgwdev \ -ooc=/hive/data/genomes/hg19/11.ooc hg19 hg38 # Turns out the chain step procedure will not construct the proper # set of files in debug mode (pslParts.lst) because it can not. # the chain step has to be run for real: doSameSpeciesLiftOver.pl -continue=chain -buildDir=`pwd` \ -bigClusterHub=ku \ -dbHost=hgwdev -workhorse=hgwdev \ -ooc=/hive/data/genomes/hg19/11.ooc hg19 hg38 > chain.log 2>&1 # verify the convert link on the browser is now active from hg19 to hg38 -############################################################################# -2013-12-13: import of UCSC GENCODE group processing of GENCODE V19 (markd) - # download files - mkdir -p /hive/data/genomes/hg19/bed/gencodeV19/data - cd /hive/data/genomes/hg19/bed/gencodeV19 - - # download gencode release - cd data - wget -nv -r -np ftp://ftp.sanger.ac.uk/pub/gencode/release_19 - mv ftp.sanger.ac.uk/pub/gencode/release_19 . - rm -rf ftp.sanger.ac.uk/ - - cd /hive/data/genomes/hg19/bed/gencodeV19 - # create Makefile from previous one. - cp ../gencodeV18/Makefile . - - # build and load tables - (time nice make -j 10) >&build.1.out& - - # compare tables from previous release to see if number changed makes - # sense. - make cmpRelease >gencode-cmp.tsv - - ## Copy and update trackDb files from previous release. - ## Change version and use lower priority so it sorts to top of - ## super track page. - ## Important to make sure filter attrs.transcriptType matches current set - ## figured out with - select distinct transcriptType from wgEncodeGencodeAttrsV19 order by transcriptType; - cd kent/src/hg/makeDb/trackDb - cp human/hg19/wgEncodeGencodeV18.ra human/hg19/wgEncodeGencodeV19.ra - cp human/hg19/wgEncodeGencodeV18.html human/hg19/wgEncodeGencodeV19.html - - # edit these plus human/hg19/trackDb.wgEncode.ra - # - set priorities in wgEncodeGencodeV19.ra in reverse order with previous - # tracks so newest shows up first - # priority - set to previous version priority minus 0.001 - # searchPriority - set each to previous -0.001 - # - make current track default to pack and hide previous [ONLY if it's going to be pushed] - # superTrack wgEncodeGencodeSuper pack - # - Update wgEncodeGencodeSuper.html to describe new release and to - # pick up other updates. - - ### IMPORTANT: make sure that hgTracks/gencodeTracks.c registers - ### track handler for this version of gencode: - registerTrackHandler("wgEncodeGencodeV19", gencodeGeneMethods); - - # update all.joiner and validate - # look for the last section `begin Gencode V??' in all.joiner - # and copy and update version - # repeat this until happy, editing minCheck as needed - cd /hive/data/genomes/hg19/bed/gencodeV19 - for tbl in $(hgsql -Ne 'show tables like "wgEncodeGencode%V19"' hg19) ; do runJoiner.csh hg19 $tbl ~/kent/src/hg/makeDb/schema/all.joiner noTimes ; done >&check/joiner.out - ############################################################################## # hg19 <-> hg38 difference tracks (DONE - 2013-12-28 - Hiram) # note: the procedure for this is in the hg38.txt file under # this same heading. The end result is the loading of the table: cd /hive/data/genomes/hg19/bed/liftOverHg38 hgLoadBed hg19 hg38ContigDiff hg19.itemRgb.bed ########################################################################## # NEANDERTAL AND DENISOVA METHYLATION (DONE 8/27/14 angie) # RM #13439 mkdir /hive/data/genomes/hg19/bed/neandertalMethylation cd /hive/data/genomes/hg19/bed/neandertalMethylation wget http://carmelab.huji.ac.il/data/Reconstructed_Methylation_Neandertal.zip @@ -31428,72 +31097,30 @@ # The -S10G parameter is only supported in newer sort versions # if it complains, just remove it. It will just take longer. time sort -k4,4 -S10G --parallel=20 hg19.bed > hg19.s4.bed # convert the hg19 bed to bigBed cd /hive/data/genomes/hg19/bed/patents/hg19 join -t $'\t' -1 4 -2 1 ../data/hg19.s4.bed ../data/seqAndPatentSummary.tab -o '1.1 1.2 1.3 1.4 1.5 1.6 1.7 1.8 1.9 1.10 1.11 1.12 2.2 2.3 2.4 2.5 2.6 2.7 2.8 2.9 2.10 2.11 2.12' | patSeqFilterBulkAndAnnotate ../data/htPatents.txt patBulk.bed patNonBulk.bed -c ../data/seqCounts.tab bedSort patNonBulk.bed patNonBulk.bed bedSort patBulk.bed patBulk.bed bedToBigBed patNonBulk.bed /cluster/data/genomes/hg19/chrom.sizes patNonBulk.bb -tab -as=../patSummary.as -type=bed12+ bedToBigBed patBulk.bed /cluster/data/genomes/hg19/chrom.sizes patBulk.bb -tab -as=../patSummary.as -type=bed12+ hgBbiDbLink hg19 patBulk /gbdb/hg19/bbi/patBulk.bb hgBbiDbLink hg19 patNonBulk /gbdb/hg19/bbi/patNonBulk.bb ######################################################################### -2016-03-02: import of UCSC GENCODE group processing of GENCODE V24lift37 (markd) - # download files - mkdir -p /hive/data/genomes/hg19/bed/V24lift37 - cd /hive/data/genomes/hg19/bed/V24lift37 - - # edit hg/makeDb/outside/gencode/gencodeLoad.mk to set - # release and transcript support versions - - - # download, build and load tables - (time nice make -j 10 -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk) >&build.1.out& - - # compare tables from previous release to see if number changed makes - # sense (results in gencode-cmp.tsv) - make -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk cmpRelease - - ## Copy and update trackDb files from previous release. - ## Change version and use lower priority so it sorts to top of - ## super track page. Follow instructiuons in ra file to ensure - ## filters are correct. - cd kent/src/hg/makeDb/trackDb - cp human/hg38/wgEncodeGencodeV24.ra human/hg19/wgEncodeGencodeV24lift37.ra - cp human/hg38/wgEncodeGencodeV24.html human/hg19/wgEncodeGencodeV24lift37.html - - # edit these plus human/hg38/trackDb.wgEncode.ra - # - set priorities in wgEncodeGencodeV24.ra tracks so newest shows up first - # priority - set to previous version priority minus 0.001 - # searchPriority - set each to previous minus 0.001 - # - make current track default to pack and hide previous [ONLY if it's going to be pushed] - # superTrack wgEncodeGencodeSuper pack - # - Update wgEncodeGencodeSuper.html to describe new release and to - # pick up other updates. - - # update all.joiner and validate - # look for the last section `begin Gencode V??' in all.joiner - # and copy and update version - # repeat this until happy, editing minCheck as needed - # output in check/joiner.out - cd /hive/data/genomes/hg19/bed/gencodeV24lift37 - make -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk joinerCheck - -######################################################################### # killer whale/orcOrc1 Lastz run (WORKING - 2016-06-03 - Hiram) # note: incorrect date on this directory name, should be 2016-06-03 mkdir /hive/data/genomes/hg19/bed/lastzOrcOrc1.2016-07-03 cd /hive/data/genomes/hg19/bed/lastzOrcOrc1.2016-07-03 printf '# human vs killer whale BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.66/bin/lastz BLASTZ_M=254 # TARGET: Human Hg19 SEQ1_DIR=/scratch/data/hg19/hg19.2bit SEQ1_LEN=/scratch/data/hg19/chrom.sizes SEQ1_CHUNK=10000000 SEQ1_LAP=10000 @@ -31875,58 +31502,30 @@ # *** All done! ############################################################################## # SNPMASKED SEQUENCE FOR SNP149 (DONE 3/24/17 angie) # Redmine #18330 screen -S mask -t mask ~/kent/src/hg/utils/automation/doDbSnpMaskSequence.pl hg19 149 -debug # *** Steps were performed in /hive/data/genomes/hg19/snp149Mask.2017-03-24 cd /hive/data/genomes/hg19/snp149Mask.2017-03-24 ~/kent/src/hg/utils/automation/doDbSnpMaskSequence.pl hg19 149 \ >>& do.log & tail -f do.log # *** All done! ######################################################################### -2017-04-16: import of UCSC GENCODE group processing of GENCODE V26lift37 (markd) - # edit hg/makeDb/outside/gencode/gencodeLoad.mk to set release and ensembl versions - - # download, build and load tables - mkdir -p /hive/data/genomes/hg19/bed/gencodeV26lift37 - pushd /hive/data/genomes/hg19/bed/gencodeV26lift37 - (time nice make -j 10 -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk) >&build.1.out& - - # compare tables from previous release to see if number changed makes - # sense. Results are in gencode-cmp.tsv - - # generate trackDb and joiner blurb - pushd kent/src/hg/makeDb/trackDb - ../../makeDb/outside/gencode/gencodeGenerateTrackDbs hg19 26lift37 88 'March 2017' - - # edit human/hg19/trackDb.wgEncode.ra to add new .ra file include - make DBS=hg19 - - # Update human/hg19/wgEncodeGencodeSuper.html and update 'Release Notes' - # to describe new release. [ONLY if it's going to be pushed] - - # edit all.joiner to add ~/tmp/gencodeV26lift37.joiner - # verify with: - pushd /hive/data/genomes/hg19/bed/gencodeV26lift37 - make -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk joinerCheck - - # commit all -######################################################################### # COSMIC v81 DONE Chris Eisenhart 2017-05-11 # Make a new COSCMIC track for hg19 mkdir /hive/data/outside/cosmic/hg19/v81 cd /hive/data/outside/cosmic/hg19/v81 # Get the new data sftp ceisenha@ucsc.edu@sftp-cancer.sanger.ac.uk # Login to SFTP server then run these commands get /files/grch37/cosmic/v81/CosmicMutantExport.tsv.gz # Get the schema from V80 cp ~/kent/src/hg/lib/cosmicNew.as . # Remove the 'NS' fields, search for the \t after to exclude the E'NS'ST transcripts. zcat CosmicMutantExport.tsv.gz | sed 's/NS\t/\t/g' > cosMut.tsv @@ -32315,58 +31914,30 @@ # Use a script to convert to bed format. cosmicToBed cosMut.tsv cosMut.bed # This many lines were skipped, 128966 for not having genomic coordinate # Sort and convert to big bed using the .as file. sort -k1,1 -k2,2n cosMut.bed > sCosMut.bed bedToBigBed -type=bed8+31 -as=cosmicNew.as sCosMut.bed /hive/data/genomes/hg19/chrom.sizes cosMutHg19V82.bb -tab -extraIndex=name,cosmLabel # Link it up so the outside world can see it. cd /gbdb/hg19/cosmic/ ln -s /hive/data/outside/cosmic/hg19/v82/cosMutHg19V82.bb . ############################################################################## # snpedia (DONE - 2017-09-06 - Max) # see ../hg38/snpedia.txt -############################################################################## -2017-09-14: import of UCSC GENCODE group processing of GENCODE V27lift37 (markd) - # edit hg/makeDb/outside/gencode/gencodeLoad.mk to set release and ensembl versions - - # download, build and load tables - mkdir -p /hive/data/genomes/hg19/bed/gencodeV27lift37 - pushd /hive/data/genomes/hg19/bed/gencodeV27lift37 - (time nice make -j 10 -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk) >&build.1.out& - - # compare tables from previous release to see if number changed makes - # sense. Results are in gencode-cmp.tsv - - # generate trackDb and joiner blurb - pushd kent/src/hg/makeDb/trackDb - ../../makeDb/outside/gencode/gencodeGenerateTrackDbs hg19 27lift37 90 'August 2017' - - # Update human/hg19/wgEncodeGencodeSuper.html and update 'Release Notes' - # to describe new release. [ONLY if it's going to be pushed] - - # edit human/hg19/trackDb.wgEncode.ra to add new .ra file include - make DBS=hg19 - - # edit all.joiner to add ~/tmp/gencodeV27lift37.joiner - # verify with: - pushd /hive/data/genomes/hg19/bed/gencodeV27lift37 - make -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk joinerCheck - - # commit all ######################################################################### # LASTZ human/hg19 Gorilla/gorGor5 - (DONE - 2017-11-08 - Hiram) mkdir /hive/data/genomes/hg19/bed/lastzGorGor5.2017-11-08 cd /hive/data/genomes/hg19/bed/lastzGorGor5.2017-11-08 printf '# human vs gorilla BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.66/bin/lastz BLASTZ_T=2 BLASTZ_O=600 BLASTZ_E=150 BLASTZ_M=254 BLASTZ_K=4500 BLASTZ_Y=15000 BLASTZ_Q=/scratch/data/blastz/human_chimp.v2.q # A C G T @@ -32581,69 +32152,30 @@ # HGMD (updated 01/25/18 max) # got hgmd 2017 from Frank Schacherer Frank.Schacherer@qiagen.com and Rupert Yip Rupert.Yip@qiagen.com # see also the file hg38/hgmd.txt year=2019 cd /hive/data/genomes/hg19/bed/hgmd cat /hive/data/outside/hgmd/$year.4-hgmd-public_hg19.tsv | grep -v \# | tawk '{if ($5=="I") {start=$4-1; end=$4+1; col="100,100,100"} else if ($5=="D") {start=$4-1; end=$4; col="170,170,170"} else {start=$4-1; end=$4; col="0,0,0"}; print "chr"$3,start,end,$2":"$1,0,".",start,end,col,$2,$1,$5}' | sed -e 's/M$/substitution/' | sed -e 's/I$/insertion (between the two basepairs, sequence not provided by HGMD)/' | sed -e 's/D$/deletion (endpoint not provided by HGMD)/' | sed -e 's/X$/insertion-deletion (endpoint not provided by HGMD)/' | sed -e 's/R$/regulatory variant/' | sed -e 's/S$/splicing variant/' | sort -k1,1 -k2,2n > hgmd.bed bedToBigBed hgmd.bed /hive/data/genomes/hg19/chrom.sizes hgmd.bb -type=bed9+ -as=hgmd.as -tab ln -s /hive/data/genomes/hg19/bed/hgmd/hgmd.bb /gbdb/hg19/bbi/hgmd.bb hgBbiDbLink hg19 hgmd /gbdb/hg19/bbi/hgmd.bb # Forgot, finally done Oct 24: also updated hgBeacon bigBedToBed /gbdb/hg19/bbi/hgmd.bb /tmp/temp.bed /usr/local/apache/cgi-bin/hgBeacon -f hgmd temp.bed hgmd # Forgot, finally done June 26: updated GBIB as qateam scp /gbdb/hg19/bbi/hgmd.bb hgdownload:/usr/local/apache/gbib/prot/ -########################################################################### -2018-03-08: update UCSC GENCODE V19 to include protein id (for VAI) - -cd /hive/data/genomes/hg19/bed/gencodeV19/ -# move tables that need to be rebuilt -mkdir -p prev/pre-proteinId -mv tables/wgEncodeGencodeAttrsV19.tab tables/wgEncodeGencodeTagV19.tab prev/pre-proteinId/ -mv loaded/wgEncodeGencodeAttrsV19.tab.loaded loaded/wgEncodeGencodeTagV19.tab.loaded prev/pre-proteinId/ - - -# V19 didn't have protein ids in GTF, need to get from ensembl database - cd ~/hive/tmp - ftp://ftp.ensembl.org/pub/release-74/mysql/homo_sapiens_core_74_37/homo_sapiens_core_74_37.sql.gz - ftp://ftp.ensembl.org/pub/release-74/mysql/homo_sapiens_core_74_37/transcript.txt.gz - ftp://ftp.ensembl.org/pub/release-74/mysql/homo_sapiens_core_74_37/translation.txt.gz - - # load small subset - hgsql -e 'create database markd_ens' - zcat homo_sapiens_core_74_37.sql.gz |hgsql markd_ens - gunzip trans* - hgsqlimport markd_ens $(pwd)/transcript.txt $(pwd)/translation.txt - # add proteinId column to wgEncodeGencodeAttrsV19 - hgsql - CREATE table markd_ens.trans_prot - SELECT concat(trans.stable_id, ".", CAST(trans.version AS CHAR)) transcriptId, - concat(prot.stable_id, ".", CAST(prot.version AS CHAR)) proteinId - FROM markd_ens.transcript trans, markd_ens.translation prot - WHERE prot.transcript_id = trans.transcript_id; - CREATE INDEX transcriptId ON markd_ens.trans_prot(transcriptId); - UPDATE hg19.wgEncodeGencodeAttrsV19 as attr - INNER JOIN markd_ens.trans_prot as tp ON attr.transcriptId = tp.transcriptId - SET attr.proteinId = tp.proteinId; - SELECT count(*) FROM wgEncodeGencodeAttrsV19 WHERE transcriptClass="coding" AND proteinId = ""; - -> 0 !!! got them all - DROP DATABASE markd_ens; - -# 2018-03-19: update search to include protein id -edit kent/src/hg/makeDb/trackDb/human/hg19/wgEncodeGencodeV19.ra - ############################################################################# # LASTZ human/hg19 vs. pig/susScr11 - (DONE - 2018-04-02 - Hiram) mkdir /hive/data/genomes/hg19/bed/lastzSusScr11.2018-04-02 cd /hive/data/genomes/hg19/bed/lastzSusScr11.2018-04-02 printf '# human vs pig BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.66/bin/lastz BLASTZ_O=400 BLASTZ_E=30 BLASTZ_M=254 # default BLASTZ_Q score matrix: # A C G T # A 91 -114 -31 -123 # C -114 100 -125 -31 # G -31 -125 100 -114 @@ -32701,66 +32233,30 @@ cat fb.susScr11.chainHg19Link.txt # 1386496715 bases of 2472073034 (56.086%) in intersection cat fb.susScr11.chainSynHg19Link.txt # 1353158526 bases of 2472073034 (54.738%) in intersection # testing -trackHub option time ($HOME/kent/src/hg/utils/automation/doRecipBest.pl \ -load -trackHub -workhorse=hgwdev -buildDir=`pwd` susScr11 hg19) \ > rbest.log 2>&1 # real 610m45.624s cat fb.susScr11.chainRBest.Hg19.txt # 1342604720 bases of 2472073034 (54.311%) in intersection ######################################################################### -2018-04-17: import of UCSC GENCODE group processing of GENCODE V28lift37 (markd) - # edit hg/makeDb/outside/gencode/gencodeLoad.mk to set release and ensembl versions - - # download, build and load tables - mkdir -p /hive/data/genomes/hg19/bed/gencodeV28lift37 - pushd /hive/data/genomes/hg19/bed/gencodeV28lift37 - (time nice make -j 10 -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk) >&build.1.out& - - # compare tables from previous release to see if number changed makes - # sense. Results are in gencode-cmp.tsv - - # generate trackDb and joiner blurb - pushd ~/kent/src/hg/makeDb/trackDb - ../../makeDb/outside/gencode/gencodeGenerateTrackDbs hg19 28lift37 92 'Apr 2018' - - # Update human/hg19/wgEncodeGencodeSuper.html and update 'Release Notes' - # to describe new release. [ONLY if it's going to be pushed] - - # edit human/hg19/trackDb.wgEncode.ra to add new .ra file include - make DBS=hg19 - - # edit all.joiner to add ~/tmp/gencodeV28lift37.joiner - # verify with: - pushd /hive/data/genomes/hg19/bed/gencodeV28lift37 - make -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk joinerCheck - - # commit all - - # 2018-05-01 markd - # fix bug in wgEncodeGencodeEntrezGeneV28lift37 with change in gencodeLoad.mk. Reload the table. - mkdir -p /hive/data/genomes/hg19/bed/gencodeV28lift37 - rm tables/wgEncodeGencodeEntrezGeneV28lift37.tab - (time nice make -j 10 -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk) >&build.3.out& - - -######################################################################### # DBSNP B151 / SNP151 (DONE 4/16/18 angie) # Redmine #21010 mkdir -p /hive/data/outside/dbSNP/151/human_hg19 cd /hive/data/outside/dbSNP/151/human_hg19 # Look at the directory listing of ftp://ftp.ncbi.nih.gov/snp/organisms/ # to find the subdir name to use as orgDir below (human_9606_b151_GRCh37p13 in this case). # Go to that subdirectory, then to database/organism_data/ and look for files # whose names start with b151_* and may or may not end with a suffix that identifies # the build assembly version or some annotation version. If there is a suffix shared # by all b151_* files, add that to config.ra as the "buildAssembly". # Since this build is on GRCh37.p13 like b144 above, use the liftUp.lft file # and ignoreDbSnpContigsFile constructed for b144. cat > config.ra < do.log 2>&1 # real 5896m44.643s ############################################################################## -2018-11-09: import of UCSC GENCODE group processing of GENCODE V29lift37 (markd) - # edit hg/makeDb/outside/gencode/gencodeLoad.mk to set release and ensembl versions - - # download, build and load tables - mkdir -p /hive/data/genomes/hg19/bed/gencodeV29lift37 - pushd /hive/data/genomes/hg19/bed/gencodeV29lift37 - (time nice make -j 10 -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk) >&build.1.out& - - # Contained invalid biotype for one transcript (ENST00000649475.1_1), fix for now - cd data/release_29lift37 - mv gencode.v29lift37.annotation.gff3.gz gencode.v29lift37.annotation.orig.gff3.gz - zcat gencode.v29lift37.annotation.orig.gff3.gz | sed -e 's/lincrna/lincRNA/g' | gzip -c> gencode.v29lift37.annotation.gff3.gz - cd ../.. - (time nice make -j 10 -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk) >&build.2.out& - - - # compare tables from previous release to see if number changed makes - # sense. Results are in gencode-cmp.tsv - - # generate trackDb and joiner blurb - pushd ~/kent/src/hg/makeDb/trackDb - ../../makeDb/outside/gencode/gencodeGenerateTrackDbs hg19 29lift37 94 'Oct 2018' - - # Update human/hg19/wgEncodeGencodeSuper.html and update 'Release Notes' - # to describe new release. [ONLY if it's going to be pushed] - - # edit human/hg19/trackDb.wgEncode.ra to add new .ra file include - make DBS=hg19 - - # edit all.joiner to add ~/tmp/gencodeV29lift37.joiner - # verify with: - pushd /hive/data/genomes/hg19/bed/gencodeV29lift37 - make -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk joinerCheck - - # commit all - -############################################################################## # LASTZ human/hg19 vs. chimp/panTro6 - (DONE - 2018-12-14 - Hiram) mkdir /hive/data/genomes/hg19/bed/lastzPanTro6.2018-12-14 cd /hive/data/genomes/hg19/bed/lastzPanTro6.2018-12-14 printf '# human vs chimp BLASTZ=/cluster/bin/penn/lastz-distrib-1.04.00/bin/lastz # maximum M allowed with lastz is only 254 BLASTZ_M=254 BLASTZ_O=600 BLASTZ_E=150 BLASTZ_K=4500 BLASTZ_Y=15000 BLASTZ_L=4500 BLASTZ_T=2 BLASTZ_Q=/hive/data/staging/data/blastz/human_chimp.v2.q @@ -33340,59 +32799,30 @@ cat process/hg19.curated.gp | fgrep -f hgmdTranscripts.txt - > hgmd.curated.gp hgLoadGenePred -genePredExt hg19 ncbiRefSeqHgmd hgmd.curated.gp $ wc -l hgmd.curated.gp 7965 hgmd.curated.gp ############################################################################# # genomenom mastermind track, Max, Feb 2019 cd /hive/data/genomes/hg19/bed/mastermind/ wget 'https://mastermind.genomenon.com/cvr/download?format=csv' -O - > mastermind.2018.11.26.csv.gz unzip mastermind.2018.11.26.csv.zip mv mastermind_cited_variants_reference-2018.11.26-csv/ 2018-11-26 hgsql hg19 -NB -e 'select alias, chrom from chromAlias where source = "refseq";' > chromAlias.tab python ~/kent/src/hg/makeDb/mastermind/mastermindToBed.py 2018-11-26/mastermind_cited_variants_reference-2018.11.26.csv bedSort mastermind.bed mastermind.bed bedToBigBed -type=bed9+ -as=~/kent/src/hg/makeDb/mastermind/mastermind.as -tab mastermind.bed /hive/data/genomes/hg19/chrom.sizes mastermind.bb ln -s `pwd`/mastermind.bb /gbdb/hg19/bbi/mastermind.bb -############################################################################# -2019-04-08: import of UCSC GENCODE group processing of GENCODE V30lift37 (markd) - # edit hg/makeDb/outside/gencode/gencodeLoad.mk to set release and ensembl versions - - # download, build and load tables - mkdir -p /hive/data/genomes/hg19/bed/gencodeV30lift37 - pushd /hive/data/genomes/hg19/bed/gencodeV30lift37 - (time nice make -j 10 -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk) >&build.1.out& - - # compare tables from previous release to see if number changed makes - # sense. Results are in gencode-cmp.tsv - - # generate trackDb and joiner blurb - pushd ~/kent/src/hg/makeDb/trackDb - ../../makeDb/outside/gencode/gencodeGenerateTrackDbs hg19 30lift37 96 'Apr 2019' - - # Update human/hg19/wgEncodeGencodeSuper.html and update 'Release Notes' - # to describe new release. [ONLY if it's going to be pushed] - - # edit human/hg19/trackDb.wgEncode.ra to add new .ra file include - make DBS=hg19 - - # edit all.joiner to add ~/tmp/gencodeV30lift37.joiner - # verify with: - pushd /hive/data/genomes/hg19/bed/gencodeV30lift37 - make -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk joinerCheck - - # commit all - ############################################################################## # DGV GOLD (DATABASE OF GENOMIC VARIANTS GOLD STANDARD) (DONE 5/06/19 ChrisL) # Redmine #23371 ############################################################################## TODAY=`date +%y%m%d` mkdir -p /hive/data/genomes/hg19/bed/dgv/$TODAY cd /hive/data/genomes/hg19/bed/dgv/$TODAY wget http://dgv.tcag.ca/dgv/docs/DGV.GS.March2016.50percent.GainLossSep.Final.hg19.gff3 # GFF3 with the 9th field full of extra info that we need to recreate the blocks # as seen at the DGV website. See note-6 in the redmine (23371) for an example # of the different cnv representations (1, 2, or 3 blocks). # what sub-fields are in the 9th field: head -1 DGV.GS.March2016.50percent.GainLossSep.Final.hg19.gff3 | cut -f9 | tr ';' '\n' | cut -d'=' -f1 @@ -33478,94 +32908,30 @@ # itemCount: 38,185 # primaryDataSize: 30,841,362 # primaryIndexSize: 6,892 # zoomLevels: 8 # chromCount: 24 # basesCovered: 580,564,080 # meanDepth (of bases covered): 3.668451 # minDepth: 1.000000 # maxDepth: 81.000000 # std of depth: 5.825349 # link into gbdb mkdir -p /gbdb/hg19/dgv ln -s `pwd`/dgvGold.bb /gbdb/hg19/dgv/ -############################################################################# -2019-07-05: import of UCSC GENCODE group processing of GENCODE V31lift37 (markd) - # Replaced import of pre-release - # edit hg/makeDb/outside/gencode/gencodeLoad.mk to set release and ensembl versions - - # download, build and load tables - mkdir -p /hive/data/genomes/hg19/bed/gencodeV31lift37 - pushd /hive/data/genomes/hg19/bed/gencodeV31lift37 - (time nice make -j 10 -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk) >&build.1.out& - - # compare tables from previous release to see if number changed makes - # sense. Results are in gencode-cmp.tsv - - # generate trackDb and joiner blurb - pushd ~/kent/src/hg/makeDb/trackDb - ../../makeDb/outside/gencode/gencodeGenerateTrackDbs hg19 31lift37 97 'June 2019' - - # Update human/hg19/wgEncodeGencodeSuper.html and update 'Release Notes' - # to describe new release. [ONLY if it's going to be pushed] - - # edit human/hg19/trackDb.wgEncode.ra to add new .ra file include - make DBS=hg19 - - # edit all.joiner to add ~/tmp/gencodeV31lift37.joiner - # verify with: - pushd /hive/data/genomes/hg19/bed/gencodeV31lift37 - make -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk joinerCheck - - # commit all - -############################################################################## -2019-09-03: import of UCSC GENCODE group processing of GENCODE V32lift37 (markd) - # PRE-RELEASE - # edit hg/makeDb/outside/gencode/gencodeLoad.mk to set release and ensembl versions - - # download, build and load tables - mkdir -p /hive/data/genomes/hg19/bed/gencodeV32lift37 - pushd /hive/data/genomes/hg19/bed/gencodeV32lift37 - (time nice make -j 10 -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk) >&build.1.out& - - # compare tables from previous release to see if number changed makes - # sense. Results are in gencode-cmp.tsv - - # generate trackDb and joiner blurb - pushd ~/kent/src/hg/makeDb/trackDb - ../../makeDb/outside/gencode/gencodeGenerateTrackDbs hg19 32lift37 98 'Sept 2019' - - # Update human/hg19/wgEncodeGencodeSuper.html and update 'Release Notes' - # to describe new release. [ONLY if it's going to be pushed] - - # edit human/hg19/trackDb.wgEncode.ra to add new .ra file include - make DBS=hg19 - - # edit all.joiner to add ~/tmp/gencodeV32lift37.joiner - # verify with: - pushd /hive/data/genomes/hg19/bed/gencodeV32lift37 - make -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk joinerCheck - - # commit all - -2019-08-30: repeat above for final release - mv /hive/data/genomes/hg19/bed/gencodeV32lift37 /hive/data/genomes/hg19/bed/gencodeV32lift37pre - # redo do above imports - ############################################################################## # LASTZ human/hg19 Tree chimp/rheMac10 - (DONE - 2019-07-09 - Hiram) mkdir /hive/data/genomes/hg19/bed/lastzRheMac10.2019-07-09 cd /hive/data/genomes/hg19/bed/lastzRheMac10.2019-07-09 printf '# human vs macaca mulatta BLASTZ=/cluster/bin/penn/lastz-distrib-1.04.00/bin/lastz # maximum M allowed with lastz is only 254 BLASTZ_M=254 BLASTZ_Q=/hive/data/staging/data/blastz/human_chimp.v2.q BLASTZ_O=600 BLASTZ_E=150 # other parameters from panTro2 vs hg18 lastz on advice from Webb BLASTZ_K=4500 BLASTZ_Y=15000 @@ -33804,59 +33170,30 @@ # int[blockCount] blockSizes; "Size of each exon" # int[blockCount] blockStarts; "0-based start position of each exon" # string _mouseOver; "Mouseover label" # string geneName; "Associated Gene symbol" # string synonymous; "Synonymous metrics" # string missense; "Missense metrics" # string pLoF; "Predicted Loss of Function metrics # ) sizes=/hive/data/genomes/hg19/chrom.sizes bedToBigBed -type=bed12+5 -as=pliMetrics.as -tab -extraIndex=name,geneName pliByGene.bed $sizes pliByGene.bb bedToBigBed -type=bed12+5 -as=pliMetrics.as -tab -extraIndex=name,geneName pliByTranscript.bed $sizes pliByTranscript.bb cd /gbdb/hg19/gnomAD/pLI/ ln -s /hive/data/outside/gnomAD.2/constraint/pliByGene.bb ln -s /hive/data/outside/gnomAD.2/constraint/pliByTranscript.bb -######################################################################### -2019-11-17: import of UCSC GENCODE group processing of GENCODE V33lift37 PRE-RELEASE (markd) - # - # edit hg/makeDb/outside/gencode/gencodeLoad.mk to set release and ensembl versions - - # download, build and load tables - mkdir -p /hive/data/genomes/hg19/bed/gencodeV33lift37 - pushd /hive/data/genomes/hg19/bed/gencodeV33lift37 - (time nice make -j 10 -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk) >&build.1.out& - - # compare tables from previous release to see if number changed makes - # sense. Results are in gencode-cmp.tsv - - # generate trackDb and joiner blurb - pushd ~/kent/src/hg/makeDb/trackDb - ../../makeDb/outside/gencode/gencodeGenerateTrackDbs hg19 33lift37 99 'Nov 2019' - - # Update human/hg19/wgEncodeGencodeSuper.html and update 'Release Notes' - # to describe new release. [ONLY if it's going to be pushed] - - # edit human/hg19/trackDb.wgEncode.ra to add new .ra file include - make DBS=hg19 - - # edit all.joiner to add ~/tmp/gencodeV33lift37.joiner - # verify with: - pushd /hive/data/genomes/hg19/bed/gencodeV33lift37 - make -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk joinerCheck - - # commit all ############################################################################## 2020-01-13: Add size filter to dgvMerged and dgvSupporting track (ChrisL) cd /hive/data/genomes/hg19/bed/dgv/160810 zcat dgvMerged.bed.gz | tawk '{print $0, $3-$2}' > dgvMergedWithSize.bed zcat dgvSupporting.bed.gz | tawk '{print $0, $3-$2}' > dgvSupportingWithSize.bed cat dgvPlusSize.as # table dgvPlus # "Database of Genomic Variants incorporating dbVar, July 2013 and later" # ( # string chrom; "Reference sequence chromosome or scaffold" # uint chromStart; "Start position in chromosome" # uint chromEnd; "End position in chromosome" # string name; "ID of merged variant or supporting variant" # uint score; "Score from 0-1000 (placeholder for BED 9+ format)" # char[1] strand; "+ or - (placeholder for BED 9+ format)" @@ -33964,74 +33301,30 @@ # pass2 - checking and writing primary data (6507 records, 17 fields): 134 millis # only a few genes: cut -f13 missenseConstrained.bed | sort | uniq | wc -l # 2700 ln -s /gbdb/hg19/gnomAD/missenseConstrained.bb missenseConstrained.bb ############################################################################## # adding RefSeq Select to NCBIRefSeq, Max, Feb 17 2020 cd /hive/data/genomes/hg19/bed/ncbiRefSeq.p13.2019-11-21 zcat download/*_genomic.gff.gz | egrep 'tag=(RefSeq|MANE) Select' | cut -f9- | tr ';' '\n' | grep Name= | grep -v NP_ | cut -d= -f2 | sort -u > refseqSelectTranscripts.txt cat process/hg19.curated.gp | fgrep -f refseqSelectTranscripts.txt - > refseqSelect.curated.gp hgLoadGenePred -genePredExt hg19 ncbiRefSeqSelect refseqSelect.curated.gp wc -l refseqSelect.curated.gp 21436 refseqSelect.curated.gp ############################################################################## -2020-01-16: import of UCSC GENCODE group processing of GENCODE V33lift37 (markd) - # deprecate pre-release - mv /hive/data/genomes/hg19/bed/gencodeV33lift37 /hive/data/genomes/hg19/bed/gencodeV33lift37Pre - # edit hg/makeDb/outside/gencode/gencodeLoad.mk to set release and ensembl versions - - # download, build and load tables - mkdir -p /hive/data/genomes/hg19/bed/gencodeV33lift37 - pushd /hive/data/genomes/hg19/bed/gencodeV33lift37 - (time nice make -j 10 -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk) >&build.1.out& - - # compare tables from previous release to see if number changed makes - # sense. Results are in gencode-cmp.tsv - - # generate trackDb and joiner blurb - pushd ~/kent/src/hg/makeDb/trackDb - ../../makeDb/outside/gencode/gencodeGenerateTrackDbs hg19 33lift37 99 'Jan 2020' - - # Update human/hg19/wgEncodeGencodeSuper.html and update 'Release Notes' - # to describe new release. [ONLY if it's going to be pushed] - - # edit human/hg19/trackDb.gencode.ra to add new .ra file include - make DBS=hg19 - - # edit all.joiner to add ~/tmp/gencodeV33lift37.joiner - # verify with: - pushd /hive/data/genomes/hg19/bed/gencodeV33lift37 - make -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk joinerCheck - - # commit all -############################################################################## -2020-01-21: manually update GENCODE V19 to include chrMT (markd) - - cd /hive/data/genomes/hg19/bed/gencodeV19/chrMT - # convert chrM annotation in gff3 to chrMT - zcat ../data/release_19/gencode.v19.annotation.gff3.gz | tawk 'NR==1{print;next} $1=="chrM"{$1="chrMT"; print}' >chrMT.gff3 - # comprensive and basic are the same on chrM and there are no pseudogenes, so this is - # easy - cat ../tables/wgEncodeGencodeCompV19.gp chrMT.gp >wgEncodeGencodeCompV19.gp - cat ../tables/wgEncodeGencodeBasicV19.gp chrMT.gp >wgEncodeGencodeBasicV19.gp - - hgLoadGenePred -genePredExt hg19 wgEncodeGencodeCompV19 wgEncodeGencodeCompV19.gp - hgLoadGenePred -genePredExt hg19 wgEncodeGencodeBasicV19 wgEncodeGencodeBasicV19.gp - -############################################################################## # doseSensitivity (WORKING - 2020-02-06, Ana, Hiram) mkdir /hive/data/genomes/hg19/bed/doseSensitivity cd /hive/data/genomes/hg19/bed/doseSensitivity ftp original files from ClinGen: ftp://ftp.ncbi.nlm.nih.giv/pub/dbVar/clingen/ wget --timestamping \ ftp://ftp.ncbi.nlm.nih.gov/pub/dbVar/clingen/ClinGen_haploinsufficiency_gene_GRCh37.bed wget --timestamping \ ftp://ftp.ncbi.nlm.nih.gov/pub/dbVar/clingen/ClinGen_triplosensitivity_gene_GRCh37.bed @@ -34246,58 +33539,30 @@ 109025 INS x 748 INV x 1108 MCNV x 1 SVTYPE # add colors based on gnomad website and get in to proper bed9+ for f in *.bed4Plus; do out=${f/.bed4Plus/}; bedClip -truncate $f $chromSizes stdout | ./gnomadSvToUcsc.awk | sort -k1,1 -k2,2n > $out.bed9Plus; done chromSizes=/hive/data/genomes/hg19/chrom.sizes for f in *.bed9Plus; do out=${f/.bed9Plus/}; bedToBigBed -tab -type=bed9+19 -as=gnomadSv.as -extraIndex=name $f $chromSizes $out.bb; done mkdir -p /gbdb/hg19/gnomAD/structuralVariants/ cd /gbdb/hg19/gnomAD/structuralVariants/ cp -s /hive/data/outside/gnomAD.2/structuralVariants/*.bb . ############################################################################## -2020-03-11: import of UCSC GENCODE group processing of GENCODE V34lift37 pre-release (markd) - # edit hg/makeDb/outside/gencode/gencodeLoad.mk to set release and ensembl versions - - # download, build and load tables - mkdir -p /hive/data/genomes/hg19/bed/gencodeV34lift37Pre - pushd /hive/data/genomes/hg19/bed/gencodeV34lift37Pre - (time nice make -j 10 -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk) >&build.1.out& - - # compare tables from previous release to see if number changed makes - # sense. Results are in gencode-cmp.tsv - - # generate trackDb and joiner blurb - pushd ~/kent/src/hg/makeDb/trackDb - ../../makeDb/outside/gencode/gencodeGenerateTrackDbs hg19 34lift37 100 'March 2020' - - # Update human/hg19/wgEncodeGencodeSuper.html and update 'Release Notes' - # to describe new release. [ONLY if it's going to be pushed] - - # edit human/hg19/trackDb.gencode.ra to add new .ra file include - make DBS=hg19 - - # edit all.joiner to add ~/tmp/gencodeV34lift37.joiner - # verify with: - pushd /hive/data/genomes/hg19/bed/gencodeV34lift37Pre - make -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk joinerCheck - - # commit all -############################################################################## # NCBI regions that are problematic for sequencing, Mon Nov 18 05:06:17 PST 2019, Max mkdir /hive/data/genomes/hg19/bed/specialRegions/orig cd /hive/data/genomes/hg19/bed/specialRegions/orig # download and convert to Excel wget ftp://ftp.ncbi.nlm.nih.gov/variation/get-rm/highly_homologous_genes/Table_S1_List1_NGS_Dead_Zone_exon_level.xlsx wget ftp://ftp.ncbi.nlm.nih.gov/variation/get-rm/highly_homologous_genes/Table_S2_List2_NGS_Problem_List_High_Stringency_exon_level.xlsx wget ftp://ftp.ncbi.nlm.nih.gov/variation/get-rm/highly_homologous_genes/Table_S3_List3_NGS_Problem_List_Low_Stringency_exon_level.xlsx wget ftp://ftp.ncbi.nlm.nih.gov/variation/get-rm/highly_homologous_genes/Table_S4_List4_Sanger_Dead_Zone_exon_level.xlsx in2csv Table_S1_List1_NGS_Dead_Zone_exon_level.xlsx | csvformat -T > Table_S1_List1_NGS_Dead_Zone_exon_level.tsv in2csv Table_S2_List2_NGS_Problem_List_High_Stringency_exon_level.xlsx | csvformat -T > Table_S2_List2_NGS_Problem_List_High_Stringency_exon_level.tsv in2csv Table_S3_List3_NGS_Problem_List_Low_Stringency_exon_level.xlsx | csvformat -T > Table_S3_List3_NGS_Problem_List_Low_Stringency_exon_level.tsv in2csv Table_S4_List4_Sanger_Dead_Zone_exon_level.xlsx | csvformat -T > Table_S4_List4_Sanger_Dead_Zone_exon_level.tsv cd ..