src/hg/makeDb/doc/hg19.txt be4311c07e14feb728abc6425ee606ffaa611a58

be4311c07e14feb728abc6425ee606ffaa611a58
markd
  Fri Jan 22 06:46:58 2021 -0800
merge with master

diff --git src/hg/makeDb/doc/hg19.txt src/hg/makeDb/doc/hg19.txt
index 36fa16a..4b9bc09 100644
--- src/hg/makeDb/doc/hg19.txt
+++ src/hg/makeDb/doc/hg19.txt
@@ -32138,43 +32138,59 @@
 chr21   45650008        45650008        rs145424134     8       ENSG00000160223.12      ICOSLG  -10841  -0.070  -       0       1       esophagusMuscular,      -0.070, 5.106,  0.008,
 
 # refine generated trackDb.gtexEqtl.ra file and install in makeDb/trackDb/human/hg19
 
 ########
 # Load 44 per-tissue tracks: gtexEqtlTissue<tissueName>
 csh $bin/getxEqtlLoadTissues.csh UCSC_output >&! loadTissuesV2.log &
 
 #NOTE: V2 was a second release that followed immediately after first release (which was timed to coincide
 #  with Nature paper pub.  V2 revised schema (added ensembl gene ID, additional summary fields)
 # and color conventions.
 
 ###########################################################################
 # HGMD (updated 12/10/19 max)
 # HGMD (updated 01/25/18 max)
-# got hgmd 2017 from Frank Schacherer Frank.Schacherer@qiagen.com and Rupert Yip Rupert.Yip@qiagen.com
+# HGMD (updated 12/12/20 max)
+# got hgmd from Frank Schacherer Frank.Schacherer@qiagen.com and Rupert Yip Rupert.Yip@qiagen.com
 # see also the file hg38/hgmd.txt
-year=2019
+year=2020
 cd /hive/data/genomes/hg19/bed/hgmd
 cat /hive/data/outside/hgmd/$year.4-hgmd-public_hg19.tsv | grep -v \# | tawk '{if ($5=="I") {start=$4-1; end=$4+1; col="100,100,100"} else if ($5=="D") {start=$4-1; end=$4; col="170,170,170"} else {start=$4-1; end=$4; col="0,0,0"}; print "chr"$3,start,end,$2":"$1,0,".",start,end,col,$2,$1,$5}' | sed -e 's/M$/substitution/' | sed -e 's/I$/insertion (between the two basepairs, sequence not provided by HGMD)/' | sed -e 's/D$/deletion (endpoint not provided by HGMD)/' | sed -e 's/X$/insertion-deletion (endpoint not provided by HGMD)/' | sed -e 's/R$/regulatory variant/' | sed -e 's/S$/splicing variant/' | sort -k1,1 -k2,2n > hgmd.bed
 bedToBigBed hgmd.bed /hive/data/genomes/hg19/chrom.sizes hgmd.bb -type=bed9+ -as=hgmd.as -tab
 ln -s /hive/data/genomes/hg19/bed/hgmd/hgmd.bb /gbdb/hg19/bbi/hgmd.bb
 hgBbiDbLink hg19 hgmd /gbdb/hg19/bbi/hgmd.bb
 # Forgot, finally done Oct 24: also updated hgBeacon
 bigBedToBed /gbdb/hg19/bbi/hgmd.bb /tmp/temp.bed
-/usr/local/apache/cgi-bin/hgBeacon -f hgmd temp.bed hgmd
+python2 /usr/local/apache/cgi-bin/hgBeacon -f hgmd /tmp/temp.bed hgmd
 # Forgot, finally done June 26: updated GBIB as qateam
 scp /gbdb/hg19/bbi/hgmd.bb hgdownload:/usr/local/apache/gbib/prot/
+# next restrict RefSeq down to HGMD subset 
+
+# addition of HGMD-restricted subset, Max, Jan 29 2019, updated Dec 10 2019
+cd /hive/data/genomes/hg19/bed/ncbiRefSeq.p13.2020-10-27/
+year=2019
+# change in 2019: ignore the version numbers, otherwise only 1815 transcripts left, big update by HGMD in 2019
+# adding "." so NM_123 doesn't match NM_123123
+cat /hive/data/outside/hgmd/$year.4-hgmd-public_hg38.tsv | cut -f7 | cut -d. -f1 | sort -u | awk '{print $1"."}' > hgmdTranscripts.txt
+cat process/hg19.curated.gp.gz | fgrep -f hgmdTranscripts.txt - > hgmd.curated.gp
+hgLoadGenePred -genePredExt hg19 ncbiRefSeqHgmd hgmd.curated.gp
+$ wc -l hgmd.curated.gp 
+7965 hgmd.curated.gp in 2019
+8971 hgmd.curated.gp in 2020
+
+# now continue the process at ../hg38/hgmd.txt
 
 #############################################################################
 # LASTZ human/hg19 vs. pig/susScr11 - (DONE - 2018-04-02 - Hiram)
     mkdir /hive/data/genomes/hg19/bed/lastzSusScr11.2018-04-02
     cd /hive/data/genomes/hg19/bed/lastzSusScr11.2018-04-02
 
     printf '# human vs pig
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.66/bin/lastz
 BLASTZ_O=400
 BLASTZ_E=30
 BLASTZ_M=254
 # default BLASTZ_Q score matrix:
 #       A     C     G     T
 # A    91  -114   -31  -123
 # C  -114   100  -125   -31
@@ -32776,42 +32792,30 @@
     #  real    62m32.858s
 
     cat fb.ponAbe3.chainHg19Link.txt
     # 2690870339 bases of 3043444524 (88.415%) in intersection
 
     cat fb.ponAbe3.chainSynHg19Link.txt
     # 2675805099 bases of 3043444524 (87.920%) in intersection
 
     time (doRecipBest.pl -load -workhorse=hgwdev \
 	-buildDir=`pwd` ponAbe3 hg19) > rbest.log 2>&1
     # real    76m24.498s
 
     cat fb.ponAbe3.chainRBest.Hg19.txt
     # 2641865423 bases of 3043444524 (86.805%) in intersection
 
-##############################################################################
-# addition of HGMD-restricted subset, Max, Jan 29 2019, updated Dec 10 2019
-cd /hive/data/genomes/hg19/bed/ncbiRefSeq.p13.2019-11-21/
-year=2019
-#cat /hive/data/outside/hgmd/$year.4-hgmd-public_hg38.tsv | cut -f7 | sort -u > hgmdTranscripts.txt
-# change in 2019: ignore the version numbers, otherwise only 1815 transcripts left, big update by HGMD in 2019
-# adding "." so NM_123 doesn't match NM_123123
-cat /hive/data/outside/hgmd/$year.4-hgmd-public_hg38.tsv | cut -f7 | cut -d. -f1 | sort -u | awk '{print $1"."}' > hgmdTranscripts.txt
-cat process/hg19.curated.gp | fgrep -f hgmdTranscripts.txt - > hgmd.curated.gp
-hgLoadGenePred -genePredExt hg19 ncbiRefSeqHgmd hgmd.curated.gp
-$ wc -l hgmd.curated.gp 
-7965 hgmd.curated.gp
 #############################################################################
 # genomenom mastermind track, Max, Feb 2019
 cd /hive/data/genomes/hg19/bed/mastermind/
 wget 'https://mastermind.genomenon.com/cvr/download?format=csv' -O - > mastermind.2018.11.26.csv.gz
 unzip mastermind.2018.11.26.csv.zip
 mv mastermind_cited_variants_reference-2018.11.26-csv/ 2018-11-26
 hgsql hg19 -NB -e 'select alias, chrom from chromAlias where source = "refseq";' > chromAlias.tab
 python ~/kent/src/hg/makeDb/mastermind/mastermindToBed.py 2018-11-26/mastermind_cited_variants_reference-2018.11.26.csv
 bedSort mastermind.bed mastermind.bed
 bedToBigBed -type=bed9+ -as=~/kent/src/hg/makeDb/mastermind/mastermind.as -tab mastermind.bed /hive/data/genomes/hg19/chrom.sizes  mastermind.bb
 ln -s `pwd`/mastermind.bb /gbdb/hg19/bbi/mastermind.bb
 ##############################################################################
 # DGV GOLD (DATABASE OF GENOMIC VARIANTS GOLD STANDARD) (DONE 5/06/19 ChrisL)
 # Redmine #23371
 ##############################################################################
@@ -33697,38 +33701,40 @@
             $buildDir/$db.rna.fa \
             $pre)
 
 # pslMismatchGapToBed: NM_001365372.1 gapIx 9 shifted right 74 bases, but next block size is only 38; report to NCBI
 # pslMismatchGapToBed: NM_001288811.1 gapIx 1 shifted left 6 bases, but previous block size is only 5; report to NCBI
 
 #  real    0m21.265s
 
  bedToBigBed -type=bed9+ -tab -as=$HOME/kent/src/hg/lib/txAliDiff.as $pre.bed \
         /hive/data/genomes/$db/chrom.sizes $pre.bb
 # pass1 - making usageList (180 chroms): 77 millis
 # pass2 - checking and writing primary data (27362 records, 20 fields): 234 millis
     ln -sf `pwd`/$pre.bb /gbdb/hg19/ncbiRefSeq/$pre.bb
 
 #############################################################################
-# clinvarSubLolly track  IN PROGRESS BRANEY 10/17/2020
+# clinvarSubLolly track  DONE BRANEY 12/14/2020
 mkdir /cluster/data/hg19/bed/clinvarSubLolly
 cd /cluster/data/hg19/bed/clinvarSubLolly
 bigBedToBed /gbdb/hg19/bbi/clinvar/clinvarMain.bb stdout | tawk '{print $40, $1,$2,$2+1,$4}' | sort  -S 40g > sort.main.bed
 hgsql hg19 -Ne "select varId,clinSign,scv from clinvarSub" | sort  -S 40g  > clinvarSubSub.txt
 
 join -t $'\t' sort.main.bed clinvarSubSub.txt | tawk '{print $2,$3,$4,$5,$6,$1, $7}' | sort -S 40g -k1,1 -k2,2n -k5,5 | tawk -f makeFranklin   | tawk -f assignColors > tmp1
-tawk '{print $1":"$2 + 1"-"$3"←Variants (submissions):"$11}' tmp1 > tmp2
+# add the line break after v409
+#tawk '{print $1":"$2 + 1"-"$3" <br>Variants (submissions):"$11}' tmp1 > tmp2
+tawk '{print $1":"$2 + 1"-"$3" Variants (submissions):"$11}' tmp1 > tmp2
 paste tmp1 tmp2 > bigBedInput.bed
 
 bedToBigBed -as=$HOME/kent/src/hg/lib/clinvarSubLolly.as -type=bed9+5 -tab bigBedInput.bed /cluster/data/hg19/chrom.sizes clinvarSubLolly.bb
 mkdir -p /gbdb/hg19/clinvarSubLolly
 ln -s `pwd`/clinvarSubLolly.bb /gbdb/hg19/clinvarSubLolly/clinvarSubLolly.bb
 
 bigBedToBed /gbdb/hg19/bbi/clinvar/clinvarMain.bb stdout | tawk '{print $40, $1,$2,$2+1,$4,$13,$15,$18,$19}' | sort  -S 40g > sort.main.bed
 hgsql hg19 -Ne "select * from clinvarSub" | sort  -S 40g  > clinvarSubSub.txt
 join -t $'\t' sort.main.bed clinvarSubSub.txt | tawk '{print $2,$3,$4,$5,0,"+",0,0,"0,0,0",$6,$20,$8, $9,$1,$10,$7,$11,$12,$13,$14,$15,$16,$17,$18,$19,$21}' | sort -S 40g -k1,1 -k2,2n | tawk -f assignScore > bigBedInput.bed
 
 bedToBigBed -as=clinvarSubBB.as -type=bed9+11 -tab bigBedInput.bed /cluster/data/hg19/chrom.sizes clinvarSub.bb
 ln -s `pwd`/clinvarSub.bb /gbdb/hg19/clinvarSubLolly/clinvarSub.bb
 
 
 #############################################################################
@@ -33842,15 +33848,499 @@
 # sys 12m0.858s
 cd ..
 time cat hg19/genomes/*.bed | ./gnomadVcfBedToBigBed stdin stdout | sort -k1,1 -k2,2n > gnomad.v2.1.1.genomes.bed
 # real    199m48.619s
 # user    186m49.769s
 # sys 29m12.841s
 
 # now South Asian variants in the genomes file, change type:
 time bedToBigBed -type=bed9+47 -tab -as=genomes.as gnomad.v2.1.1.genomes.bed /hive/data/genomes/hg19/chrom.sizes genomes.bb
 # pass1 - making usageList (23 chroms): 165336 millis
 # pass2 - checking and writing primary data (253556152 records, 55 fields): 4909106 millis
 #
 # real    89m3.165s
 # user    86m41.554s
 # sys 2m15.722s
+
+#############################################################################
+# LASTZ Cow bosTau9 (ONE - 2020-12-07 - Hiram)
+    mkdir /hive/data/genomes/hg19/bed/lastzBosTau9.2020-12-07
+    cd /hive/data/genomes/hg19/bed/lastzBosTau9.2020-12-07
+
+    printf '# human vs Cow
+BLASTZ=/cluster/bin/penn/lastz-distrib-1.04.03/bin/lastz
+BLASTZ_T=2
+BLASTZ_O=400
+BLASTZ_E=30
+BLASTZ_M=254
+# default BLASTZ_Q score matrix:
+#       A     C     G     T
+# A    91  -114   -31  -123
+# C  -114   100  -125   -31
+# G   -31  -125   100  -114
+# T  -123   -31  -114    91
+
+# TARGET: human hg19
+SEQ1_DIR=/hive/data/genomes/hg19/hg19.2bit
+SEQ1_LEN=/hive/data/genomes/hg19/chrom.sizes
+SEQ1_CHUNK=20000000
+SEQ1_LAP=10000
+
+# QUERY: Cow bosTau9
+SEQ2_DIR=/hive/data/genomes/bosTau9/bosTau9.2bit
+SEQ2_LEN=/hive/data/genomes/bosTau9/chrom.sizes
+SEQ2_CHUNK=20000000
+SEQ2_LIMIT=10
+SEQ2_LAP=0
+
+BASE=/hive/data/genomes/hg19/bed/lastzBosTau9.2020-12-07
+TMPDIR=/dev/shm
+' > DEF
+
+    time (doBlastzChainNet.pl `pwd`/DEF -verbose=2 \
+        -chainMinScore=3000 -chainLinearGap=medium \
+          -workhorse=hgwdev -smallClusterHub=hgwdev -bigClusterHub=ku \
+            -syntenicNet) > do.log 2>&1
+    # real    239m35.175s
+
+    cat fb.hg19.chainBosTau9Link.txt
+    # 1407432462 bases of 2991710746 (47.044%) in intersection
+
+    cat fb.hg19.chainSynBosTau9Link.txt
+    # 1354159575 bases of 2991710746 (45.264%) in intersection
+
+    time (doRecipBest.pl -load -workhorse=hgwdev -buildDir=`pwd` hg19 bosTau9) > rbest.log 2>&1 &
+    #	real    274m55.811s
+
+    cat fb.hg19.chainRBest.BosTau9.txt
+    #	1290531802 bases of 2991710746 (43.137%) in intersection
+
+    #   running the swap
+    mkdir /hive/data/genomes/bosTau9/bed/blastz.hg19.swap
+    cd /hive/data/genomes/bosTau9/bed/blastz.hg19.swap
+    time (doBlastzChainNet.pl -verbose=2 \
+        /hive/data/genomes/hg19/bed/lastzBosTau9.2020-12-07/DEF \
+        -swap  -syntenicNet -workhorse=hgwdev \
+	-smallClusterHub=hgwdev -bigClusterHub=ku \
+        -chainMinScore=3000 -chainLinearGap=medium) > swap.log 2>&1
+    #   real    72m28.826s
+
+    cat fb.bosTau9.chainHg19Link.txt
+    #   1342159887 bases of 2715853792 (49.419%) in intersection
+    cat fb.bosTau9.chainSynHg19Link.txt
+    #	1305558878 bases of 2715853792 (48.072%) in intersection
+
+    time (doRecipBest.pl -load -workhorse=hgwdev -buildDir=`pwd` bosTau9 hg19) > rbest.log 2>&1 &
+XXX - running - Tue Dec  8 09:13:34 PST 2020
+    # real    272m15.176s
+
+    cat fb.bosTau9.chainRBest.Hg19.txt
+    # 1290810412 bases of 2715853792 (47.529%) in intersection
+
+#############################################################################
+# Exome Probesets composite track
+# Tue Jan  5 02:25:06 PST 2021 Made by Ana, Tiana, Pranav, Beagan, reviewed and committed by Max
+# Download data for hg19:
+cd /hive/data/genomes/hg19/bed/exomeProbesets
+We made tracks for the main Exome Kit Vendors: IDT, Twist Biosciences, MGI, Agilent, Roche, and Illumina.
+
+Note: IDT, Agilent and Roche have bed files for the Probes and for the Target Regions. Twist, MGI, and Illumina have bed files for the Target Regions (but not for Probes).
+
+Data downloaded in my windows desktop and copied to hgwdev: 
+scp <file.bed> ana@hgwdev.gi.ucsc.edu://hive/data/genomes/hg19/bed/exonArrays/raw/idt 
+
+# IDT Datasets:
+
+Track: IDT - xGen Exome Research Panel Probes 
+Download: https://sfvideo.blob.core.windows.net/sitefinity/docs/default-source/supplementary-product-info/xgen-exome-research-panel-probesbe255a1532796e2eaa53ff00001c1b3c.bed?sfvrsn=425c3407_7&download=true
+File name: xgen-exome-research-panel-probes-hg19.bed
+
+Track: IDT - xGen Exome Research Panel Target Regions
+Download: https://sfvideo.blob.core.windows.net/sitefinity/docs/default-source/supplementary-product-info/xgen-exome-research-panel-targetsae255a1532796e2eaa53ff00001c1b3c.bed?sfvrsn=435c3407_7&download=true
+File name: xgen-exome-research-panel-targets-hg19.bed
+
+Track: IDT - xGen Exome Research Panel V2 Probes
+Download: https://sfvideo.blob.core.windows.net/sitefinity/docs/default-source/supplementary-product-info/xgen-exome-research-panel-v2-probes-hg1952a5791532796e2eaa53ff00001c1b3c.bed?sfvrsn=1dd1707_6&download=true
+File name: xgen-exome-research-panel-v2-probes-hg19.bed
+
+Track: IDT - xGen Exome Research Panel V2 Target Regions
+Download: https://sfvideo.blob.core.windows.net/sitefinity/docs/default-source/supplementary-product-info/xgen-exome-research-panel-v2-targets-hg1902a5791532796e2eaa53ff00001c1b3c.bed?sfvrsn=6dd1707_10&download=true
+File name: xgen-exome-research-panel-v2-targets-hg19.bed
+
+# Twist Biosciences Datasets:
+
+Track: Twist - RefSeq Exome Panel Target Regions
+Download: https://www.twistbioscience.com/sites/default/files/resources/2019-09/Twist_Exome_RefSeq_targets_hg19_0.bed
+File name: Twist_Exome_RefSeq_targets_hg19_0.bed
+
+Track: Twist - Core Exome Panel Target Regions
+Download: https://www.twistbioscience.com/sites/default/files/resources/2018-09/Twist_Exome_Target_hg19.bed
+File name: Twist_Exome_Target_hg19.bed
+
+Track: Twist - Comprehensive Exome Panel Target Regions
+Download: https://www.twistbioscience.com/sites/default/files/resources/2020-09/Twist_ComprehensiveExome_targets_hg19.bed
+File name: Twist_ComprehensiveExome_targets_hg19.bed
+
+# MGI Datasets:
+
+Track: MGI - Easy Exome Capture V4 Target Regions 
+Download: https://en.mgitech.cn/Uploads/Temp/file/20191225/5e03126e808a0.zip
+File name: MGI_Exome_Capture_V4.bed
+
+Track: MGI - Easy Exome Capture V5 Target Regions 
+Download: https://en.mgitech.cn/Uploads/Temp/file/20191225/5e0312a7be43e.zip
+File name: MGI_Exome_Capture_V5.bed 
+
+# Agilent Datasets:
+Download for all Agilent files: https://earray.chem.agilent.com/suredesign/ - Password needed (from Ana)
+
+Track: Agilent - SureSelect Clinical Research Exome Covered by Probes
+File name: S06588914_Covered.bed
+
+Track: Agilent - SureSelect Clinical Research Exome Target Regions
+File name: S06588914_Regions.bed
+
+Track: Agilent - SureSelect Clinical Research Exome V2 Covered by Probes
+File name: S30409818_Covered.bed
+
+Track: Agilent - SureSelect Clinical Research Exome V2 Target Regions 
+File name: S30409818_Regions.bed
+
+Track: Agilent - SureSelect Focused Exome Covered by Probes
+File name: S07084713_Covered.bed
+
+Track: Agilent - SureSelect Focused Exome Target Regions
+File name: S07084713_Regions.bed
+
+Track: Agilent - SureSelect All Exon V4 Covered by Probes
+File name: S03723314_Covered.bed
+
+Track: Agilent - SureSelect All Exon V4 Target Regions
+File name: S03723314_Regions.bed
+
+Track: Agilent - SureSelect All Exon V4 + UTRs Covered by Probes
+File name: S03723424_Covered.bed
+
+Track: Agilent - SureSelect All Exon V4 + UTRs Target Regions
+File name: S03723424_Regions.bed
+
+Track: Agilent - SureSelect All Exon V5 Covered by Probes
+File name: S04380110_Covered.bed
+
+Track: Agilent - SureSelect All Exon V5 Target Regions
+File name: S04380110_Regions.bed
+
+Track: Agilent - SureSelect All Exon V5 + UTRs Covered by Probes
+File name: S04380219_Covered.bed
+
+Track: Agilent - SureSelect All Exon V5 + UTRs Target Regions
+File name: S04380219_Regions.bed
+
+Track: Agilent - SureSelect All Exon V6 r2 Covered by Probes
+File name: S07604514_Covered.bed
+
+Track: Agilent - SureSelect All Exon V6 r2 Target Regions
+File name: S07604514_Regions.bed
+
+Track: Agilent - SureSelect All Exon V6 + COSMIC r2 Covered by Probes
+File name: S07604715_Covered.bed
+
+Track: Agilent - SureSelect All Exon V6 + COSMIC r2 Target Regions
+File name: S07604715_Regions.bed
+
+Track: Agilent - SureSelect All Exon V6 + UTR r2 Covered by Probes
+File name: S07604624_Covered.bed
+
+Track: Agilent - SureSelect All Exon V6 + UTR r2 Target Regions
+File name: S07604624_Regions.bed
+
+Track: Agilent - SureSelect All Exon V7 Covered by Probes
+File name: S31285117_Covered.bed
+
+Track: Agilent - SureSelect All Exon V7 Target Regions
+File name: S31285117_Regions.bed
+
+# Roche Datasets:
+
+Track: Roche - KAPA HyperExome Capture Probe Footprint
+Download: https://sequencing.roche.com/content/dam/rochesequence/worldwide/design-files/KAPA%20HyperExome%20Design%20files%20hg19.zip
+File name: KAPA_HyperExome_hg19_capture_targets.bed
+
+Track: Roche - KAPA HyperExome Primary Target Regions
+Download:
+https://sequencing.roche.com/content/dam/rochesequence/worldwide/design-files/KAPA%20HyperExome%20Design%20files%20hg19.zip
+File name: KAPA_HyperExome_hg19_primary_targets.bed
+
+Track: Roche - SeqCap EZ Exome V3 Capture Probe Footprint
+Download: https://sequencing.roche.com/content/dam/rochesequence/worldwide/shared-designs/SeqCapEZ_Exome_v3.0_Design_Annotation_files.zip
+File name: SeqCap_EZ_Exome_v3_hg19_capture_targets.bed
+
+Track: Roche - SeqCap EZ Exome V3 Primary Target Regions
+Download: https://sequencing.roche.com/content/dam/rochesequence/worldwide/shared-designs/SeqCapEZ_Exome_v3.0_Design_Annotation_files.zip
+File name: SeqCap_EZ_Exome_v3_hg19_primary_targets.bed
+
+Track: Roche - SeqCap EZ Exome V3 + UTR Capture Probe Footprint
+Download: https://sequencing.roche.com/content/dam/rochesequence/worldwide/shared-designs/Exome_UTR_Design_Annotation_Files.zip
+File name: SeqCap_EZ_ExomeV3_Plus_UTR_hg19_capture_annotated.bed
+
+Track: Roche - SeqCap EZ Exome V3 + UTR Primary Target Regions
+Download: https://sequencing.roche.com/content/dam/rochesequence/worldwide/shared-designs/Exome_UTR_Design_Annotation_Files.zip
+File name: SeqCap_EZ_ExomeV3_Plus_UTR_hg19_primary_annotated.bed
+
+Track: Roche - SeqCap EZ MedExome Capture Probe Footprint
+Download: https://sequencing.roche.com/content/dam/rochesequence/worldwide/shared-designs/MedExome_design_files.zip
+File name: SeqCap_EZ_MedExome_hg19_capture_targets.bed
+
+Track: Roche - SeqCap EZ MedExome Empirical Target Regions
+Download: https://sequencing.roche.com/content/dam/rochesequence/worldwide/shared-designs/MedExome_design_files.zip
+File name: SeqCap_EZ_MedExome_hg19_empirical_targets.bed
+
+Track: Roche - SeqCap EZ MedExome + Mito Capture Probe Footprint
+Download: https://sequencing.roche.com/content/dam/rochesequence/worldwide/shared-designs/MedExomePlusMito_design_files.zip
+File name: SeqCap_EZ_MedExomePlusMito_hg19_capture_targets.bed
+
+Track: Roche - SeqCap EZ MedExome + Mito Empirical Target Regions
+Download: https://sequencing.roche.com/content/dam/rochesequence/worldwide/shared-designs/MedExomePlusMito_design_files.zip
+File name: SeqCap_EZ_MedExomePlusMito_hg19_empirical_targets.bed
+
+# Illumina Datasets:
+
+Track: Illumina - Nextera DNA Exome V1.2 Target Regions
+Download: https://support.illumina.com/content/dam/illumina-support/documents/downloads/productfiles/nextera-dna-exome/nextera-dna-exome-targeted-regions-manifest-bed.zip
+File name: nextera-dna-exome-targeted-regions-manifest-v1-2.bed
+
+Track: Illumina - Nextera Rapid Capture Exome Target Regions
+Download: https://support.illumina.com/softwaredownload.html?assetId=d2c2bc7e-75e5-4f20-bfb7-780839390565&assetDetails=nexterarapidcapture_exome_targetedregions.bed - Password needed (from Ana)
+File name: nexterarapidcapture_exome_targetedregions.bed
+
+Track: Illumina - Nextera Rapid Capture Exome V1.2 Target Regions
+Download: https://support.illumina.com/softwaredownload.html?assetId=197e4b2b-161d-4576-a52f-1204833567c5&assetDetails=nexterarapidcapture_exome_targetedregions_v1.2.bed - Password needed (from Ana)
+File name: nexterarapidcapture_exome_targetedregions_v1.2.bed
+
+Track: Illumina - Nextera Rapid Capture Expanded Exome Target Regions
+Download: https://support.illumina.com/softwaredownload.html?assetId=f020d708-dad9-44e4-8c7c-439add28536c&assetDetails=nexterarapidcapture_expandedexome_targetedregions.bed - Password needed (from Ana)
+File name: nexterarapidcapture_expandedexome_targetedregions.bed
+
+Track: Illumina - TruSeq DNA Exome V1.2 Target Regions
+Download: https://support.illumina.com/content/dam/illumina-support/documents/downloads/productfiles/truseq/truseq-dna-exome/truseq-dna-exome-targeted-regions-manifest-v1-2-bed.zip
+File name: truseq-dna-exome-targeted-regions-manifest-v1-2.bed
+
+Track: Illumina - TruSeq Rapid Exome V1.2 Target Regions
+Download: https://support.illumina.com/content/dam/illumina-support/documents/downloads/productfiles/truseq/truseq-rapid-exome-targeted-regions-manifest-v1-2-bed.zip
+File name: truseq-rapid-exome-targeted-regions-manifest-v1-2.bed
+
+Track: Illumina - TruSight ONE V1.1 Target Regions
+Download: https://support.illumina.com/content/dam/illumina-support/documents/downloads/productfiles/trusight/trusight-one-file-for-ucsc-browser-v1-1.zip
+File name: TruSight_One_v1.1.bed
+
+Track: Illumina - TruSight ONE Expanded V2.0 Target Regions
+Download: https://support.illumina.com/content/dam/illumina-support/documents/downloads/productfiles/nextera/nextera-flex-for-enrichment/trusight-one-expanded-targeted-regions-v2-0.zip
+File name: TSOne_Expanded_Final_TargetedRegions_v2
+
+Track: Illumina - TruSight Exome Target Regions
+Download: https://support.illumina.com/content/dam/illumina-support/documents/documentation/chemistry_documentation/trusight/trusight_exome_manifest_a.bed
+File name: trusight_exome_manifest_a.bed
+
+Track: Illumina - AmpliSeq Exome Panel Target Regions
+Download: https://support.illumina.com/content/dam/illumina-support/documents/downloads/productfiles/ampliseq-for-illumina/ampliseq-for-illumina-exome-panel-manifest-file-bed.zip
+File name: Exome.dna_manifest.20180509.bed
+
+# Converting bed files for hg19:
+
+All files were converted from bed to bigBed using the Genome Browser documentation. All of the files underwent the following steps, with the exception of a few files that are described below. (NOTE: the documentation includes a step to remove any header lines -- only a couple files had headers, and those were simply removed within vi/vim.)
+
+1. Sort all bed files
+sort -k1,1 -k2,2n unsorted.bed > input.bed
+
+2. fetchChromSizes (run once)
+fetchChromSizes hg19 > hg19.chrom.sizes
+
+Note: this only needs to be run once, since ione hg19.chrom.sizes files can be used for all bedToBigBed runs.
+
+3. bedToBigBed for all files
+bedToBigBed input.bed hg19.chrom.sizes myBigBed.bb
+
+Here's an example using the MGI Exome Capture V4 file:
+
+sort -k1,1 -k2,2n MGI_Exome_Capture_V4.bed > sorted_MGI_Exome_Capture_V4.bed
+
+fetchChromSizes hg19 > hg19.chrom.sizes
+
+bedToBigBed sorted_MGI_Exome_Capture_V4.bed hg19.chrom.sizes MGI_Exome_Capture_V4.bb
+
+--
+
+The following files from Roche had long entries in col4, causing these files to have rows that were too long for bedToBigBed. Therefore, all the input bed files had col4 cut. (Note: these were just the ensembl and ccds ids, which did not provide any other substantial information.)
+
+We ran the command
+
+> cut -f1,2,3 
+
+for all such files. Here's an example for the Roche - KAPA HyperExome Capture Probe:
+
+Footprint file:
+
+cut -f1,2,3 sorted-KAPA_HyperExome_hg19_capture_targets.bed > sorted-cut-KAPA_HyperExome_hg19_capture_targets.bed
+#############################################################################
+
+#############################################################################
+# skinSoleBoldo JimK 01-14-2020
+# This describes how we got the skinSoleBoldo data set into the 
+# Genome Browser from the Cell Browser.
+#############################################################################
+
+# Create working directory and go there
+mkdir /hive/data/genomes/hg19/bed/singleCell/skinSoleBoldo
+cd /hive/data/genomes/hg19/bed/singleCell/skinSoleBoldo
+
+# Create output dir for binaries
+mkdir bbi 
+
+# Downloaded files from the UCSC cell browser's as so
+wget https://cells.ucsc.edu/aging-human-skin/meta.tsv
+wget https://cells.ucsc.edu/aging-human-skin/exprMatrix.tsv.gz
+
+# Get the first line (fields) out of meta.tsv and also make stats on it
+head -1 meta.tsv > meta.fields
+tabInfo meta.tsv -vals=20 > meta.20
+
+
+# Make a bunch of smaller matrices by clustering columns.  Mostly we'll use the cluster one
+# but some of the others are good to look at sometimes too.  This is the time consuming step.
+mkdir clust
+matrixClusterColumns -makeIndex=clust/exprMatrix.ix  exprMatrix.tsv.gz meta.tsv \
+    Celltype clust/cell_type.matrix bbi/cell_type.stats \
+    subj clust/donor.matrix bbi/donor.stats \
+    age	clust/age.matrix bbi/age.stats \
+    Celltype_and_Age clust/age_cell_type.matrix bbi/age_cell_type.stats 
+
+# Get the first column (the genes) out of expression matrix.  
+cut -f 1 clust/cell_type.matrix > gene.lst
+
+# Figure out the geneset they used and generate mapping file
+gencodeVersionForGenes gene.lst /hive/data/inside/geneSymVerTx.tsv -bed=mapping.bed
+# best is gencodeV19 as sym on hg19 with 21217 of 21353 (99.3631%) hits
+
+# Turn some into barChart, and then bigBarChart
+foreach s (cell_type donor age age_cell_type)
+    matrixToBarChartBed clust/$s.matrix mapping.bed clust/$s.bed -stats=bbi/$s.stats -trackDb=clust/$s.ra
+    bedSort clust/$s.bed clust/$s.bed
+    bedToBigBed clust/$s.bed /hive/data/genomes/hg19/chrom.sizes bbi/$s.bb -type=bed6+3 -as=/cluster/home/kent/src/hg/lib/simpleBarChartBed.as
+end
+
+# Make up special colors for cell_type.  First manually create two column 
+# file that relates at least some of sample labels to cell types we have colors for.
+# Call this file clust/cell_type.labels.  
+matrixClusterColumns clust/cell_type.matrix clust/cell_type.labels cluster clust/cell_type.unnormed clust/cell_type.restats
+matrixNormalize column sum clust/cell_type.unnormed clust/cell_type.ref
+
+# Use same colors for sample
+foreach s (cell_type donor age age_cell_type)
+    hcaColorCells clust/cell_type.ref ../typeColors.tsv clust/$s.matrix clust/$s.refStats -trackDb=clust/$s.colors -stats=bbi/$s.stats
+end
+
+# Link files needed by browser at runtime to the /gbdb dir
+mkdir /gbdb/hg19/bbi/skinSoleBoldo
+foreach s (cell_type donor age age_cell_type)
+    ln -s /hive/data/genomes/hg19/bed/singleCell/skinSoleBoldo/bbi/$s.bb /gbdb/hg19/bbi/skinSoleBoldo/
+    ln -s /hive/data/genomes/hg19/bed/singleCell/skinSoleBoldo/bbi/$s.stats /gbdb/hg19/bbi/skinSoleBoldo/
+end
+
+# Add the bits from clust/*.ra and clust/*.colors to hg19/trackDb.ra and you should be good.
+rm -f tracks.ra
+foreach s (cell_type donor age age_cell_type)
+    grep -v barChartColors clust/$s.ra >>tracks.ra
+    cat clust/$s.colors >> tracks.ra
+    echo transformFunc NONE >> tracks.ra
+    echo barChartLimit 2 >> tracks.ra
+    echo "" >> tracks.ra
+end
+
+
+#############################################################################
+# fetalGeneAtlas JimK 01-19-2020
+############################################################################
+# This is the RNA-seq part of the data set described in 
+# "A human cell atlas of fetal gene expression" by Cao, Day et al
+# Science 13 Nove 2020.   This was imported from Cell Browser
+
+# Create directory for work.
+
+mkdir -p /hive/data/genomes/hg19/bed/singleCell/fetalGeneAtlas
+cd /hive/data/genomes/hg19/bed/singleCell/fetalGeneAtlas
+
+# Create output dir for binaries
+mkdir bbi 
+
+# link in in files from cell browser
+ln -s /hive/data/inside/cells/datasets/fetal-gene-atlas/genes/all/meta.tsv .
+ln -s /hive/data/inside/cells/datasets/fetal-gene-atlas/genes/all/exprMatrix.tsv.gz .
+
+# Get the first line (fields) out of meta.tsv and also make stats on it
+head -1 meta.tsv > meta.fields
+tabInfo meta.tsv -vals=20 > meta.20
+
+
+# Make a bunch of smaller matrices by clustering columns.  Mostly we'll use the cluster one
+# but some of the others are good to look at sometimes too.  This is the time consuming step.
+mkdir clust
+matrixClusterColumns -makeIndex=clust/exprMatrix.ix  exprMatrix.tsv.gz meta.tsv \
+    Main_cluster_name clust/cell_type.matrix bbi/cell_type.stats \
+    Assay clust/Assay.matrix bbi/Assay.stats \
+    Experiment_batch clust/Experiment_batch.matrix bbi/Experiment_batch.stats \
+    Fetus_id clust/donor.matrix bbi/donor.stats \
+    Organ clust/Organ.matrix bbi/Organ.stats \
+    Organ_cell_lineage clust/Organ_cell_lineage.matrix bbi/Organ_cell_lineage.stats \
+    RT_group clust/RT_group.matrix bbi/RT_group.stats \
+    sex clust/sex.matrix bbi/sex.stats 
+
+# Get the first column (the genes) out of expression matrix.  
+cut -f 1 clust/cell_type.matrix > gene.lst
+
+
+# Figure out the geneset they used and generate mapping file
+gencodeVersionForGenes gene.lst /hive/data/inside/geneSymVerTx.tsv -bed=mapping.bed
+# best is gencodeV19 as id on hg19 with 60284 of 63562 (94.8428%) hits
+
+
+# Turn some into barChart, and then bigBarChart
+foreach s (cell_type Assay Experiment_batch donor Organ Organ_cell_lineage RT_group sex)
+    matrixToBarChartBed clust/$s.matrix mapping.bed clust/$s.bed -stats=bbi/$s.stats -trackDb=clust/$s.ra
+    bedSort clust/$s.bed clust/$s.bed
+    bedToBigBed clust/$s.bed /hive/data/genomes/hg19/chrom.sizes bbi/$s.bb -type=bed6+3 -as=/cluster/home/kent/src/hg/lib/simpleBarChartBed.as
+end
+
+# Make up special colors for cell_type.  First manually create two column 
+# file that relates at least some of sample labels to cell types we have colors for.
+# Call this file cell_type.labels.  
+matrixClusterColumns clust/cell_type.matrix cell_type.labels cluster clust/cell_type.unnormed clust/cell_type.restats
+matrixNormalize column sum clust/cell_type.unnormed clust/cell_type.ref
+#hcaColorCells clust/cell_type.ref ../typeColors.tsv clust/cell_type.matrix clust/cell_type.refStats -trackDb=clust/cell_type.colors -stats=bbi/cell_type.stats
+
+# Use same colors for some others
+foreach s (cell_type Assay Experiment_batch donor Organ Organ_cell_lineage RT_group sex)
+    hcaColorCells clust/cell_type.ref ../typeColors.tsv clust/$s.matrix clust/$s.refStats -trackDb=clust/$s.colors -stats=bbi/$s.stats
+end
+
+# Link files needed by browser at runtime to the /gbdb dir
+mkdir /gbdb/hg19/bbi/fetalGeneAtlas
+foreach s (cell_type Assay Experiment_batch donor Organ Organ_cell_lineage RT_group sex)
+    ln -s /hive/data/genomes/hg19/bed/singleCell/fetalGeneAtlas/bbi/$s.bb /gbdb/hg19/bbi/fetalGeneAtlas/
+    ln -s /hive/data/genomes/hg19/bed/singleCell/fetalGeneAtlas/bbi/$s.stats /gbdb/hg19/bbi/fetalGeneAtlas/
+end
+
+
+hgBbiDbLink hg19 fetalGeneAtlasCellTypes /gbdb/hg19/bbi/fetalGeneAtlas/cell_type.bb
+hgBbiDbLink hg19 fetalGeneAtlasDonor /gbdb/hg19/bbi/fetalGeneAtlas/donor.bb
+hgBbiDbLink hg19 fetalGeneAtlasAssay /gbdb/hg19/bbi/fetalGeneAtlas/Assay.bb
+hgBbiDbLink hg19 fetalGeneAtlasExperiment /gbdb/hg19/bbi/fetalGeneAtlas/Experiment_batch.bb
+hgBbiDbLink hg19 fetalGeneAtlasOrgan /gbdb/hg19/bbi/fetalGeneAtlas/Organ.bb
+hgBbiDbLink hg19 fetalGeneAtlasOrganCellLineage /gbdb/hg19/bbi/fetalGeneAtlas/Organ_cell_lineage.bb
+hgBbiDbLink hg19 fetalGeneAtlasRtGroup /gbdb/hg19/bbi/fetalGeneAtlas/RG_group.bb
+hgBbiDbLink hg19 fetalGeneAtlasSex /gbdb/hg19/bbi/fetalGeneAtlas/sex.bb
+
+# Add the bits from clust/*.ra and clust/*.colors to hg19/trackDb.ra and you should be good.
+foreach s (cell_type Assay Experiment_batch donor Organ Organ_cell_lineage RT_group sex)
+    echo >> clust/$s.ra
+end
+cat clust/*.ra > tracks.ra
+