9e87a928af4260e37c2054af239b6b411058fd7e
kate
  Thu Jun 4 11:24:29 2020 -0700
Adding make docs for ENCODE CCRE tracks. refs #24668

diff --git src/hg/makeDb/doc/hg38/reg.txt src/hg/makeDb/doc/hg38/reg.txt
index c6d987f..697ad22 100644
--- src/hg/makeDb/doc/hg38/reg.txt
+++ src/hg/makeDb/doc/hg38/reg.txt
@@ -616,15 +616,68 @@
 # Greater number of elements must be due to mappings on new alt chroms
 # (96 chroms in new track, 38 in old)
 
 
 ##############################################################################
 # wgEncodeReg ENCODE Regulatory tracks (Done Chris Eisenhart)
 # Transcription, Layered H3K4Me1, Layered H3K4Me3, Layered H3K27Ac
     mkdir /hive/data/genomes/hg38/bed/hg19MassiveLift/wgEncodeReg
     mkdir /hive/data/genomes/hg38/bed/hg19MassiveLift/wgEncodeReg/wgEncodeRegMarkH3k27ac
     mkdir /hive/data/genomes/hg38/bed/hg19MassiveLift/wgEncodeReg/wgEncodeRegMarkH34me1
     mkdir /hive/data/genomes/hg38/bed/hg19MassiveLift/wgEncodeReg/wgEncodeRegMarkH3k4me3
     mkdir /hive/data/genomes/hg38/bed/hg19MassiveLift/wgEncodeReg/wgEncodeRegTfbsClusteredV3
     mkdir /hive/data/genomes/hg38/bed/hg19MassiveLift/wgEncodeReg/wgEncodeRegTxn
     mkdir /hive/data/genomes/hg38/bed/hg19MassiveLift/wgEncodeReg
     liftManyBigWigs /cluster/home/ceisenhart/kent/src/hg/utils/liftList/bigWigList.ra
+
+##############################################################################
+# ENCODE Registry of Candidate cis-Regulatory Elements
+#
+# 2020-04-14  kate
+#
+# From ENCODE 3 Data Analysis Center at U Mass Med Center (Zlab)
+# Data contacts:  Henry Pratt, Jill Moore, Zhiping Weng PI
+#
+# RM #24668
+#
+# Download BED file (hosted on their integrative hub)
+
+cd /hive/data/outside/encode3/ccre
+wget http://gcp.wenglab.org/hubs/integrative1/data/GRCh38/CTA/GRCh38-ccREs.bigBed
+
+# Later Jill asked to add scores, download that
+
+wget -nd https://users.wenglab.org/moorej3/Human-maxZ-DNase.txt.gz
+gunzip Human-maxZ-DNase.txt.gz
+
+# check score distribution
+textHistogram -real -col=2 Human-maxZ-DNase.txt
+1.000000 ***************** 102913
+2.000000 ************************************************************ 362463
+3.000000 ************************************************* 294351
+4.000000 ********************* 128201
+5.000000 ***** 29345
+6.000000 * 6991
+7.000000  2096
+8.000000  168
+9.000000  5
+10.000000  2
+
+# noting that order of accessions in score file doesn't match bed file ;-(
+sort Human-maxZ-DNase.txt > Human-maxZ-DNase.sorted.txt
+sort -k 4 GRCh38-ccREs.bed > GRCh38-ccREs.sorted.bed
+
+paste GRCh38-ccREs.sorted.bed Human-maxZ-DNase.sorted.txt > ccres.prescored.bed
+# manually sanity check start and end of file to see that accessions match
+
+# score using zscore,  min(zscore*100),1000), and reformat
+awk '{OFS="\t"; print $1, $2, $3, $4, ($13>10)? 1000 : int($13 * 100), $6, $7, $8, $9, $10, $13}' \
+        ccres.prescored.bed | bedSort stdin ccres.scored.bed
+
+# Reformat to add fields for filtering and mouseover, etc.
+set f = encodeCcreCombined
+perl makeCcreCombined.pl < ccres.scored.bed > $f.bed
+bedToBigBed -tab -type=bed9+6 -as=$f.as $f.bed /hive/data/genomes/hg38/chrom.sizes $f.bb
+ln -s `pwd`/$f.bb /gbdb/hg38/encode3/ccre/
+
+###############################
+