1363e072831a0b48576d349379f32d1b9e5d5076
kate
  Wed Feb 5 12:32:52 2020 -0800
Add user-friendly downloads file for TF Clusters. refs #24848

diff --git src/hg/makeDb/doc/encode3/tfbs.txt src/hg/makeDb/doc/encode3/tfbs.txt
index 7d9699c..0f8597a 100644
--- src/hg/makeDb/doc/encode3/tfbs.txt
+++ src/hg/makeDb/doc/encode3/tfbs.txt
@@ -12,30 +12,32 @@
 Here are links to tarballs for all the narrowPeak files we have used for motif discovery in GRCh38, hg19, and mm10. Each one has a metadata.tsv with file ID, experiment ID, antibody ID, factor name, donor ID, cell type, and lab for each narrowPeak. Does this look good? Let me know if I need to tweak anything.
 
 
 http://users.wenglab.org/pratth/tf.GRCh38.tar.gz
 
 http://users.wenglab.org/pratth/tf.hg19.tar.gz
 
 http://users.wenglab.org/pratth/tf.mm10.tar.gz
 
 
 Henry
 
 ###############
 # Download files and metadata
 
+cd /hive/data/outside/encode3/tfbs/dac
+
 mkdir hg38 hg19 mm10
 
 wget -nd -P hg38 http://users.wenglab.org/pratth/tf.GRCh38.tar.gz
 wget -nd -P hg19 http://users.wenglab.org/pratth/tf.hg19.tar.gz
 wget -nd -P mm10 http://users.wenglab.org/pratth/tf.mm10.tar.gz
 
 # retrieve and unroll narrowpeak files
 
 cd hg19
 gunzip *.gz
 tar xvfz tf.hg19.tar
 ls *.bed.gz | wc -l
 # 1400
 
 ls *.bed.gz | head -1
@@ -653,15 +655,45 @@
 cd ..
 mkdir bigScoredPeaks
 
 cat > makeBigs.csh << 'EOF'
 set files = "encode3TfbsPkENCFF512IAI encode3TfbsPkENCFF403BWK encode3TfbsPkENCFF389ULP encode3TfbsPkENCFF869YGK encode3TfbsPkENCFF193DQZ encode3TfbsPkENCFF765NAN"
 set sizes = /hive/data/genomes/hg38/chrom.sizes
 foreach f ($files)
     echo $f
     zcat scoredPeaks/$f.bed.gz > bigScoredPeaks/$f.bed
     sort -k1,1 -k2,2n  bigScoredPeaks/$f.bed > bigScoredPeaks/$f.sorted.bed
     bedToBigBed -as=$HOME/kent/src/hg/lib/bigNarrowPeak.as -type=bed6+4 \
                         bigScoredPeaks/$f.sorted.bed $sizes bigScoredPeaks/$f.bb
     ln -s `pwd`/bigScoredPeaks/$f.bb /gbdb/hg38/encode3/tfbs
 end
 'EOF'
+
+################################################
+# Create BED5+ user-friendly download file with clusters and cell info
+# 2020-01-27 kate
+
+cd /hive/data/outside/encode3/tfbs/dac
+
+set t =  encRegTfbsClustered
+set f = ${t}WithCells
+set d = /data/apache/htdocs-hgdownload/goldenPath
+
+set db = hg38
+cd $db
+clusterAddSources.pl clusters.bed clusters.inputs.tab > $f.bed
+mkdir -p $d/$db/$t
+gzip -c $f.bed > $d/$db/$t/$f.$db.bed.gz
+cd ..
+
+set db = hg19
+cd $db
+clusterAddSources.pl clusters.bed clusters.inputs.tab > $f.bed
+mkdir -p $d/$db/$t
+gzip -c $f.bed > $d/$db/$t/$f.$db.bed.gz
+cd ..
+
+# Add README.txt to downloads dirs
+
+
+
+