61b98e84a58f8049465452e435fa9b6048777bf5
kate
  Thu May 16 11:52:43 2019 -0700
Add track tables for ENCODE 3 TFBS Clusters track. refs #21139

diff --git src/hg/makeDb/doc/encode3/tfbs.txt src/hg/makeDb/doc/encode3/tfbs.txt
index 3cfd472..1c42f25 100644
--- src/hg/makeDb/doc/encode3/tfbs.txt
+++ src/hg/makeDb/doc/encode3/tfbs.txt
@@ -145,31 +145,34 @@
 
 perl ../makeInputs.pl < fileCellAbTarget.tab > clusters.inputs.tab
 hgLoadSqlTab hg19 encode3RegTfbsClusterInput \
         ~/kent/src/hg/lib/clusterInputTrackTable5.sql clusters.inputs.tab
 
 # Compact to factorSource format
 set tools = ~/kent/src/hg/makeDb/hgBedsToBedExps
 (date; $tools/bedExpsToFactorSource.pl clusters.bed > clusters.factorSource.bed; date) >& makeFactorSource.log &
 # Mon Apr  1 15:01:59 PDT 2019
 # Mon Apr  1 16:19:20 PDT 2019
 
 # check for max score issue (RM #13224)
 $tools/factorSourceCheckScore.pl < clusters.factorSource.bed
 # Errors: 0 in 10560472 lines
 
-hgLoadBed -sqlTable=$HOME/kent/src/hg/lib/factorSource.sql -renameSqlTable \
+#hgLoadBed -sqlTable=$HOME/kent/src/hg/lib/factorSource.sql -renameSqlTable \
+    #hg19 encode3RegTfbsCluster clusters.factorSource.bed
+# use this schema for compatibility with previous tracks
+hgLoadBed -sqlTable=$HOME/kent/src/hg/lib/bed5SourceVals.sql -renameSqlTable \
     hg19 encode3RegTfbsCluster clusters.factorSource.bed
 
 # Read 10560472 elements of size 8 from clusters.factorSource.bed
 
 # compare coverage
 # TODO
 featureBits hg19 -noRandom -enrichment wgEncodeRegTfbsClusteredV3 encode3RegTfbsCluster
 #wgEncodeRegTfbsClusteredV3 12.740%, encode3RegTfbsCluster 27.457%, both 11.716%, cover 91.96%, enrich 3.35x
 
 # list cells
 awk '{print $2}' fileCellAbTarget.tab | sed 's/+.*//' | sort | uniq > cells.txt
 wc -l cells.txt
 #130 cells.txt
 
 # list factors for trackDb filterBy
@@ -196,30 +199,31 @@
     url http://www.factorbook.org/mediawiki/index.php/$$
     idInUrlSql select value from factorbookGeneAlias where name='%s'
     controlledVocabulary encode/cv.ra cellType=cell treatment=treatment lab=lab
     visibility dense
     useScore 1
     priority 1.71
     maxWindowToDraw 10000000
     dataVersion ENCODE Mar 2012 Freeze
     filterBy name:factor=\
 'EOF'
 
 # rename tables for consistency with earlier regulatory supertrack tracks (but distinguish from
 # ENCODE 2 by prefix
 
 hgsql hg19 -e "alter table encode3RegTfbsCluster rename to encRegTfbsClustered"
+hgsql hg19 -e "alter table encode3RegTfbsCluster rename to encRegTfbsClustered"
 hgsql hg19 -e "alter table encode3RegTfbsClusterInput rename to encRegTfbsClusteredInputs"
 hgsql hg19 -e "alter table encode3RegTfbsExp rename to encRegTfbsClusteredSources"
 
 ###############
 # hg38
 
 # (2019-03-25 kate) 
 
 cd ../hg38
 mkdir peaks
 mv tf.GRCh38.tar peaks
 cd peaks
 tar xvf tf.GRCh38.tar
 cd ..
 mv peaks/metadata.tsv .
@@ -276,44 +280,49 @@
 
 # Compact to factorSource format
 set tools = ~/kent/src/hg/makeDb/hgBedsToBedExps
 (date; $tools/bedExpsToFactorSource.pl clusters.bed > clusters.factorSource.bed; date) >& makeFactorSource.log &
 
 cat makeFactorSource.log
 #Mon Apr  1 13:39:39 PDT 2019
 #Mon Apr  1 14:56:53 PDT 2019
 
 # Elapsed 1:17
 
 # check for max score issue (RM #13224)
 $tools/factorSourceCheckScore.pl < clusters.factorSource.bed
 # Errors: 0 in 10565630 lines
 
-hgLoadBed -sqlTable=$HOME/kent/src/hg/lib/factorSource.sql -renameSqlTable \
+#hgLoadBed -sqlTable=$HOME/kent/src/hg/lib/factorSource.sql -renameSqlTable \
+    #hg38 encode3RegTfbsCluster clusters.factorSource.bed
+hgLoadBed -sqlTable=$HOME/kent/src/hg/lib/bed5SourceVals.sql -renameSqlTable \
     hg38 encode3RegTfbsCluster clusters.factorSource.bed
 # Read 10565630 elements of size 8 from clusters.factorSource.bed
 
 # load experiments tables
 hgLoadSqlTab hg38 encode3RegTfbsExp ~/kent/src/hg/lib/expRecord.sql clusters.exps
 
 # Create inputTrackTable with columns to match trackDb setting 'inputTableFieldDisplay'.
 # e.g. cell factor treatment lab
 - 7 columns: <table> <source> <factor> <antibody> <cell> <treatment> <lab>
 
 perl ../makeInputs.pl < fileCellAbTarget.tab > clusters.inputs.tab
 hgLoadSqlTab hg38 encode3RegTfbsClusterInput \
         ~/kent/src/hg/lib/clusterInputTrackTable5.sql clusters.inputs.tab
+# next time, use this:
+#hgLoadSqlTab hg38 encode3RegTfbsClusterInput \
+        #~/kent/src/hg/lib/clusterInputTrackEncode3Tfbs.sql clusters.inputs.tab
 
 # list factors for trackDb filterBy
 awk '{print $4}' fileCellAbTarget.tab | sort | uniq | sed 's/$/,\\/' > factors.trackDb
 wc -l factors.trackDb
 # 340 factors.trackDb
 
 # add to trackDb filterBy setting
 
 # list cells
 awk '{print $2}' fileCellAbTarget.tab | sed 's/+.*//' | sort | uniq > cells.txt
 wc -l cells.txt
 #129 cells.txt
 
 # rename tables for consistency with earlier regulatory supertrack tracks (but distinguish from
 # ENCODE 2 by prefix