61b98e84a58f8049465452e435fa9b6048777bf5 kate Thu May 16 11:52:43 2019 -0700 Add track tables for ENCODE 3 TFBS Clusters track. refs #21139 diff --git src/hg/makeDb/doc/encode3/tfbs.txt src/hg/makeDb/doc/encode3/tfbs.txt index 3cfd472..1c42f25 100644 --- src/hg/makeDb/doc/encode3/tfbs.txt +++ src/hg/makeDb/doc/encode3/tfbs.txt @@ -145,31 +145,34 @@ perl ../makeInputs.pl < fileCellAbTarget.tab > clusters.inputs.tab hgLoadSqlTab hg19 encode3RegTfbsClusterInput \ ~/kent/src/hg/lib/clusterInputTrackTable5.sql clusters.inputs.tab # Compact to factorSource format set tools = ~/kent/src/hg/makeDb/hgBedsToBedExps (date; $tools/bedExpsToFactorSource.pl clusters.bed > clusters.factorSource.bed; date) >& makeFactorSource.log & # Mon Apr 1 15:01:59 PDT 2019 # Mon Apr 1 16:19:20 PDT 2019 # check for max score issue (RM #13224) $tools/factorSourceCheckScore.pl < clusters.factorSource.bed # Errors: 0 in 10560472 lines -hgLoadBed -sqlTable=$HOME/kent/src/hg/lib/factorSource.sql -renameSqlTable \ +#hgLoadBed -sqlTable=$HOME/kent/src/hg/lib/factorSource.sql -renameSqlTable \ + #hg19 encode3RegTfbsCluster clusters.factorSource.bed +# use this schema for compatibility with previous tracks +hgLoadBed -sqlTable=$HOME/kent/src/hg/lib/bed5SourceVals.sql -renameSqlTable \ hg19 encode3RegTfbsCluster clusters.factorSource.bed # Read 10560472 elements of size 8 from clusters.factorSource.bed # compare coverage # TODO featureBits hg19 -noRandom -enrichment wgEncodeRegTfbsClusteredV3 encode3RegTfbsCluster #wgEncodeRegTfbsClusteredV3 12.740%, encode3RegTfbsCluster 27.457%, both 11.716%, cover 91.96%, enrich 3.35x # list cells awk '{print $2}' fileCellAbTarget.tab | sed 's/+.*//' | sort | uniq > cells.txt wc -l cells.txt #130 cells.txt # list factors for trackDb filterBy @@ -196,30 +199,31 @@ url http://www.factorbook.org/mediawiki/index.php/$$ idInUrlSql select value from factorbookGeneAlias where name='%s' controlledVocabulary encode/cv.ra cellType=cell treatment=treatment lab=lab visibility dense useScore 1 priority 1.71 maxWindowToDraw 10000000 dataVersion ENCODE Mar 2012 Freeze filterBy name:factor=\ 'EOF' # rename tables for consistency with earlier regulatory supertrack tracks (but distinguish from # ENCODE 2 by prefix hgsql hg19 -e "alter table encode3RegTfbsCluster rename to encRegTfbsClustered" +hgsql hg19 -e "alter table encode3RegTfbsCluster rename to encRegTfbsClustered" hgsql hg19 -e "alter table encode3RegTfbsClusterInput rename to encRegTfbsClusteredInputs" hgsql hg19 -e "alter table encode3RegTfbsExp rename to encRegTfbsClusteredSources" ############### # hg38 # (2019-03-25 kate) cd ../hg38 mkdir peaks mv tf.GRCh38.tar peaks cd peaks tar xvf tf.GRCh38.tar cd .. mv peaks/metadata.tsv . @@ -276,44 +280,49 @@ # Compact to factorSource format set tools = ~/kent/src/hg/makeDb/hgBedsToBedExps (date; $tools/bedExpsToFactorSource.pl clusters.bed > clusters.factorSource.bed; date) >& makeFactorSource.log & cat makeFactorSource.log #Mon Apr 1 13:39:39 PDT 2019 #Mon Apr 1 14:56:53 PDT 2019 # Elapsed 1:17 # check for max score issue (RM #13224) $tools/factorSourceCheckScore.pl < clusters.factorSource.bed # Errors: 0 in 10565630 lines -hgLoadBed -sqlTable=$HOME/kent/src/hg/lib/factorSource.sql -renameSqlTable \ +#hgLoadBed -sqlTable=$HOME/kent/src/hg/lib/factorSource.sql -renameSqlTable \ + #hg38 encode3RegTfbsCluster clusters.factorSource.bed +hgLoadBed -sqlTable=$HOME/kent/src/hg/lib/bed5SourceVals.sql -renameSqlTable \ hg38 encode3RegTfbsCluster clusters.factorSource.bed # Read 10565630 elements of size 8 from clusters.factorSource.bed # load experiments tables hgLoadSqlTab hg38 encode3RegTfbsExp ~/kent/src/hg/lib/expRecord.sql clusters.exps # Create inputTrackTable with columns to match trackDb setting 'inputTableFieldDisplay'. # e.g. cell factor treatment lab - 7 columns: <table> <source> <factor> <antibody> <cell> <treatment> <lab> perl ../makeInputs.pl < fileCellAbTarget.tab > clusters.inputs.tab hgLoadSqlTab hg38 encode3RegTfbsClusterInput \ ~/kent/src/hg/lib/clusterInputTrackTable5.sql clusters.inputs.tab +# next time, use this: +#hgLoadSqlTab hg38 encode3RegTfbsClusterInput \ + #~/kent/src/hg/lib/clusterInputTrackEncode3Tfbs.sql clusters.inputs.tab # list factors for trackDb filterBy awk '{print $4}' fileCellAbTarget.tab | sort | uniq | sed 's/$/,\\/' > factors.trackDb wc -l factors.trackDb # 340 factors.trackDb # add to trackDb filterBy setting # list cells awk '{print $2}' fileCellAbTarget.tab | sed 's/+.*//' | sort | uniq > cells.txt wc -l cells.txt #129 cells.txt # rename tables for consistency with earlier regulatory supertrack tracks (but distinguish from # ENCODE 2 by prefix