7b3269bd1e75158ddf049cd109f7f4e1383da0ec kate Tue May 14 10:36:23 2019 -0700 Color subtracks by DNase similarity. refs #23243 diff --git src/hg/makeDb/doc/encode3/tfbs.txt src/hg/makeDb/doc/encode3/tfbs.txt index be450bf..f60f3ec 100644 --- src/hg/makeDb/doc/encode3/tfbs.txt +++ src/hg/makeDb/doc/encode3/tfbs.txt @@ -363,30 +363,54 @@ # Fix up subGroup members w/ punctuation and initial numbers: # Peyer's_patch -> Peyers_patch # NT2/D1 -> NT2_D1 # 22Rv1 -> X22Rv1 # MM.1S -> MM_1S # rename tables (encode3Tfbs -> encTfChipPk) hgsql hg19 -e 'show tables like "encode3TfbsPk%"' > tables.old.txt sed -e 's/^/alter table /' -e 's/$/ rename to /' tables.old.txt > rename.1.sql sed -e 's/encode3TfbsPk/encTfChipPk/' tables.old.txt | paste rename.1.sql - | \ sed 's/$/;/' > rename.sql hgsql hg19 < rename.sql +# add colors to tier1-3 cell experiments (using ENCODE 2 color conventions) +# +# GM12878 color 153,38,0 +# H1-hESC color 0,107,27 +# K562 color 46,0,184 +# HeLa-S3 color 0,119,158 +# HepG2 color 189,0,157 +# HUVEC color 224,75,0 + +raToLines encode3.ra encode3.lines +sed -e '/GM12878/s/$/| color 153,38,0/' \ + -e '/H1-hESC/s/$/| color 0,107,27/' \ + -e '/K562/s/$/| color 46,0,184/' \ + -e '/HeLa-S3/s/$/| color 0,119,158/' \ + -e '/HepG2/s/$/| color 189,0,157/' \ + -e '/HUVEC/s/$/| color 224,75,0/' \ + encode3.lines > encode3.color.lines + +# buf size exceeded on linesToRa, so prune down to just composite subtracks + +linesToRa encode3.color.lines encode3.color.ra + +# concatenate to orig ra file + # reload cluster input table hgLoadSqlTab hg19 encode3RegTfbsClusterInput \ ~/kent/src/hg/lib/clusterInputTrackTable5.sql clusters.inputs.tab # rename field in cluster input table # NOTE syntax change in MariaDb (now requires type) hgsql -e hg19 "alter table encRegTfbsClusteredInputs change treatment experiment varchar(255)" ############### # hg38 cd ../hg38 mkdir scoredPeaks cd peaks @@ -409,30 +433,46 @@ # Fix up subGroup members w/ punctuation and initial numbers: # Peyer's_patch -> Peyers_patch # NT2/D1 -> NT2_D1 # 22Rv1 -> X22Rv1 # MM.1S -> MM_1S # rename tables (encode3Tfbs -> encTfChipPk) hgsql hg38 -e 'show tables like "encode3TfbsPk%"' > tables.old.txt sed -e 's/^/alter table /' -e 's/$/ rename to /' tables.old.txt > rename.1.sql sed -e 's/encode3TfbsPk/encTfChipPk/' tables.old.txt | paste rename.1.sql - | \ sed 's/$/;/' > rename.sql hgsql hg38 < rename.sql +# add colors, using DNase similarity track +raToLines encode3.ra encode3.lines + +# edit to leave only subtracks +# generate edit script from DNase similarity .ra file + +cd ~/kent/src/hg/makeDb/trackDb/human/hg38 +csh addColors.csh encode3.lines > encode3.color.lines +linesToRa encode3.color.lines encode3.color.ra + +# missed a few (punctuation diffs, etc.) +csh addColors2.csh encode3.color.lines encode3.color2.lines +linesToRa encode3.color2.lines encode3.color.ra + +# merge in to encode3.ra + # reload cluster input table hgLoadSqlTab hg38 encode3RegTfbsClusterInput \ ~/kent/src/hg/lib/clusterInputTrackTable5.sql clusters.inputs.tab # rename field in cluster input table # NOTE syntax change in MariaDb (now requires type) hgsql -e hg19 "alter table encRegTfbsClusteredInputs change treatment experiment varchar(255)" #################### # Motifs (hg38) from Henry Pratt at Zlab # 2019-03-13 # Notes from Henry: #The structure is mostly the same as the existing tables: canonical.tsv contains lists