src/hg/makeDb/doc/hg18.txt 1.371

1.371 2009/07/25 02:40:16 kate
44way phyloP update from Adam Siepel
Index: src/hg/makeDb/doc/hg18.txt
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/makeDb/doc/hg18.txt,v
retrieving revision 1.370
retrieving revision 1.371
diff -b -B -U 4 -r1.370 -r1.371
--- src/hg/makeDb/doc/hg18.txt	21 Jul 2009 21:01:42 -0000	1.370
+++ src/hg/makeDb/doc/hg18.txt	25 Jul 2009 02:40:16 -0000	1.371
@@ -25664,8 +25664,12 @@
     $PHASTBIN/tree_doctor \
         --prune=monDom4,ornAna1,taeGut1,petMar1,galGal3,anoCar1,xenTro2,gasAcu1,danRer5,tetNig1,fr2,oryLat2 \
                 tree_4d.44way.nh > tree_4d.44way.placental.nh
 
+    # chrX-only for placental subset (requested by 2X project)
+    set PHASTBIN=/cluster/bin/phast.2008-12-18
+    $PHASTBIN/phyloFit --EM --precision MED --msa-format FASTA --subst-mod REV --tree ../tree-commas.nh --out-root 4d.chrX mfa/chrX.mfa
+
 #############################################################################
 # phastCons 44-way (DONE - 2008-12-23 - 2009-01-02 - Hiram)
 
     # split 44way mafs into 10M chunks and generate sufficient statistics 
@@ -26515,8 +26519,13 @@
     # NOTE: this corresponds well to Hiram's GC values from his phyloFit runs
     # on the 44-way ss files
     $PHASTBIN/modFreqs ../../4d/phyloFit.all.mod $gc > ../../4d/44way.all.mod
 
+    # repeat for chrX only tree
+    cd /cluster/data/hg18/bed/multiz44way/4d
+    $PHASTBIN/modFreqs 4d.chrX.mod $gc > 44way.chrX.mod
+    ln -s `pwd`/44way.chrX.mod /usr/local/apache/golenPath/hg18/phastCons44way
+
 cat > doPhyloP.csh << 'EOF'
     set f = $1
     set out = $2
     set c = $f:r:r
@@ -26850,8 +26859,61 @@
 
     ln -s  /cluster/data/hg18/bed/multiz44way/consPhyloP/run.phyloP/glire-ls/wig/phyloP44wayGlireLs.wib /gbdb/hg18/multiz44way/phyloP44wayGlireLs.wib
     hgLoadWiggle -pathPrefix=/gbdb/hg18/multiz44way hg18 phyloP44wayGlireLs phyloP44wayGlireLs.wig
 
+#########################################################################
+# Update phyloP44way tables from Adam Siepel, Melissa Hubisz at Cornell
+# This version uses a different neutral tree model for chrX
+# and will replace the original version as default view on the Conservation track
+# ( 2009-06-30  kate)
+    mkdir /hive/data/genomes/hg18/bed/multiz44way/chrX.phyloP
+    cd /hive/data/genomes/hg18/bed/multiz44way/chrX.phyloP
+    mkdir  primates
+    cd primates
+    wget --timestamping ftp:ftp.biotech.cornell.edu/2x/phyloP/44way/primates/\*
+    cd ..
+
+    mkdir placental
+    cd placental
+    wget --timestamping ftp://siepellab:siepellab@ftp.biotech.cornell.edu/2x/phyloP/44way/placental/\*
+    cd ..
+
+    mkdir all
+    cd all
+    wget --timestamping ftp://siepellab:siepellab@ftp.biotech.cornell.edu/2x/phyloP/44way/all/\*
+    cd ..
+
+    zcat all/*.wigFix.gz | wigEncode stdin phyloP44way_v2.wig phyloP44way_v2.wib
+    zcat primates/*.wigFix.gz | wigEncode stdin phyloP44wayPrimates_v2.wig phyloP44wayPrimates_v2.wib
+    zcat placental/*.wigFix.gz | wigEncode stdin phyloP44wayPlacental_v2.wig phyloP44wayPlacental_v2.wib
+
+    ln -s `pwd`/*.wib /gbdb/hg18/multiz44way
+    time nice -n +19 hgLoadWiggle -pathPrefix=/gbdb/hg18/multiz44way hg18  phyloP44way_v2 phyloP44way_v2.wig
+    time nice -n +19 hgLoadWiggle -pathPrefix=/gbdb/hg18/multiz44way hg18  phyloP44wayPrimates_v2 phyloP44wayPrimates_v2.wig
+    time nice -n +19 hgLoadWiggle -pathPrefix=/gbdb/hg18/multiz44way hg18  phyloP44wayPlacental_v2 phyloP44wayPlacental_v2.wig
+
+# Lineage specific phyloP
+# These updated tables will appear in the Lineage Cons track
+
+    mkdir glires-ls
+    cd glires-ls
+    wget --timestamping ftp://siepellab:siepellab@ftp.biotech.cornell.edu/2x/phyloP/44way/glires-ls/\*
+    cd ..
+
+    mkdir primates-ls
+    cd primates-ls
+    wget --timestamping ftp://siepellab:siepellab@ftp.biotech.cornell.edu/2x/phyloP/44way/primates-ls/\*
+    cd ..
+
+    zcat glires-ls/*.wigFix.gz | wigEncode stdin phyloP44wayGliresLs_v2.wig phyloP44wayGliresLs_v2.wib
+    zcat primates-ls/*.wigFix.gz | wigEncode stdin phyloP44wayPrimatesLs_v2.wig phyloP44wayPrimatesLs_v2.wib
+
+    ln -s `pwd`/phyloP44wayGliresLs_v2.wib /gbdb/hg18/multiz44way
+    nice -n +19 hgLoadWiggle -pathPrefix=/gbdb/hg18/multiz44way hg18 phyloP44wayGliresLs_v2 phyloP44wayGliresLs_v2.wig
+
+    ln -s `pwd`/phyloP44wayPrimatesLs_v2.wib /gbdb/hg18/multiz44way
+    nice -n +19 hgLoadWiggle -pathPrefix=/gbdb/hg18/multiz44way hg18 phyloP44wayPrimatesLs_v2 phyloP44wayPrimatesLs_v2.wig
+
 
 ######################################################################
 # downloads for 44-way (DONE - 2009-01-09 - Hiram)
     mkdir -p /hive/data/genomes/hg18/bed/multiz44way/downloads/maf
@@ -27891,8 +27953,44 @@
       -tab -renameSqlTable -sqlTable=$HOME/kent/src/hg/lib/hapmapPhaseIIISummary.sql
 #Loaded 4166007 elements of size 18
 #33.401u 3.275s 1:46.95 34.2%    0+0k 0+0io 0pf+0w
 
+
+#############################################################################
+# GERP Conservation scoring and elements for Ensembl 31-way alignments
+# From Javier Guerroro
+# ENCODE-related data (equested by Margulies, for use by ENCODE analysis group)
+# (2009-03-05 kate)
+
+    ssh hgwdev
+    cd /cluster/data/hg18/bed
+    mkdir -p ensembl31wayGerp/lab
+    cd ensembl31wayGerp/lab
+    wget -r ftp://ftp.ebi.ac.uk/pub/databases/ensembl/encode/31way_msa/
+    cd ..
+    bzcat lab/31way_gerp_elements.bed.bz2 | \
+        tail -n +2 | \
+        sed 's/31way_gerp_elem_365000000/gerp31./' | \
+        hgLoadBed hg18 ensembl31wayGerpElements stdin \
+            -sqlTable=$HOME/kent/src/hg/lib/encode/broadPeak.sql -renameSqlTable
+    # Loaded 1464897 elements of size 9
+
+cat > we.csh << 'EOF'
+    foreach f (lab/*.wig.bz2)
+        echo $f
+        bzcat $f | tail -n +2 | wigEncode stdin temp.wig temp.wib
+    end
+'EOF'
+
+    bzcat lab/*.wig.bz2 | tail -n +2 | \
+        wigEncode stdin ensembl31wayGerpScores.wig ensembl31wayGerpScores.wib
+
+    #   load database
+    mkdir /gbdb/hg18/wib
+    ln -s `pwd`/ensembl31wayGerpScores.wib /gbdb/hg18/wib
+    hgLoadWiggle -pathPrefix=/gbdb/hg18/wib hg18 ensembl31wayGerpScores ensembl31wayGerpScores.wig
+
+
 ############################################################################
 # VEGA GENES UPDATE (BUILD 33) (DONE 2008-03-11 Andy)
     mkdir  /cluster/data/hg18/bed/vega33
     cd  /cluster/data/hg18/bed/vega33