src/hg/makeDb/doc/felCatV17e.txt 1.9

1.9 2010/04/27 19:20:00 chinhli
Add multiz 6 ways support
Index: src/hg/makeDb/doc/felCatV17e.txt
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/makeDb/doc/felCatV17e.txt,v
retrieving revision 1.8
retrieving revision 1.9
diff -b -B -U 4 -r1.8 -r1.9
--- src/hg/makeDb/doc/felCatV17e.txt	9 Apr 2010 18:41:24 -0000	1.8
+++ src/hg/makeDb/doc/felCatV17e.txt	27 Apr 2010 19:20:00 -0000	1.9
@@ -587,21 +587,27 @@
     cd /hive/data/genomes/ailMel1/bed/lastzFelCatV17e.2010-03-22
     cat fb.felCatV17e.chainAilMel1Link.txt
     # 1503647735 bases of 1990635005 (75.536%) in intersection
 
+    #   re-run swap 04-26
     mkdir /hive/data/genomes/felCatV17e/bed/blastz.ailMel1.swap
     cd /hive/data/genomes/felCatV17e/bed/blastz.ailMel1.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
         /hive/data/genomes/ailMel1/bed/lastzFelCatV17e.2010-03-22/DEF \
         -swap -syntenicNet -noDbNameCheck \
         -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
         -chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
     # real    155m57.566s
+    cd /hive/data/genomes/ailMel1/bed/blastz.felCatV17e.swap
     cat fb.ailMel1.chainFelCatV17eLink.txt
     # 1507273252 bases of 2245312831 (67.130%) in intersection
 
-
-
+    #   Due to the SEQ1 and SEQ2 spec'ed in
+    # /hive/data/genomes/ailMel1/bed/lastzFelCatV17e.2010-03-22/DEF
+    # need to copy the swap directory from ailMel1 to felCatv17e
+    #  also need to rename from ailMel1.felCatV17e, or 6way step will have
+    #  problem
+    #  Ping hiram for this 
 
 #####################################################################
 # LASTZ Oppsom  Swap (DONE - 2010-03-22 - Chin)
     cd /hive/data/genomes/monDom5/bed/lastzFelCatV17e.2010-03-22
@@ -713,8 +719,127 @@
 
     #   reset to hguser in .hg.conf.beta
 
 
+#####################################################################
+## 6-Way Multiz (working - 2010-04-19 - Chin)
+# use /cluster/home/chinhli/kent/src/hg/utils/phyloTrees/49way.nh 
+    mkdir /hive/data/genomes/felCatV17e/bed/multiz6way
+    cd /hive/data/genomes/felCatV17e/bed/multiz6way
+
+    /cluster/bin/phast/tree_doctor \
+      --prune-all-but=felCat3,hg19,mm9,canFam2,monDom5,ailMel1 \
+      --rename="felCat3 -> felCatV17e  " \
+/cluster/home/chinhli/kent/src/hg/utils/phyloTrees/49way.nh > 6way.nh 
+
+    #   rearrange felCatV17e to the top, get some help from tree_doctor:
+    /cluster/bin/phast/tree_doctor --name-ancestors --reroot felCatV17e \
+           --with-branch 6way.nh
+    #   edit out the ancestors, and move felCatV17e from the bottom to
+    #   the top, resulting in this tree:
+
+(((felCatV17e:0.098612,(canFam2:0.052458,ailMel1:0.050000):0.050000):0.093470,(hg19:0.144018,mm9:0.356483):0.020593):0.258392,monDom5:0.340786);
+
+    #   more rearranging after seeing what the distance table looks like
+    #   below to get them appearing as much as possible in their
+    #   distance order top to bottom: (TO_DO)
+
+    # use the phyloGif to get the tree image
+   /cluster/bin/x86_64/phyloGif -phyloGif_tree=6way.nh \
+   -phyloGif_width=260  -phyloGif_height=260 > 6way.gif
+
+    #   more rearranging after seeing what the distance table looks like
+    #   below to get them appearing as much as possible in their
+    #   distance order top to bottom: (TO_DO)
+
+
+
+    #   usee this specification in the phyloGif tool after changing the names:
+    /cluster/bin/phast/tree_doctor \
+--rename="felCatV17e -> Cat ;  hg19 -> Human ; ailMel1 -> Panda ; canFam2 -> Dog ; mm9 -> Mouse ; monDom5 -> Opossum " 6way.nh
+
+    #   http://genome.ucsc.edu/cgi-bin/phyloGif
+    #   to obtain a gif image for htdocs/images/phylo/felCatV17e_6way.gif
+   /cluster/bin/x86_64/phyloGif -phyloGif_tree=6way.nh \
+   -phyloGif_width=260  -phyloGif_height=260 > felCatV17e_6way.gif
+
+    /cluster/bin/phast/all_dists 6way.nh > 6way.distances.txt
+    # make sure all symlinks lastz.DB -> lastzDb-date
+    #   exist here and at the swap locations, the perl script expects this
+    #   in order to find featureBits numbers.
+    cd /hive/data/genomes/felCatV17e/bed]
+    ln -s blastz.hg19.swap lastz.hg19
+    ln -s blastz.canFam2.swap lastz.canFam2
+    ln -s blastz.mm9.swap lastz.mm9
+    ln -s blastz.monDom5.swap lastz.monDom5
+    # ln -s blastz.ailMel1.swap lastz.ailMel1 (we switched SEQ1 and SEQ2)
+    ln -s /hive/data/genomes/ailMel1/bed/blastz.felCatV17e.swap lastz.ailMel1
+
+
+    cd /hive/data/genomes/felCatV17e/bed/multiz6way
+    #   Use 6way.distances.txt to create the table below
+    #   with this perl script:
+    cat << '_EOF_' > sizeStats.pl
+#!/usr/bin/env perl
+use strict;
+use warnings;
+
+
+open (FH, "grep -y felCatV17e 6way.distances.txt | sort -k3,3n|") or
+        die "can not read 6way.distances.txt";
+
+my $count = 0;
+while (my $line = <FH>) {
+    chomp $line;
+    my ($felCatV17e, $D, $dist) = split('\s+', $line);
+    my $chain = "chain" . ucfirst($D);
+    my $B="/hive/data/genomes/felCatV17e/bed/lastz.$D/fb.felCatV17e." .
+        $chain . "Link.txt";
+    my $chainLinkMeasure =
+        `awk '{print \$5}' ${B} 2> /dev/null | sed -e "s/(//; s/)//"`;
+    chomp $chainLinkMeasure;
+    $chainLinkMeasure = 0.0 if (length($chainLinkMeasure) < 1);
+    $chainLinkMeasure =~ s/\%//;
+    my $swapFile="/hive/data/genomes/${D}/bed/lastz.felCatV17e/fb.${D}.chainCalJac3Link.txt";
+    my $swapMeasure = "N/A";
+    if ( -s $swapFile ) {
+        $swapMeasure =
+            `awk '{print \$5}' ${swapFile} 2> /dev/null | sed -e "s/(//; s/)//"`;
+        chomp $swapMeasure;
+        $swapMeasure = 0.0 if (length($swapMeasure) < 1);
+        $swapMeasure =~ s/\%//;
+    }
+    my $orgName=
+    `hgsql -N -e "select organism from dbDb where name='$D';" hgcentraltest`;
+    chomp $orgName;
+    if (length($orgName) < 1) {
+        $orgName="N/A";
+    }
+    ++$count;
+   ++$count;
+    if ($swapMeasure eq "N/A") {
+        printf "# %02d  %.4f - %s %s\t(%% %.3f) (%s)\n", $count, $dist,
+            $orgName, $D, $chainLinkMeasure, $swapMeasure
+    } else {
+        printf "# %02d  %.4f - %s %s\t(%% %.3f) (%% %.3f)\n", $count, $dist,
+            $orgName, $D, $chainLinkMeasure, $swapMeasure
+    }
+}
+close (FH);
+'_EOF_'
+    # << happy emacs
+    chmod +x ./sizeStats.pl
+    ./sizeStats.pl
+###### 04-27 stop here due to wrong output from sizeStats.pl 
+###### pending move/rename aelMel1/felcatV17e lastz swap results
+# 02  0.1986 - Panda ailMel1    (% 0.000) (N/A)
+# 04  0.2011 - Dog canFam2      (% 73.720) (N/A)
+# 06  0.3567 - Human hg19       (% 60.870) (N/A)
+# 08  0.5692 - Mouse mm9        (% 30.972) (N/A)
+# 10  0.7913 - Opossum monDom5  (% 8.364) (N/A)
+
+
+
 
 ####################################################################