src/hg/makeDb/doc/mm9.txt 1.121

1.121 2010/01/25 19:16:12 hiram
panda aligned to dog,rat,mouse,human,opossum pig aligned to mouse,human,opossum,cow bosTau4 to rn4, rabbit to rat,mouse,opossum,guineaPig
Index: src/hg/makeDb/doc/mm9.txt
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/makeDb/doc/mm9.txt,v
retrieving revision 1.120
retrieving revision 1.121
diff -b -B -U 4 -r1.120 -r1.121
--- src/hg/makeDb/doc/mm9.txt	15 Jan 2010 23:49:38 -0000	1.120
+++ src/hg/makeDb/doc/mm9.txt	25 Jan 2010 19:16:12 -0000	1.121
@@ -9910,5 +9910,182 @@
     featureBits mm9 vegaGene
     # 53838752 bases of 2620346127 (2.055%) in intersection
     featureBits mm9 vegaPseudoGene
     # 3060300 bases of 2620346127 (0.117%) in intersection
+
 ######################################################################## 
+# Blastz Rabbit oryCun2 (DONE - 2010-01-15 - Hiram)
+    ssh hgwdev
+    screen # use screen to control this job
+    mkdir /hive/data/genomes/mm9/bed/lastzOryCun2.2010-01-15
+    cd /hive/data/genomes/mm9/bed/lastzOryCun2.2010-01-15
+
+    cat << '_EOF_' > DEF
+# Mouse vs. Rabbit
+BLASTZ_M=50
+
+# TARGET: Mouse Mm9
+SEQ1_DIR=/scratch/data/mm9/mm9.2bit
+SEQ1_LEN=/cluster/data/mm9/chrom.sizes
+SEQ1_CHUNK=10000000
+SEQ1_LAP=10000
+
+# QUERY: Rabbit at chunk 20,000,000 all but 36 contigs can fit in a single job
+SEQ2_DIR=/scratch/data/oryCun2/oryCun2.2bit
+SEQ2_LEN=/scratch/data/oryCun2/chrom.sizes
+SEQ2_CTGDIR=/scratch/data/oryCun2/oryCun2.contigs.2bit
+SEQ2_CTGLEN=/scratch/data/oryCun2/oryCun2.contigs.sizes
+SEQ2_LIFT=/hive/data/genomes/oryCun2/contigs/oryCun2.contigs.lift
+SEQ2_CHUNK=20000000
+SEQ2_LIMIT=400
+SEQ2_LAP=0
+
+BASE=/hive/data/genomes/mm9/bed/lastzOryCun2.2010-01-15
+TMPDIR=/scratch/tmp
+'_EOF_'
+    # << happy emacs
+
+    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
+	`pwd`/DEF \
+	-noLoadChainSplit -chainMinScore=3000 -chainLinearGap=medium \
+	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
+	> do.log 2>&1 &
+    cat fb.mm9.chainOryCun2Link.txt
+# 670229789 bases of 2620346127 (25.578%) in intersection
+
+    #	496428446 bases of 2620346127 (18.945%) in intersection
+    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
+	`pwd`/DEF \
+	-noLoadChainSplit -chainMinScore=3000 -chainLinearGap=medium \
+	-continue=syntenicNet -bigClusterHub=swarm \
+	-syntenicNet > syntenicNet.log 2>&1 &
+    #	about 20 minutes
+
+    #	create reciprocal best chains/nets
+    ssh hgwdev
+    cd /hive/data/genomes/mm9/bed/lastzOryCun2.2010-01-15
+    #	this needs blastz.oryCun2 symlink to function
+    time nice -n +19 doRecipBest.pl mm9 oryCun2 > rbest.log 2>&1 &
+    #	real    37m32.151s
+
+    mkdir /hive/data/genomes/oryCun2/bed/blastz.mm9.swap
+    cd /hive/data/genomes/oryCun2/bed/blastz.mm9.swap
+    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
+	/hive/data/genomes/mm9/bed/lastzOryCun2.2010-01-15/DEF \
+	-noLoadChainSplit -chainMinScore=3000 -chainLinearGap=medium \
+	-swap -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
+	> swap.log 2>&1 &
+    #	real    84m6.571s
+    cat fb.oryCun2.chainMm9Link.txt 
+    #	669602734 bases of 2604023284 (25.714%) in intersection
+
+#########################################################################
+# ailMel1 Panda alignment (DONE - 2010-01-21 - Hiram)
+    mkdir /hive/data/genomes/mm9/bed/lastzAilMel1.2010-01-21
+    cd /hive/data/genomes/mm9/bed/lastzAilMel1.2010-01-21
+
+    cat << '_EOF_' > DEF
+# Mouse vs. Panda
+#	parameters from the Panda paper supplemental where they describe
+#	their lastz parameters
+BLASTZ_K=2200
+BLASTZ_Y=3400
+BLASTZ_L=6000
+BLASTZ_H=2000
+BLASTZ_C=2
+BLASTZ_T=2
+
+# our usual M
+BLASTZ_M=50
+
+# TARGET: Mouse Mm9
+SEQ1_DIR=/scratch/data/mm9/nib
+SEQ1_LEN=/scratch/data/mm9/chrom.sizes
+SEQ1_CHUNK=10000000
+SEQ1_LAP=10000
+
+# QUERY: Panda
+SEQ2_DIR=/scratch/data/ailMel1/ailMel1.2bit
+SEQ2_LEN=/scratch/data/ailMel1/chrom.sizes
+SEQ2_CHUNK=10000000
+SEQ2_LIMIT=50
+SEQ2_LAP=0
+
+BASE=/hive/data/genomes/mm9/bed/lastzAilMel1.2010-01-21
+TMPDIR=/scratch/tmp
+'_EOF_'
+    # << happy emacs
+
+    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
+	`pwd`/DEF \
+	-noLoadChainSplit -syntenicNet \
+	-workhorse=hgwdev -smallClusterHub=pk -bigClusterHub=swarm \
+	-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
+    #	real    49m23.409s
+    #	something failed during the cat operation, continuing:
+    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
+	-continue=cat `pwd`/DEF \
+	-noLoadChainSplit -syntenicNet \
+	-workhorse=hgwdev -smallClusterHub=memk -bigClusterHub=swarm \
+	-chainMinScore=3000 -chainLinearGap=medium > cat.log 2>&1 &
+    #	real    79m11.840s
+    cat fb.mm9.chainAilMel1Link.txt 
+    #	748670674 bases of 2620346127 (28.571%) in intersection
+
+    mkdir /hive/data/genomes/ailMel1/bed/blastz.mm9.swap
+    cd /hive/data/genomes/ailMel1/bed/blastz.mm9.swap
+    time doBlastzChainNet.pl -verbose=2 \
+	/hive/data/genomes/mm9/bed/lastzAilMel1.2010-01-21/DEF \
+	-swap -noLoadChainSplit -bigClusterHub=swarm -smallClusterHub=memk \
+	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
+    #	real    48m59.568s
+    cat fb.ailMel1.chainMm9Link.txt 
+    #	736638976 bases of 2225124764 (33.106%) in intersection
+
+############################################################################
+# susScr1 Pig BLASTZ/CHAIN/NET (WORKING - 2010-01-21 - Hiram)
+    screen # use a screen to manage this multi-day job
+    mkdir /hive/data/genomes/mm9/bed/lastzSusScr1.2010-01-21
+    cd /hive/data/genomes/mm9/bed/lastzSusScr1.2010-01-21
+
+    cat << '_EOF_' > DEF
+# Pig vs. Mouse
+BLASTZ_M=50
+
+# TARGET: Mouse Mm9
+SEQ1_DIR=/scratch/data/mm9/nib
+SEQ1_LEN=/scratch/data/mm9/chrom.sizes
+SEQ1_CHUNK=10000000
+SEQ1_LAP=10000
+
+# QUERY: Pig SusScr1
+SEQ2_DIR=/scratch/data/susScr1/susScr1.2bit
+SEQ2_LEN=/scratch/data/susScr1/chrom.sizes
+SEQ2_CHUNK=10000000
+SEQ2_LAP=0
+
+BASE=/hive/data/genomes/mm9/bed/lastzSusScr1.2010-01-21
+TMPDIR=/scratch/tmp
+'_EOF_'
+    # << this line keeps emacs coloring happy
+
+    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
+	`pwd`/DEF \
+	-noLoadChainSplit -syntenicNet \
+	-workhorse=hgwdev -smallClusterHub=memk -bigClusterHub=pk \
+	-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
+    #	real    875m26.114s
+    cat fb.mm9.chainSusScr1Link.txt 
+    #	616833828 bases of 2620346127 (23.540%) in intersection
+
+    mkdir /hive/data/genomes/susScr1/bed/blastz.mm9.swap
+    cd /hive/data/genomes/susScr1/bed/blastz.mm9.swap
+    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
+	/hive/data/genomes/mm9/bed/lastzSusScr1.2010-01-21/DEF \
+	-swap -noLoadChainSplit -syntenicNet \
+	-workhorse=hgwdev -smallClusterHub=memk -bigClusterHub=pk \
+	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
+    #	real    69m27.221s
+    cat fb.susScr1.chainMm9Link.txt 
+    #	656445475 bases of 2231332019 (29.419%) in intersection
+
+#########################################################################