src/hg/makeDb/doc/hg19.txt 1.14

1.14 2009/05/14 17:37:13 hiram
lastz runs status updated
Index: src/hg/makeDb/doc/hg19.txt
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/makeDb/doc/hg19.txt,v
retrieving revision 1.13
retrieving revision 1.14
diff -b -B -U 4 -r1.13 -r1.14
--- src/hg/makeDb/doc/hg19.txt	14 May 2009 04:56:59 -0000	1.13
+++ src/hg/makeDb/doc/hg19.txt	14 May 2009 17:37:13 -0000	1.14
@@ -549,9 +549,9 @@
     #	adding -normScore
     hgLoadChain -normScore -tIndex hg19 chainSelf hg19.hg19.all.chain.gz
 
 ############################################################################
-# Chimp Lastz run (WORKING - 2009-03-19 - Hiram)
+# Chimp Lastz run (DONE - 2009-03-19 - Hiram)
     mkdir /hive/data/genomes/hg19/bed/lastzPanTro2.2009-03-19
     cd /hive/data/genomes/hg19/bed/lastzPanTro2.2009-03-19
     cat << '_EOF_'
 # human vs chimp
@@ -605,8 +605,10 @@
 	-chainLinearGap=medium \
 	-workhorse=hgwdev -smallClusterHub=pk -bigClusterHub=swarm \
 	> load.log 2>&1 &
     #	real    47m17.871s
+    cat fb.hg19.chainPanTro2Link.txt 
+    #	2747983350 bases of 2897316137 (94.846%) in intersection
 
     #	running the swap
     ssh swarm
     mkdir /hive/data/genomes/panTro2/bed/blastz.hg19.swap
@@ -706,9 +708,9 @@
     #	1723432141 bases of 2897316137 (59.484%) in intersection
     doRecipBest.pl -buildDir=`pwd` hg19 gorGor1 > rbest.log 2>&1
 
 ############################################################################
-# PREPARE LINEAGE SPECIFIC REPEAT FILES FOR BLASTZ (DONE - 2009-04-02 - Hiram)
+# PREPARE LINEAGE SPECIFIC REPEAT FILES FOR LASTZ (DONE - 2009-04-02 - Hiram)
     ssh pk
     mkdir /hive/data/genomes/hg19/bed/linSpecRep
     cd /hive/data/genomes/hg19/bed/linSpecRep
     #	create individual .out files from the master record in ../repeatMasker
@@ -2100,9 +2102,9 @@
     hgsql hg19 \
 -e 'LOAD DATA LOCAL INFILE "ensemblLift.tab" INTO TABLE ensemblLift'
 
 ##############################################################################
-# BLASTZ MOUSE Mm9 (DONE - 2009-05-13 - Hiram)
+# LASTZ MOUSE Mm9 (DONE - 2009-05-13 - Hiram)
     mkdir /hive/data/genomes/hg19/bed/lastzMm9.2009-05-13
     cd /hive/data/genomes/hg19/bed/lastzMm9.2009-05-13
 
     cat << '_EOF_' > DEF
@@ -2134,28 +2136,22 @@
 	`pwd`/DEF \
 	-noLoadChainSplit -syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=memk -bigClusterHub=swarm \
 	-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
-XXX - running Wed May 13 16:51:48 PDT 2009
-
-    #	fixed up some bugs in the doBlastzChainNet.pl script, finished
-    #	the lastz run manually, then continuing:
-    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
-	`pwd`/DEF \
-	-continue=cat \
-	-noLoadChainSplit \
-	-workhorse=hgwdev -smallClusterHub=memk -bigClusterHub=swarm \
-	-chainMinScore=3000 -chainLinearGap=medium > cat.log 2>&1 &
     cat fb.hg19.chainMm9Link.txt 
-    #	1022723573 bases of 2897316137 (35.299%) in intersection
+    #	1022734273 bases of 2897316137 (35.299%) in intersection
+
+    #	and the swap
+    mkdir /hive/data/genomes/mm9/bed/blastz.hg19.swap
+    cd /hive/data/genomes/mm9/bed/blastz.hg19.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
-	`pwd`/DEF \
-	-noLoadChainSplit -continue=syntenicNet -syntenicNet \
+	/hive/data/genomes/hg19/bed/lastzMm9.2009-05-13/DEF \
+	-swap -noLoadChainSplit -syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=memk -bigClusterHub=swarm \
-	-chainMinScore=3000 -chainLinearGap=medium > synNet.log 2>&1 &
+	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
 
 #########################################################################
-# BLASTZ Dog CanFam2 (DONE - 2009-05-13 - Hiram)
+# LASTZ Dog CanFam2 (DONE - 2009-05-13 - Hiram)
     mkdir /hive/data/genomes/hg19/bed/lastzCanFam2.2009-05-13
     cd /hive/data/genomes/hg19/bed/lastzCanFam2.2009-05-13
 
     cat << '_EOF_' > DEF
@@ -2188,38 +2184,13 @@
 	`pwd`/DEF \
 	-noLoadChainSplit -syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=memk -bigClusterHub=pk \
 	-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
-XXX - running Wed May 13 16:48:16 PDT 2009
-
-    #	fixed up some bugs in the doBlastzChainNet.pl script, finished
-    #	the lastz run manually, then continuing:
-    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
-	`pwd`/DEF \
-	-continue=cat \
-	-noLoadChainSplit \
-	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=pk \
-	-chainMinScore=3000 -chainLinearGap=medium > cat.log 2>&1 &
-    #	fixup some data missing problems on encodek, then continuing
-    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
-	`pwd`/DEF \
-	-continue=chainMerge \
-	-noLoadChainSplit \
-	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=pk \
-	-chainMinScore=3000 -chainLinearGap=medium > chainMerge.log 2>&1 &
-    #	real    91m9.723s
     cat fb.hg19.chainCanFam2Link.txt 
-    #	1532071680 bases of 2897316137 (52.879%) in intersection
-
-    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
-	`pwd`/DEF \
-	-noLoadChainSplit -continue=syntenicNet -syntenicNet \
-	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=pk \
-	-chainMinScore=3000 -chainLinearGap=medium > synNet.log 2>&1 &
-XXX - running Wed May 13 12:51:14 PDT 2009
+    #	1532073507 bases of 2897316137 (52.879%) in intersection
 
 #########################################################################
-# BLASTZ Chicken GalGal3 (WORKING - 2009-05-13 - Hiram)
+# LASTZ Chicken GalGal3 (DONE - 2009-05-13 - Hiram)
     mkdir /hive/data/genomes/hg19/bed/lastzGalGal3.2009-05-13
     cd /hive/data/genomes/hg19/bed/lastzGalGal3.2009-05-13
 
     cat << '_EOF_' > DEF
@@ -2256,13 +2227,13 @@
 	-syntenicNet \
 	-noLoadChainSplit \
 	-workhorse=hgwdev -smallClusterHub=memk -bigClusterHub=pk \
 	-chainMinScore=5000 -chainLinearGap=loose > do.log 2>&1
-XXX - running Wed May 13 10:21:42 PDT 2009
+    cat fb.hg19.chainGalGal3Link.txt 
+    #	104053179 bases of 2897316137 (3.591%) in intersection
 
 #########################################################################
-# BLASTZ Macaca Mulatta RheMac2 (WORKING - 2009-05-13 - Hiram)
-
+# LASTZ Macaca Mulatta RheMac2 (DONE - 2009-05-13 - Hiram)
     mkdir /hive/data/genomes/hg19/bed/lastzRheMac2.2009-05-13
     cd /hive/data/genomes/hg19/bed/lastzRheMac2.2009-05-13
 
     cat << '_EOF_' > DEF
@@ -2303,12 +2274,14 @@
 	-syntenicNet \
 	-noLoadChainSplit -chainMinScore=5000 -chainLinearGap=medium \
 	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
 	> do.log 2>&1 &
-XXX - running Wed May 13 10:41:20 PDT 2009
+    #	real    760m22.810s
+    cat fb.hg19.chainRheMac2Link.txt 
+    #	2397361211 bases of 2897316137 (82.744%) in intersection
 
 #########################################################################
-# BLASTZ Rat Rn4 (WORKING - 2009-05-13 - Hiram)
+# LASTZ Rat Rn4 (DONE - 2009-05-13 - Hiram)
     mkdir /hive/data/genomes/hg19/bed/lastzRn4.2009-05-13
     cd /hive/data/genomes/hg19/bed/lastzRn4.2009-05-13
 
     cat << '_EOF_' > DEF
@@ -2340,15 +2313,16 @@
 	`pwd`/DEF \
 	-syntenicNet -noLoadChainSplit \
 	-workhorse=hgwdev -smallClusterHub=memk -bigClusterHub=swarm \
 	-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
-XXX - running Wed May 13 16:48:04 PDT 2009
     #	real    314m18.227s
+    cat fb.hg19.chainRn4Link.txt 
+    #	952605822 bases of 2897316137 (32.879%) in intersection
 
 ##############################################################################
-# BLASTZ Rat Rn4 (WORKING - 2009-05-13 - Hiram)
-    mkdir /hive/data/genomes/hg19/bed/lastzRn4.2009-05-13
-    cd /hive/data/genomes/hg19/bed/lastzRn4.2009-05-13
+# LASTZ Orangutan PonAbe2 (DONE - 2009-05-13 - Hiram)
+    mkdir /hive/data/genomes/hg19/bed/lastzPonAbe2.2009-05-13
+    cd /hive/data/genomes/hg19/bed/lastzPonAbe2.2009-05-13
 
     cat << '_EOF_' > DEF
 # human vs orangutan
 BLASTZ=lastz
@@ -2389,7 +2363,98 @@
 	-syntenicNet \
 	-noLoadChainSplit -chainMinScore=5000 -chainLinearGap=medium \
 	-workhorse=hgwdev -smallClusterHub=memk -bigClusterHub=pk \
 	> do.log 2>&1 &
-XXX - running Wed May 13 12:44:33 PDT 2009
+    cat fb.hg19.chainPonAbe2Link.txt 
+    #	2646687531 bases of 2897316137 (91.350%) in intersection
+
+##############################################################################
+# LASTZ Lamprey PetMar1 (WORKING - 2009-05-14 - Hiram)
+    mkdir /hive/data/genomes/hg19/bed/lastzPetMar1.2009-05-14
+    cd /hive/data/genomes/hg19/bed/lastzPetMar1.2009-05-14
+
+    cat << '_EOF_' > DEF
+# Human vs. Lamprey
+BLASTZ_Y=3400
+BLASTZ_L=6000
+BLASTZ_K=2200
+BLASTZ_M=50
+BLASTZ_Q=/scratch/data/blastz/HoxD55.q
+
+# TARGET: Human Hg19
+SEQ1_DIR=/scratch/data/hg19/hg19.2bit
+SEQ1_LEN=/scratch/data/hg19/chrom.sizes
+SEQ1_CHUNK=100000000
+SEQ1_LAP=10000
+SEQ2_LIMIT=5
+
+# QUERY: Lamprey petMar1
+SEQ2_DIR=/scratch/data/petMar1/petMar1.2bit
+SEQ2_LEN=/scratch/data/petMar1/chrom.sizes
+SEQ2_CHUNK=20000000
+SEQ2_LIMIT=300
+SEQ2_LAP=0
+
+BASE=/hive/data/genomes/hg19/bed/lastzPetMar1.2009-05-14
+TMPDIR=/scratch/tmp
+'_EOF_'
+    # << happy emacs
+
+    #	establish a screen to control this job
+    screen
+    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
+	`pwd`/DEF \
+	-qRepeats=windowmaskerSdust \
+	-noLoadChainSplit -chainMinScore=5000 -chainLinearGap=loose \
+	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=pk \
+	> do.log 2>&1 &
+XXX - running Thu May 14 10:18:36 PDT 2009
+
+##############################################################################
+# LASTZ Fugu Fr2 (WORKING - 2009-05-14 - Hiram)
+    mkdir /hive/data/genomes/hg19/bed/lastzFr2.2009-05-14
+    cd /hive/data/genomes/hg19/bed/lastzFr2.2009-05-14
+
+    cat << '_EOF_' > DEF
+# Human vs. Fugu
+# Try "human-fugu" (more distant, less repeat-killed than mammal) params
+# +M=50:
+BLASTZ_Y=3400
+BLASTZ_L=6000
+BLASTZ_K=2200
+BLASTZ_M=50
+BLASTZ_Q=/scratch/data/blastz/HoxD55.q
+
+# TARGET: Human Hg19
+SEQ1_DIR=/scratch/data/hg19/hg19.2bit
+SEQ1_LEN=/scratch/data/hg19/chrom.sizes
+SEQ1_CHUNK=10000000
+SEQ1_LAP=10000
+SEQ1_LIMIT=5
+
+# QUERY: Fugu fr2
+#       Align to the scaffolds, results lifed up to chrUn.sdTrf coordinates
+SEQ2_DIR=/scratch/data/fr2/fr2.2bit
+SEQ2_LEN=/hive/data/genomes/fr2/chrom.sizes
+SEQ2_CTGDIR=/hive/data/genomes/fr2/noUn/fr2.scaffolds.2bit
+SEQ2_CTGLEN=/hive/data/genomes/fr2/noUn/fr2.scaffolds.sizes
+SEQ2_LIFT=/hive/data/genomes/fr2/jkStuff/liftAll.lft
+SEQ2_CHUNK=20000000
+SEQ2_LIMIT=30
+SEQ2_LAP=0
+
+BASE=/hive/data/genomes/hg19/bed/lastzFr2.2009-05-14
+TMPDIR=/scratch/tmp
+'_EOF_'
+    # << happy emacs
+
+    #	establish a screen to control this job
+    screen
+    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
+	`pwd`/DEF \
+	-qRepeats=windowmaskerSdust \
+	-noLoadChainSplit -chainMinScore=5000 -chainLinearGap=loose \
+	-workhorse=hgwdev -smallClusterHub=memk -bigClusterHub=encodek \
+	> do.log 2>&1 &
+XXX - running Thu May 14 10:18:26 PDT 2009
 
 ##############################################################################