src/hg/makeDb/doc/tetNig2.txt 1.6

1.6 2009/09/18 22:15:32 hiram
Completed tetNig2 chains and nets to everything
Index: src/hg/makeDb/doc/tetNig2.txt
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/makeDb/doc/tetNig2.txt,v
retrieving revision 1.5
retrieving revision 1.6
diff -b -B -U 4 -r1.5 -r1.6
--- src/hg/makeDb/doc/tetNig2.txt	11 Aug 2009 17:16:53 -0000	1.5
+++ src/hg/makeDb/doc/tetNig2.txt	18 Sep 2009 22:15:32 -0000	1.6
@@ -102,9 +102,9 @@
 	-strand=+ ../../tetNig2.2bit > findMotif.txt 2>&1
     #	real    0m7.967s
     grep "^#GAP " findMotif.txt | sed -e "s/^#GAP //" > allGaps.bed
     featureBits tetNig2 -not gap -bed=notGap.bed
-    featureBits tetNig2 allGaps.bed notGap.bed -bed=net.gaps.bed
+    featureBits tetNig2 allGaps.bed notGap.bed -bed=new.gaps.bed
     #	what is the last index in the existing gap table:
     hgsql -N -e "select ix from gap;" tetNig2 | sort -n | tail -1
     #	34284
     cat << '_EOF_' > mkGap.pl
@@ -551,4 +551,199 @@
     cat fb.tetNig2.chainHg19Link.txt 
     #	42910930 bases of 302314788 (14.194%) in intersection
 
 ##############################################################################
+# BLASTZ/CHAIN/NET oryLat2 swap (DONE - 2009-09-15 - Hiram)
+    #	original
+    cd /hive/data/genomes/oryLat2/bed/blastzTetNig2.2009-09-14
+    cat fb.oryLat2.chainTetNig2Link.txt
+    #	162783854 bases of 700386597 (23.242%) in intersection
+
+    #	And, for the swap:
+    mkdir /hive/data/genomes/tetNig2/bed/blastz.oryLat2.swap
+    cd /hive/data/genomes/tetNig2/bed/blastz.oryLat2.swap
+    time doBlastzChainNet.pl -chainMinScore=3000 -chainLinearGap=medium \
+	/hive/data/genomes/oryLat2/bed/blastzTetNig2.2009-09-14/DEF \
+	-swap -tRepeats=windowmaskerSdust -qRepeats=windowmaskerSdust \
+	-noLoadChainSplit -verbose=2 -smallClusterHub=pk -bigClusterHub=pk \
+	> swap.log 2>&1 &
+    #	real    49m49.335s
+    cat fb.tetNig2.chainOryLat2Link.txt
+    #	136115939 bases of 302314788 (45.025%) in intersection
+
+#########################################################################
+# BLASTZ/CHAIN/NET gasAcu1 swap (DONE - 2009-09-15 - Hiram)
+    #	original
+    cd /hive/data/genomes/gasAcu1/bed/lastzTetNig2.2009-08-10
+    featureBits gasAcu1 chainTetNig2Link >&fb.gasAcu1.chainTetNig2Link.txt
+    cat fb.gasAcu1.chainTetNig2Link.txt
+    #	134497679 bases of 446627861 (30.114%) in intersection
+
+    mkdir /hive/data/genomes/tetNig2/bed/blastz.gasAcu1.swap
+    cd /hive/data/genomes/tetNig2/bed/blastz.gasAcu1.swap
+    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
+	/hive/data/genomes/gasAcu1/bed/lastzTetNig2.2009-08-10/DEF \
+	-swap -qRepeats=windowmaskerSdust -qRepeats=windowmaskerSdust \
+	-noLoadChainSplit -chainMinScore=2000 -chainLinearGap=medium \
+	-workhorse=hgwdev -smallClusterHub=memk -bigClusterHub=swarm \
+	> swap.log 2>&1 &
+    #	real    33m6.310s
+    cat fb.tetNig2.chainGasAcu1Link.txt 
+    #	123285586 bases of 302314788 (40.781%) in intersection
+
+#########################################################################
+# LASTZ/CHAIN/NET danRer6 (DONE - 2009-09-15,18 - Hiram)
+    mkdir /hive/data/genomes/tetNig2/bed/lastzDanRer6.2009-09-15
+    cd /hive/data/genomes/tetNig2/bed/lastzDanRer6.2009-09-15
+    cat << '_EOF_' > DEF
+# Tetraodon vs. Zebrafish
+
+# using the "close" genome alignment parameters
+#	see also: http://genomewiki.ucsc.edu/index.php/Mm9_multiple_alignment
+BLASTZ_Y=9400
+BLASTZ_L=3000
+BLASTZ_K=3000
+BLASTZ_M=50
+BLASTZ_Q=/scratch/data/blastz/HoxD55.q
+
+# TARGET: Tetraodon TetNig2 - single chunk big enough to run single largest item
+SEQ1_DIR=/scratch/data/tetNig2/tetNig2.2bit
+SEQ1_LEN=/scratch/data/tetNig2/chrom.sizes
+SEQ1_CTGDIR=/scratch/data/tetNig2/tetNig2.contigs.2bit
+SEQ1_CTGLEN=/scratch/data/tetNig2/tetNig2.contigs.sizes
+SEQ1_LIFT=/scratch/data/tetNig2/tetNig2.contigs.lift
+SEQ1_CHUNK=20000000
+SEQ1_LAP=10000
+SEQ1_LIMIT=50
+
+# QUERY: Zebrafish danRer6, chunk large enough to run largest piece
+SEQ2_DIR=/scratch/data/danRer6/danRer6.2bit
+SEQ2_LEN=/scratch/data/danRer6/chrom.sizes
+SEQ2_CTGDIR=/hive/data/genomes/danRer6/contigs/danRer6.contigs.2bit
+SEQ2_CTGLEN=/hive/data/genomes/danRer6/contigs/danRer6.contigs.sizes
+SEQ2_LIFT=/hive/data/genomes/danRer6/contigs/danRer6.contigs.lift
+SEQ2_CHUNK=10000000
+SEQ2_LIMIT=100
+SEQ2_LAP=0
+
+BASE=/hive/data/genomes/tetNig2/bed/lastzDanRer6.2009-09-15
+TMPDIR=/scratch/tmp
+'_EOF_'
+    # << this line keeps emacs coloring happy
+
+    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
+	`pwd`/DEF \
+	-tRepeats=windowmaskerSdust \
+	-noLoadChainSplit -chainMinScore=2000 -chainLinearGap=medium \
+	-workhorse=hgwdev -smallClusterHub=memk -bigClusterHub=swarm \
+	> do.log 2>&1 &
+    #	real    754m56.563s
+    cat fb.tetNig2.chainDanRer6Link.txt 
+    #	70626082 bases of 302314788 (23.362%) in intersection
+
+    #	and the swap to danRer6:
+    mkdir /hive/data/genomes/danRer6/bed/blastz.tetNig2.swap
+    cd /hive/data/genomes/danRer6/bed/blastz.tetNig2.swap
+    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
+	/hive/data/genomes/tetNig2/bed/lastzDanRer6.2009-09-15/DEF \
+	-swap -tRepeats=windowmaskerSdust \
+	-noLoadChainSplit -chainMinScore=2000 -chainLinearGap=medium \
+	-workhorse=hgwdev -smallClusterHub=memk -bigClusterHub=swarm \
+	> swap.log 2>&1 &
+    #	real    26m40.310s
+    cat fb.danRer6.chainTetNig2Link.txt 
+    #	93349443 bases of 1506896106 (6.195%) in intersection
+
+#########################################################################
+# LASTZ/CHAIN/NET fr2 (DONE - 2009-09-15,18 - Hiram)
+    mkdir /hive/data/genomes/tetNig2/bed/lastzFr2.2009-09-15
+    cd /hive/data/genomes/tetNig2/bed/lastzFr2.2009-09-15
+    cat << '_EOF_' > DEF
+# Tetraodon vs. Zebrafish
+
+# using the "close" genome alignment parameters
+#	see also: http://genomewiki.ucsc.edu/index.php/Mm9_multiple_alignment
+BLASTZ_Y=9400
+BLASTZ_L=3000
+BLASTZ_K=3000
+BLASTZ_M=50
+BLASTZ_Q=/scratch/data/blastz/HoxD55.q
+
+# TARGET: Tetraodon TetNig2 - single chunk big enough to run single largest item
+SEQ1_DIR=/scratch/data/tetNig2/tetNig2.2bit
+SEQ1_LEN=/scratch/data/tetNig2/chrom.sizes
+SEQ1_CTGDIR=/scratch/data/tetNig2/tetNig2.contigs.2bit
+SEQ1_CTGLEN=/scratch/data/tetNig2/tetNig2.contigs.sizes
+SEQ1_LIFT=/scratch/data/tetNig2/tetNig2.contigs.lift
+SEQ1_CHUNK=20000000
+SEQ1_LAP=10000
+SEQ1_LIMIT=50
+
+# QUERY: Zebrafish fr2, chunk large enough to run largest piece
+SEQ2_DIR=/scratch/data/fr2/fr2.2bit
+SEQ2_LEN=/scratch/data/fr2/chrom.sizes
+SEQ2_CTGDIR=/hive/data/genomes/fr2/noUn/fr2.scaffolds.2bit
+SEQ2_CTGLEN=/hive/data/genomes/fr2/noUn/fr2.scaffolds.sizes
+SEQ2_LIFT=/hive/data/genomes/fr2/jkStuff/liftAll.lft
+SEQ2_CHUNK=10000000
+SEQ2_LIMIT=100
+SEQ2_LAP=0
+
+BASE=/hive/data/genomes/tetNig2/bed/lastzFr2.2009-09-15
+TMPDIR=/scratch/tmp
+'_EOF_'
+    # << this line keeps emacs coloring happy
+
+    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
+	`pwd`/DEF \
+	-tRepeats=windowmaskerSdust \
+	-noLoadChainSplit -chainMinScore=2000 -chainLinearGap=medium \
+	-workhorse=hgwdev -smallClusterHub=memk -bigClusterHub=pk \
+	> do.log 2>&1 &
+    #	real    1226m51.875s
+
+    #	broken down during lastz due to pk problems, finished the lastz
+    #	cluster run manually, then continuing:
+    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
+	`pwd`/DEF \
+	-continue=cat -tRepeats=windowmaskerSdust \
+	-noLoadChainSplit -chainMinScore=2000 -chainLinearGap=medium \
+	-workhorse=hgwdev -smallClusterHub=memk -bigClusterHub=pk \
+	> cat.log 2>&1 &
+    #	real    63m23.911s
+    cat fb.tetNig2.chainFr2Link.txt 
+    #	243965150 bases of 302314788 (80.699%) in intersection
+
+    #	and the swap to fr2:
+    mkdir /hive/data/genomes/fr2/bed/blastz.tetNig2.swap
+    cd /hive/data/genomes/fr2/bed/blastz.tetNig2.swap
+    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
+	/hive/data/genomes/tetNig2/bed/lastzFr2.2009-09-15/DEF \
+	-swap -tRepeats=windowmaskerSdust \
+	-noLoadChainSplit -chainMinScore=2000 -chainLinearGap=medium \
+	-workhorse=hgwdev -smallClusterHub=memk -bigClusterHub=pk \
+	> swap.log 2>&1 &
+    #	real    21m56.576s
+    cat fb.fr2.chainTetNig2Link.txt 
+    #	248984008 bases of 393312790 (63.304%) in intersection
+
+#########################################################################
+# BLASTZ/CHAIN/NET mm9/Mouse swap (DONE - 2009-09-15 - Hiram)
+    #	the original alignment to the mouse sequence
+    cd /hive/data/genomes/mm9/bed/lastzTetNig2.2009-09-15
+    cat fb.mm9.chainTetNig2Link.txt 
+    #	45642112 bases of 2620346127 (1.742%) in intersection
+
+    #	running the swap to here, tetNig2:
+    mkdir /hive/data/genomes/tetNig2/bed/blastz.mm9.swap
+    cd /hive/data/genomes/tetNig2/bed/blastz.mm9.swap
+    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
+	/hive/data/genomes/mm9/bed/lastzTetNig2.2009-09-15/DEF \
+	-qRepeats=windowmaskerSdust \
+	-noLoadChainSplit -chainMinScore=5000 -chainLinearGap=loose \
+	-workhorse=hgwdev -smallClusterHub=memk -bigClusterHub=swarm \
+	-swap > swap.log 2>&1 &
+    #	real    10m34.797s
+    cat fb.tetNig2.chainMm9Link.txt 
+    #	41176381 bases of 302314788 (13.620%) in intersection
+
+#########################################################################