src/hg/makeDb/doc/hg19.txt 1.12
1.12 2009/05/13 20:51:33 hiram
Running lastz alignments
Index: src/hg/makeDb/doc/hg19.txt
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/makeDb/doc/hg19.txt,v
retrieving revision 1.11
retrieving revision 1.12
diff -b -B -U 4 -r1.11 -r1.12
--- src/hg/makeDb/doc/hg19.txt 11 May 2009 20:50:57 -0000 1.11
+++ src/hg/makeDb/doc/hg19.txt 13 May 2009 20:51:33 -0000 1.12
@@ -701,10 +701,11 @@
time nice -n +19 doBlastzChainNet.pl `pwd`/DEF -verbose=2 \
-noLoadChainSplit -chainMinScore=5000 -chainLinearGap=medium \
-workhorse=hgwdev -smallClusterHub=pk -bigClusterHub=swarm \
> do.log 2>&1 &
-# XXX running
-Sat Mar 21 22:22:18 PDT 2009
+ cat fb.hg19.chainGorGor1Link.txt
+ # 1723432141 bases of 2897316137 (59.484%) in intersection
+ doRecipBest.pl -buildDir=`pwd` hg19 gorGor1 > rbest.log 2>&1
############################################################################
# PREPARE LINEAGE SPECIFIC REPEAT FILES FOR BLASTZ (DONE - 2009-04-02 - Hiram)
ssh pk
@@ -2130,11 +2131,26 @@
# establish a screen to control this job
screen
time nice -n +19 doBlastzChainNet.pl -verbose=2 \
`pwd`/DEF \
+ -noLoadChainSplit \
-workhorse=hgwdev -smallClusterHub=memk -bigClusterHub=swarm \
-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
-XXX - running Mon May 11 13:40:46 PDT 2009
+ # fixed up some bugs in the doBlastzChainNet.pl script, finished
+ # the lastz run manually, then continuing:
+ time nice -n +19 doBlastzChainNet.pl -verbose=2 \
+ `pwd`/DEF \
+ -continue=cat \
+ -noLoadChainSplit \
+ -workhorse=hgwdev -smallClusterHub=memk -bigClusterHub=swarm \
+ -chainMinScore=3000 -chainLinearGap=medium > cat.log 2>&1 &
+ cat fb.hg19.chainMm9Link.txt
+ # 1022723573 bases of 2897316137 (35.299%) in intersection
+ time nice -n +19 doBlastzChainNet.pl -verbose=2 \
+ `pwd`/DEF \
+ -noLoadChainSplit -continue=syntenicNet -syntenicNet \
+ -workhorse=hgwdev -smallClusterHub=memk -bigClusterHub=swarm \
+ -chainMinScore=3000 -chainLinearGap=medium > synNet.log 2>&1 &
#########################################################################
# BLASTZ Dog CanFam2 (DONE - 2009-05-11 - Hiram)
mkdir /hive/data/genomes/hg19/bed/lastzCanFam2.2009-05-11
@@ -2167,9 +2183,207 @@
# establish a screen to control this job
screen
time nice -n +19 doBlastzChainNet.pl -verbose=2 \
`pwd`/DEF \
- -workhorse=hgwdev -smallClusterHub=memk -bigClusterHub=swarm \
+ -noLoadChainSplit \
+ -workhorse=hgwdev -smallClusterHub=memk -bigClusterHub=pk \
-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
-XXX - running Mon May 11 13:40:46 PDT 2009
+ # fixed up some bugs in the doBlastzChainNet.pl script, finished
+ # the lastz run manually, then continuing:
+ time nice -n +19 doBlastzChainNet.pl -verbose=2 \
+ `pwd`/DEF \
+ -continue=cat \
+ -noLoadChainSplit \
+ -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=pk \
+ -chainMinScore=3000 -chainLinearGap=medium > cat.log 2>&1 &
+ # fixup some data missing problems on encodek, then continuing
+ time nice -n +19 doBlastzChainNet.pl -verbose=2 \
+ `pwd`/DEF \
+ -continue=chainMerge \
+ -noLoadChainSplit \
+ -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=pk \
+ -chainMinScore=3000 -chainLinearGap=medium > chainMerge.log 2>&1 &
+ # real 91m9.723s
+ cat fb.hg19.chainCanFam2Link.txt
+ # 1532071680 bases of 2897316137 (52.879%) in intersection
+
+ time nice -n +19 doBlastzChainNet.pl -verbose=2 \
+ `pwd`/DEF \
+ -noLoadChainSplit -continue=syntenicNet -syntenicNet \
+ -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=pk \
+ -chainMinScore=3000 -chainLinearGap=medium > synNet.log 2>&1 &
+XXX - running Wed May 13 12:51:14 PDT 2009
+
+#########################################################################
+# BLASTZ Chicken GalGal3 (WORKING - 2009-05-13 - Hiram)
+ mkdir /hive/data/genomes/hg19/bed/lastzGalGal3.2009-05-13
+ cd /hive/data/genomes/hg19/bed/lastzGalGal3.2009-05-13
+
+ cat << '_EOF_' > DEF
+# human vs chicken
+# Specific settings for chicken (per Webb email to Brian Raney)
+BLASTZ_H=2000
+BLASTZ_Y=3400
+BLASTZ_L=10000
+BLASTZ_K=2200
+BLASTZ_Q=/scratch/data/blastz/HoxD55.q
+BLASTZ_ABRIDGE_REPEATS=1
+
+# TARGET: Human hg19
+SEQ1_DIR=/scratch/data/hg19/nib
+SEQ1_SMSK=/scratch/data/hg19/lineageSpecificRepeats
+SEQ1_LEN=/scratch/data/hg19/chrom.sizes
+SEQ1_CHUNK=10000000
+SEQ1_LAP=10000
+
+# QUERY: Chicken galGal3 - single chunk big enough to run entire chrom
+SEQ2_DIR=/scratch/data/galGal3/nib
+SEQ2_LEN=/scratch/data/galGal3/chrom.sizes
+SEQ2_SMSK=/scratch/data/galGal3/linSpecRep
+SEQ2_CHUNK=200000000
+SEQ2_LAP=0
+
+BASE=/hive/data/genomes/hg19/bed/lastzGalGal3.2009-05-13
+TMPDIR=/scratch/tmp
+'_EOF_'
+ # << happy emacs
+
+ time nice -n +19 doBlastzChainNet.pl -verbose=2 \
+ `pwd`/DEF \
+ -syntenicNet \
+ -noLoadChainSplit \
+ -workhorse=hgwdev -smallClusterHub=memk -bigClusterHub=pk \
+ -chainMinScore=5000 -chainLinearGap=loose > do.log 2>&1
+XXX - running Wed May 13 10:21:42 PDT 2009
#########################################################################
+# BLASTZ Macaca Mulatta RheMac2 (WORKING - 2009-05-13 - Hiram)
+
+ mkdir /hive/data/genomes/hg19/bed/lastzRheMac2.2009-05-13
+ cd /hive/data/genomes/hg19/bed/lastzRheMac2.2009-05-13
+
+ cat << '_EOF_' > DEF
+# human vs macaca mulatta
+BLASTZ=lastz
+# maximum M allowed with lastz is only 254
+BLASTZ_M=254
+BLASTZ_Q=/scratch/data/blastz/human_chimp.v2.q
+# and place those items here
+BLASTZ_O=600
+BLASTZ_E=150
+# other parameters from panTro2 vs hg18 lastz on advice from Webb
+BLASTZ_K=4500
+BLASTZ_Y=15000
+BLASTZ_T=2
+
+# TARGET: Human Hg19
+SEQ1_DIR=/scratch/data/hg19/hg19.2bit
+SEQ1_LEN=/scratch/data/hg19/chrom.sizes
+SEQ1_CHUNK=10000000
+SEQ1_LAP=0
+SEQ1_IN_CONTIGS=0
+
+# QUERY: Macaca Mulatta RheMac2
+SEQ2_DIR=/scratch/data/rheMac2/rheMac2.2bit
+SEQ2_LEN=/scratch/data/rheMac2/chrom.sizes
+SEQ2_CHUNK=20000000
+SEQ2_LAP=10000
+SEQ2_IN_CONTIGS=0
+
+BASE=/hive/data/genomes/hg19/bed/lastzRheMac2.2009-05-13
+TMPDIR=/scratch/tmp
+'_EOF_'
+ # << happy emacs
+
+ time nice -n +19 doBlastzChainNet.pl -verbose=2 \
+ `pwd`/DEF \
+ -syntenicNet \
+ -noLoadChainSplit -chainMinScore=5000 -chainLinearGap=medium \
+ -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
+ > do.log 2>&1 &
+XXX - running Wed May 13 10:41:20 PDT 2009
+
+#########################################################################
+# BLASTZ Rat Rn4 (WORKING - 2009-05-13 - Hiram)
+ mkdir /hive/data/genomes/hg19/bed/lastzRn4.2009-05-13
+ cd /hive/data/genomes/hg19/bed/lastzRn4.2009-05-13
+
+ cat << '_EOF_' > DEF
+# human vs rat
+BLASTZ_ABRIDGE_REPEATS=1
+
+# TARGET: Human Hg19
+SEQ1_DIR=/scratch/data/hg19/nib
+SEQ1_SMSK=/scratch/data/hg19/lineageSpecificRepeats
+SEQ1_LEN=/scratch/data/hg19/chrom.sizes
+SEQ1_CHUNK=10000000
+SEQ1_LAP=0
+
+# QUERY: Rat Rn4
+SEQ2_DIR=/scratch/data/rn4/nib
+SEQ2_SMSK=/scratch/data/rn4/linSpecRep.notInHuman
+SEQ2_LEN=/scratch/data/rn4/chrom.sizes
+SEQ2_CHUNK=10000000
+SEQ2_LAP=10000
+
+BASE=/hive/data/genomes/hg19/bed/lastzRn4.2009-05-13
+TMPDIR=/scratch/tmp
+'_EOF_'
+ # << happy emacs
+
+ # establish a screen to control this job
+ screen
+ time nice -n +19 doBlastzChainNet.pl -verbose=2 \
+ `pwd`/DEF \
+ -syntenicNet -noLoadChainSplit \
+ -workhorse=hgwdev -smallClusterHub=memk -bigClusterHub=swarm \
+ -chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
+
+##############################################################################
+# BLASTZ Rat Rn4 (WORKING - 2009-05-13 - Hiram)
+ mkdir /hive/data/genomes/hg19/bed/lastzRn4.2009-05-13
+ cd /hive/data/genomes/hg19/bed/lastzRn4.2009-05-13
+
+ cat << '_EOF_' > DEF
+# human vs orangutan
+BLASTZ=lastz
+# maximum M allowed with lastz is only 254
+BLASTZ_M=254
+BLASTZ_Q=/scratch/data/blastz/human_chimp.v2.q
+# and place those items here
+BLASTZ_O=600
+BLASTZ_E=150
+# other parameters from panTro2 vs hg18 lastz on advice from Webb
+BLASTZ_K=4500
+BLASTZ_Y=15000
+BLASTZ_T=2
+
+# TARGET: Human Hg19
+SEQ1_DIR=/scratch/data/hg19/hg19.2bit
+SEQ1_LEN=/scratch/data/hg19/chrom.sizes
+SEQ1_CHUNK=10000000
+SEQ1_LAP=0
+SEQ1_IN_CONTIGS=0
+
+# QUERY: Orangutan PonAbe1
+SEQ2_DIR=/scratch/data/ponAbe2/ponAbe2.2bit
+SEQ2_LEN=/scratch/data/ponAbe2/chrom.sizes
+SEQ2_CHUNK=20000000
+SEQ2_LAP=10000
+SEQ2_IN_CONTIGS=0
+
+BASE=/hive/data/genomes/hg19/bed/lastzPonAbe2.2009-05-13
+TMPDIR=/scratch/tmp
+'_EOF_'
+ # << happy emacs
+
+ # establish a screen to control this job
+ screen
+ time nice -n +19 doBlastzChainNet.pl -verbose=2 \
+ `pwd`/DEF \
+ -syntenicNet \
+ -noLoadChainSplit -chainMinScore=5000 -chainLinearGap=medium \
+ -workhorse=hgwdev -smallClusterHub=memk -bigClusterHub=pk \
+ > do.log 2>&1 &
+XXX - running Wed May 13 12:44:33 PDT 2009
+
+##############################################################################