src/hg/makeDb/doc/droMoj3.txt 1.4
1.4 2009/04/07 16:55:47 angie
Swapped chains/nets from dm3 to get liftOver chains to dm3 -- user request.
Index: src/hg/makeDb/doc/droMoj3.txt
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/makeDb/doc/droMoj3.txt,v
retrieving revision 1.3
retrieving revision 1.4
diff -b -B -U 1000000 -r1.3 -r1.4
--- src/hg/makeDb/doc/droMoj3.txt 19 Sep 2008 06:24:58 -0000 1.3
+++ src/hg/makeDb/doc/droMoj3.txt 7 Apr 2009 16:55:47 -0000 1.4
@@ -1,199 +1,210 @@
# for emacs: -*- mode: sh; -*-
# Drosophila mojavensis -- Agencourt "CAF1" via Eisen's 12-fly site
# THIS IS ONLY TO GET MASKED SEQUENCE -- NOT A BROWSER AT THIS POINT
#########################################################################
# DOWNLOAD SEQUENCE (DONE 9/21/06 angie)
ssh kkstore05
mkdir /cluster/store12/droMoj3
ln -s /cluster/store12/droMoj3 /cluster/data/droMoj3
mkdir /cluster/data/droMoj3/downloads
cd /cluster/data/droMoj3/downloads
wget http://rana.lbl.gov/drosophila/caf1/dmoj_caf1.tar.gz
tar xvzf dmoj_caf1.tar.gz
cd dmoj
faSize scaffolds.bases
#193826310 bases (13618479 N's 180207831 real 180207831 upper 0 lower) in 6841 sequences in 1 files
#Total size: mean 28333.0 sd 756578.0 min 101 (scaffold_5710) max 34148556 (scaffold_6540) median 1654
#N count: mean 1990.7 sd 16742.8
#U count: mean 26342.3 sd 746409.9
#L count: mean 0.0 sd 0.0
#########################################################################
# MAKE GENOME DB *UP TO DB STEP ONLY* (DONE 9/21/06 angie)
ssh kkstore05
cd /cluster/data/droMoj3
cat > droMoj3.config.ra <<EOF
# Config parameters for makeGenomeDb.pl:
db droMoj3
clade insect
scientificName Drosophila mojavensis
assemblyDate Feb. 2006
assemblyLabel Agencourt CAF1
orderKey 57
mitoAcc none
fastaFiles /cluster/data/droMoj3/downloads/dmoj/scaffolds.bases
agpFiles /cluster/data/droMoj3/downloads/dmoj/assembly.agp
dbDbSpeciesDir drosophila
EOF
# Stop at db step so we can use featureBits, but don't do dbDb and trackDb
# because we're not building an actual browser for now.
makeGenomeDb.pl droMoj3.config.ra -stop=db \
>& makeGenomeDb.log & tail -f makeGenomeDb.log
#########################################################################
# REPEATMASKER (DONE 9/21/06 angie)
ssh kkstore05
# Run -debug to create the dir structure and preview the scripts:
doRepeatMasker.pl droMoj3 -verbose 3 -debug
# Run it for real and tail the log:
doRepeatMasker.pl droMoj3 -species drosophila -verbose 3 \
>& /cluster/data/droMoj3/bed/RepeatMasker.2006-09-21/do.log &
tail -f /cluster/data/droMoj3/bed/RepeatMasker.2006-09-21/do.log
# RepeatMasker and lib version from do.log:
# March 20 2006 (open-3-1-5) version of RepeatMasker
#CC RELEASE 20060315; *
# Compare coverage to previous assembly:
featureBits -chrom=scaffold_6540 droMoj3 rmsk
#3474987 bases of 33912366 (10.247%) in intersection
featureBits -chrom=scaffold_6540 droMoj2 rmsk
#3448791 bases of 33921626 (10.167%) in intersection
#########################################################################
# SIMPLE REPEATS (TRF) (DONE 9/21/06 angie)
ssh kolossus
nice tcsh
mkdir /cluster/data/droMoj3/bed/simpleRepeat
cd /cluster/data/droMoj3/bed/simpleRepeat
twoBitToFa ../../droMoj3.unmasked.2bit stdout \
| trfBig -trf=/cluster/bin/i386/trf stdin /dev/null \
-bedAt=simpleRepeat.bed -tempDir=/tmp \
>& trf.log & tail -f trf.log
# ~50 minutes (longer than D. mel, must be because of the scaffolds)
# Make a filtered version for sequence masking:
awk '{if ($5 <= 12) print;}' simpleRepeat.bed > trfMask.bed
# Load unfiltered repeats into the database:
ssh hgwdev
hgLoadBed droMoj3 simpleRepeat \
/cluster/data/droMoj3/bed/simpleRepeat/simpleRepeat.bed \
-sqlTable=$HOME/kent/src/hg/lib/simpleRepeat.sql
# Compare coverage to previous assembly:
featureBits -chrom=scaffold_6540 droMoj3 simpleRepeat
#826554 bases of 33912366 (2.437%) in intersection
featureBits -chrom=scaffold_6540 droMoj2 simpleRepeat
#829580 bases of 33921626 (2.446%) in intersection
#########################################################################
# MASK SEQUENCE WITH FILTERED TRF IN ADDITION TO RM (DONE 9/21/06 angie)
ssh kolossus
cd /cluster/data/droMoj3
time twoBitMask droMoj3.rmsk.2bit -add bed/simpleRepeat/trfMask.bed droMoj3.2bit
# This warning is OK -- the extra fields are not block coordinates.
#Warning: BED file bed/simpleRepeat/trfMask.bed has >=13 fields which means it might contain block coordinates, but this program uses only the first three fields (the entire span -- no support for blocks).
#0.321u 0.274s 0:01.97 29.9% 0+0k 0+0io 0pf+0w
# Because this is a no-browser build (just masking for alignment)
# I did not make the usual /gbdb/$db/$db.2bit link.
###########################################################################
# BLASTZ/CHAIN/NET DROVIR3 (DONE 10/4/06 angie)
ssh kkstore05
mkdir /cluster/data/droMoj3/bed/blastz.droVir3.2006-10-02
cd /cluster/data/droMoj3/bed/blastz.droVir3.2006-10-02
cat << '_EOF_' > DEF
# D. mojavensis vs. D. virilis
BLASTZ_H=2000
BLASTZ_Y=3400
BLASTZ_L=4000
BLASTZ_K=2200
BLASTZ_Q=/cluster/data/blastz/HoxD55.q
# TARGET - D. mojavensis
SEQ1_DIR=/iscratch/i/droMoj3/droMoj3.2bit
SEQ1_CHUNK=10000000
SEQ1_LAP=10000
SEQ1_LEN=/cluster/data/droMoj3/chrom.sizes
# QUERY - D. virilis
SEQ2_DIR=/iscratch/i/droVir3/droVir3.2bit
SEQ2_CHUNK=10000000
SEQ2_LAP=10000
SEQ2_LEN=/cluster/data/droVir3/chrom.sizes
BASE=/cluster/data/droMoj3/bed/blastz.droVir3.2006-10-02
'_EOF_'
# << this line keeps emacs coloring happy
doBlastzChainNet.pl DEF \
-blastzOutRoot /panasas/store/droMoj3droVir3 >& do.log &
tail -f do.log
ln -s blastz.droVir3.2006-10-02 /cluster/data/droMoj3/bed/blastz.droVir3
###########################################################################
# BLASTZ/CHAIN/NET DROGRI2 (DONE 10/4/06 angie)
ssh kkstore05
mkdir /cluster/data/droMoj3/bed/blastz.droGri2.2006-10-02
cd /cluster/data/droMoj3/bed/blastz.droGri2.2006-10-02
cat << '_EOF_' > DEF
# D. mojavensis vs. D. grimshawi
BLASTZ_H=2000
BLASTZ_Y=3400
BLASTZ_L=4000
BLASTZ_K=2200
BLASTZ_Q=/cluster/data/blastz/HoxD55.q
# TARGET - D. mojavensis
SEQ1_DIR=/iscratch/i/droMoj3/droMoj3.2bit
SEQ1_CHUNK=10000000
SEQ1_LAP=10000
SEQ1_LEN=/cluster/data/droMoj3/chrom.sizes
# QUERY - D. grimshawi
SEQ2_DIR=/iscratch/i/droGri2/droGri2.2bit
SEQ2_CHUNK=10000000
SEQ2_LAP=10000
SEQ2_LEN=/cluster/data/droGri2/chrom.sizes
BASE=/cluster/data/droMoj3/bed/blastz.droGri2.2006-10-02
'_EOF_'
# << this line keeps emacs coloring happy
doBlastzChainNet.pl DEF \
-blastzOutRoot /cluster/bluearc/droMoj3droGri2 >& do.log &
tail -f do.log
ln -s blastz.droGri2.2006-10-02 /cluster/data/droMoj3/bed/blastz.droGri2
#########################################################################
# MAKE 11.OOC FILE FOR BLAT (DONE 9/18/08 angie)
# Use -repMatch=100 like droMoj2.
ssh kolossus
blat /hive/data/genomes/droMoj3/droMoj3.2bit /dev/null /dev/null \
-tileSize=11 -makeOoc=/hive/data/genomes/droMoj3/11.ooc -repMatch=100
#Wrote 17148 overused 11-mers to /hive/data/genomes/droMoj3/11.ooc
#########################################################################
# LIFTOVER TO DROMOJ2 (DONE 9/18/08)
doSameSpeciesLiftOver.pl -bigClusterHub=swarm -workhorse=kolossus \
droMoj3 droMoj2 -debug
# *** Steps were performed in /cluster/data/droMoj3/bed/blat.droMoj2.2008-09-18
cd /cluster/data/droMoj3/bed/blat.droMoj2.2008-09-18
doSameSpeciesLiftOver.pl -bigClusterHub=swarm -workhorse=kolossus \
-ooc=/hive/data/genomes/droMoj3/11.ooc droMoj3 droMoj2 >& do.log &
tail -f do.log
#########################################################################
+# SWAP DM3 CHAIN/NET (DONE 4/6/09 angie)
+ mkdir /hive/data/genomes/droMoj3/bed/blastz.dm3.swap
+ cd /hive/data/genomes/droMoj3/bed/blastz.dm3.swap
+ doBlastzChainNet.pl -swap -bigClusterHub swarm -smallClusterHub memk \
+ -workhorse kolossus \
+ /hive/data/genomes/dm3/bed/blastz.droMoj3/DEF.hive >& do.log &
+ tail -f do.log
+ ln -s blastz.dm3.swap /hive/data/genomes/droMoj3/bed/blastz.dm3
+
+
+#########################################################################