src/hg/makeDb/doc/droEre2.txt 1.2
1.2 2009/04/07 16:55:45 angie
Swapped chains/nets from dm3 to get liftOver chains to dm3 -- user request.
Index: src/hg/makeDb/doc/droEre2.txt
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/makeDb/doc/droEre2.txt,v
retrieving revision 1.1
retrieving revision 1.2
diff -b -B -U 1000000 -r1.1 -r1.2
--- src/hg/makeDb/doc/droEre2.txt 22 Sep 2006 00:45:03 -0000 1.1
+++ src/hg/makeDb/doc/droEre2.txt 7 Apr 2009 16:55:45 -0000 1.2
@@ -1,108 +1,120 @@
# for emacs: -*- mode: sh; -*-
# Drosophila erecta -- Agencourt "CAF1" via Eisen's 12-fly site
# THIS IS ONLY TO GET MASKED SEQUENCE -- NOT A BROWSER AT THIS POINT
#########################################################################
# DOWNLOAD SEQUENCE (DONE 9/20/06 angie)
ssh kkstore05
mkdir /cluster/store12/droEre2
ln -s /cluster/store12/droEre2 /cluster/data/droEre2
mkdir /cluster/data/droEre2/downloads
cd /cluster/data/droEre2/downloads
wget http://rana.lbl.gov/drosophila/caf1/dere_caf1.tar.gz
tar xvzf dere_caf1.tar.gz
cd dere
faSize scaffolds.bases
#152712140 bases (7628121 N's 145084019 real 145084019 upper 0 lower) in 5124 sequences in 1 files
#Total size: mean 29803.3 sd 721223.0 min 154 (scaffold_5056) max 26641161 (scaffold_4929) median 1729
#N count: mean 1488.7 sd 9323.5
#U count: mean 28314.6 sd 717773.6
#L count: mean 0.0 sd 0.0
#########################################################################
# MAKE GENOME DB *UP TO DB STEP ONLY* (DONE 9/20/06 angie)
ssh kkstore05
cd /cluster/data/droEre2
cat > droEre2.config.ra <<EOF
# Config parameters for makeGenomeDb.pl:
db droEre2
clade insect
scientificName Drosophila erecta
assemblyDate Feb. 2006
assemblyLabel Agencourt CAF1
orderKey 53
mitoAcc none
fastaFiles /cluster/data/droEre2/downloads/dere/scaffolds.bases
agpFiles /cluster/data/droEre2/downloads/dere/assembly.agp
dbDbSpeciesDir drosophila
EOF
# Stop at db step so we can use featureBits, but don't do dbDb and trackDb
# because we're not building an actual browser for now.
makeGenomeDb.pl droEre2.config.ra -stop=db \
>& makeGenomeDb.log & tail -f makeGenomeDb.log
#########################################################################
# REPEATMASKER (DONE 9/20/06 angie)
ssh kkstore05
# Run -debug to create the dir structure and preview the scripts:
doRepeatMasker.pl droEre2 -verbose 3 -debug
# Run it for real and tail the log:
doRepeatMasker.pl droEre2 -species drosophila -verbose 3 \
>& /cluster/data/droEre2/bed/RepeatMasker.2006-09-20/do.log &
tail -f /cluster/data/droEre2/bed/RepeatMasker.2006-09-20/do.log
# RepeatMasker and lib version from do.log:
# March 20 2006 (open-3-1-5) version of RepeatMasker
#CC RELEASE 20060315; *
# Compare coverage to previous assembly:
featureBits droEre2 rmsk
#18539872 bases of 145084019 (12.779%) in intersection
featureBits droEre1 rmsk
#18339464 bases of 145196048 (12.631%) in intersection
#########################################################################
# SIMPLE REPEATS (TRF) (DONE 9/20/06 angie)
ssh kolossus
nice tcsh
mkdir /cluster/data/droEre2/bed/simpleRepeat
cd /cluster/data/droEre2/bed/simpleRepeat
twoBitToFa ../../droEre2.unmasked.2bit stdout \
| trfBig -trf=/cluster/bin/i386/trf stdin /dev/null \
-bedAt=simpleRepeat.bed -tempDir=/tmp \
>& trf.log & tail -f trf.log
# ~52 minutes (longer than D. mel, must be because of the scaffolds)
# Make a filtered version for sequence masking:
awk '{if ($5 <= 12) print;}' simpleRepeat.bed > trfMask.bed
# Load unfiltered repeats into the database:
ssh hgwdev
hgLoadBed droEre2 simpleRepeat \
/cluster/data/droEre2/bed/simpleRepeat/simpleRepeat.bed \
-sqlTable=$HOME/kent/src/hg/lib/simpleRepeat.sql
# Compare coverage to previous assembly:
featureBits droEre2 simpleRepeat
#9028488 bases of 145084019 (6.223%) in intersection
featureBits droEre1 simpleRepeat
#9071572 bases of 145196048 (6.248%) in intersection
#########################################################################
# MASK SEQUENCE WITH FILTERED TRF IN ADDITION TO RM (DONE 9/20/06 angie)
ssh kolossus
cd /cluster/data/droEre2
time twoBitMask droEre2.rmsk.2bit -add bed/simpleRepeat/trfMask.bed droEre2.2bit
# This warning is OK -- the extra fields are not block coordinates.
#Warning: BED file bed/simpleRepeat/trfMask.bed has >=13 fields which means it might contain block coordinates, but this program uses only the first three fields (the entire span -- no support for blocks).
#0.161u 0.368s 0:01.61 32.2% 0+0k 0+0io 1pf+0w
# Because this is a no-browser build (just masking for alignment)
# I did not make the usual /gbdb/$db/$db.2bit link.
+#########################################################################
+# SWAP DM3 CHAIN/NET (DONE 4/3/09 angie)
+ mkdir /hive/data/genomes/droEre2/bed/blastz.dm3.swap
+ cd /hive/data/genomes/droEre2/bed/blastz.dm3.swap
+ doBlastzChainNet.pl -swap -bigClusterHub swarm -smallClusterHub memk \
+ -workhorse kolossus \
+ /hive/data/genomes/dm3/bed/blastz.droEre2/DEF >& do.log &
+ tail -f do.log
+ ln -s blastz.dm3.swap /hive/data/genomes/droEre2/bed/blastz.dm3
+
+
+#########################################################################