src/hg/makeDb/doc/macEug1.txt 1.2
1.2 2010/01/13 18:20:06 hiram
running genbank for Wallaby
Index: src/hg/makeDb/doc/macEug1.txt
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/makeDb/doc/macEug1.txt,v
retrieving revision 1.1
retrieving revision 1.2
diff -b -B -U 1000000 -r1.1 -r1.2
--- src/hg/makeDb/doc/macEug1.txt 8 Dec 2008 08:18:00 -0000 1.1
+++ src/hg/makeDb/doc/macEug1.txt 13 Jan 2010 18:20:06 -0000 1.2
@@ -1,79 +1,123 @@
# for emacs: -*- mode: sh; -*-
# Creating the assembly for Macropus eugenii
# Tammar wallaby
# http://www.hgsc.bcm.tmc.edu/projects/wallaby/
#
# This isn't a full browser. Just a 2x coverage genome
# for use in comparative genomics.
#########################################################################
# DOWNLOAD SEQUENCE, ASSEMBLE, MAKE DB (2008-12-04 Andy)
ssh hgwdev
mkdir -p /hive/data/genomes/macEug1/baylor/chroms
cd /hive/data/genomes/macEug1/baylor
wget --timestamping "ftp://ftp.hgsc.bcm.tmc.edu/pub/data/Meugenii/fasta/Meug20071125/contigs/Meug20071125-contigs*"
cd chroms/
wget --timestamping "ftp://ftp.hgsc.bcm.tmc.edu/pub/data/Meugenii/fasta/Meug20071125/linearScaffolds/*"
cd ../
zcat Meug20071125-contigs.fa.gz | grep "^>" | \
sed 's/^>//' | tr '|' '\t' | awk 'BEGIN{OFS="\t"}{print $2, $1;}' > contigs.lst
tabGrep -replace contigs.lst 6 Meug20071125-contigs.agp.gz > macEug1.agp
zcat Meug20071125-contigs.fa.gz | sed 's/^>\(Contig[[:digit:]]\+\).*/>\1/' \
| gzip -c > macEug1.contigs.fa.gz
cut -f6 macEug1.agp > contigs.agp.lst
faSomeRecords macEug1.contigs.fa.gz contigs.agp.lst stdout | gzip -c > tmp.fa.gz
mv tmp.fa.gz macEug1.contigs.fa.gz
cd ../
cat << '_EOF_' > macEug1.config.ra
db macEug1
clade mammal
scientificName Macropus eugenii
assemblyDate Nov. 2007
assemblyLabel Baylor Release 1.0
orderKey 278
dbDbSpeciesDir wallaby
mitoAcc none
agpFiles /hive/data/genomes/macEug1/baylor/macEug1.agp
fastaFiles /hive/data/genomes/macEug1/baylor/macEug1.contigs.fa.gz
commonName Wallaby
'_EOF_'
# << emacs
tcsh
~/kent/src/hg/utils/automation/makeGenomeDb.pl -stop db -workhorse=hgwdev
###########################################################################
# WINDOWMASKER (DONE 2008-12-05 Andy)
ssh hgwdev
mkdir /hive/data/genomes/macEug1/bed/WindowMasker
screen -S macEug1_WindowMasker
ssh kolossus
cd /hive/data/genomes/macEug1/bed/WindowMasker
tcsh
~/kent/src/hg/utils/automation/doWindowMasker.pl macEug1 -buildDir=`pwd` -workhorse=hgwdev >& wm.log
rm macEug1.wmsk.2bit
cd ../../
ln -s bed/WindowMasker/macEug1.wmsk.sdust.2bit macEug1.wmsk.2bit
#########################################################################
## SIMPLE REPEATS TRF (DONE 2008-11-26 - Andy)
ssh hgwdev
screen -S macEug1_trf
mkdir /hive/data/genomes/macEug1/bed/simpleRepeat
cd /hive/data/genomes/macEug1/bed/simpleRepeat
tcsh
time $HOME/kent/src/hg/utils/automation/doSimpleRepeat.pl -buildDir=`pwd` macEug1 > do.log
# *** All done!
# *** Steps were performed in /hive/data/genomes/macEug1/bed/simpleRepeat
#0.559u 0.401s 6:56:42.34 0.0% 0+0k 0+0io 2pf+0w
cat fb.simpleRepeat
# 38751844 bases of 2541767339 (1.525%) in intersection
cd ../../
twoBitMask macEug1.wmsk.2bit -add bed/simpleRepeat/trfMask.bed macEug1.2bit
## can safely ignore warning about >=13 fields in bed file
twoBitToFa macEug1.2bit stdout | faSize stdin > macEug1.2bit.faSize.txt
#
# %31.28 masked total, %36.24 masked real
## link to gbdb
ln -s `pwd`/macEug1.2bit /gbdb/macEug1
+#########################################################################
+## Genbank gene run (DONE - 2009-09-25 - Hiram)
+ cd /hive/data/genomes/macEug1
+ # MAKE 11.OOC FILE FOR BLAT
+ blat macEug1.2bit \
+ /dev/null /dev/null -tileSize=11 -makeOoc=jkStuff/macEug1.11.ooc \
+ -repMatch=1024
+ # Wrote 24249 overused 11-mers to jkStuff/macEug1.11.ooc
+
+ mkdir /hive/data/staging/data/macEug1
+ cp -p macEug1.2bit chrom.sizes jkStuff/macEug1.11.ooc \
+ /hive/data/staging/data/macEug1
+
+ # add the following two lines to src/lib/gbGenome.c
+ # static char *macEugNames[] = {"Macropus eugenii", NULL};
+ # {"macEug", macEugNames},
+
+ cvs ci -m "Adding Wallaby Macropus eugenii" src/lib/gbGenome.c
+ make install-server
+
+ ssh genbank
+ screen # control this business with a screen since it takes a while
+ cd /cluster/data/genbank
+
+ time nice -n +19 bin/gbAlignStep -initial macEug1 &
+ # logFile: var/build/logs/2009.09.25-14:54:03.macEug1.initalign.log
+ # real 2613m8.027s
+
+ # load database when finished
+ ssh hgwdev
+ cd /cluster/data/genbank
+ time nice -n +19 ./bin/gbDbLoadStep -drop -initialLoad macEug1
+ # logFile: var/dbload/hgwdev/logs/2009.09.28-09:23:52.dbload.log
+ # real 15m28.552s
+
+ # enable daily alignment and update of hgwdev
+ cd ~/kent/src/hg/makeDb/genbank
+ cvsup
+ # add macEug1 to:
+ etc/align.dbs
+ etc/hgwdev.dbs
+ cvs ci -m "Added macEug1 - Macropus eugenii - Wallaby" etc/align.dbs etc/hgwdev.dbs
+ make etc-update
+ # done - 2009-09-28 - Hiram