src/hg/makeDb/doc/macEug1.txt 1.2

1.2 2010/01/13 18:20:06 hiram
running genbank for Wallaby
Index: src/hg/makeDb/doc/macEug1.txt
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/makeDb/doc/macEug1.txt,v
retrieving revision 1.1
retrieving revision 1.2
diff -b -B -U 1000000 -r1.1 -r1.2
--- src/hg/makeDb/doc/macEug1.txt	8 Dec 2008 08:18:00 -0000	1.1
+++ src/hg/makeDb/doc/macEug1.txt	13 Jan 2010 18:20:06 -0000	1.2
@@ -1,79 +1,123 @@
 # for emacs: -*- mode: sh; -*-
 
 #	Creating the assembly for Macropus eugenii
 #	Tammar wallaby
 #       http://www.hgsc.bcm.tmc.edu/projects/wallaby/
 #       
 #       This isn't a full browser.  Just a 2x coverage genome
 #       for use in comparative genomics.
 
 #########################################################################
 # DOWNLOAD SEQUENCE, ASSEMBLE, MAKE DB (2008-12-04 Andy) 
        ssh hgwdev
        mkdir -p /hive/data/genomes/macEug1/baylor/chroms
        cd /hive/data/genomes/macEug1/baylor
        wget --timestamping "ftp://ftp.hgsc.bcm.tmc.edu/pub/data/Meugenii/fasta/Meug20071125/contigs/Meug20071125-contigs*"
        cd chroms/
        wget --timestamping "ftp://ftp.hgsc.bcm.tmc.edu/pub/data/Meugenii/fasta/Meug20071125/linearScaffolds/*"
        cd ../
        zcat Meug20071125-contigs.fa.gz | grep "^>" | \
            sed 's/^>//' | tr '|' '\t' | awk 'BEGIN{OFS="\t"}{print $2, $1;}' > contigs.lst
        tabGrep -replace contigs.lst 6 Meug20071125-contigs.agp.gz > macEug1.agp
        zcat Meug20071125-contigs.fa.gz | sed 's/^>\(Contig[[:digit:]]\+\).*/>\1/' \
            | gzip -c > macEug1.contigs.fa.gz
        cut -f6 macEug1.agp > contigs.agp.lst
        faSomeRecords macEug1.contigs.fa.gz contigs.agp.lst stdout | gzip -c > tmp.fa.gz
        mv tmp.fa.gz macEug1.contigs.fa.gz
        cd ../
        cat << '_EOF_' > macEug1.config.ra
 db macEug1
 clade mammal
 scientificName Macropus eugenii
 assemblyDate Nov. 2007
 assemblyLabel Baylor Release 1.0
 orderKey 278
 dbDbSpeciesDir wallaby
 mitoAcc none
 agpFiles /hive/data/genomes/macEug1/baylor/macEug1.agp
 fastaFiles /hive/data/genomes/macEug1/baylor/macEug1.contigs.fa.gz
 commonName Wallaby
 '_EOF_'
 # <<   emacs
        tcsh 
        ~/kent/src/hg/utils/automation/makeGenomeDb.pl -stop db -workhorse=hgwdev
 
 ###########################################################################
 # WINDOWMASKER (DONE 2008-12-05 Andy)
     ssh hgwdev
     mkdir /hive/data/genomes/macEug1/bed/WindowMasker
     screen -S macEug1_WindowMasker
     ssh kolossus
     cd /hive/data/genomes/macEug1/bed/WindowMasker
     tcsh
     ~/kent/src/hg/utils/automation/doWindowMasker.pl macEug1 -buildDir=`pwd` -workhorse=hgwdev >& wm.log
     rm macEug1.wmsk.2bit
     cd ../../
     ln -s bed/WindowMasker/macEug1.wmsk.sdust.2bit macEug1.wmsk.2bit
 
 #########################################################################
 ## SIMPLE REPEATS TRF (DONE 2008-11-26 - Andy)
     ssh hgwdev
     screen -S macEug1_trf
     mkdir /hive/data/genomes/macEug1/bed/simpleRepeat
     cd /hive/data/genomes/macEug1/bed/simpleRepeat
     tcsh
     time $HOME/kent/src/hg/utils/automation/doSimpleRepeat.pl -buildDir=`pwd` macEug1 > do.log
 # *** All done!
 # *** Steps were performed in /hive/data/genomes/macEug1/bed/simpleRepeat
 #0.559u 0.401s 6:56:42.34 0.0%   0+0k 0+0io 2pf+0w
     cat fb.simpleRepeat
 # 38751844 bases of 2541767339 (1.525%) in intersection
     cd ../../
     twoBitMask macEug1.wmsk.2bit -add bed/simpleRepeat/trfMask.bed macEug1.2bit
     ##  can safely ignore warning about >=13 fields in bed file
     twoBitToFa macEug1.2bit stdout | faSize stdin > macEug1.2bit.faSize.txt
 # 
 # %31.28 masked total, %36.24 masked real
     ##   link to gbdb
     ln -s `pwd`/macEug1.2bit /gbdb/macEug1
 
+#########################################################################
+## Genbank gene run (DONE - 2009-09-25 - Hiram)
+    cd /hive/data/genomes/macEug1
+    # MAKE 11.OOC FILE FOR BLAT
+    blat macEug1.2bit \
+	 /dev/null /dev/null -tileSize=11 -makeOoc=jkStuff/macEug1.11.ooc \
+	-repMatch=1024
+    #	Wrote 24249 overused 11-mers to jkStuff/macEug1.11.ooc
+
+    mkdir /hive/data/staging/data/macEug1
+    cp -p macEug1.2bit chrom.sizes jkStuff/macEug1.11.ooc \
+	/hive/data/staging/data/macEug1
+
+    #	add the following two lines to src/lib/gbGenome.c
+    #	static char *macEugNames[] = {"Macropus eugenii", NULL};
+    #	{"macEug", macEugNames},
+
+    cvs ci -m "Adding Wallaby Macropus eugenii" src/lib/gbGenome.c
+    make install-server
+
+    ssh genbank
+    screen  # control this business with a screen since it takes a while
+    cd /cluster/data/genbank
+
+    time nice -n +19 bin/gbAlignStep -initial macEug1 &
+    #	logFile: var/build/logs/2009.09.25-14:54:03.macEug1.initalign.log
+    #	real    2613m8.027s
+
+    # load database when finished
+    ssh hgwdev
+    cd /cluster/data/genbank
+    time nice -n +19 ./bin/gbDbLoadStep -drop -initialLoad macEug1
+    #	logFile:  var/dbload/hgwdev/logs/2009.09.28-09:23:52.dbload.log
+    #	real    15m28.552s
+
+    # enable daily alignment and update of hgwdev
+    cd ~/kent/src/hg/makeDb/genbank
+    cvsup
+    # add macEug1 to:
+        etc/align.dbs
+        etc/hgwdev.dbs
+    cvs ci -m "Added macEug1 - Macropus eugenii - Wallaby" etc/align.dbs etc/hgwdev.dbs
+    make etc-update
+    #	done - 2009-09-28 - Hiram