src/hg/makeDb/doc/oviAri1.txt 1.3
1.3 2010/04/15 23:37:48 chinhli
re-do the genbank task
Index: src/hg/makeDb/doc/oviAri1.txt
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/makeDb/doc/oviAri1.txt,v
retrieving revision 1.2
retrieving revision 1.3
diff -b -B -U 4 -r1.2 -r1.3
--- src/hg/makeDb/doc/oviAri1.txt 12 Apr 2010 21:33:00 -0000 1.2
+++ src/hg/makeDb/doc/oviAri1.txt 15 Apr 2010 23:37:48 -0000 1.3
@@ -15,9 +15,9 @@
# ftp://ftp.ncbi.nlm.nih.gov/genbank/genomes/Eukaryotes/vertebrates_mammals/
# Ovis_aries/Ovis_aries_1.0
##########################################################################
-# Download sequence (DONE - 2010-03-22 Chin)
+# Download sequence (DONE - 2010-03-22 2010-0414 Chin)
mkdir /hive/data/genomes/oviAri1
cd /hive/data/genomes/oviAri1
mkdir genbank
cd genbank
@@ -58,23 +59,26 @@
# 2784748484 bases (1600136831 N's 1184611653 real 1184611653 upper
# 0 lower) in 27 sequences in 27 files
# For unplaced scalfolds, named them as chrUn_xxxxxxxx
- # where xxxxxx is the original access id as: chrUn_ACBE01000381.1
- # and put it into chrUn.* files
- # copy all the comment lines (start with #)
+ # where xxxxxx is the original access id as: chrUn_GL340781.1
+ # The ".1" at the end need to be filter out since
+ # MySQL does not allow "." as part of the table name and
+ # will casue problems in genbank task step later
+
export S=Primary_Assembly/unplaced_scaffolds
zcat ${S}/AGP/unplaced.scaf.agp.gz | grep "^#" > ucscChr/chrUn.agp
# append the gap records
zcat ${S}/AGP/unplaced.scaf.agp.gz | grep -v "^#" \
- | sed -e "s/^/chrUn_/" >> ucscChr/chrUn.agp
+ | sed -e "s/^/chrUn_/" -e "s/\.1//" >> ucscChr/chrUn.agp
gzip ucscChr/chrUn.agp &
zcat ${S}/FASTA/unplaced.scaf.fa.gz \
- | sed -e "s#^>.*|gb|#>chrUn_#; s#|.*##" | gzip > ucscChr/chrUn.fa.gz
+ | sed -e "s#^>.*|gb|#>chrUn_#; s#|.*##" -e "s/\.1//" \
+ | gzip > ucscChr/chrUn.fa.gz
# about 1190 sequences in the unplaced
zcat ucscChr/chrUn.fa.gz | grep "^>" | wc
- # 1190 1190 21420
+ # 1190 1190 19040
# Check them with faSize
faSize Primary_Assembly/unplaced_scaffolds/FASTA/unplaced.scaf.fa.gz
# 75747883 bases (59104875 N's 16643008 real 16643008 upper 0 lower)
@@ -83,9 +87,9 @@
# 75747883 bases (59104875 N's 16643008 real 16643008 upper 0 lower)
# in 1190 sequences in 1 files
#########################################################################
-# Initial makeGenomeDb.pl (DONE - 2010-04-01 - Chin)
+# Initial makeGenomeDb.pl (DONE - 2010-04-14 - Chin)
cd /hive/data/genomes/oviAri1
cat << '_EOF_' > oviAri1.config.ra
# Config parameters for makeGenomeDb.pl:
db oviAri1
@@ -106,36 +110,47 @@
'_EOF_'
# << happy emacs
time makeGenomeDb.pl -noGoldGapSplit -workhorse=hgwdev oviAri1.config.ra \
> makeGenomeDb.log 2>&1 &
- # real 10m50.210s
+ # real real 12m42.419s
# add the trackDb entries to the source tree, and the 2bit link:
ln -s `pwd`/oviAri1.unmasked.2bit /gbdb/oviAri1/oviAri1.2bit
+
+ # Per instructions in makeGenomeDb.log:
+ # - cvs add sheep/oviAri1
+ # - cvs add sheep/oviAri1/*.{ra,html}
+ # - cvs ci -m "Added oviAri1 to DBS." makefile
+ # - cvs ci -m "Initial descriptions for oviAri1." sheep/oviAri1
+ # - (if necessary) cvs ci sheep
+ # - Run make update DBS=oviAri1 and make alpha when done.
+ # - (optional) Clean up /cluster/data/oviAri1/TemporaryTrackDbCheckout
+ # - cvsup your ~/kent/src/hg/makeDb/trackDb and make future edits there.
# browser should function now
- # per instructions at end of makeGenomeDb.log, edit the html file
# and checkin the *.ra and *.html files. in
# /cluster/home/chinhli/kent/src/hg/makeDb/trackDb/sheep/oviAri1
#########################################################################
-# RepeatMasker (DONE - 2010-04-2 - Chin)
+# RepeatMasker (DONE - 2010-04-14 - Chin)
mkdir /hive/data/genomes/oviAri1/bed/repeatMasker
cd /hive/data/genomes/oviAri1/bed/repeatMasker
+
time nice -n +19 doRepeatMasker.pl -buildDir=`pwd` \
-workhorse=hgwdev -bigClusterHub=swarm -noSplit oviAri1 > do.log 2>&1 &
- # real 317m15.920s
+ # real 178m52.467s
cat faSize.rmsk.txt
# 2860512983 bases (1659241706 N's 1201271277 real 954826276 upper
# 246445001 lower) in 1218 sequences in 1 files
# %8.62 masked total, %20.52 masked real
#########################################################################
-# simpleRepeats ( DONE` - 2010-04-02 - Chin)
+# simpleRepeats ( DONE - 2010-04-14 - Chin)
mkdir /hive/data/genomes/oviAri1/bed/simpleRepeat
cd /hive/data/genomes/oviAri1/bed/simpleRepeat
+
time nice -n +19 doSimpleRepeat.pl -buildDir=`pwd` -workhorse=hgwdev \
-bigClusterHub=pk -smallClusterHub=pk oviAri1 > do.log 2>&1 &
- # real 3m18.652s
+ # real 3m23.411s
cat fb.simpleRepeat
# 4278474 bases of 1201962925 (0.356%) in intersection
# add to the repeatMasker
@@ -149,22 +164,24 @@
# %8.62 masked total, %20.53 masked real
#########################################################################
# Marking *all* gaps - they are not all in the AGP file
-# (DONE - 2010-04-02 - Chin)
+# (DONE - 2010-04-14 - Chin)
mkdir /hive/data/genomes/oviAri1/bed/allGaps
cd /hive/data/genomes/oviAri1/bed/allGaps
+
time nice -n +19 findMotif -motif=gattaca -verbose=4 \
-strand=+ ../../oviAri1.unmasked.2bit > findMotif.txt 2>&1
- # real 1m56.688s
+ # real 1m40.366s
grep "^#GAP " findMotif.txt | sed -e "s/^#GAP //" > allGaps.bed
featureBits oviAri1 -not gap -bed=notGap.bed
- # 1201271277 bases of 1201271277 (100.000%) in intersection
+ # 1201962925 bases of 1201962925 (100.000%) in intersection
featureBits oviAri1 allGaps.bed notGap.bed -bed=new.gaps.bed
+ # 691648 bases of 1201962925 (0.058%) in intersection
# 0 bases of 1201271277 (0.000%) in intersection zero?????
# what is the highest index in the existing gap table:
hgsql -N -e "select ix from gap;" oviAri1 | sort -n | tail -1
- # 959944
+ # 484408
# use tcsh and ctrl-c to create the here doc
cat << '_EOF_' > mkGap.pl
#!/usr/bin/env perl
@@ -205,11 +222,11 @@
# == 2350123 + 475536
########################################################################
-# Create kluster run files (DONE - 2010-04-05 - Chin)
+# Create kluster run files (DONE - 2010-04-15 - Chin)
# numerator is oviAri1 gapless bases "real" as reported by:
- # featureBits -noRandom -noHap oviAri1 gap
+ featureBits -noRandom -noHap oviAri1 gap
# 1600136831 bases of 1184628269 (135.075%) in intersection
# denominator is hg19 gapless bases as reported by:
# featureBits -noRandom -noHap hg19 gap
@@ -232,15 +249,18 @@
-e 'select * from gap where bridge="no" order by size;' oviAri1 \
| sort -k7,7nr
# most gaps have size > 100,000
# decide on a minimum gap for this break
- gapToLift -verbose=2 -minGap=cw20000 oviAri1 jkStuff/nonBridged.lft \
+ gapToLift -verbose=2 -minGap=20000 oviAri1 jkStuff/nonBridged.lft \
-bedFile=jkStuff/nonBridged.bed
cp -p jkStuff/nonBridged.lft \
/hive/data/staging/data/oviAri1/oviAri1.nonBridged.lft
+ # ask cluster-admin to copy (evry time if any file chsnged)
+ # /hive/data/staging/data/oviAri1 directory to cluster nodes
+ # /scratch/data/oviAri1
########################################################################
-# GENBANK AUTO UPDATE (working - 2010-04-12 - Chin)
+# GENBANK AUTO UPDATE (working - 2010-04-15 - Chin)
ssh hgwdev
cd $HOME/kent/src/hg/makeDb/genbank
cvsup
@@ -276,34 +296,30 @@
cvs ci -m "adding oviAri1 Sheep" src/lib/gbGenome.c
make install-server
-
ssh genbank
screen # control this business with a screen since it takes a while
+ cd $HOME/kent/src/hg/makeDb/genbank
-XXXX 04-12
+
+XXXX 04-15 eveniing
cd /cluster/data/genbank
time nice -n +19 bin/gbAlignStep -initial oviAri1 &
- # logFile: var/build/logs/2010.04.12-12:18:21.oviAri1.initalign.log
- # real ???? 127m50.706s
+ # logFile: var/build/logs/2010.04.15-13:49:37.oviAri1.initalign.log
+ # real ????368m17.919s
+command failed: gbAlignRun -workdir=work/initial.oviAri1/align at /cluster/genbank/genbank/bin/../lib/gbCommon.pm line 272. at /cluster/genbank/genbank/bin/../lib/gbCommon.pm line 272.
+
# load database when finished
ssh hgwdev
cd /cluster/data/genbank
- # use local copy: XXXX 04-09 still failed with
-XXXX 04-09
-hgwdev 2010.04.09-08:57:17 dbload: begin
-hgwdev 2010.04.09-08:57:17 dbload: command: /cluster/home/chinhli/kent/src/hg/makeDb/genbank/bin/gbDbLoadStep -drop -initialLoad oviAri1
--initialLoad specified and no sequences were found to load
-command failed: gbLoadRna -workdir=work/hgwdev/dbload -initialLoad oviAri1 at /cluster/home/chinhli/kent/src/hg/makeDb/genbank/bin/../lib/gbCommon.pm line 268. at /cluster/home/chinhli/kent/src/hg/makeDb/genbank/bin/../lib/gbCommon.pm line 268.
-
- time nice -19 \
- /cluster/home/chinhli/kent/src/hg/makeDb/genbank/bin/gbDbLoadStep \
- -drop -initialLoad oviAri1 &
- # time nice -n +19 ./bin/gbDbLoadStep -drop -initialLoad oviAri1 &
- # logFile: var/dbload/hgwdev/logs/2010.03.26-15:38:17.dbload.log
- # real ???? 68m
+ time nice -n +19 ./bin/gbDbLoadStep -drop -initialLoad oviAri1 &
+ # var/dbload/hgwdev/logs/2010.04.13-14:43:37.dbload.log
+ # real 11m7.008s
+
+
+
# enable daily alignment and update of hgwdev
cd ~/kent/src/hg/makeDb/genbank
cvsup
@@ -311,21 +327,20 @@
etc/align.dbs
etc/hgwdev.dbs
cvs ci -m "Added oviAri1 - Sheep" etc/align.dbs etc/hgwdev.dbs
make etc-update
- # DONE 2010-03-31 ????
#########################################################################
# reset position to RHO location as found from blat of hg19 RHO gene
-# (DONE - 2010-04-08 - Chin)
+# (DONE - 2010-04-15 - Chin)
hgsql -e \
'update dbDb set defaultPos="chr13:57394166-57402412" where name="oviAri1";' \
hgcentraltest
############################################################################
# ctgPos2 track - showing clone sequence locations on chromosomes
-# (DONE 2010-04-08 - Chin)
-# NOTE XXXX need to create entry in all.joiner since this is a new species
+# (working 2010-04-15 - Chin)
+# NOTE - create oviAri1 entry in all.joiner since this is a new species
mkdir /hive/data/genomes/oviAri1/bed/ctgPos2
cd /hive/data/genomes/oviAri1/bed/ctgPos2
cat << '_EOF_' > agpToCtgPos2.pl
#!/usr/bin/env perl
@@ -367,9 +382,9 @@
hgLoadSqlTab oviAri1 ctgPos2 $HOME/kent/src/hg/lib/ctgPos2.sql ctgPos2.tab
############################################################################
-# oviAri1 Sheep BLASTZ/CHAIN/NET (working 04-09-2010 - Chin)
+# oviAri1 Sheep BLASTZ/CHAIN/NET (working 04-16-2010 - Chin)
# request to copy /hive/data/staging.oviAri1 over to /scratch/data/oviAri1
screen # use a screen to manage this multi-day job
mkdir /hive/data/genomes/bosTau4/bed/lastzOviAri1.2010-04-12
@@ -548,9 +563,9 @@
cat fb.bosTau4.chainOviAri1Link.txt
# 1383557633 bases of 2731830700 (50.646%) in intersection
#########################################################################
-# SWAP mm9 lastz (working - 2010-04-12 - Chin)
+# SWAP mm9 lastz (DONE - 2010-04-12 - Chin)
# original alignment
cd /hive/data/genomes/mm9/bed/lastzOviAri1.2010-04-09
cat fb.mm9.chainOviAri1Link.txt
# 406407377 bases of 2620346127 (15.510%) in intersection