src/hg/makeDb/doc/oviAri1.txt 1.2
1.2 2010/04/12 21:33:00 chinhli
Adding oviAri1 lastz swap support
Index: src/hg/makeDb/doc/oviAri1.txt
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/makeDb/doc/oviAri1.txt,v
retrieving revision 1.1
retrieving revision 1.2
diff -b -B -U 4 -r1.1 -r1.2
--- src/hg/makeDb/doc/oviAri1.txt 3 Apr 2010 06:17:47 -0000 1.1
+++ src/hg/makeDb/doc/oviAri1.txt 12 Apr 2010 21:33:00 -0000 1.2
@@ -8,18 +8,13 @@
# file template copied from susScr2.txt
-# Felis catus (NCBI Project ID: 10703, Accession: GCA_000005525.1)
+# Ovis aries (NCBI Project ID: 10709, Accession: GCA_000005525.1)
# by International Sheep Genomics Consortium (ISGC)
# assembly] sequence:
# ftp://ftp.ncbi.nlm.nih.gov/genbank/genomes/Eukaryotes/vertebrates_mammals/
# Ovis_aries/Ovis_aries_1.0
-# Felis catus
-
-
-# Sus scrofa - SGSC Sscrofa9.2 NCBI project 10718, CM000812
-# ftp://ftp.ncbi.nlm.nih.gov:genbank/genomes/Eukaryotes/vertebrates_mammals/Sus_scrofa/Sscrofa9.2/
##########################################################################
# Download sequence (DONE - 2010-03-22 Chin)
mkdir /hive/data/genomes/oviAri1
@@ -88,9 +83,9 @@
# 75747883 bases (59104875 N's 16643008 real 16643008 upper 0 lower)
# in 1190 sequences in 1 files
#########################################################################
-# Initial makeGenomeDb.pl (working - 2010-04-01 - Chin)
+# Initial makeGenomeDb.pl (DONE - 2010-04-01 - Chin)
cd /hive/data/genomes/oviAri1
cat << '_EOF_' > oviAri1.config.ra
# Config parameters for makeGenomeDb.pl:
db oviAri1
@@ -98,9 +93,9 @@
genomeCladePriority 31
scientificName Ovis aries
commonName Sheep
assemblyDate Feb. 2010
-assemblyLabel ISGC (NCBI project 10703, accession GCA_000005525.1)
+assemblyLabel ISGC (NCBI project 10709, accession GCA_000005525.1)
assemblyShortLabel ISGC Ovis_aries_1.0
orderKey 236
mitoAcc NC_001941
fastaFiles /hive/data/genomes/oviAri1/genbank/ucscChr/chr*.fa.gz
@@ -115,8 +110,11 @@
# real 10m50.210s
# add the trackDb entries to the source tree, and the 2bit link:
ln -s `pwd`/oviAri1.unmasked.2bit /gbdb/oviAri1/oviAri1.2bit
# browser should function now
+ # per instructions at end of makeGenomeDb.log, edit the html file
+ # and checkin the *.ra and *.html files. in
+ # /cluster/home/chinhli/kent/src/hg/makeDb/trackDb/sheep/oviAri1
#########################################################################
# RepeatMasker (DONE - 2010-04-2 - Chin)
@@ -159,14 +157,14 @@
-strand=+ ../../oviAri1.unmasked.2bit > findMotif.txt 2>&1
# real 1m56.688s
grep "^#GAP " findMotif.txt | sed -e "s/^#GAP //" > allGaps.bed
featureBits oviAri1 -not gap -bed=notGap.bed
- # 1201962925 bases of 1201962925 (100.000%) in intersection
+ # 1201271277 bases of 1201271277 (100.000%) in intersection
featureBits oviAri1 allGaps.bed notGap.bed -bed=new.gaps.bed
- # 691648 bases of 1201962925 (0.058%) in intersection
+ # 0 bases of 1201271277 (0.000%) in intersection zero?????
# what is the highest index in the existing gap table:
hgsql -N -e "select ix from gap;" oviAri1 | sort -n | tail -1
- # 484408
+ # 959944
# use tcsh and ctrl-c to create the here doc
cat << '_EOF_' > mkGap.pl
#!/usr/bin/env perl
@@ -205,10 +203,11 @@
hgsql -e "select count(*) from gap;" oviAri1
# 2825659
# == 2350123 + 475536
+
########################################################################
-# Create kluster run files (working - 2010-04-03 - Chin)
+# Create kluster run files (DONE - 2010-04-05 - Chin)
# numerator is oviAri1 gapless bases "real" as reported by:
# featureBits -noRandom -noHap oviAri1 gap
# 1600136831 bases of 1184628269 (135.075%) in intersection
@@ -231,24 +230,24 @@
# check non-bridged gaps to see what the typical size is:
hgsql -N \
-e 'select * from gap where bridge="no" order by size;' oviAri1 \
| sort -k7,7nr
-XXXX 04/03
+ # most gaps have size > 100,000
# decide on a minimum gap for this break
- gapToLift -verbose=2 -minGap=5000 oviAri1 jkStuff/nonBridged.lft \
+ gapToLift -verbose=2 -minGap=cw20000 oviAri1 jkStuff/nonBridged.lft \
-bedFile=jkStuff/nonBridged.bed
cp -p jkStuff/nonBridged.lft \
/hive/data/staging/data/oviAri1/oviAri1.nonBridged.lft
########################################################################
-# GENBANK AUTO UPDATE (DONE - 2010-03-26 - Hiram)
+# GENBANK AUTO UPDATE (working - 2010-04-12 - Chin)
ssh hgwdev
- cd ~/kent/src/hg/makeDb/genbank
+ cd $HOME/kent/src/hg/makeDb/genbank
cvsup
- # edit etc/genbank.conf to add oviAri1 just before susScr1
+ # edit etc/genbank.conf to add oviAri1 just before susScr2
-# oviAri1 (Pig)
+# oviAri1 (Sheep)
oviAri1.serverGenome = /hive/data/genomes/oviAri1/oviAri1.2bit
oviAri1.clusterGenome = /scratch/data/oviAri1/oviAri1.2bit
oviAri1.ooc = /scratch/data/oviAri1/oviAri1.11.ooc
oviAri1.lift = /scratch/data/oviAri1/oviAri1.nonBridged.lft
@@ -264,45 +264,68 @@
cvs ci -m "Added oviAri1" etc/genbank.conf
# update /cluster/data/genbank/:
make etc-update
+
+# Edit src/lib/gbGenome.c to add new species. With these two lines:
+# static char *oviAriNames[] = {"Ovis aries", NULL};
+# ... later ...
+# {"oviAri", oviAriNames},
+# gbGenome.c is in
+# /cluster/home/chinhli/kent/src/hg/makeDb/genbank/src/lib
+# make and checkin
+
+ cvs ci -m "adding oviAri1 Sheep" src/lib/gbGenome.c
+ make install-server
+
+
ssh genbank
- screen # use a screen to manage this job
+ screen # control this business with a screen since it takes a while
+
+XXXX 04-12
cd /cluster/data/genbank
time nice -n +19 bin/gbAlignStep -initial oviAri1 &
- # logFile: var/build/logs/2010.03.26-10:25:04.oviAri1.initalign.log
- # real 304m38.588s
+ # logFile: var/build/logs/2010.04.12-12:18:21.oviAri1.initalign.log
+ # real ???? 127m50.706s
# load database when finished
ssh hgwdev
cd /cluster/data/genbank
- time nice -n +19 ./bin/gbDbLoadStep -drop -initialLoad oviAri1
+ # use local copy: XXXX 04-09 still failed with
+XXXX 04-09
+hgwdev 2010.04.09-08:57:17 dbload: begin
+hgwdev 2010.04.09-08:57:17 dbload: command: /cluster/home/chinhli/kent/src/hg/makeDb/genbank/bin/gbDbLoadStep -drop -initialLoad oviAri1
+-initialLoad specified and no sequences were found to load
+command failed: gbLoadRna -workdir=work/hgwdev/dbload -initialLoad oviAri1 at /cluster/home/chinhli/kent/src/hg/makeDb/genbank/bin/../lib/gbCommon.pm line 268. at /cluster/home/chinhli/kent/src/hg/makeDb/genbank/bin/../lib/gbCommon.pm line 268.
+
+ time nice -19 \
+ /cluster/home/chinhli/kent/src/hg/makeDb/genbank/bin/gbDbLoadStep \
+ -drop -initialLoad oviAri1 &
+ # time nice -n +19 ./bin/gbDbLoadStep -drop -initialLoad oviAri1 &
# logFile: var/dbload/hgwdev/logs/2010.03.26-15:38:17.dbload.log
- # real 68m
+ # real ???? 68m
# enable daily alignment and update of hgwdev
cd ~/kent/src/hg/makeDb/genbank
cvsup
# add oviAri1 to:
etc/align.dbs
etc/hgwdev.dbs
- cvs ci -m "Added oviAri1 - Pig" etc/align.dbs etc/hgwdev.dbs
+ cvs ci -m "Added oviAri1 - Sheep" etc/align.dbs etc/hgwdev.dbs
make etc-update
- # DONE 2010-03-31
+ # DONE 2010-03-31 ????
#########################################################################
# reset position to RHO location as found from blat of hg19 RHO gene
-# (DONE - 2010-03-31 - Hiram)
+# (DONE - 2010-04-08 - Chin)
hgsql -e \
'update dbDb set defaultPos="chr13:57394166-57402412" where name="oviAri1";' \
hgcentraltest
- # and make this the default genome for Pig
- hgsql -e 'update defaultDb set name="oviAri1" where name="susScr1";' \
- hgcentraltest
############################################################################
# ctgPos2 track - showing clone sequence locations on chromosomes
-# (DONE - 2010-03-26 - Hiram)
+# (DONE 2010-04-08 - Chin)
+# NOTE XXXX need to create entry in all.joiner since this is a new species
mkdir /hive/data/genomes/oviAri1/bed/ctgPos2
cd /hive/data/genomes/oviAri1/bed/ctgPos2
cat << '_EOF_' > agpToCtgPos2.pl
#!/usr/bin/env perl
@@ -344,41 +367,73 @@
hgLoadSqlTab oviAri1 ctgPos2 $HOME/kent/src/hg/lib/ctgPos2.sql ctgPos2.tab
############################################################################
-# oviAri1 Pig BLASTZ/CHAIN/NET (DONE - 2010-03-27 - Hiram)
+# oviAri1 Sheep BLASTZ/CHAIN/NET (working 04-09-2010 - Chin)
+
+# request to copy /hive/data/staging.oviAri1 over to /scratch/data/oviAri1
screen # use a screen to manage this multi-day job
- mkdir /hive/data/genomes/oviAri1/bed/lastzBosTau4.2010-03-27
- cd /hive/data/genomes/oviAri1/bed/lastzBosTau4.2010-03-27
+ mkdir /hive/data/genomes/bosTau4/bed/lastzOviAri1.2010-04-12
+ cd /hive/data/genomes/bosTau4/bed/lastzOviAri1.2010-04-12
cat << '_EOF_' > DEF
-# Cow vs. Pig
+# Cow vs. Sheep
BLASTZ_M=50
-# TARGET: Pig SusScr2
-SEQ1_DIR=/scratch/data/oviAri1/oviAri1.2bit
-SEQ1_LEN=/scratch/data/oviAri1/chrom.sizes
-SEQ1_CHUNK=10000000
+# TARGET: Cow BosTau4
+SEQ1_DIR=/scratch/data/bosTau4/bosTau4.2bit
+SEQ1_LEN=/scratch/data/bosTau4/chrom.sizes
+SEQ1_CHUNK=20000000
SEQ1_LAP=10000
-SEQ1_LIMIT=100
+SEQ1_LIMIT=50
-# QUERY: Cow BosTau4
-SEQ2_DIR=/scratch/data/bosTau4/bosTau4.2bit
-SEQ2_LEN=/scratch/data/bosTau4/chrom.sizes
+# QUERY: Sheep OviAri1
+SEQ2_DIR=/scratch/data/oviAri1/oviAri1.2bit
+SEQ2_LEN=/scratch/data/oviAri1/chrom.sizes
SEQ2_CHUNK=10000000
+SEQ2_LIMIT=100
SEQ2_LAP=0
-BASE=/hive/data/genomes/oviAri1/bed/lastzBosTau4.2010-03-27
+BASE=/hive/data/genomes/bosTau4/bed/lastzOviAri1.2010-04-12
TMPDIR=/scratch/tmp
'_EOF_'
# << this line keeps emacs coloring happy
+XXXX 04-12 working
time nice -n +19 doBlastzChainNet.pl -verbose=2 \
`pwd`/DEF \
-noLoadChainSplit -syntenicNet \
-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
- # real 2422m32.203s
+ # real ???? 2422m32.203s
+
+ # and the swap
+ mkdir /hive/data/genomes/oviAri1/bed/blastz.bosTau4.swap
+ cd /hive/data/genomes/oviAri1/bed/blastz.bosTau4.swap
+ time nice -n +19 doBlastzChainNet.pl -verbose=2 \
+ /hive/data/genomes/bosTau4/bed/lastzOviAri1.2010-04-12/DEF \
+ -swap -noLoadChainSplit -syntenicNet \
+ -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
+ -chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
+ # Elapsed time: 63m4s
+ cat fb.oviAri1.chainMm9Link.txt
+ # 656444411 bases of 2231298548 (29.420%) in intersection
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
# failed during the netChainSubset | chainStitchId out of memory
# finish that manually with ulimits to allow more memory on hgwdev:
export sizeG=188743680
ulimit -d $sizeG
@@ -411,9 +466,9 @@
-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
-chainMinScore=3000 -chainLinearGap=medium > download.log 2>&1 &
XXX - running Tue Mar 30 13:18:09 PDT 2010
# creating a bigWig graph to see the chain pileups:
- cd /hive/data/genomes/oviAri1/bed/lastzBosTau4.2010-03-27/axtChain
+ cd /hive/data/genomes/oviAri1/bed/lastzBosTau4.2010-4-09/axtChain
zcat oviAri1.bosTau4.all.chain.gz | grep "^chain " \
| awk '{printf "%s\t%d\t%d\t%s\t%s\t%s\n", $3, $6, $7, $8, $2, $5}' \
> all.bed
# find the largest score:
@@ -489,68 +544,93 @@
limit datasize 163840m
limit vmemoryuse 163840m
# real 498m5.861s
- cat fb.bosTau4.chainSusScr2Link.txt
+ cat fb.bosTau4.chainOviAri1Link.txt
# 1383557633 bases of 2731830700 (50.646%) in intersection
#########################################################################
-# SWAP mm9 lastz (DONE - 2010-03-27 - Hiram)
+# SWAP mm9 lastz (working - 2010-04-12 - Chin)
# original alignment
- cd /hive/data/genomes/mm9/bed/lastzSusScr2.2010-03-26
- cat fb.mm9.chainSusScr2Link.txt
- # 616615408 bases of 2620346127 (23.532%) in intersection
+ cd /hive/data/genomes/mm9/bed/lastzOviAri1.2010-04-09
+ cat fb.mm9.chainOviAri1Link.txt
+ # 406407377 bases of 2620346127 (15.510%) in intersection
# and the swap
mkdir /hive/data/genomes/oviAri1/bed/blastz.mm9.swap
cd /hive/data/genomes/oviAri1/bed/blastz.mm9.swap
time nice -n +19 doBlastzChainNet.pl -verbose=2 \
- /hive/data/genomes/mm9/bed/lastzSusScr2.2010-03-26/DEF \
+ /hive/data/genomes/mm9/bed/lastzOviAri1.2010-04-09/DEF \
-swap -noLoadChainSplit -syntenicNet \
-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
- # Elapsed time: 63m4s
+ # real 37m25.345s
cat fb.oviAri1.chainMm9Link.txt
- # 656444411 bases of 2231298548 (29.420%) in intersection
+ # 383753361 bases of 1201271277 (31.946%) in intersection
############################################################################
-# SWAP hg19 lastz (DONE - 2010-03-27 - Hiram)
+# SWAP hg19 lastz (DONE 2010-04-12 - Chin)
# original alignment
- cat fb.hg19.chainSusScr2Link.txt
- # 1198794058 bases of 2897316137 (41.376%) in intersection
+ cat fb.hg19.chainOviAri1Link.txt
+ # 878545517 bases of 2897316137 (30.323%) in intersection
# and the swap
mkdir /hive/data/genomes/oviAri1/bed/blastz.hg19.swap
cd /hive/data/genomes/oviAri1/bed/blastz.hg19.swap
time nice -n +19 doBlastzChainNet.pl -verbose=2 \
- /hive/data/genomes/hg19/bed/lastzSusScr2.2010-03-26/DEF \
+ /hive/data/genomes/hg19/bed/lastzOviAri1.2010-04-09/DEF \
-swap -noLoadChainSplit -syntenicNet \
-workhorse=hgwdev -smallClusterHub=memk -bigClusterHub=pk \
-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
- # Elapsed time: 112m40s
+ # real 71m37.020s
cat fb.oviAri1.chainHg19Link.txt
- # 1272785114 bases of 2231298548 (57.042%) in intersection
+ # 824310420 bases of 1201271277 (68.620%) in intersection
#########################################################################
-# SWAP monDom5 lastz (DONE - 2010-03-27 - Hiram)
+# SWAP monDom5 lastz (working 2010-04-12 Chin)
# original alignment
- cat fb.monDom5.chainSusScr2Link.txt
- # 179898307 bases of 3501660299 (5.138%) in intersection
+ cat fb.monDom5.chainOviAri1Link.txt
+ # 133534458 bases of 3501660299 (3.813%) in intersection
+XXXX 04-12
# and the swap
mkdir /hive/data/genomes/oviAri1/bed/blastz.monDom5.swap
cd /hive/data/genomes/oviAri1/bed/blastz.monDom5.swap
time nice -n +19 doBlastzChainNet.pl -verbose=2 \
- /hive/data/genomes/monDom5/bed/lastzSusScr2.2010-03-26/DEF \
+ /hive/data/genomes/monDom5/bed/lastzOviAri1.2010-04-09/DEF \
-swap -noLoadChainSplit -syntenicNet \
- -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=pk \
+ -workhorse=hgwdev -smallClusterHub=memn -bigClusterHub=pk \
-chainMinScore=5000 -chainLinearGap=loose > swap.log 2>&1 &
# Elapsed time: 82m55s
cat fb.oviAri1.chainMonDom5Link.txt
# 182834643 bases of 2231298548 (8.194%) in intersection
+
+#########################################################################
+# SWAP equCap2 lastz (DONE 2010-04-12 Chin)
+ # original alignment
+ cat fb.equCab2.chainOviAri1Link.txt
+ # 1012763540 bases of 2428790173 (41.698%) in intersection
+
+
+ # and the swap
+ mkdir /hive/data/genomes/oviAri1/bed/blastz.equCab2.swap
+ cd /hive/data/genomes/oviAri1/bed/blastz.equCab2.swap
+ time nice -n +19 doBlastzChainNet.pl -verbose=2 \
+ /hive/data/genomes/equCab2/bed/lastzOviAri1.2010-04-09/DEF \
+ -swap -noLoadChainSplit -syntenicNet \
+ -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=pk \
+ -chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
+ # Elapsed time: ???? 63m4s
+ # real 100m49.583s
+
+
+ cat fb.oviAri1.chainEquCab2Link.txt
+ # 940763026 bases of 1201271277 (78.314%) in intersection
+
+
############################################################################
# running cpgIsland business (DONE - 2010-03-31 - Hiram)
mkdir /hive/data/genomes/oviAri1/bed/cpgIsland
cd /hive/data/genomes/oviAri1/bed/cpgIsland