src/hg/makeDb/doc/ce8.txt 1.2
1.2 2009/07/28 21:55:26 hiram
5-way alignments done on ce7 and ce8, liftOvers done for ce6 to ce7 and ce7 to ce8
Index: src/hg/makeDb/doc/ce8.txt
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/makeDb/doc/ce8.txt,v
retrieving revision 1.1
retrieving revision 1.2
diff -b -B -U 1000000 -r1.1 -r1.2
--- src/hg/makeDb/doc/ce8.txt 24 Jul 2009 22:54:44 -0000 1.1
+++ src/hg/makeDb/doc/ce8.txt 28 Jul 2009 21:55:26 -0000 1.2
@@ -1,167 +1,524 @@
# for emacs: -*- mode: sh; -*-
# Caenorhabditis elegans
# Washington University School of Medicine GSC and Sanger Institute WS204
# $Id$
#########################################################################
# DOWNLOAD SEQUENCE (DONE - 2009-07-22 - Hiram)
mkdir /hive/data/genomes/ce8
cd /hive/data/genomes/ce8
mkdir ws204
cd ws204
TOP=/hive/data/genomes/ce8/ws204
export TOP
for D in annotation genome_feature_tables/GFF2 \
genome_feature_tables/SUPPLEMENTARY_GFF sequences/dna \
sequences/protein sequences/rna
do
mkdir -p ${D}
cd ${D}
wget --timestamping \
ftp://ftp.sanger.ac.uk/pub2/wormbase/WS204/genomes/c_elegans/${D}/*.*
cd ${TOP}
done
# about 1h24m
#########################################################################
# NORMALIZE SEQUENCE NAMES TO BEGIN WITH chr (DONE - 2009-07-24 - Hiram)
mkdir /hive/data/genomes/ce8/sanger
cd /hive/data/genomes/ce8/sanger
# Fix fasta names:
zcat ../ws204/sequences/dna/CHR*[XVAI].dna.gz \
| sed -e '/^$/ d; s/^>CHROMOSOME_MtDNA/>chrM/; s/^>CHROMOSOME_/>chr/;' \
| gzip -c > UCSC.fa.gz
faSize -detailed UCSC.fa.gz
# chrI 15072421
# chrII 15279323
# chrIII 13783685
# chrIV 17493784
# chrM 13794
# chrV 20924143
# chrX 17718854
# chrII(+1) and chrII(+3) are slightly different than WS200
# Make sure we get the same sizes from this command:
zcat ../ws204/sequences/dna/CHR*[XVAI].dna.gz | sed -e '/^$/ d;' \
| faSize -detailed stdin
faCount UCSC.fa.gz
#seq len A C G T N cpg
# chrI 15072421 4835939 2695879 2692150 4848453 0 503521
# chrII 15279323 4878195 2769216 2762198 4869714 0 492149
# chrIII 13783685 4444652 2449141 2466322 4423570 0 459669
# chrIV 17493784 5711040 3034767 3017008 5730969 0 522372
# chrM 13794 4335 1225 2055 6179 0 110
# chrV 20924143 6750393 3712058 3701397 6760295 0 638983
# chrX 17718854 5747199 3119702 3117868 5734085 0 514715
# total 100286004 32371753 17781988 17758998 323732650 3131519
# WS200:
# chrII 15279324 4878196 2769216 2762198 4869714 0 492149
# chrIII 13783682 4444652 2449139 2466321 4423570 0 459669
# total 100286002 32371754 17781986 17758997 323732650 3131519
# Fix AGP names:
sed -e 's/^/chr/' ../ws204/sequences/dna/CHR*.agp > UCSC.agp
# And add a fake mitochondrial AGP entry for the sake of downstream
# tools (make sure the GenBank sequence is identical to given):
echo -e "chrM\t1\t13794\t1\tF\tNC_001328.1\t1\t13794\t+" >> UCSC.agp
#########################################################################
# run the makeGenomeDb procedure to create the db and unmasked sequence
# (DONE - 2009-07-22 - Hiram)
cd /hive/data/genomes/ce8
cat << '_EOF_' > ce8.config.ra
# Config parameters for makeGenomeDb.pl:
db ce8
clade worm
genomeCladePriority 10
scientificName Caenorhabditis elegans
commonName C. elegans
assemblyDate Jun 2009
assemblyLabel Washington University School of Medicine GSC and Sanger Institute WS204
orderKey 824
mitoAcc none
fastaFiles /hive/data/genomes/ce8/sanger/UCSC.fa.gz
agpFiles /hive/data/genomes/ce8/sanger/UCSC.agp
# qualFiles /dev/null
dbDbSpeciesDir worm
taxId 6239
'_EOF_'
# << emacs
mkdir jkStuff
# run just to AGP to make sure things are sane first
nice -n +19 makeGenomeDb.pl ce8.config.ra -stop=agp \
> jkStuff/makeGenomeDb.agp.log 2>&1
# now, continuing to make the Db and all
time nice -n +19 makeGenomeDb.pl ce8.config.ra -continue=db \
> jkStuff/makeGenomeDb.db.log 2>&1
# real 1m33.036s
# take the trackDb business there and check it into the source tree
# fixup the description, gap and gold html page descriptions
#########################################################################
# REPEATMASKER (DONE - 2009-07-24 - Hiram)
screen # use screen to control the job
mkdir /hive/data/genomes/ce8/bed/repeatMasker
cd /hive/data/genomes/ce8/bed/repeatMasker
time nice -n +19 doRepeatMasker.pl -bigClusterHub=swarm \
-buildDir=`pwd` ce8 > do.log 2>&1 &
# real 35m58.812s
# from the do.log:
# June 4 2009 (open-3-2-8) version of RepeatMasker
# CC RELEASE 20090604;
cat faSize.rmsk.txt
# 100286004 bases (0 N's 100286004 real 87035623 upper 13250381 lower)
# in 7 sequences in 1 files
# %13.21 masked total, %13.21 masked real
#########################################################################
# SIMPLE REPEATS (DONE - 2009-07-24 - Hiram)
screen # use screen to control the job
mkdir /hive/data/genomes/ce8/bed/simpleRepeat
cd /hive/data/genomes/ce8/bed/simpleRepeat
time nice -n +19 doSimpleRepeat.pl -smallClusterHub=encodek \
-buildDir=`pwd` ce8 > do.log 2>&1 &
# real 18m30.323s
cat fb.simpleRepeat
# 4331076 bases of 100286004 (4.319%) in intersection
#########################################################################
# MASK SEQUENCE WITH RM+TRF (DONE - 2009-07-24 - Hiram)
# Since both doRepeatMasker.pl and doSimpleRepeats.pl have completed,
# now it's time to combine the masking into the final ce8.2bit,
# following the instructions at the end of doSimpleRepeat's output.
cd /hive/data/genomes/ce8
twoBitMask ce8.rmsk.2bit -add bed/simpleRepeat/trfMask.bed ce8.2bit
# You can safely ignore the warning about extra BED columns
twoBitToFa ce8.2bit stdout | faSize stdin > faSize.ce8.2bit.txt
cat faSize.ce8.2bit.txt
# 100286004 bases (0 N's 100286004 real 86863769 upper 13422235 lower)
# in 7 sequences in 1 files
# %13.38 masked total, %13.38 masked real
# set the symlink on hgwdev to /gbdb/ce8
rm -f /gbdb/ce8/ce8.2bit
ln -s `pwd`/ce8.2bit /gbdb/ce8/ce8.2bit
#########################################################################
# MAKE 11.OOC FILE FOR BLAT (DONE - 2009-07-22 - Hiram)
# Use -repMatch=100 (based on size -- for human we use 1024, and
# worm size is ~3.4% of human judging by gapless ce4 vs. hg18 genome
# size from featureBits. So we would use 34, but that yields a very
# high number of tiles to ignore, especially for a small more compact
# genome. Bump that up a bit to be more conservative.
cd /hive/data/genomes/ce8
blat ce8.2bit /dev/null /dev/null -tileSize=11 \
-makeOoc=jkStuff/ce8.11.ooc -repMatch=100
# Wrote 8514 overused 11-mers to jkStuff/ce8.11.ooc
# copy all of this stuff to the klusters:
mkdir /hive/data/staging/data/ce8
cp -p jkStuff/ce8.11.ooc chrom.sizes ce8.2bit /hive/data/staging/data/ce8
# request push of that data to kluster nodes /scratch/data/ce8/
#########################################################################
+## LASTZ caePb2 (DONE - 2009-07-28 - Hiram)
+ screen # use screen to control the job
+ mkdir /hive/data/genomes/ce8/bed/lastzCaePb2.2009-07-28
+ cd /hive/data/genomes/ce8/bed/lastzCaePb2.2009-07-28
+
+ cat << '_EOF_' > DEF
+# ce8 vs caePb2
+BLASTZ_H=2000
+BLASTZ_M=50
+
+# TARGET: elegans Ce8
+SEQ1_DIR=/scratch/data/ce8/ce8.2bit
+SEQ1_LEN=/scratch/data/ce8/chrom.sizes
+SEQ1_CHUNK=1000000
+SEQ1_LAP=10000
+
+# QUERY: C. PB2801 caePb2
+SEQ2_DIR=/scratch/data/caePb2/caePb2.2bit
+SEQ2_LEN=/scratch/data/caePb2/chrom.sizes
+SEQ2_CTGDIR=/scratch/data/caePb2/caePb2.supercontigs.2bit
+SEQ2_CTGLEN=/scratch/data/caePb2/caePb2.supercontigs.sizes
+SEQ2_LIFT=/scratch/data/caePb2/caePb2.supercontigs.lift
+SEQ2_CHUNK=1000000
+SEQ2_LAP=0
+SEQ2_LIMIT=50
+
+BASE=/hive/data/genomes/ce8/bed/lastzCaePb2.2009-07-28
+TMPDIR=/scratch/tmp
+'_EOF_'
+ # << happy emacs
+
+ time nice -n +19 doBlastzChainNet.pl -verbose=2 \
+ `pwd`/DEF -verbose=2 -bigClusterHub=swarm -workhorse=hgwdev \
+ -qRepeats=windowmaskerSdust -noLoadChainSplit -smallClusterHub=memk \
+ > do.log 2>&1 &
+ # about 50 minutes
+ cat fb.ce8.chainCaePb2Link.txt
+ # 40793017 bases of 100286004 (40.677%) in intersection
+
+ # swap, this is also in caePb2.txt
+ mkdir /hive/data/genomes/caePb2/bed/blastz.ce8.swap
+ cd /hive/data/genomes/caePb2/bed/blastz.ce8.swap
+ time nice -n +19 doBlastzChainNet.pl -verbose=2 \
+ -workhorse=hgwdev -qRepeats=windowmaskerSdust \
+ /hive/data/genomes/ce8/bed/lastzCaePb2.2009-07-28/DEF \
+ -bigClusterHub=swarm -smallClusterHub=encodek -swap > swap.log 2>&1 &
+ # real 3m16.709s
+ cat fb.caePb2.chainCe8Link.txt
+ # 55084580 bases of 170473138 (32.313%) in intersection
+
+#########################################################################
+## BLASTZ caeJap2 (DONE - 2009-07-28 - Hiram)
+ screen # use screen to control the job
+ mkdir /hive/data/genomes/ce8/bed/lastzCaeJap2.2009-07-28
+ cd /hive/data/genomes/ce8/bed/lastzCaeJap2.2009-07-28
+
+ cat << '_EOF_' > DEF
+# ce8 vs caeJap2
+BLASTZ_H=2000
+BLASTZ_M=50
+
+# TARGET: elegans Ce8
+SEQ1_DIR=/scratch/data/ce8/ce8.2bit
+SEQ1_LEN=/scratch/data/ce8/chrom.sizes
+SEQ1_CHUNK=1000000
+SEQ1_LAP=10000
+
+# QUERY: C. japonica caeJap2
+SEQ2_DIR=/scratch/data/caeJap2/caeJap2.2bit
+SEQ2_LEN=/scratch/data/caeJap2/chrom.sizes
+SEQ2_CTGDIR=/scratch/data/caeJap2/caeJap2.supers.2bit
+SEQ2_CTGLEN=/scratch/data/caeJap2/caeJap2.supers.sizes
+SEQ2_LIFT=/scratch/data/caeJap2/caeJap2.chrUn.lift
+SEQ2_CHUNK=1000000
+SEQ2_LAP=0
+SEQ2_LIMIT=50
+
+BASE=/hive/data/genomes/ce8/bed/lastzCaeJap2.2009-07-28
+TMPDIR=/scratch/tmp
+'_EOF_'
+ # << happy emacs
+
+ time nice -n +19 doBlastzChainNet.pl -verbose=2 \
+ `pwd`/DEF \
+ -bigClusterHub=swarm -noLoadChainSplit -qRepeats=windowmaskerSdust \
+ -workhorse=hgwdev -smallClusterHub=memk > do.log 2>&1 &
+ # about 42 minutes
+ cat fb.ce8.chainCaeJap2Link.txt
+ # 27270052 bases of 100286004 (27.192%) in intersection
+
+ # swap, this is also in caeJap2.txt
+ mkdir /hive/data/genomes/caeJap2/bed/blastz.ce8.swap
+ cd /hive/data/genomes/caeJap2/bed/blastz.ce8.swap
+ time nice -n +19 doBlastzChainNet.pl -verbose=2 \
+ -qRepeats=windowmaskerSdust -bigClusterHub=swarm -noLoadChainSplit \
+ /hive/data/genomes/ce8/bed/lastzCaeJap2.2009-07-28/DEF \
+ -smallClusterHub=encodek -swap > swap.log 2>&1 &
+ # real 3m44.671s
+ cat fb.caeJap2.chainCe8Link.txt
+ # 26440993 bases of 129295754 (20.450%) in intersection
+
+############################################################################
+## BLASTZ cb3 (DONE - 2009-07-28 - Hiram)
+ screen # use screen to control the job
+ mkdir /hive/data/genomes/ce8/bed/lastzCb3.2009-07-28
+ cd /hive/data/genomes/ce8/bed/lastzCb3.2009-07-28
+
+ cat << '_EOF_' > DEF
+# ce8 vs cb3
+BLASTZ_H=2000
+BLASTZ_M=50
+
+# TARGET: elegans Ce8
+SEQ1_DIR=/scratch/data/ce8/ce8.2bit
+SEQ1_LEN=/scratch/data/ce8/chrom.sizes
+SEQ1_CHUNK=1000000
+SEQ1_LAP=10000
+
+# QUERY: C. briggsae cb3
+SEQ2_DIR=/hive/data/genomes/cb3/cb3.rmskTrf.2bit
+SEQ2_LEN=/hive/data/genomes/cb3/chrom.sizes
+SEQ2_CHUNK=1000000
+SEQ2_LAP=0
+
+BASE=/hive/data/genomes/ce8/bed/lastzCb3.2009-07-28
+TMPDIR=/scratch/tmp
+'_EOF_'
+ # << happy emacs
+
+ time nice -n +19 doBlastzChainNet.pl -verbose=2 \
+ `pwd`/DEF \
+ -workhorse=hgwdev -bigClusterHub=swarm -noLoadChainSplit \
+ -smallClusterHub=memk > do.log 2>&1 &
+ # about 40 minutes
+ cat fb.ce8.chainCb3Link.txt
+ # 42421395 bases of 100286004 (42.300%) in intersection
+
+ # swap, this is also in cb3.txt
+ mkdir /hive/data/genomes/cb3/bed/blastz.ce8.swap
+ cd /hive/data/genomes/cb3/bed/blastz.ce8.swap
+ time nice -n +19 doBlastzChainNet.pl -verbose=2 \
+ /hive/data/genomes/ce8/bed/lastzCb3.2009-07-28/DEF \
+ -workhorse=hgwdev -bigClusterHub=swarm -noLoadChainSplit \
+ -smallClusterHub=encodek -swap > swap.log 2>&1 &
+ # real 3m46.700s
+
+ cat fb.cb3.chainCe8Link.txt
+ # 43115929 bases of 108433446 (39.763%) in intersection
+
+############################################################################
+## BLASTZ caeRem3 (DONE - 2009-07-28,09 - Hiram)
+ screen # use screen to control the job
+ mkdir /hive/data/genomes/ce8/bed/lastzCaeRem3.2009-07-28
+ cd /hive/data/genomes/ce8/bed/lastzCaeRem3.2009-07-28
+
+ cat << '_EOF_' > DEF
+# ce8 vs caeRem3
+BLASTZ_H=2000
+BLASTZ_M=50
+
+# TARGET: elegans Ce8
+SEQ1_DIR=/scratch/data/ce8/ce8.2bit
+SEQ1_LEN=/scratch/data/ce8/chrom.sizes
+SEQ1_CHUNK=1000000
+SEQ1_LAP=10000
+
+# QUERY: C. remanei caeRem3
+SEQ2_DIR=/scratch/data/caeRem3/caeRem3.2bit
+SEQ2_LEN=/scratch/data/caeRem3/chrom.sizes
+SEQ2_CTGDIR=/scratch/data/caeRem3/caeRem3.supercontigs.2bit
+SEQ2_CTGLEN=/scratch/data/caeRem3/caeRem3.supercontigs.sizes
+SEQ2_LIFT=/scratch/data/caeRem3/caeRem3.chrUn.lift
+SEQ2_CHUNK=1000000
+SEQ2_LAP=0
+SEQ2_LIMIT=50
+
+BASE=/hive/data/genomes/ce8/bed/lastzCaeRem3.2009-07-28
+TMPDIR=/scratch/tmp
+'_EOF_'
+ # << happy emacs
+
+ time nice -n +19 doBlastzChainNet.pl -verbose=2 \
+ `pwd`/DEF \
+ -workhorse=hgwdev -bigClusterHub=swarm -noLoadChainSplit \
+ -qRepeats=windowmaskerSdust -smallClusterHub=memk > do.log 2>&1 &
+XXX - running Tue Jul 28 11:14:53 PDT 2009
+ # real 28m14.168s
+ cat fb.ce8.chainCaeRem3Link.txt
+ # 41841184 bases of 100286004 (41.722%) in intersection
+
+ # swap, this is also in caeRem3.txt
+ mkdir /hive/data/genomes/caeRem3/bed/blastz.ce8.swap
+ cd /hive/data/genomes/caeRem3/bed/blastz.ce8.swap
+ time nice -n +19 doBlastzChainNet.pl -verbose=2 \
+ -qRepeats=windowmaskerSdust \
+ -workhorse=hgwdev -noLoadChainSplit \
+ /hive/data/genomes/ce8/bed/lastzCaeRem3.2009-07-28/DEF \
+ -bigClusterHub=swarm -smallClusterHub=encodek -swap > swap.log 2>&1 &
+ # real 3m10.033s
+ cat fb.caeRem3.chainCe8Link.txt
+ # 46320775 bases of 138406388 (33.467%) in intersection
+
+############################################################################
+## 5-Way multiple alignment (DONE - 2009-07-28 - Hiram)
+
+ mkdir /cluster/data/ce8/bed/multiz5way
+ cd /cluster/data/ce8/bed/multiz5way
+ # See notes in ce6.txt for 6-way alignment. This is the tree from
+ # there.
+
+ cat << '_EOF_' > 5way.nh
+((C._elegans_ce8:0.003000,
+ (C._brenneri_caePb2:0.013000,
+ (C._remanei_caeRem3:0.003000,C._briggsae_cb3:0.005000):0.004000)
+ :0.002000):0.001000,
+ C._japonica_caeJap2:0.023000);
+'_EOF_'
+ # << happy emacs
+
+ /cluster/bin/phast/x86_64/all_dists 5way.nh > 5way.distances.txt
+ grep -i ce8 5way.distances.txt | sort -k3,3n
+ # Use this output for reference, and use the calculated
+ # distances in the table below to order the organisms and check
+ # the button order on the browser.
+ # And if you can fill in the table below entirely, you have
+ # succeeded in finishing all the alignments required.
+ #
+# featureBits chainLink measures
+# chaince8Link chain linearGap
+# distance on Ce8 on other minScore
+# 1 0.0120 - remanei_caeRem3 (% 41.722) (% 33.467) 1000 loose
+# 2 0.0140 - briggsae_cb3 (% 42.300) (% 39.763) 1000 loose
+# 3 0.0180 - brenneri_caePb2 (% 40.677) (% 32.313) 1000 loose
+# 3 0.0270 - japonica_caeJap2 (% 27.192) (% 20.450) 1000 loose
+
+ cd /cluster/data/ce8/bed/multiz5way
+ # bash shell syntax here ...
+ export H=/cluster/data/ce8/bed
+ mkdir mafLinks
+ for G in caeRem3 cb3 caePb2 caeJap2
+ do
+ mkdir mafLinks/$G
+ if [ ! -d ${H}/lastz.${G}/mafNet ]; then
+ echo "missing directory lastz.${G}/mafNet"
+ fi
+ ln -s ${H}/lastz.$G/mafNet/*.maf.gz ./mafLinks/$G
+ done
+
+ # these are x86_64 binaries
+ mkdir penn
+ cp -p /cluster/bin/penn/multiz.2008-11-25/multiz penn
+ cp -p /cluster/bin/penn/multiz.2008-11-25/maf_project penn
+ cp -p /cluster/bin/penn/multiz.2008-11-25/autoMZ penn
+
+ # the autoMultiz cluster run
+ ssh memk
+ cd /hive/data/genomes/ce8/bed/multiz5way/
+
+ # create species list and stripped down tree for autoMZ
+ sed -e \
+'s/[a-z][a-z0-9]*_//ig; s/:[0-9\.][0-9\.]*//g; s/;//; /^ *$/d; s/C._//g' \
+ 5way.nh > tmp.nh
+ echo `cat tmp.nh` > tree-commas.nh
+ echo `cat tree-commas.nh` | sed 's/ //g; s/,/ /g' > tree.nh
+ sed 's/[()]//g; s/,/ /g' tree.nh > species.list
+
+ mkdir maf run
+ cd run
+
+ # NOTE: set the db and pairs directories in this script
+ cat > autoMultiz.csh << '_EOF_'
+#!/bin/csh -ef
+set db = ce8
+set c = $1
+set result = $2
+set run = `pwd`
+set tmp = $run/tmp/$db/multiz.$c
+set nway = /hive/data/genomes/ce8/bed/multiz5way
+set pairs = $nway/mafLinks
+/bin/rm -fr $tmp
+/bin/mkdir -p $tmp
+/bin/cp -p $nway/tree.nh $nway/species.list $tmp
+pushd $tmp
+foreach s (`sed -e "s/ $db//" species.list`)
+ set in = $pairs/$s/$c.maf
+ set out = $db.$s.sing.maf
+ if (-e $in.gz) then
+ /bin/zcat $in.gz > $out
+ if (! -s $out) then
+ echo "##maf version=1 scoring=autoMZ" > $out
+ endif
+ else if (-e $in) then
+ ln -s $in $out
+ else
+ echo "##maf version=1 scoring=autoMZ" > $out
+ endif
+end
+set path = ($nway/penn $path); rehash
+$nway/penn/autoMZ + T=$tmp E=$db "`cat tree.nh`" $db.*.sing.maf $c.maf
+popd
+/bin/rm -f $result
+/bin/cp -p $tmp/$c.maf $result
+/bin/rm -fr $tmp
+/bin/rmdir --ignore-fail-on-non-empty $run/tmp/$db
+/bin/rmdir --ignore-fail-on-non-empty $run/tmp
+'_EOF_'
+# << happy emacs
+ chmod +x autoMultiz.csh
+
+ cat << '_EOF_' > template
+#LOOP
+./autoMultiz.csh $(root1) {check out line+ /hive/data/genomes/ce8/bed/multiz5way/maf/$(root1).maf}
+#ENDLOOP
+'_EOF_'
+# << happy emacs
+
+ awk '{print $1}' /cluster/data/ce8/chrom.sizes > chrom.lst
+ gensub2 chrom.lst single template jobList
+ para create jobList
+ para -maxNode=1 push
+ para check ... push ... etc ...
+# Completed: 7 of 7 jobs
+# CPU time in finished jobs: 1517s 25.28m 0.42h 0.02d 0.000 y
+# IO & Wait Time: 124s 2.07m 0.03h 0.00d 0.000 y
+# Average job time: 234s 3.91m 0.07h 0.00d
+# Longest finished job: 334s 5.57m 0.09h 0.00d
+# Submission to last job: 348s 5.80m 0.10h 0.00d
+
+ cd /hive/data/genomes/ce8/bed/multiz5way
+ time nice -n +19 catDir maf > multiz5way.maf
+ # a few seconds to produce:
+ # -rw-rw-r-- 1 321149265 Jul 28 14:53 multiz5way.maf
+
+ # before getting to the annotation, load this up so we can get
+ # a first view of the track. This will be replaced with the annotated
+ # mafs
+ ssh hgwdev
+ cd /hive/data/genomes/ce8/bed/multiz5way
+ mkdir /gbdb/ce8/multiz5way
+ ln -s /hive/data/genomes/ce8/bed/multiz5way/multiz5way.maf \
+ /gbdb/ce8/multiz5way
+
+ # this load creates a large file, do that on local disk:
+ cd /scratch/tmp
+ time nice -n +19 hgLoadMaf ce8 multiz5way
+ # a few seconds to load:
+ # Loaded 327164 mafs in 1 files from /gbdb/ce8/multiz5way
+
+ time nice -n +19 hgLoadMafSummary -minSize=10000 -mergeGap=500 \
+ -maxSize=50000 ce8 multiz5waySummary /gbdb/ce8/multiz5way/multiz5way.maf
+ # a few seconds to load:
+ # Created 111894 summary blocks from 850001 components and 327164 mafs
+ # from /gbdb/ce8/multiz5way/multiz5way.maf
+
+ # remove the temporary .tab files:
+ rm multiz5*.tab
+############################################################################