src/hg/makeDb/doc/gasAcu1.txt 1.39

1.39 2009/09/24 20:55:31 hiram
Finish tetNig2 chain/net
Index: src/hg/makeDb/doc/gasAcu1.txt
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/makeDb/doc/gasAcu1.txt,v
retrieving revision 1.38
retrieving revision 1.39
diff -b -B -U 1000000 -r1.38 -r1.39
--- src/hg/makeDb/doc/gasAcu1.txt	20 Sep 2009 17:16:43 -0000	1.38
+++ src/hg/makeDb/doc/gasAcu1.txt	24 Sep 2009 20:55:31 -0000	1.39
@@ -1,2697 +1,2791 @@
 # for emacs: -*- mode: sh; -*-
 
 # Gasterosteus aculeatus - Broad Institute 1.0
 
 # Jeff McKinnon gave permission for photo 8/16/06 and asked for notification 
 # when the browser is public --
 # e-mail: mckinnoj@uww.edu   http://facstaff.uww.edu/mckinnoj/mckinnon.html
 
 
 #########################################################################
 # DOWNLOAD SEQUENCE (DONE 8/15/06 angie)
     ssh kkstore05
     mkdir /cluster/store12/gasAcu1
     ln -s /cluster/store12/gasAcu1 /cluster/data/gasAcu1
     mkdir /cluster/data/gasAcu1/downloads
     cd /cluster/data/gasAcu1/downloads
     foreach f (Stickleback1.0.agp \
                Stickleback1.0.agp.chromosome.fasta.gz \
                Stickleback1.0.agp.chromosome.qual.gz \
                assembly.agp \
                assembly.bases.gz \
                assembly.ps)
       wget --timestamping \
         ftp://ftp.broad.mit.edu/pub/assemblies/fish/stickleback/gasAcu1/$f
     end
     faSize Stickleback1.0.agp.chromosome.fasta.gz 
 #463338706 bases (16726587 N's 446612119 real 446612119 upper 0 lower) in 105 sequences in 1 files
 #Total size: mean 4412749.6 sd 1314228.9 min 83130 (XIII.20000001-20083130) max 5000000 (I.1-5000000) median 5000000
 #N count: mean 159300.8 sd 213368.9
 #U count: mean 4253448.8 sd 1311287.4
 #L count: mean 0.0 sd 0.0
     # Doh!  Their chromosome fasta is chunked into 5M pieces.  Not so 
     # useful for us.
     # AGP and FA use different names, too.  AGP has "groupI" and FA has "I".
     # A lot of our software really wants to see "chr" at the beginning.
     # So make AGP with substituted names and use that to construct the 
     # chrom FA.
     sed -e 's/^group/chr/' Stickleback1.0.agp > UCSC.gasAcu1.agp
     # Their chromosome qual is chunked into 5M pieces too.  (There is also
     # an assembly.agp.qual.gz that has *scaffolds* chunked.)  Unchunk, 
     # relying on the fact that chunks are in order in the file:
     zcat Stickleback1.0.agp.chromosome.qual.gz \
     | perl -wpe 's/^>(\S+)\.1-\d+.*/>chr$1/;  s/^>\S+\.\d+-\d+.*\n$//;' \
       > UCSC.gasAcu1.qual
 
 
 #########################################################################
 # MAKE GENOME DB (DONE 8/17/06 angie)
     ssh kkstore05
     cd /cluster/data/gasAcu1
     cat > gasAcu1.config.ra <<EOF
 # Config parameters for makeGenomeDb.pl:
 db gasAcu1
 clade vertebrate
 genomeCladePriority 120
 scientificName Gasterosteus aculeatus
 commonName Stickleback
 assemblyDate Feb. 2006
 assemblyLabel Broad Institute gasAcu1 (1.0)
 orderKey 40
 mitoAcc 16356995
 fastaFiles /cluster/data/gasAcu1/downloads/assembly.bases.gz
 agpFiles /cluster/data/gasAcu1/downloads/UCSC.gasAcu1.agp
 qualFiles /cluster/data/gasAcu1/downloads/UCSC.gasAcu1.qual
 dbDbSpeciesDir stickleback
 EOF
 
     # Run with -debug to see what it would do / check for issues:
     ~kent/src/utils/makeGenomeDb.pl gasAcu1.config.ra -debug -verbose=3
 
     # Run it for real:
     ~kent/src/utils/makeGenomeDb.pl gasAcu1.config.ra \
       >& makeGenomeDb.log & tail -f makeGenomeDb.log
     # Follow the directions at the end of the log after
 #NOTES -- STUFF THAT YOU WILL HAVE TO DO --
 
 
 #########################################################################
 # REPEATMASKER (DONE (but might need better libs??) 8/16/06 angie)
     ssh kkstore05
     # Run -debug to create the dir structure and preview the scripts:
     ~/kent/src/utils/doRepeatMasker.pl gasAcu1 -verbose 3 -debug
     # Run it for real and tail the log:
     cd /cluster/data/gasAcu1/bed/RepeatMasker.2006-08-16
     ~/kent/src/utils/doRepeatMasker.pl gasAcu1 -verbose 3 \
       >& do.log & tail -f do.log
     # RM version info from do.log:
 #grep version of RepeatMasker$ /cluster/bluearc/RepeatMasker/RepeatMasker
 ##    March 20 2006 (open-3-1-5) version of RepeatMasker
 #grep RELEASE /cluster/bluearc/RepeatMasker/Libraries/RepeatMaskerLib.embl
 #CC   RELEASE 20060315;                                            *
     # The cluster job crashed because no repeats were found on chrM 
     # so check out line+ failed.  NBD.  Manually made run.cluster/run.time
     # and continued:
     ~/kent/src/utils/doRepeatMasker.pl gasAcu1 -verbose 3 -continue cat \
       >>& do.log & tail -f do.log
     ssh hgwdev
     featureBits gasAcu1 rmsk
 #11005259 bases of 446627861 (2.464%) in intersection
     # Wow, that is awfully skimpy.  Stickleback is a ~675M vertebrate genome,
     # probably pretty compact, but still...
     # RepeatMaskerLib.embl has only 2 stickleback-specific repeats.
 
 
 #########################################################################
 # REPEATMASKER - FUGU LIBS (DONE 8/17/06 angie)
     # RepeatMasker with stickleback as the species had very low coverage.
     # I found a paper on stickleback sex determination where they studied
     # repeats and they said they used fugu libs, so give that a try...
     # PMID: 15324658
     # http://www.sciencedirect.com/science?_ob=ArticleURL&_udi=B6VRT-4D56PX0-N&_coverDate=08%2F24%2F2004&_alid=435874407&_rdoc=1&_fmt=&_orig=search&_qd=1&_cdi=6243&_sort=d&view=c&_acct=C000059601&_version=1&_urlVersion=0&_userid=4428&md5=e5d62090265bd7b92b95f8f4cda5e079
     ssh kkstore05
     # Run -debug to create the dir structure and preview the scripts:
     ~/kent/src/utils/doRepeatMasker.pl gasAcu1 -species fugu -verbose 3 -debug
     # Run it for real and tail the log:
     cd /cluster/data/gasAcu1/bed/RepeatMasker.2006-08-17
     ~/kent/src/utils/doRepeatMasker.pl gasAcu1 -species fugu -verbose 3 \
       >& do.log & tail -f do.log
     # RM version info from do.log:
 #grep version of RepeatMasker$ /cluster/bluearc/RepeatMasker/RepeatMasker
 ##    March 20 2006 (open-3-1-5) version of RepeatMasker
 #grep RELEASE /cluster/bluearc/RepeatMasker/Libraries/RepeatMaskerLib.embl
 #CC   RELEASE 20060315;                                            *
     featureBits gasAcu1 rmsk
 #16563793 bases of 446627861 (3.709%) in intersection
     # Still seems rather slim, but it's an improvement.
 
 
 #########################################################################
 # SIMPLE REPEATS (TRF) (DONE 8/16/06 angie)
     ssh kolossus
     nice tcsh
     mkdir /cluster/data/gasAcu1/bed/simpleRepeat
     cd /cluster/data/gasAcu1/bed/simpleRepeat
     twoBitToFa ../../gasAcu1.unmasked.2bit stdout \
     | trfBig -trf=/cluster/bin/i386/trf stdin /dev/null \
       -bedAt=simpleRepeat.bed -tempDir=/tmp \
     >& trf.log & tail -f trf.log
     # ~40 minutes
     # Make a filtered version for sequence masking:
     awk '{if ($5 <= 12) print;}' simpleRepeat.bed > trfMask.bed
     splitFileByColumn trfMask.bed trfMaskChrom
 
     # Load unfiltered repeats into the database:
     ssh hgwdev
     hgLoadBed gasAcu1 simpleRepeat \
       /cluster/data/gasAcu1/bed/simpleRepeat/simpleRepeat.bed \
       -sqlTable=$HOME/kent/src/hg/lib/simpleRepeat.sql
     featureBits gasAcu1 simpleRepeat
 #11062299 bases of 446627861 (2.477%) in intersection
 
 
 #########################################################################
 # MASK SEQUENCE WITH FILTERED TRF IN ADDITION TO RM (DONE 8/16/06 angie)
     ssh kolossus
     cd /cluster/data/gasAcu1
     time twoBitMask gasAcu1.rmsk.2bit -add bed/simpleRepeat/trfMask.bed gasAcu1.2bit
     # This warning is OK -- the extra fields are not block coordinates.
 #Warning: BED file bed/simpleRepeat/trfMask.bed has >=13 fields which means it might contain block coordinates, but this program uses only the first three fields (the entire span -- no support for blocks).
 #0.372u 0.716s 0:04.34 24.8%     0+0k 0+0io 0pf+0w
     # Link to it from /gbdb:
     ssh hgwdev
     ln -s /cluster/data/gasAcu1/gasAcu1.2bit /gbdb/gasAcu1/gasAcu1.2bit
 
 
 #########################################################################
 # MAKE DOWNLOADABLE / GOLDENPATH FILES (DONE 8/17/06 angie)
     cd /cluster/data/gasAcu1
     ~/kent/src/utils/makeDownloads.pl gasAcu1 -verbose=2 \
       >& jkStuff/downloads.log & tail -f jkStuff/downloads.log
     # Edit README.txt files when done:
 # *** Edit each README.txt to resolve any notes marked with "***":
 #     /cluster/data/gasAcu1/goldenPath/database/README.txt
 #     /cluster/data/gasAcu1/goldenPath/bigZips/README.txt
 #     /cluster/data/gasAcu1/goldenPath/chromosomes/README.txt
 
 
 #########################################################################
 # BLASTZ/CHAIN/NET DANRER4 (DONE 10/17/06 angie)
     ssh kkstore05
     mkdir /cluster/data/gasAcu1/bed/blastz.danRer4.2006-10-16
     cd /cluster/data/gasAcu1/bed/blastz.danRer4.2006-10-16
     cat << '_EOF_' > DEF
 # Stickleback vs. Zebrafish
 
 # Try "human-fugu" (more distant, less repeat-killed than mammal) params +M=50:
 BLASTZ_H=2000
 BLASTZ_Y=3400
 BLASTZ_L=6000
 BLASTZ_K=2200
 BLASTZ_M=50
 BLASTZ_Q=/cluster/data/blastz/HoxD55.q
 
 # TARGET: Stickleback gasAcu1
 SEQ1_DIR=/san/sanvol1/scratch/gasAcu1/gasAcu1.2bit
 SEQ1_CHUNK=10000000
 SEQ1_LAP=10000
 SEQ1_LEN=/cluster/data/gasAcu1/chrom.sizes
 
 # QUERY: Zebrafish danRer4
 SEQ2_DIR=/san/sanvol1/scratch/danRer4/danRer4.2bit
 SEQ2_CTGDIR=/san/sanvol1/scratch/danRer4/danRer4ChrUnNAScafs.2bit
 SEQ2_LIFT=/san/sanvol1/scratch/danRer4/liftNAandUnScaffoldsToChrom.lft
 SEQ2_LEN=/cluster/data/danRer4/chrom.sizes
 SEQ2_CTGLEN=/san/sanvol1/scratch/danRer4/chromsUnNAScafs.sizes
 SEQ2_CHUNK=30000000
 SEQ2_LAP=0
 SEQ2_LIMIT=100
 
 BASE=/cluster/data/gasAcu1/bed/blastz.danRer4.2006-10-16
 TMPDIR=/scratch/tmp
 '_EOF_'
     # << this line keeps emacs coloring happy
     doBlastzChainNet.pl DEF -chainMinScore=5000 -chainLinearGap=loose \
       -bigClusterHub pk \
       -blastzOutRoot /cluster/bluearc/gasAcu1danRer4 >& do.log &
     tail -f do.log
     ln -s blastz.danRer4.2006-10-16 /cluster/data/gasAcu1/bed/blastz.danRer4
     nice featureBits gasAcu1 -chrom=chrI chainDanRer4Link
 #9296061 bases of 27557028 (33.734%) in intersection
 
     time nice -n 19 featureBits gasAcu1 chainDanRer4Link \
 	> fb.gasAcu1.chainDanRer4Link.txt 2>&1
     #	157049518 bases of 446627861 (35.163%) in intersection
 
     #	I'm curious what a swap would look like on this one
     #	(DONE - 2006-12-08 - Hiram)
     ssh kkstore04
     mkdir /cluster/data/danRer4/bed/blastz.gasAcu1.swap
     cd /cluster/data/danRer4/bed/blastz.gasAcu1.swap
     time doBlastzChainNet.pl \
 	/cluster/data/gasAcu1/bed/blastz.danRer4.2006-10-16/DEF \
 	-chainMinScore=5000 -chainLinearGap=loose \
 	-verbose=2 -swap -bigClusterHub=pk \
 	-blastzOutRoot /cluster/bluearc/gasAcu1danRer4 > swap.log 2>&1 &
     #	real    38m51.527s
 
     ssh hgwdev
     cd /cluster/data/danRer4/bed/blastz.gasAcu1.swap
     time nice -n 19 featureBits danRer4 chainGasAcu1Link \
 	> fb.danRer4.chainGasAcu1Link.txt 2>&1
     #	213083380 bases of 1626093931 (13.104%) in intersection
 
 #########################################################################
 # BLASTZ/CHAIN/NET TETNIG1 (DONE 10/17/06 angie)
     ssh kkstore05
     mkdir /cluster/data/gasAcu1/bed/blastz.tetNig1.2006-10-17
     cd /cluster/data/gasAcu1/bed/blastz.tetNig1.2006-10-17
     cat << '_EOF_' > DEF
 # Stickleback vs. Tetraodon
 
 # Try "human-fugu" (more distant, less repeat-killed than mammal) params
 # +M=50:
 BLASTZ_H=2000
 BLASTZ_Y=3400
 BLASTZ_L=6000
 BLASTZ_K=2200
 BLASTZ_M=50
 BLASTZ_Q=/cluster/data/blastz/HoxD55.q
 
 # TARGET: Stickleback gasAcu1
 SEQ1_DIR=/san/sanvol1/scratch/gasAcu1/gasAcu1.2bit
 SEQ1_CHUNK=10000000
 SEQ1_LAP=10000
 SEQ1_LEN=/cluster/data/gasAcu1/chrom.sizes
 
 # QUERY: Tetraodon tetNig1
 SEQ2_DIR=/san/sanvol1/scratch/tetNig1/tetNig1.2bit
 SEQ2_CTGDIR=/san/sanvol1/scratch/tetNig1/chromsAndScafs/tetNig1ChromsRandomScafs.2bit
 SEQ2_LIFT=/san/sanvol1/scratch/tetNig1/chromsAndScafs/chromsAndScafs.lft
 SEQ2_LEN=/cluster/data/tetNig1/chrom.sizes
 SEQ2_CTGLEN=/san/sanvol1/scratch/tetNig1/chromsAndScafs/chromsAndScafs.sizes
 SEQ2_CHUNK=30000000
 SEQ2_LAP=0
 SEQ2_LIMIT=100
 
 BASE=/cluster/data/gasAcu1/bed/blastz.tetNig1.2006-10-17
 TMPDIR=/scratch/tmp
 '_EOF_'
     # << this line keeps emacs coloring happy
     doBlastzChainNet.pl DEF -chainMinScore=5000 -chainLinearGap=loose \
       -bigClusterHub pk \
       -blastzOutRoot /cluster/bluearc/gasAcu1tetNig1 >& do.log &
     tail -f do.log
     ln -s blastz.tetNig1.2006-10-17 /cluster/data/gasAcu1/bed/blastz.tetNig1
     nice featureBits gasAcu1 -chrom=chrI chainTetNig1Link
 #11175314 bases of 27557028 (40.553%) in intersection
 
     time nice -n 19 featureBits gasAcu1 chainTetNig1Link \
 	> fb.gasAcu1.chainTetNig1Link.txt 2>&1 &
     #	187779775 bases of 446627861 (42.044%) in intersection
 
     #	I'm curious what a swap would look like on this one
     #	(DONE - 2006-12-08 - Hiram)
     ssh kkstore04
     mkdir /cluster/data/tetNig1/bed/blastz.gasAcu1.swap
     cd /cluster/data/tetNig1/bed/blastz.gasAcu1.swap
     doBlastzChainNet.pl \
 	/cluster/data/gasAcu1/bed/blastz.tetNig1.2006-10-17/DEF \
 	-chainMinScore=5000 -chainLinearGap=loose \
 	-verbose=2 -swap -bigClusterHub pk \
 	-blastzOutRoot /cluster/bluearc/gasAcu1tetNig1 > swap.log 2>&1 &
     ssh hgwdev
     cd /cluster/data/tetNig1/bed/blastz.gasAcu1.swap
     time nice -n 19 featureBits tetNig1 chainGasAcu1Link \
 	> fb.tetNig1.chainGasAcu1Link.txt 2>&1 &
     #	171216660 bases of 342403326 (50.004%) in intersection
 
 #########################################################################
 # BLASTZ/CHAIN/NET HG18 (DONE 10/17/06 angie)
     ssh kkstore05
     mkdir /cluster/data/gasAcu1/bed/blastz.hg18.2006-10-17
     cd /cluster/data/gasAcu1/bed/blastz.hg18.2006-10-17
     cat << '_EOF_' > DEF
 # Stickleback vs. Human
 
 # Try "human-fugu" (more distant, less repeat-killed than mammal) params
 # +M=50:
 BLASTZ_H=2000
 BLASTZ_Y=3400
 BLASTZ_L=6000
 BLASTZ_K=2200
 BLASTZ_M=50
 BLASTZ_Q=/cluster/data/blastz/HoxD55.q
 
 # TARGET: Stickleback gasAcu1
 SEQ1_DIR=/san/sanvol1/scratch/gasAcu1/gasAcu1.2bit
 SEQ1_CHUNK=10000000
 SEQ1_LAP=10000
 SEQ1_LEN=/cluster/data/gasAcu1/chrom.sizes
 
 # QUERY: Human hg18
 SEQ2_DIR=/scratch/hg/hg18/hg18.2bit
 SEQ2_LEN=/cluster/data/hg18/chrom.sizes
 SEQ2_CHUNK=30000000
 SEQ2_LAP=0
 SEQ2_LIMIT=100
 
 BASE=/cluster/data/gasAcu1/bed/blastz.hg18.2006-10-17
 TMPDIR=/scratch/tmp
 '_EOF_'
     # << this line keeps emacs coloring happy
     doBlastzChainNet.pl DEF -chainMinScore=2000 -chainLinearGap=loose \
       -bigClusterHub pk \
       -blastzOutRoot /cluster/bluearc/gasAcu1hg18 >& do.log &
     tail -f do.log
     ln -s blastz.hg18.2006-10-17 /cluster/data/gasAcu1/bed/blastz.hg18
     nice featureBits gasAcu1 -chrom=chrI chainHg18Link
 #2973001 bases of 27557028 (10.789%) in intersection
 
     ssh hgwdev
     cd /cluster/data/gasAcu1/bed/blastz.hg18.2006-10-17
     time nice -n 19 featureBits gasAcu1 chainHg18Link \
 	> fb.gasAcu1.chainHg18Link.txt 2>&1 &
     #	50705450 bases of 446627861 (11.353%) in intersection
 
     #	Been swapped
     ssh hgwdev
     cd /cluster/data/hg18/bed/blastz.gasAcu1.swap
     time nice -n 19 featureBits hg18 chainGasAcu1Link \
 	> fb.hg18.chainGasAcu1Link.txt 2>&1 &
     #	55424609 bases of 2881515245 (1.923%) in intersection
 
 #########################################################################
 # BLASTZ/CHAIN/NET MM8 (DONE 10/17/06 angie)
     ssh kkstore05
     mkdir /cluster/data/gasAcu1/bed/blastz.mm8.2006-10-17
     cd /cluster/data/gasAcu1/bed/blastz.mm8.2006-10-17
     cat << '_EOF_' > DEF
 # Stickleback vs. Mouse
 
 # Try "human-fugu" (more distant, less repeat-killed than mammal) params
 # +M=50:
 BLASTZ_H=2000
 BLASTZ_Y=3400
 BLASTZ_L=6000
 BLASTZ_K=2200
 BLASTZ_M=50
 BLASTZ_Q=/cluster/data/blastz/HoxD55.q
 
 # TARGET: Stickleback gasAcu1
 SEQ1_DIR=/san/sanvol1/scratch/gasAcu1/gasAcu1.2bit
 SEQ1_CHUNK=10000000
 SEQ1_LAP=10000
 SEQ1_LEN=/cluster/data/gasAcu1/chrom.sizes
 
 # QUERY: Mouse mm8
 SEQ2_DIR=/scratch/hg/mm8/mm8.2bit
 SEQ2_LEN=/cluster/data/mm8/chrom.sizes
 SEQ2_CHUNK=30000000
 SEQ2_LAP=0
 SEQ2_LIMIT=100
 
 BASE=/cluster/data/gasAcu1/bed/blastz.mm8.2006-10-17
 TMPDIR=/scratch/tmp
 '_EOF_'
     # << this line keeps emacs coloring happy
     doBlastzChainNet.pl DEF -chainMinScore=2000 -chainLinearGap=loose \
       -bigClusterHub pk \
       -blastzOutRoot /cluster/bluearc/gasAcu1mm8 >& do.log &
     tail -f do.log
     ln -s blastz.mm8.2006-10-17 /cluster/data/gasAcu1/bed/blastz.mm8
     nice featureBits gasAcu1 -chrom=chrI chainMm8Link
 #2981604 bases of 27557028 (10.820%) in intersection
 
     ssh hgwdev
     cd /cluster/data/gasAcu1/bed/blastz.mm8.2006-10-17
     time nice -n 19 featureBits gasAcu1 chainMm8Link \
 	> fb.gasAcu1.chainMm8Link.txt 2>&1 &
 
 #########################################################################
 # MAKE 11.OOC FILE FOR BLAT (DONE 11/14/06 angie)
     # Use -repMatch=200 (based on size -- for human we use 1024, and 
     # stickleback size is ~15% of human judging by gapless gasAcu1 vs. hg18 
     # genome sizes from featureBits.  Bump it up a bit to be conservative.
     ssh kolossus
     blat /cluster/data/gasAcu1/gasAcu1.2bit /dev/null /dev/null -tileSize=11 \
       -makeOoc=/cluster/bluearc/gasAcu1/11.ooc -repMatch=200
 #Wrote 5700 overused 11-mers to /cluster/bluearc/gasAcu1/11.ooc
 
 
 #########################################################################
 # GENBANK AUTO UPDATE (DONE 11/15/06 angie)
     # Make a liftAll.lft that specifies 5M chunks for genbank:
     ssh kkstore05
     cd /cluster/data/gasAcu1
     simplePartition.pl gasAcu1.2bit 5000000 /tmp/gasAcu1P
     cat /tmp/gasAcu1P/*/*.lft > jkStuff/liftAll.lft
     rm -r /tmp/gasAcu1P
 
     # align with latest genbank process.
     ssh hgwdev
     cd ~/kent/src/hg/makeDb/genbank
     cvsup
     # edit etc/genbank.conf to add gasAcu1 just after dm2...
 
 # gasAcu1 (G. Aculeatus)
 gasAcu1.serverGenome = /cluster/data/gasAcu1/gasAcu1.2bit
 gasAcu1.clusterGenome = /iscratch/i/gasAcu1/gasAcu1.2bit
 gasAcu1.refseq.mrna.native.pslCDnaFilter  = ${ordered.refseq.mrna.native.pslCDnaFilter}
 gasAcu1.refseq.mrna.xeno.pslCDnaFilter    = ${ordered.refseq.mrna.xeno.pslCDnaFilter}
 gasAcu1.genbank.mrna.native.pslCDnaFilter = ${ordered.genbank.mrna.native.pslCDnaFilter}
 gasAcu1.genbank.mrna.xeno.pslCDnaFilter   = ${ordered.genbank.mrna.xeno.pslCDnaFilter}
 gasAcu1.genbank.est.native.pslCDnaFilter  = ${ordered.genbank.est.native.pslCDnaFilter}
 gasAcu1.ooc = /cluster/bluearc/gasAcu1/11.ooc
 gasAcu1.lift = /cluster/data/gasAcu1/jkStuff/liftAll.lft
 gasAcu1.genbank.mrna.xeno.load = yes
 gasAcu1.downloadDir = gasAcu1
 
     cvs ci -m "Added gasAcu1." etc/genbank.conf
     # update /cluster/data/genbank/:
     make etc-update
 
     # Edit src/lib/gbGenome.c to add new species.
     cvs ci -m "Added G. aculeatus (stickleback)." src/lib/gbGenome.c
     make install-server
 
     ssh kkstore02
     cd /cluster/data/genbank
     nice bin/gbAlignStep -initial gasAcu1 &
     # load database when finished
     ssh hgwdev
     cd /cluster/data/genbank
     nice ./bin/gbDbLoadStep -drop -initialLoad gasAcu1 &
 
     # enable daily alignment and update of hgwdev
     cd ~/kent/src/hg/makeDb/genbank
     cvsup
     # add gasAcu1 to:
         etc/align.dbs
         etc/hgwdev.dbs
     cvs ci -m "Added gasAcu1." etc/align.dbs etc/hgwdev.dbs
     make etc-update
 
 
 #########################################################################
 # EXTRACT CHROM TO SCAFFOLD LIFTUP (DONE 3/6/07 angie)
 # Ensembl genes use scaffold_* instead of chrUn.  Broad doesn't seem to 
 # provide a scaffold->chrom mapping in their download files, but here's
 # a way to extract one from ctg->chrom and ctg->scaffold agp...
     cd /cluster/data/gasAcu1/downloads
     # Distill contig coordinates to a single field that will be the same
     # for ctg->chrom and ctg->scaf maps:
     awk '$5 != "N" {print $6 ":" $7 ":" $8 "\t" $1 "\t" $2 "\t" $3 "\t" $4 "\t" $5 "\t" $9;}' \
       UCSC.gasAcu1.agp \
     | sort > chrom.tmp
     awk '$5 != "N" {print $6 ":" $7 ":" $8 "\t" $1 "\t" $2 "\t" $3 "\t" $4 "\t" $5 "\t" $9;}' \
       assembly.agp \
     | sort > scaf.tmp
     # Join the maps into what is AGP except it has a bunch of lines for 
     # each scaffold that must be collapsed into one line per scaffold:
     join -t "	" -o '1.2 1.3 1.4 1.5 1.6 2.2 2.3 2.4 1.7' chrom.tmp scaf.tmp \
     | sort -k 1,1 -k 2n,2n \
       > tmp.agp
     # Make sure the join went right (same #lines in all):
     wc -l chrom.tmp scaf.tmp tmp.agp
 #  16967 chrom.tmp
 #  16967 scaf.tmp
 #  16967 tmp.agp
     # Now collapse the many per-contig lines for each scaffold into a single
     # line per scaffold.  This produces gapless AGP with sequence numbers 
     # that aren't consecutive (but still monotonically increasing):
     perl -we 'while (<>) { \
                 chomp; @w = split; \
                 if (! defined $lastScaf) { \
                   ($chr, $chrS, $chrE, $seq, $type, \
                    $lastScaf, $firstScafS, $firstScafE, $strand) = @w; \
                   $lastScafS = $firstScafS;  $lastScafE = $firstScafE; \
                 } elsif ($lastScaf ne $w[5]) { \
                   $scafS = ($firstScafS < $lastScafS) ? $firstScafS : $lastScafS;\
                   $scafE = ($firstScafE > $lastScafE) ? $firstScafE : $lastScafE;\
                   print "$chr\t$chrS\t$chrE\t$seq\t$type\t" . \
                         "$lastScaf\t$scafS\t$scafE\t$strand\n"; \
                   ($chr, $chrS, $chrE, $seq, $type, \
                    $lastScaf, $firstScafS, $firstScafE, $strand) = @w; \
                   $lastScafS = $firstScafS;  $lastScafE = $firstScafE; \
                 } else { \
                   ($thisChr,undef,$chrE,undef,undef, \
                    undef,$lastScafS,$lastScafE,undef) = @w; \
                   die "$lastScaf $lastScafE: $thisChr != $chr" \
                     if ($thisChr ne $chr); \
                 } \
               } \
               $scafS = ($firstScafS < $lastScafS) ? $firstScafS : $lastScafS;\
               $scafE = ($firstScafE > $lastScafE) ? $firstScafE : $lastScafE;\
               print "$chr\t$chrS\t$chrE\t$seq\t$type\t" . \
                     "$lastScaf\t$scafS\t$scafE\t$strand\n"; \
               ' tmp.agp \
       > UCSC.chromToScaffoldSansGaps.agp
     # fortunately agpToLift doesn't need gap lines in its input, just frags:
     agpToLift -revStrand < UCSC.chromToScaffoldSansGaps.agp \
       > ../jkStuff/UCSC.chromToScaffoldSansGaps.lft
 
 
 #########################################################################
 # ENSEMBL GENES (DONE 11/27/06 angie - RELOADED 3/8/07 angie)
     ssh hgwdev
     mkdir /cluster/data/gasAcu1/bed/ensembl
     cd /cluster/data/gasAcu1/bed/ensembl
     # Go to www.ensembl.org and click around their evolving interface 
     # to get the following types of data:
     # 1. a tab-separated file that relates Ensembl gene, transcript and
     #    protein IDs.  Save as ensGtp.txt.gz  
     # 2. peptide fasta with transcript IDs in header -> ensemblPep.fa.gz
 
     # They have started dumping GTF files so we can download that directly:
     wget 'ftp://ftp.ensembl.org/pub/current_gasterosteus_aculeatus/data/gtf/*'
 
     # Add "chr" to front of each line in the gene data gtf file to make 
     # it compatible with our software.
     # Also need to lift scaffold coords up to chrUn:
     gunzip -c Gasterosteus_aculeatus.BROADS1.43.gtf.gz \
     | sed -e 's/^group/chr/; s/^MT/chrM/;' \
     | liftUp -type=.gtf stdout ../../jkStuff/UCSC.chromToScaffoldSansGaps.lft \
        carry stdin \
     > ensGene.gtf
     ldHgGene -gtf -genePredExt gasAcu1 ensGene \
       /cluster/data/gasAcu1/bed/ensembl/ensGene.gtf
 
     # strip header line:
     gunzip -c ensGtp.txt.gz | tail +2 > ensGtp.txt
     hgLoadSqlTab gasAcu1 ensGtp ~/kent/src/hg/lib/ensGtp.sql ensGtp.txt
 
     gunzip -c ensemblPep.fa.gz \
     | perl -wpe 's/\|.*//' \
     > ensPep.fa
     # ONE TIME ONLY... seven protein sequences were chopped up in the file 
     # from BioMart and the fragments appeared as competing duplicate 
     # sequences.  Ensembl helpdesk is still looking at it but sent the 
     # correct whole sequences for the seven, so I'm manually editing those 
     # in.  The ID's are ENSGACT00000000021, ENSGACT00000000176, 
     # ENSGACT00000017572, ENSGACT00000011000, ENSGACT00000000042, 
     # ENSGACT00000000087, ENSGACT00000022185.
     hgPepPred gasAcu1 generic ensPep ensPep.fa
 
 
 #########################################################################
 #########################################################################
 
 #########################################################################
 # BLASTZ/CHAIN/NET FR1 (DONE - 2006-12-20 - Hiram)
 ##  no chrUn in gasAcu1, and align to fr1 scaffolds,
 ##	results lifted to fr1 chrUn coordinates
     ssh kkstore04
     mkdir /cluster/data/gasAcu1/bed/blastz.fr1.2006-12-08
     cd /cluster/data/gasAcu1/bed/blastz.fr1.2006-12-08
     cat << '_EOF_' > DEF
 # Stickleback vs. Fugu
 
 # Try "human-fugu" (more distant, less repeat-killed than mammal) params
 # +M=50:
 BLASTZ_H=2000
 BLASTZ_Y=3400
 BLASTZ_L=6000
 BLASTZ_K=2200
 BLASTZ_M=50
 BLASTZ_Q=/cluster/data/blastz/HoxD55.q
 
 # TARGET: Stippleback gasAcu1, no chrUn in this run
 SEQ1_DIR=/san/sanvol1/scratch/gasAcu1/gasAcu1.noUn.sdTrf.2bit
 SEQ1_LEN=/san/sanvol1/scratch/gasAcu1/gasAcu1.noUn.sdTrf.sizes
 SEQ1_CHUNK=40000000
 SEQ1_LIMIT=30
 SEQ1_LAP=0
 
 # QUERY: Fugu fr1
 #       Align to the scaffolds, results lifed up to chrUn.sdTrf coordinates
 SEQ2_DIR=/san/sanvol1/scratch/fr1/chrUn.sdTrf.2bit
 SEQ2_LEN=/san/sanvol1/scratch/fr1/chrom.sizes
 SEQ2_CTGDIR=/san/sanvol1/scratch/fr1/fr1.UnScaffolds.sdTrf.2bit
 SEQ2_CTGLEN=/san/sanvol1/scratch/fr1/scaffold.sizes
 SEQ2_LIFT=/san/sanvol1/scratch/fr1/UnScaffolds/ordered.lft
 SEQ2_CHUNK=20000000
 SEQ2_LIMIT=30
 SEQ2_LAP=0
 
 BASE=/cluster/data/gasAcu1/bed/blastz.fr1.2006-12-08
 TMPDIR=/scratch/tmp
 '_EOF_'
     # << this line keeps emacs coloring happy
 
     time doBlastzChainNet.pl DEF -chainMinScore=2000 -chainLinearGap=loose \
 	-tRepeats=windowmaskerSdust -qRepeats=windowmaskerSdust \
 	-bigClusterHub=pk \
 	-blastzOutRoot /cluster/bluearc/gasAcu1Fr1 > do.log 2>&1 &
     #	real    272m33.395s
     #	user    0m0.189s
     #	sys     0m0.163s
 
     ssh hgwdev
     cd /cluster/data/gasAcu1/bed
     ln -s blastz.fr1.2006-12-08 blastz.fr1
     cd blastz.fr1
     time nice -n 19 featureBits gasAcu1 chainFr1Link \
 	> fb.gasAcu1.chainFr1Link.txt 2>&1 &
     #	146655780 bases of 446627861 (32.836%) in intersection
 
     mkdir /cluster/data/fr1/bed/blastz.gasAcu1.swap
     cd /cluster/data/fr1/bed/blastz.gasAcu1.swap
     time doBlastzChainNet.pl \
 	/cluster/data/gasAcu1/bed/blastz.fr1.2006-12-08/DEF \
 	-chainMinScore=2000 -chainLinearGap=loose \
 	-tRepeats=windowmaskerSdust -qRepeats=windowmaskerSdust \
 	-swap -bigClusterHub=pk > do.log 2>&1 &
 
     time doBlastzChainNet.pl \
 	/cluster/data/gasAcu1/bed/blastz.fr1.2006-12-08/DEF \
 	-continue=net -chainMinScore=2000 -chainLinearGap=loose \
 	-tRepeats=windowmaskerSdust -qRepeats=windowmaskerSdust \
 	-swap -bigClusterHub=pk > net.log 2>&1 &
     #	real    24m33.761s
     #	user    0m0.125s
     #	sys     0m0.080s
 
     ssh hgwdev
     cd /cluster/data/fr1/bed/blastz.gasAcu1.swap
     time nice -n 19 featureBits fr1 chainGasAcu1Link \
 	> fb.fr1.chainGasAcu1Link.txt 2>&1 &
     #	148005715 bases of 315518167 (46.909%) in intersection
 
 #######################################################################
 ## Swap of oryLat1 alignments to gasAcu1 (DONE - 2006-12-08 - Hiram)
 ## This sequence duplicated in oryLat1.txt too
     ssh kkstore04
     cd /cluster/data/gasAcu1/bed
     rm blastz.oryLat1.swap
     mkdir /cluster/data/gasAcu1/bed/blastz.oryLat1.swap
     cd /cluster/data/gasAcu1/bed/blastz.oryLat1.swap
     time $HOME/kent/src/hg/utils/automation/doBlastzChainNet.pl -verbose=2 \
 	/cluster/data/oryLat1/bed/blastz.gasAcu1.2006-12-08/DEF \
 	-chainMinScore=2000 -chainLinearGap=loose \
 	-tRepeats=repeats -qRepeats=windowmaskerSdust -bigClusterHub=pk \
 	-swap > swap.log 2>&1 &
     #	real    26m32.549s
 
     ssh hgwdev
     cd /cluster/data/gasAcu1/bed/blastz.oryLat1.swap
     nice -n 19 featureBits -noRandom gasAcu1 chainOryLat1Link \
 	>fb.gasAcu1.oryLat1.txt 2>&1 &
     #	148335558 bases of 391398261 (37.899%) in intersection
 
 ###########################################################################
 ## checking measurements to determine which fish is close to which fish
 ## 2006-12-08 - Hiram
 
 #                         featureBits chainLink measures
 #                                           chainGasAcu1Link  chain linearGap
 #    distance                      on gasAcu1    on other   minScore
 #  1  0.xxxx - tetraodon tetNig1   (% 42.044) (% 50.004)       5000     loose
 #  3  0.xxxx - zebrafish danRer4   (% 35.163) (% 13.104)       5000     loose
 #  4  0.xxxx - fugu fr1            (% 32.836) (% 46.909)       2000     loose
 #  5  0.xxxx - human hg18          (% 11.353) (%  1.923)       2000     loose
 #  6  0.xxxx - mouse mm8           (% 11.070) (%  x.923)       2000
 # from oryLat1.txt:
 #  2  0.xxxx - medaka oryLat1      (% 37.899) (% 28.110)       2000     loose
 ######
 danRer4 #	157049518 bases of 446627861 (35.163%) in intersection
 tetNig1 #	187779775 bases of 446627861 (42.044%) in intersection
 hg18 #	50705450 bases of 446627861 (11.353%) in intersection
 on hg18 #	55424609 bases of 2881515245 (1.923%) in intersection
 on tetNig1 #	171216660 bases of 342403326 (50.004%) in intersection
 on danRer4 #	213083380 bases of 1626093931 (13.104%) in intersection
 fr1 #	146655780 bases of 446627861 (32.836%) in intersection
 on fr1 #	148005715 bases of 315518167 (46.909%) in intersection
 
 #######################################################################
 ## BLASTZ oryLat1 chrUn contigs/scaffolds only (DONE - 2006-12-20 - Hiram)
 ##  This was an experiment, not used
     ssh kkstore05
     mkdir /cluster/data/gasAcu1/bed/blastz.oryLat1.Un.2006-12-20
     cd /cluster/data/gasAcu1/bed/blastz.oryLat1.Un.2006-12-20
     cat << '_EOF_' > DEF
 # Stickleback vs. Medaka, both chrUn random contigs/scaffolds only
 
 # Try "human-fugu" (more distant, less repeat-killed than mammal) params
 # +M=50:
 BLASTZ_H=2000
 BLASTZ_Y=3400
 BLASTZ_L=6000
 BLASTZ_K=2200
 BLASTZ_M=50
 BLASTZ_Q=/cluster/data/blastz/HoxD55.q
     
 # TARGET: Stickleback gasAcu1 chrUn only
 #	chrUn in contigs for this alignment run
 #	The largest is 418,000 bases and there are 5,000 of them.
 SEQ1_DIR=/san/sanvol1/scratch/gasAcu1/gasAcu1.chrUn.sdTrf.2bit
 SEQ1_LEN=/san/sanvol1/scratch/gasAcu1/gasAcu1.chrUn.sdTrf.sizes
 SEQ1_CTGDIR=/san/sanvol1/scratch/gasAcu1/gasAcu1.chrUnContigsOnly.sdTrf.2bit
 SEQ1_CTGLEN=/san/sanvol1/scratch/gasAcu1/gasAcu1.chrUnContigsOnly.sdTrf.sizes
 SEQ1_LIFT=/san/sanvol1/scratch/gasAcu1/chrUn.extraCloneGap.lift
 SEQ1_CHUNK=1000000
 SEQ1_LIMIT=100
 SEQ1_LAP=0
 
 # QUERY: Medaka oryLat1 chrUn only
 #       chrUn in scaffolds for this alignment run
 #	The largest scaffold is only 76,000 bases, there are 36,000 of them
 #	and a total size of 119 million.
 SEQ2_DIR=/san/sanvol1/scratch/oryLat1/oryLat1.chrUn.sdTrf.2bit
 SEQ2_LEN=/san/sanvol1/scratch/oryLat1/oryLat1.chrUn.sdTrf.sizes
 SEQ2_CTGDIR=/san/sanvol1/scratch/oryLat1/oryLat1.UnScaffoldsOnly.sdTrf.2bit
 SEQ2_CTGLEN=/san/sanvol1/scratch/oryLat1/oryLat1.UnScaffoldsOnly.sdTrf.sizes
 SEQ2_LIFT=/san/sanvol1/scratch/oryLat1/chrUn.lift
 SEQ2_CHUNK=1000000
 SEQ2_LIMIT=100
 SEQ2_LAP=0
 
 
 BASE=/cluster/data/gasAcu1/bed/blastz.oryLat1.Un.2006-12-20
 TMPDIR=/scratch/tmp
 '_EOF_'
 
     time doBlastzChainNet.pl -verbose=2 DEF \
 	-chainMinScore=2000 -chainLinearGap=loose \
 	-tRepeats=windowmaskerSdust -qRepeats=windowmaskerSdust \
 	-blastzOutRoot /cluster/bluearc/gasAcu1OryLat1 \
 	-bigClusterHub=kk > do.log 2>&1 &
     #	real    106m42.410s
     #	user    0m0.087s
     #	sys     0m0.062s
     #	Had some difficulty with the kk kluster, continuing after getting the
     #	jobs completed
     time doBlastzChainNet.pl -verbose=2 DEF \
 	-chainMinScore=2000 -chainLinearGap=loose \
 	-tRepeats=windowmaskerSdust -qRepeats=windowmaskerSdust \
 	-blastzOutRoot /cluster/bluearc/gasAcu1OryLat1 \
 	-continue=cat -bigClusterHub=kk > cat.log 2>&1 &
 
 #######################################################################
 ## BLASTZ Stickleback chroms vs Medaka random contigs
 ##	(DONE - 2006-12-20 - Hiram)
 ##	This was an experiment, not used
     ssh kkstore05
     mkdir /cluster/data/gasAcu1/bed/blastz.oryLat1.ChrUn.2006-12-20
     cd /cluster/data/gasAcu1/bed/blastz.oryLat1.ChrUn.2006-12-20
     cat << '_EOF_' > DEF
 # Stickleback chroms vs. Medaka random contigs
 
 # Try "human-fugu" (more distant, less repeat-killed than mammal) params
 # +M=50:
 BLASTZ_H=2000
 BLASTZ_Y=3400
 BLASTZ_L=6000
 BLASTZ_K=2200
 BLASTZ_M=50
 BLASTZ_Q=/cluster/data/blastz/HoxD55.q
     
 # TARGET: Stickleback gasAcu1 no chrUn sequence, only chroms
 #       The largest is 32 million bases, there are 22 of them
 SEQ1_DIR=/san/sanvol1/scratch/gasAcu1/gasAcu1.noUn.sdTrf.2bit
 SEQ1_LEN=/san/sanvol1/scratch/gasAcu1/gasAcu1.noUn.sdTrf.sizes
 SEQ1_CHUNK=40000000
 SEQ1_LAP=10000
 
 # QUERY: Medaka oryLat1 chrUn only
 #       chrUn in scaffolds for this alignment run
 #       The largest scaffold is only 76,000 bases, there are 36,000 of them
 #       and a total size of 119 million.
 SEQ2_DIR=/san/sanvol1/scratch/oryLat1/oryLat1.chrUn.sdTrf.2bit
 SEQ2_LEN=/san/sanvol1/scratch/oryLat1/oryLat1.chrUn.sdTrf.sizes
 SEQ2_CTGDIR=/san/sanvol1/scratch/oryLat1/oryLat1.UnScaffoldsOnly.sdTrf.2bit
 SEQ2_CTGLEN=/san/sanvol1/scratch/oryLat1/oryLat1.UnScaffoldsOnly.sdTrf.sizes
 SEQ2_LIFT=/san/sanvol1/scratch/oryLat1/chrUn.lift
 SEQ2_CHUNK=1000000
 SEQ2_LIMIT=40
 SEQ2_LAP=0
 
 
 BASE=/cluster/data/gasAcu1/bed/blastz.oryLat1.ChrUn.2006-12-20
 TMPDIR=/scratch/tmp
 '_EOF_'
 
     time doBlastzChainNet.pl -verbose=2 DEF \
 	-chainMinScore=2000 -chainLinearGap=loose \
 	-tRepeats=windowmaskerSdust -qRepeats=windowmaskerSdust \
 	-blastzOutRoot /cluster/bluearc/gasAcu1ChrOryLat1Un \
 	-bigClusterHub=kk > do.log 2>&1 &
 
 #######################################################################
 ## BLASTZ Stickleback chroms vs Medaka chroms and random contigs
 ##	(DONE - 2006-12-22 - Hiram)
     ssh kkstore05
     mkdir /cluster/data/gasAcu1/bed/blastz.oryLat1.2006-12-22
     cd /cluster/data/gasAcu1/bed/blastz.oryLat1.2006-12-22
     cat << '_EOF_' > DEF
 # Stickleback chroms vs. Medaka random contigs
 
 # Try "human-fugu" (more distant, less repeat-killed than mammal) params
 # +M=50:
 BLASTZ_H=2000
 BLASTZ_Y=3400
 BLASTZ_L=6000
 BLASTZ_K=2200
 BLASTZ_M=50
 BLASTZ_Q=/cluster/data/blastz/HoxD55.q
     
 # TARGET: Stickleback gasAcu1 no chrUn sequence, only chroms
 #       The largest is 32 million bases, there are 22 of them
 SEQ1_DIR=/san/sanvol1/scratch/gasAcu1/gasAcu1.noUn.sdTrf.2bit
 SEQ1_LEN=/san/sanvol1/scratch/gasAcu1/gasAcu1.noUn.sdTrf.sizes
 SEQ1_CHUNK=40000000
 SEQ1_LAP=10000
 
 # QUERY: Medaka oryLat1 chroms and chrUn in scaffolds
 #       The largest scaffold is only 76,000 bases, there are 36,000 of them
 #       and a total size of 119 million.
 #       The biggest chrom is just under 40 million
 SEQ2_DIR=/san/sanvol1/scratch/oryLat1/oryLat1.sdTrf.2bit
 SEQ2_LEN=/san/sanvol1/scratch/oryLat1/chrom.sizes
 SEQ2_CTGDIR=/san/sanvol1/scratch/oryLat1/oryLat1UnScaffolds.2bit
 SEQ2_CTGLEN=/san/sanvol1/scratch/oryLat1/oryLat1UnScaffolds.sizes
 SEQ2_LIFT=/san/sanvol1/scratch/oryLat1/chrUn.lift
 SEQ2_CHUNK=10000000
 SEQ2_LIMIT=20
 SEQ2_LAP=0
 
 BASE=/cluster/data/gasAcu1/bed/blastz.oryLat1.2006-12-22
 TMPDIR=/scratch/tmp
 '_EOF_'
     # << happy emacs
 
     time doBlastzChainNet.pl -verbose=2 DEF \
 	-chainMinScore=2000 -chainLinearGap=loose \
 	-tRepeats=windowmaskerSdust -qRepeats=windowmaskerSdust \
 	-blastzOutRoot /cluster/bluearc/gasAcu1ChrOryLat1ChrUn \
 	-bigClusterHub=pk > do.log 2>&1 &
     #	real    132m54.403s
     #	user    0m0.213s
     #	sys     0m0.138s
 
     ssh hgwdev
     cd /cluster/data/gasAcu1/bed/blastz.oryLat1.2006-12-22
     time featureBits gasAcu1 chainOryLat1Link \
 	> fb.gasAcu1.chainOryLat1Link.txt 2>&1
     #	164155231 bases of 446627861 (36.754%) in intersection
 
 #########################################################################
 ##  Reorder Fish organisms (DONE - 2006-12-22 - Hiram)
     hgsql -h genome-testdb hgcentraltest \
 	-e "update dbDb set orderKey = 470 where name = 'gasAcu1';"
 
 #########################################################################
 ## 7-Way Multiz (DONE - 2006-12-22 - Hiram)
 ##  replaced by 8-Way done below
     ssh hgwdev
     mkdir /cluster/data/gasAcu1/bed/multiz7way
     cd /cluster/data/gasAcu1/bed/multiz7way
     #	Constructed this tree by pruining the 17-way tree from mm8
     #	multiz17way, and then adding in gasAcu1 and oryLat1 branch
     #	Arbitrarily set 0.2 distances for this added branch
     #	All other distances remain as specified in the 17way.nh
     cat << '_EOF_' > 7way.nh
 ((human_hg18:0.054922,mouse_mm8:0.412790):0.754413,
 (
     ((tetraodon_tetNig1:0.199381,fugu_fr1:0.239894):0.2,
     (stickleback_gasAcu1:0.2,medaka_oryLat1:0.2):0.2):0.292961,
 zebrafish_danRer4:0.782561):0.156067);
 '_EOF_'
     # << happy emacs
 
     /cluster/bin/phast/draw_tree 7way.nh > 7way.ps
     /cluster/bin/phast/all_dists 7way.nh > 7way.distances.txt
     #	Use this output to create the table below
     grep -y gasAcu1 7way.distances.txt | sort -k3,3n
 #
 #	If you can fill in all the numbers in this table, you are ready for
 #	the multiple alignment procedure
 #
 #                         featureBits chainLink measures
 #                                           chainGasAcu1Link  chain linearGap
 #    distance                      on gasAcu1    on other   minScore
 #  1  0.4000 - medaka oryLat1      (% 38.754) (% 27.044)       2000     loose
 #  2  0.7994 - tetraodon tetNig1   (% 42.044) (% 50.004)       5000     loose
 #  3  0.8399 - fugu fr1            (% 32.836) (% 49.909)       2000     loose
 #  4  1.4755 - zebrafish danRer4   (% 35.163) (% 13.104)       5000     loose
 #  6  1.6584 - human hg18          (% 11.353) (%  1.923)       2000     loose
 #  7  2.0162 - mouse mm8           (% 11.070) (%  2.056)       2000
 
     cd /cluster/data/gasAcu1/bed/multiz7way
     #	bash shell syntax here ...
     export H=/cluster/data/gasAcu1/bed
     mkdir mafLinks
     for G in oryLat1 tetNig1 fr1 danRer4 hg18 mm8
     do
 	mkdir mafLinks/$G
 	if [ ! -d ${H}/blastz.${G}/mafNet ]; then
 	echo "missing directory blastz.${G}/mafNet"
 		exit 255
 	fi
 	ln -s ${H}/blastz.$G/mafNet/*.maf.gz ./mafLinks/$G
     done
 
     #	Copy MAFs to some appropriate NFS server for kluster run
     ssh kkstore05
     mkdir /san/sanvol1/scratch/gasAcu1/multiz7way
     cd /san/sanvol1/scratch/gasAcu1/multiz7way
     time rsync -a --copy-links --progress \
 	/cluster/data/gasAcu1/bed/multiz7way/mafLinks/ .
 
     mkdir penn
     cp -p /cluster/bin/penn/multiz.v11.x86_64/multiz-tba/multiz penn
     cp -p /cluster/bin/penn/multiz.v11.x86_64/multiz-tba/maf_project penn
     cp -p /cluster/bin/penn/multiz.v11.x86_64/multiz-tba/autoMZ penn
 
     # the autoMultiz cluster run
     ssh pk
     cd /cluster/data/gasAcu1/bed/multiz7way/
 
     # create species list and stripped down tree for autoMZ
     sed 's/[a-z][a-z]*_//g; s/:[0-9\.][0-9\.]*//g; s/;//; /^ *$/d' \
 	7way.nh > tmp.nh
     echo `cat tmp.nh` > tree-commas.nh
     echo `cat tree-commas.nh` | sed 's/ //g; s/,/ /g' > tree.nh
     sed 's/[()]//g; s/,/ /g' tree.nh > species.lst
 
     mkdir run maf
     cd run
 
     #	NOTE: you need to set the db and multiz dirname properly in this script
     cat > autoMultiz << '_EOF_'
 #!/bin/csh -ef
 set db = gasAcu1
 set c = $1
 set maf = $2
 set binDir = /san/sanvol1/scratch/$db/multiz7way/penn
 set tmp = /scratch/tmp/$db/multiz.$c
 set pairs = /san/sanvol1/scratch/$db/multiz7way
 rm -fr $tmp
 mkdir -p $tmp
 cp ../{tree.nh,species.lst} $tmp
 pushd $tmp
 foreach s (`cat species.lst`)
     set in = $pairs/$s/$c.maf
     set out = $db.$s.sing.maf
     if ($s == $db) then
 	continue
     endif
     if (-e $in.gz) then
 	zcat $in.gz > $out
     else if (-e $in) then
 	cp $in $out
     else
 	echo "##maf version=1 scoring=autoMZ" > $out
     endif
 end
 set path = ($binDir $path); rehash
 $binDir/autoMZ + T=$tmp E=$db "`cat tree.nh`" $db.*.sing.maf $c.maf
 popd
 cp $tmp/$c.maf $maf
 rm -fr $tmp
 '_EOF_'
     # << happy emacs
     chmod +x autoMultiz
 
 cat  << '_EOF_' > template
 #LOOP
 autoMultiz $(root1) {check out line+ /cluster/data/gasAcu1/bed/multiz7way/maf/$(root1).maf}
 #ENDLOOP
 '_EOF_'
     # << happy emacs
 
     awk '{print $1}' /cluster/data/gasAcu1/chrom.sizes > chrom.lst
     gensub2 chrom.lst single template jobList
     para create jobList
     # 23 jobs
     para try ... check ... push ... etc ...
 # Completed: 23 of 23 jobs
 # CPU time in finished jobs:       9086s     151.43m     2.52h    0.11d  0.000 y
 # IO & Wait Time:                   194s       3.24m     0.05h    0.00d  0.000 y
 # Average job time:                 403s       6.72m     0.11h    0.00d
 # Longest finished job:             707s      11.78m     0.20h    0.01d
 # Submission to last job:           707s      11.78m     0.20h    0.01d
 
     #	combine results into a single file for loading and gbdb reference
     ssh kkstore05
     cd /cluster/data/gasAcu1/bed/multiz7way
     time catDir maf > multiz7way.maf
     #	real    0m6.467s
     #	makes an 1.5 Gb file:
     #	-rw-rw-r--  1 1577409197 Dec 22 14:49 multiz7way.maf
 
     #	Create per-chrom individual maf files for downloads
     ssh kkstore05
     cd /cluster/data/gasAcu1/bed/multiz7way
     mkdir mafDownloads
     time for M in maf/chr*.maf
     do
 	B=`basename $M`
 	cp -p ${M} mafDownloads/${B}
 	gzip mafDownloads/${B}
 	echo ${B} done
     done
     #	real    4m58.923s
     #	user    4m41.178s
     #	sys     0m14.387s
 
     #	deliver to downloads
     ssh hgwdev
     ln -s /cluster/data/gasAcu1/bed/multiz7way/mafDownloads \
 	/usr/local/apache/htdocs/goldenPath/gasAcu1/multiz7way
 
     # Load into database
     ssh hgwdev
     cd /cluster/data/gasAcu1/bed/multiz7way
     mkdir /gbdb/gasAcu1/multiz7way
     ln -s /cluster/data/gasAcu1/bed/multiz7way/multiz7way.maf \
 	/gbdb/gasAcu1/multiz7way
     time nice -n +19 hgLoadMaf gasAcu1 multiz7way
     #	Loaded 1925835 mafs in 1 files from /gbdb/gasAcu1/multiz7way
     #	real    0m45.970s
     #	user    0m25.438s
     #	sys     0m8.453s
 
     time nice -n +19 hgLoadMafSummary -minSize=10000 -mergeGap=500 \
 	-maxSize=50000 gasAcu1 multiz7waySummary multiz7way.maf
     #	Created 684283 summary blocks from 5182575 components
     #		and 1925835 mafs from multiz7way.maf
     #	real    0m54.289s
     #	user    0m48.004s
     #	sys     0m1.779s
 
     #	Create tree image for details page
     #	You can get a better image from the phyloGif tool:
     #	http://genome.ucsc.edu/cgi-bin/phyloGif
     cp 7way.nh treeImage.nh
     #	Edit treeImage.nh to make the names as desired
     #	Submit this treeImage.nh to the phyloGif program, or:
     /cluster/bin/phast/draw_tree -b -s treeImage.nh > treeImage.ps
 
 ############################################################################
 # ANNOTATE MULTIZ7WAY MAF AND LOAD TABLES (DONE - 2006-12-22 - Hiram)
 ##  replaced by 8-Way done below
     ssh kolossus
     mkdir /cluster/data/gasAcu1/bed/multiz7way/anno
     cd /cluster/data/gasAcu1/bed/multiz7way/anno
     mkdir maf run
     cd run
     rm -f sizes nBeds
     ln -s  /cluster/data/gasAcu1/chrom.sizes gasAcu1.len
     twoBitInfo -nBed /cluster/data/gasAcu1/gasAcu1.sdTrf.{2bit,N.bed}
     ln -s  /cluster/data/gasAcu1/gasAcu1.sdTrf.N.bed gasAcu1.bed
 
     for DB in `grep -v gasAcu1 /cluster/data/gasAcu1/bed/multiz7way/species.lst`
     do
         ln -s  /cluster/data/${DB}/chrom.sizes ${DB}.len
         ln -s  /cluster/data/${DB}/${DB}.N.bed ${DB}.bed
 	echo ${DB}.bed  >> nBeds
 	echo ${DB}.len  >> sizes
 	echo $DB
     done
 
     echo '#!/bin/csh -ef' > jobs.csh
     echo date >> jobs.csh
     # do smaller jobs first so you can see some progress immediately:
     for F in `ls -1rS ../../maf/*.maf`
     do
      echo nice mafAddIRows -nBeds=nBeds -sizes=sizes $F \
       /cluster/data/gasAcu1/gasAcu1.sdTrf.2bit ../maf/`basename $F` >> jobs.csh
      echo "echo $F" >> jobs.csh
     done 
     echo date >> jobs.csh
     chmod +x jobs.csh
     time ./jobs.csh > jobs.log 2>&1 &
     #	to watch progress;
     tail -f jobs.log
     #	real    19m20.705s
     #	user    7m11.818s
     #	sys     11m31.508s
 
     # Load anno/maf
     ssh hgwdev
     cd /cluster/data/gasAcu1/bed/multiz7way/anno/maf
     mkdir -p /gbdb/gasAcu1/multiz7way/anno/maf
     ln -s /cluster/data/gasAcu1/bed/multiz7way/anno/maf/*.maf \
       /gbdb/gasAcu1/multiz7way/anno/maf
     time nice -n +19 hgLoadMaf \
 	-pathPrefix=/gbdb/gasAcu1/multiz7way/anno/maf gasAcu1 multiz7way
     #	Loaded 2269386 mafs in 23 files from /gbdb/gasAcu1/multiz7way/anno/maf
     #	real    0m52.053s
     #	user    0m35.608s
     #	sys     0m3.762s
 
     # Do the computation-intensive part of hgLoadMafSummary on a workhorse 
     # machine and then load on hgwdev:
     ssh kolossus
     cd /cluster/data/gasAcu1/bed/multiz7way/anno/maf
     time cat *.maf | \
     	nice -n +19 hgLoadMafSummary gasAcu1 -minSize=30000 -mergeGap=1500 \
 	-maxSize=200000 -test multiz7waySummary stdin
 
 #######################################################################
 ## BLASTZ Stickleback chroms vs chicken chroms and random contigs
 ##	(DONE - 2006-12-28 - 2007-01-05 - Hiram)
     ssh kkstore05
     mkdir /cluster/data/gasAcu1/bed/blastz.galGal3.2006-12-28
     cd /cluster/data/gasAcu1/bed/blastz.galGal3.2006-12-28
     cat << '_EOF_' > DEF
 # Stickleback chroms vs. Chicken random contigs
 
 # Try "human-fugu" (more distant, less repeat-killed than mammal) params
 # +M=50:
 BLASTZ_H=2000
 BLASTZ_Y=3400
 BLASTZ_L=6000
 BLASTZ_K=2200
 BLASTZ_M=50
 BLASTZ_Q=/cluster/data/blastz/HoxD55.q
     
 # TARGET: Stickleback gasAcu1 no chrUn sequence, only chroms
 #	The largest is 32 million bases, there are 22 of them
 SEQ1_DIR=/san/sanvol1/scratch/gasAcu1/gasAcu1.noUn.sdTrf.2bit
 SEQ1_LEN=/san/sanvol1/scratch/gasAcu1/gasAcu1.noUn.sdTrf.sizes
 SEQ1_CHUNK=40000000
 SEQ1_LAP=10000
 
 # QUERY: Chicken galGal3 chroms and randoms in contigs
 #	The largest chrom is 200M bases, the largest contig 122,228
 #	The smallest chrom is 1028 bases, smallest contig 54 bases
 #	there are 25,991 chroms and contig pieces
 #	total size 1,089,690,673 bases
 #	47107538 N's 1042583135 real 834274605 upper 208308530 lower
 SEQ2_DIR=/san/sanvol1/scratch/galGal3/galGal3.sdTrf.2bit
 SEQ2_LEN=/san/sanvol1/scratch/galGal3/galGal3.sdTrf.sizes
 SEQ2_CTGDIR=/san/sanvol1/scratch/galGal3/galGal3.randomContigs.sdTrf.2bit
 SEQ2_CTGLEN=/san/sanvol1/scratch/galGal3/galGal3.randomContigs.sdTrf.sizes
 SEQ2_LIFT=/san/sanvol1/scratch/galGal3/randomContigs.lft
 SEQ2_CHUNK=10000000
 SEQ2_LIMIT=20
 SEQ2_LAP=0
 
 BASE=/cluster/data/gasAcu1/bed/blastz.galGal3.2006-12-28
 TMPDIR=/scratch/tmp
 '_EOF_'
     # << happy emacs
 
     time doBlastzChainNet.pl -verbose=2 DEF \
 	-smallClusterHub=pk \
 	-chainMinScore=2000 -chainLinearGap=loose \
 	-tRepeats=windowmaskerSdust -qRepeats=windowmaskerSdust \
 	-blastzOutRoot /cluster/bluearc/gasAcu1ChrGalGal3ChrUn \
 	-bigClusterHub=pk > do.log 2>&1 &
     #	real    53m30.713s
     #	failed during the load step due to difficulties on hgwdev
     #	continuing 2007-01-02
     cd axtChain
     ./loadUp.csh
     cd ..
     time doBlastzChainNet.pl -verbose=2 DEF \
 	-smallClusterHub=pk \
 	-chainMinScore=2000 -chainLinearGap=loose \
 	-tRepeats=windowmaskerSdust -qRepeats=windowmaskerSdust \
 	-blastzOutRoot /cluster/bluearc/gasAcu1ChrGalGal3ChrUn \
 	-continue=download -bigClusterHub=pk > download.log 2>&1 &
 
     #	And, to swap:
     mkdir /cluster/data/galGal3/bed/blastz.gasAcu1.swap
     cd /cluster/data/galGal3/bed/blastz.gasAcu1.swap
     time doBlastzChainNet.pl -verbose=2 \
 	/cluster/data/gasAcu1/bed/blastz.galGal3.2006-12-28/DEF \
 	-smallClusterHub=pk \
         -chainMinScore=2000 -chainLinearGap=loose \
         -tRepeats=windowmaskerSdust -qRepeats=windowmaskerSdust \
         -swap -bigClusterHub=pk > swap.log 2>&1 &
     #	failed on the net step:
 # HgStepManager: executing step 'net'.
 # netChains: looks like previous stage was not successful
 #	(can't find [galGal3.gasAcu1.]all.chain[.gz]).
     time doBlastzChainNet.pl -verbose=2 \
 	/cluster/data/gasAcu1/bed/blastz.galGal3.2006-12-28/DEF \
 	-smallClusterHub=pk \
         -chainMinScore=2000 -chainLinearGap=loose \
         -tRepeats=windowmaskerSdust -qRepeats=windowmaskerSdust \
         -continue=net -swap -bigClusterHub=pk > net_swap.log 2>&1 &
     #	real    7m11.286s
 
     featureBits gasAcu1 chainGalGal3Link > fb.gasAcu1.chainGalGal2Link.txt 
     #	37154459 bases of 446627861 (8.319%) in intersection
     featureBits galGal3 chainGasAcu1Link > fb.galGal3.chainGasAcu1Link.txt
     #	32781658 bases of 1042591351 (3.144%) in intersection
 
     #  Adding the chrUn result from the galGal3 swapped alignments
     ssh kkstore05
     cd /cluster/data/gasAcu1/bed/blastz.galGal3.2006-12-28/axtChain
     chainSplit chain gasAcu1.galGal3.all.chain.gz
     cp -p ../../blastz.galGal3.swap/axtChain/chain/chrUn.chain chain/chrUn.chain
     ssh hgwdev
     cd /cluster/data/gasAcu1/bed/blastz.galGal3.2006-12-28/axtChain/chain
     hgLoadChain gasAcu1 chrUn_chainGalGal3 chrUn.chain
     Loading 13904 chains into gasAcu1.chrUn_chainGalGal3
 
     ssh kkstore05
     cd /cluster/data/gasAcu1/bed/blastz.galGal3.2006-12-28/axtChain/chain
     chainMergeSort *.chain | gzip -c > ../gasAcu1.galGal3.all.chain.gz
     #	This changes the IDs, but nothing else
     #	reusing the existing netChains with some alterations
     cp netChains.csh netChainsUn.csh
     #	fixup the reference to chrom.sizes and the 2bit file in this script
     #	and running it after moving existing items out of the way
 
 
 #########################################################################
 ## 8-Way Multiz (DONE - 2007-01-02 - 2007-01-05 - Hiram)
 ##  re-done with new chrUn.maf 2007-01-13 - Hiram
 ##  re-done with fr2 instead of fr1 - 2007-02-02 - Hiram
     ssh hgwdev
     mkdir /cluster/data/gasAcu1/bed/multiz8way
     cd /cluster/data/gasAcu1/bed/multiz8way
 
     /cluster/bin/phast/tree_doctor --prune chimp_panTro2,macaque_rheMac2,rat_rn4,rabbit_oryCun1,cow_bosTau2,dog_canFam2,armadillo_dasNov1,elephant_loxAfr1,tenrec_echTel1,monodelphis_monDom4,xenopus_xenTro2 17way.nh
 
     #	use the output of that to manually construct this tree.
     #	Arbitrarily set 0.2 distances for this added branch
     #	All other distances remain as specified in the 17way.nh
 
     cat << '_EOF_' > 8way.nh
 (((human_hg18:0.054922,mouse_mm8:0.412790):0.475049,
 	chicken_galGal3:0.454691):0.279364,
   (((tetraodon_tetNig1:0.199381,fugu_fr2:0.239894):0.2,
     (stickleback_gasAcu1:0.2,medaka_oryLat1:0.2):0.2):0.292961,
 	zebrafish_danRer4:0.782561):0.156067);
 '_EOF_'
     # << happy emacs
 
     #	Use this specification in the phyloGif tool:
     #	http://genome.ucsc.edu/cgi-bin/phyloGif
     #	to obtain a gif image for htdocs/images/phylo/gasAcu1_8way.gif
 
     /cluster/bin/phast/all_dists 8way.nh > 8way.distances.txt
     #	Use this output to create the table below
     grep -y gasAcu1 8way.distances.txt | sort -k3,3n
 #
 #	If you can fill in all the numbers in this table, you are ready for
 #	the multiple alignment procedure
 #
 #                         featureBits chainLink measures
 #                                           chainGasAcu1Link  chain linearGap
 #    distance                      on gasAcu1    on other   minScore
 #  1  0.4000 - medaka oryLat1      (% 38.308) (% 27.044)       2000     loose
 #  2  0.7994 - tetraodon tetNig1   (% 42.044) (% 50.004)       5000     loose
 #  3  0.8399 - fugu fr2            (% 37.574) (% 40.269)       2000     loose
 #  3  0.8399 - fugu fr1            (% 35.925) (% 46.909)       2000     loose
 #  4  1.4755 - zebrafish danRer4   (% 35.163) (% 13.104)       5000     loose
 #  6  1.5831 - chicken galGal3     (%  9.181) (%  3.144)       2000     loose
 #  7  1.6584 - human hg18          (% 11.353) (%  1.923)       2000     loose
 #  8  2.0162 - mouse mm8           (% 11.070) (%  2.056)       2000     loose
 
     cd /cluster/data/gasAcu1/bed/multiz8way
     #	bash shell syntax here ...
     export H=/cluster/data/gasAcu1/bed
     mkdir mafLinks
     for G in oryLat1 tetNig1 fr2 danRer4 galGal3 hg18 mm8
     do
 	mkdir mafLinks/$G
 	if [ ! -d ${H}/blastz.${G}/mafNet ]; then
 	echo "missing directory blastz.${G}/mafNet"
 		exit 255
 	fi
 	ln -s ${H}/blastz.$G/mafNet/*.maf.gz ./mafLinks/$G
     done
 
     #	Copy MAFs to some appropriate NFS server for kluster run
     ssh kkstore05
     mkdir /san/sanvol1/scratch/gasAcu1/multiz8way
     cd /san/sanvol1/scratch/gasAcu1/multiz8way
     time rsync -a --copy-links --progress \
 	/cluster/data/gasAcu1/bed/multiz8way/mafLinks/ .
 
     mkdir penn
     cp -p /cluster/bin/penn/multiz.v11.x86_64/multiz-tba/multiz penn
     cp -p /cluster/bin/penn/multiz.v11.x86_64/multiz-tba/maf_project penn
     cp -p /cluster/bin/penn/multiz.v11.x86_64/multiz-tba/autoMZ penn
 
     # the autoMultiz cluster run
     ssh pk
     cd /cluster/data/gasAcu1/bed/multiz8way/
 
     # create species list and stripped down tree for autoMZ
     sed 's/[a-z][a-z]*_//g; s/:[0-9\.][0-9\.]*//g; s/;//; /^ *$/d' \
 	8way.nh > tmp.nh
     echo `cat tmp.nh` > tree-commas.nh
     echo `cat tree-commas.nh` | sed 's/ //g; s/,/ /g' > tree.nh
     sed 's/[()]//g; s/,/ /g' tree.nh > species.lst
 
     mkdir run maf
     cd run
 
     #	NOTE: you need to set the db and multiz dirname properly in this script
     cat > autoMultiz << '_EOF_'
 #!/bin/csh -ef
 set db = gasAcu1
 set c = $1
 set maf = $2
 set binDir = /san/sanvol1/scratch/$db/multiz8way/penn
 set tmp = /scratch/tmp/$db/multiz.$c
 set pairs = /san/sanvol1/scratch/$db/multiz8way
 rm -fr $tmp
 mkdir -p $tmp
 cp ../{tree.nh,species.lst} $tmp
 pushd $tmp
 foreach s (`cat species.lst`)
     set in = $pairs/$s/$c.maf
     set out = $db.$s.sing.maf
     if ($s == $db) then
 	continue
     endif
     if (-e $in.gz) then
 	zcat $in.gz > $out
     else if (-e $in) then
 	cp $in $out
     else
 	echo "##maf version=1 scoring=autoMZ" > $out
     endif
 end
 set path = ($binDir $path); rehash
 $binDir/autoMZ + T=$tmp E=$db "`cat tree.nh`" $db.*.sing.maf $c.maf
 popd
 cp $tmp/$c.maf $maf
 rm -fr $tmp
 '_EOF_'
     # << happy emacs
     chmod +x autoMultiz
 
 cat  << '_EOF_' > template
 #LOOP
 autoMultiz $(root1) {check out line+ /cluster/data/gasAcu1/bed/multiz8way/maf/$(root1).maf}
 #ENDLOOP
 '_EOF_'
     # << happy emacs
 
     awk '{print $1}' /cluster/data/gasAcu1/chrom.sizes > chrom.lst
     gensub2 chrom.lst single template jobList
     para create jobList
     # 23 jobs
     para try ... check ... push ... etc ...
 # Completed: 23 of 23 jobs
 # CPU time in finished jobs:      11075s     184.58m     3.08h    0.13d  0.000 y
 # IO & Wait Time:                   155s       2.58m     0.04h    0.00d  0.000 y
 # Average job time:                 488s       8.14m     0.14h    0.01d
 # Longest finished job:            1123s      18.72m     0.31h    0.01d
 # Submission to last job:          1123s      18.72m     0.31h    0.01d
 
     #	combine results into a single file for loading and gbdb reference
     ssh kkstore05
     cd /cluster/data/gasAcu1/bed/multiz8way
     time nice -n +19 catDir maf > multiz8way.maf
     #	real    0m7.656s
     #	makes an 1.7 Gb file:
     #	-rw-rw-r--  1 1742307109 Feb  2 16:58 multiz8way.maf
 
     #	Create per-chrom individual maf files for downloads
     #	NOT NECESSARY HERE - DONE LATER WITH THE ANNOTATED MAFS
     ssh kkstore05
     cd /cluster/data/gasAcu1/bed/multiz8way
     mkdir mafDownloads
     time for M in maf/chr*.maf
     do
 	B=`basename $M`
 	cp -p ${M} mafDownloads/${B}
 	gzip mafDownloads/${B}
 	echo ${B} done
     done
     #	real    5m9.273
 
     #	deliver to downloads *!* NOT NECESSARY HERE - DONE LATER WITH
     #		THE ANNOTATED MAFS
     ssh hgwdev
     ln -s /cluster/data/gasAcu1/bed/multiz8way/mafDownloads \
 	/usr/local/apache/htdocs/goldenPath/gasAcu1/multiz8way
 
     # Load into database
     ssh hgwdev
     cd /cluster/data/gasAcu1/bed/multiz8way
     mkdir /gbdb/gasAcu1/multiz8way
     ln -s /cluster/data/gasAcu1/bed/multiz8way/multiz8way.maf \
 	/gbdb/gasAcu1/multiz8way
     time nice -n +19 hgLoadMaf gasAcu1 multiz8way
     #	Loaded 1989046 mafs in 1 files from /gbdb/gasAcu1/multiz8way
     #	real    0m47.121s
 
     time nice -n +19 hgLoadMafSummary -minSize=10000 -mergeGap=500 \
 	-maxSize=50000 gasAcu1 multiz8waySummary multiz8way.maf
     #	Created 775687 summary blocks from 6070904 components
     #		and 2047322 mafs from multiz8way.maf
     #	real    1m0.331s
 
     #	Create tree image for details page
     #	You can get a better image from the phyloGif tool:
     #	http://genome.ucsc.edu/cgi-bin/phyloGif
     # cp 8way.nh treeImage.nh
     #	Edit treeImage.nh to make the names as desired
     #	Submit this treeImage.nh to the phyloGif program, or:
     # /cluster/bin/phast/draw_tree -b -s treeImage.nh > treeImage.ps
 
 ############################################################################
 # ANNOTATE MULTIZ8WAY MAF AND LOAD TABLES (DONE - 2007-01-05 - Hiram)
 ##  re-done with new chrUn.maf 2007-01-13 - Hiram
 ##  re-done with fr2 instead of fr1 - 2007-02-02 - Hiram
 ##  re-done with correct nBeds and sizes files, 2007-03-27 - Hiram
     ssh kolossus
     mkdir /cluster/data/gasAcu1/bed/multiz8way/anno
     cd /cluster/data/gasAcu1/bed/multiz8way/anno
     mkdir maf run
     cd run
     rm -f sizes nBeds
     ln -s  /cluster/data/gasAcu1/chrom.sizes gasAcu1.len
     #	This were done before during the 7-way
     # twoBitInfo -nBed /cluster/data/gasAcu1/gasAcu1.sdTrf.{2bit,N.bed}
     ln -s  /cluster/data/gasAcu1/gasAcu1.sdTrf.N.bed gasAcu1.bed
     for DB in \
 `cat /cluster/data/gasAcu1/bed/multiz8way/species.lst | sed -e "s/ gasAcu1//"`
     do
         ln -s  /cluster/data/${DB}/chrom.sizes ${DB}.len
         ln -s  /cluster/data/${DB}/${DB}.N.bed ${DB}.bed
 	echo ${DB}.bed  >> nBeds
 	echo ${DB}.len  >> sizes
 	echo $DB
     done
 
     echo '#!/bin/csh -ef' > jobs.csh
     echo date >> jobs.csh
     # do smaller jobs first so you can see some progress immediately:
     for F in `ls -1rS ../../maf/*.maf`
     do
      echo mafAddIRows -nBeds=nBeds -sizes=sizes $F \
       /cluster/data/gasAcu1/gasAcu1.sdTrf.2bit ../maf/`basename $F` >> jobs.csh
      echo "echo $F" >> jobs.csh
     done 
     echo date >> jobs.csh
     chmod +x jobs.csh
     time nice -n +19 ./jobs.csh > jobs.log 2>&1 &
     #	to watch progress;
     tail -f jobs.log
     #	this was run on titan (can not do this for larger genomes on titan)
     #	real    48m40.965s
     # Load anno/maf
     ssh hgwdev
     cd /cluster/data/gasAcu1/bed/multiz8way/anno/maf
     mkdir -p /gbdb/gasAcu1/multiz8way/anno/maf
     ln -s /cluster/data/gasAcu1/bed/multiz8way/anno/maf/*.maf \
       /gbdb/gasAcu1/multiz8way/anno/maf
     time nice -n +19 hgLoadMaf \
 	-pathPrefix=/gbdb/gasAcu1/multiz8way/anno/maf gasAcu1 multiz8way
     #	Loaded 2395584 mafs in 23 files from /gbdb/gasAcu1/multiz8way/anno/maf
     #	real    2m16.918s
 
     # Do the computation-intensive part of hgLoadMafSummary on a workhorse 
     # machine and then load on hgwdev:
     ssh kolossus
     cd /cluster/data/gasAcu1/bed/multiz8way/anno/maf
     time cat *.maf | \
     	nice -n +19 hgLoadMafSummary gasAcu1 -minSize=30000 -mergeGap=1500 \
 	-maxSize=200000 -test multiz8waySummary stdin
     #	Created 322344 summary blocks from 6111763 components
     #	and 2395584 mafs from stdin
     #	real    23m49.073s
     #	-rw-rw-r--  1  15063781 Mar 27 12:02 multiz8waySummary.tab
 
     ssh hgwdev
     cd /cluster/data/gasAcu1/bed/multiz8way/anno/maf
     sed -e 's/mafSummary/multiz8waySummary/' ~/kent/src/hg/lib/mafSummary.sql \
       > /tmp/multiz8waySummary.sql
     time nice -n +19 hgLoadSqlTab gasAcu1 multiz8waySummary \
 	~/kent/src/hg/lib/mafSummary.sql multiz8waySummary.tab
     #	real    0m4.525
 
 #########################################################################
+# Adding automatic generation of upstream files (DONE - 2009-08-13 - Hiram)
+    # edit src/hg/makeDb/genbank/genbank.conf to add:
+gasAcu1.upstreamGeneTbl = ensGene
+gasAcu1.upstreamMaf = multiz8way /hive/data/genomes/gasAcu1/bed/multiz8way/species.lst
+
+#########################################################################
 # MULTIZ8WAY DOWNLOADABLES (DONE - 2007-01-05 - Hiram)
 ##  re-done with new chrUn.maf 2007-01-13 - Hiram
 ##  re-done with fr2 in place of fr1 - 2007-02-03 - Hiram
     # Annotated MAF is now documented, so use anno/maf for downloads/
     ssh hgwdev
     mkdir /cluster/data/gasAcu1/bed/multiz8way/mafDownloads
     cd /cluster/data/gasAcu1/bed/multiz8way
 
     # upstream mafs 
     #	rebuilt 2007-12-21 to fix difficulty in mafFrags when species.lst
     #	did not have mm8 as the first one
     #	There isn't any refGene table, using ensGene instead
 for S in 1000 2000 5000
 do
     echo "making upstream${S}.maf"
     nice -n +19 $HOME/bin/$MACHTYPE/featureBits -verbose=2 gasAcu1 \
 	ensGene:upstream:${S} -fa=/dev/null -bed=stdout \
     	| perl -wpe 's/_up[^\t]+/\t0/' | sort -k1,1 -k2,2n \
 	| $HOME/kent/src/hg/ratStuff/mafFrags/mafFrags gasAcu1 multiz8way \
 		stdin stdout -orgs=species.lst \
 	| gzip -c > mafDownloads/ensGene.upstream${S}.maf.gz
     echo "done ensGene.upstream${S}.maf.gz"
 done
 
     ## re-done after correction to anno/maf files 2007-03-27 - Hiram
     ssh kkstore05
     cd /cluster/data/gasAcu1/bed/multiz8way
     time for M in anno/maf/chr*.maf
     do
 	B=`basename $M`
 	nice -n +19 gzip -c ${M} > mafDownloads/${B}.gz
 	echo ${B}.gz done
     done
     #	real    6m1.454
 
     cd mafDownloads
     nice -n +19 md5sum *.maf.gz > md5sum.txt
     # Make a README.txt
 
     ssh hgwdev
     mkdir /usr/local/apache/htdocs/goldenPath/gasAcu1/multiz8way
     cd /usr/local/apache/htdocs/goldenPath/gasAcu1/multiz8way
     ln -s /cluster/data/gasAcu1/bed/multiz8way/mafDownloads/{*.gz,*.txt} .
 
 #######################################################################
 # MULTIZ8WAY MAF FRAMES (DONE - 2007-01-10 - Hiram)
 ##  re-done with new chrUn.maf 2007-01-13 - Hiram
 ##  re-donw with fr2 in place of fr1 - 2007-02-03 - Hiram
     ssh hgwdev
     mkdir /cluster/data/gasAcu1/bed/multiz8way/frames
     cd /cluster/data/gasAcu1/bed/multiz8way/frames
     mkdir genes
     # The following is adapted from the mm8 sequence
 
     #------------------------------------------------------------------------
     # get the genes for all genomes
     # mRNAs with CDS.  single select to get cds+psl, then split that up and
     # create genePred
     # using ensGene for gasAcu1 oryLat1
     for qDB in danRer4 galGal3
     do
 	geneTbl=refGene
       echo hgsql -N -e \"'select * from '$geneTbl\;\" ${qDB}
       hgsql -N -e "select * from $geneTbl" ${qDB} | cut -f 2-100 \
       | genePredSingleCover stdin stdout | gzip -2c \
         > /scratch/tmp/$qDB.tmp.gz
       mv /scratch/tmp/$qDB.tmp.gz genes/$qDB.gp.gz
       rm -f $tmpExt
     done
 
     # using knownGene for mm8 hg18
     # using genscan for tetNig1
     # using ensGene for gasAcu1, oryLat1 and fr2
     # genePreds; (must keep only the first 10 columns for knownGene)
     for qDB in mm8 hg18 gasAcu1 oryLat1 fr2 tetNig1
     do
       if [ $qDB = "gasAcu1" -o $qDB = "oryLat1" -o $qDB = "fr2" ]; then
         geneTbl=ensGene
       elif [ $qDB = "tetNig1" ]; then
         geneTbl=genscan
       else
         geneTbl=knownGene
       fi
       echo hgsql -N -e \"'select * from '$geneTbl\;\" ${qDB}
       hgsql -N -e "select * from $geneTbl" ${qDB} | cut -f 1-10 \
       | genePredSingleCover stdin stdout | gzip -2c \
         > /scratch/tmp/$qDB.tmp.gz
       mv /scratch/tmp/$qDB.tmp.gz genes/$qDB.gp.gz
       rm -f $tmpExt
     done
 
     ###
     
     ssh kkstore05
     cd /cluster/data/gasAcu1/bed/multiz8way/frames
     time cat ../maf/*.maf | nice -n +19 genePredToMafFrames gasAcu1 stdin stdout gasAcu1 genes/gasAcu1.gp.gz oryLat1 genes/oryLat1.gp.gz fr2 genes/fr2.gp.gz tetNig1 genes/tetNig1.gp.gz danRer4 genes/danRer4.gp.gz galGal3 genes/galGal3.gp.gz mm8 genes/mm8.gp.gz hg18 genes/hg18.gp.gz | gzip > multiz8way.mafFrames.gz
     #	real    0m55.706
 
     ssh hgwdev
     cd /cluster/data/gasAcu1/bed/multiz8way/frames
     time nice -n +19 hgLoadMafFrames gasAcu1 multiz8wayFrames \
 	multiz8way.mafFrames.gz
     #	real    0m32.220s
 
 ############################################################################
 # CREATE CONSERVATION WIGGLE WITH PHASTCONS
 #		(DONE - 2007-01-10 - 2007-01-17 - Hiram)
 ## re-done with fr2 in place of fr1 - 2007-02-03 - Hiram
 
 # Estimate phastCons parameters
     ssh kkstore05
     mkdir /cluster/data/gasAcu1/bed/multiz8way/cons
     cd /cluster/data/gasAcu1/bed/multiz8way/cons
 
     twoBitToFa -seq=chrIV ../../../gasAcu1.sdTrf.2bit chrIV.fa
     # Create a starting-tree.mod based on chrIV (the largest one)
     time nice -n +19 /cluster/bin/phast/$MACHTYPE/msa_split ../maf/chrIV.maf \
 	--refseq ./chrIV.fa --in-format MAF \
 	--windows 100000000,1000 --out-format SS \
 	--between-blocks 5000 --out-root s1
     #	26 seconds
 
     time nice -n +19 /cluster/bin/phast/$MACHTYPE/phyloFit -i SS s1.*.ss \
 --tree "(((hg18,mm8),galGal3),(((tetNig1,fr2),(gasAcu1,oryLat1)),danRer4))" \
     --out-root starting-tree
     #	Fitting tree model to s1.1-32632948.ss using REV ...
     #	Writing model to starting-tree.mod ...
     #	real    3m53.084
 
     rm s1.*.ss
     # add up the C and G:
     grep BACKGROUND starting-tree.mod | awk '{printf "%0.3f\n", $3 + $4;}'
     #	0.449
     #	This 0.449 is used in the --gc argument below
 
     ## the fa files are needed for the sequence and they are created during
     #	this loop if they haven't been done before
     # Create SS files on san filesystem
     #	Using a size of 10,000,000 to slow down the phastCons pk jobs
     ssh kkstore05
     mkdir -p  /san/sanvol1/scratch/gasAcu1/cons/ss
     cd  /san/sanvol1/scratch/gasAcu1/cons/ss
     time for C in `awk '{print $1}' /cluster/data/gasAcu1/chrom.sizes`
     do
       if [ -s /cluster/data/gasAcu1/bed/multiz8way/maf/${C}.maf ]; then
 	mkdir -p ${C}
 	mkdir -p /cluster/data/gasAcu1/bed/multiz8way/cons/${C}
 	echo msa_split $C
 	if [ ! -s /cluster/data/gasAcu1/bed/multiz8way/cons/${C}/${C}.fa ]; then
 	    twoBitToFa -seq=${C} /cluster/data/gasAcu1/gasAcu1.sdTrf.2bit \
 		/cluster/data/gasAcu1/bed/multiz8way/cons/${C}/${C}.fa
 	fi
 	chrN=${C/chr/}
 	chrN=${chrN/_random/}
 	nice -n +19 /cluster/bin/phast/$MACHTYPE/msa_split \
 	    /cluster/data/gasAcu1/bed/multiz8way/maf/${C}.maf \
 	    --refseq /cluster/data/gasAcu1/bed/multiz8way/cons/${C}/${C}.fa \
 	    --in-format MAF --windows 10000000,0 --between-blocks 5000 \
 	    --out-format SS --out-root ${C}/${C}
       fi
     done &
     #	real    6m31.721
 
     # Create a random list of 50 1 mb regions  (do not use the _randoms)
     cd /san/sanvol1/scratch/gasAcu1/cons/ss
     ls -1l chr*/chr*.ss | grep -v Un | \
 	awk '$5 > 4000000 {print $9;}' | randomLines stdin 50 ../randomSs.list
 
     # Set up parasol directory to calculate trees on these 50 regions
     ssh pk
     mkdir /san/sanvol1/scratch/gasAcu1/cons/treeRun6
     cd /san/sanvol1/scratch/gasAcu1/cons/treeRun6
     mkdir tree log
 
     #	Tuning this loop should come back to here to recalculate 
     # Create little script that calls phastCons with right arguments
     #	--target-coverage of 0.20 is about right for mouse, will be
     #	tuned exactly below
     cat > makeTree.csh << '_EOF_'
 #!/bin/csh -fe
 set C=$1:h
 mkdir -p log/${C} tree/${C}
     /cluster/bin/phast/$MACHTYPE/phastCons ../ss/$1 \
       /cluster/data/gasAcu1/bed/multiz8way/cons/starting-tree.mod \
       --gc 0.449 --nrates 1,1 --no-post-probs --ignore-missing \
       --expected-length 10 --target-coverage 0.20 \
       --quiet --log log/$1 --estimate-trees tree/$1
 '_EOF_'
     #	<< happy emacs
     chmod a+x makeTree.csh
 
     # Create gensub file
     cat > template << '_EOF_'
 #LOOP
 ./makeTree.csh $(path1)
 #ENDLOOP
 '_EOF_'
     #	<< happy emacs
 
     # Make cluster job and run it
     gensub2 ../randomSs.list single template jobList
     para create jobList
     para try/push/check/etc
 # Completed: 47 of 47 jobs
 # CPU time in finished jobs:      78013s    1300.22m    21.67h    0.90d  0.002 y
 # IO & Wait Time:                   224s       3.73m     0.06h    0.00d  0.000 y
 # Average job time:                1665s      27.74m     0.46h    0.02d
 # Longest finished job:            2123s      35.38m     0.59h    0.02d
 # Submission to last job:          2123s      35.38m     0.59h    0.02d
 
     # Now combine parameter estimates.  We can average the .mod files
     # using phyloBoot.  This must be done separately for the conserved
     # and nonconserved models
     ssh pk
     cd /san/sanvol1/scratch/gasAcu1/cons/treeRun6
     ls -1 tree/chr*/*.cons.mod > cons.list
     /cluster/bin/phast/$MACHTYPE/phyloBoot --read-mods '*cons.list' \
 	--output-average ave.cons.mod > cons_summary.txt 2>&1 &
     ls -1 tree/chr*/*.noncons.mod > noncons.list
     /cluster/bin/phast/$MACHTYPE/phyloBoot --read-mods '*noncons.list' \
 	--output-average ave.noncons.mod > noncons_summary.txt
     cp -p ave.*.mod ..
     cd ..
     cp -p ave.*.mod /cluster/data/gasAcu1/bed/multiz8way/cons
 
     #	measuring entropy
     #	consEntopy <target coverage> <expected lengths>
     #		 ave.cons.mod ave.noncons.mod --NH 9.78
     #	never stops with the --NH argument
     time /cluster/bin/phast/$MACHTYPE/consEntropy --NH 9.7834 \
 	0.20 10 ave.{cons,noncons}.mod
 ## 0.20 10 with fr2
 ( Solving for new omega: 10.000000 12.945684 12.467518 12.456325 )
 
 Transition parameters: gamma=0.200000, omega=10.000000, mu=0.100000, nu=0.025000
 Relative entropy: H=1.364850 bits/site
 Expected min. length: L_min=6.767655 sites
 Expected max. length: L_max=4.721873 sites
 Phylogenetic information threshold: PIT=L_min*H=9.236837 bits
 Recommended expected length: omega=12.456325 sites (for L_min*H=9.783400)
 
 ## 0.20 10 
 ( Solving for new omega: 10.000000 12.922391 12.450309 12.439360 )
 
 Transition parameters: gamma=0.200000, omega=10.000000, mu=0.100000, nu=0.025000
 Relative entropy: H=1.358227 bits/site
 Expected min. length: L_min=6.803722 sites
 Expected max. length: L_max=4.741215 sites
 Phylogenetic information threshold: PIT=L_min*H=9.241002 bits
 Recommended expected length: omega=12.439360 sites (for L_min*H=9.783400)
 
 ## 0.01 12 does not convert, trying without --NH
 Transition parameters: gamma=0.010000, omega=12.000000, mu=0.083333, nu=0.000842
 Relative entropy: H=1.012920 bits/site
 Expected min. length: L_min=15.386530 sites
 Expected max. length: L_max=10.103812 sites
 Phylogenetic information threshold: PIT=L_min*H=15.585327 bits
 
 ## 0.10 12
 # ( Solving for new omega: 12.000000 7.303911 5.603783 4.937054 4.700330 4.651391 4.648962 4.648956 )
 
 # Transition parameters: gamma=0.100000, omega=12.000000, mu=0.083333, nu=0.009259
 # Relative entropy: H=1.200128 bits/site
 # Expected min. length: L_min=9.375672 sites
 # Expected max. length: L_max=6.463613 sites
 # Phylogenetic information threshold: PIT=L_min*H=11.252009 bits
 # Recommended expected length: omega=4.648956 sites (for L_min*H=9.783400)
 
 ## 0.17 12
 # ( Solving for new omega: 12.000000 10.568744 10.445646 10.444628 )
 
 # Trans parameters: gamma=0.170000, omega=12.000000, mu=0.083333, nu=0.017068
 # Relative entropy: H=1.267443 bits/site
 # Expected min. length: L_min=7.976979 sites
 # Expected max. length: L_max=5.664374 sites
 # Phylogenetic information threshold: PIT=L_min*H=10.110368 bits
 # Recommended expected length: omega=10.444628 sites (for L_min*H=9.783400)
 
     /cluster/bin/phast/$MACHTYPE/consEntropy .17 12 \
                         ave.cons.mod ave.noncons.mod
 Transition parameters: gamma=0.170000, omega=12.000000, mu=0.083333, nu=0.017068
 Relative entropy: H=1.253111 bits/site
 Expected min. length: L_min=8.076184 sites
 Expected max. length: L_max=5.736142 sites
 Phylogenetic information threshold: PIT=L_min*H=10.120357 bits
 
 #Transition parameters:gamma=0.100000, omega=12.000000, mu=0.083333, nu=0.009259
 # Relative entropy: H=1.454874 bits/site
 # Required length: N=7.596943 sites
 # Total entropy: NH=11.052595 bits
 
     #	SKIP to here passing by the tuning numbers
     ssh pk
     # Create cluster dir to do main phastCons run
     mkdir /san/sanvol1/scratch/gasAcu1/cons/consRun5
     cd /san/sanvol1/scratch/gasAcu1/cons/consRun5
     mkdir ppRaw bed
 
     # Create script to run phastCons with right parameters
     #	This job is I/O intensive in its output files, thus it is all
     #	working over in /scratch/tmp/
     cat > doPhast << '_EOF_'
 #!/bin/csh -fe
 mkdir /scratch/tmp/${2}
 cp -p ../ss/${1}/${2}.ss ../ave.{cons,noncons}.mod /scratch/tmp/${2}
 pushd /scratch/tmp/${2} > /dev/null
 /cluster/bin/phast/${MACHTYPE}/phastCons ${2}.ss ave.cons.mod,ave.noncons.mod \
    --expected-length 10 --target-coverage 0.20 --quiet \
 	--seqname ${1} --idpref ${1} --viterbi ${2}.bed --score > ${2}.pp
 popd > /dev/null
 mkdir -p ppRaw/${1}
 mkdir -p bed/${1}
 mv /scratch/tmp/${2}/${2}.pp ppRaw/${1}
 mv /scratch/tmp/${2}/${2}.bed bed/${1}
 rm /scratch/tmp/${2}/ave.{cons,noncons}.mod
 rm /scratch/tmp/${2}/${2}.ss
 rmdir /scratch/tmp/${2}
 '_EOF_'
     # << happy emacs
     chmod a+x doPhast
 
     #	root1 == chrom name, file1 == ss file name without .ss suffix
     # Create gsub file
     cat > template << '_EOF_'
 #LOOP
 ./doPhast $(root1) $(file1)
 #ENDLOOP
 '_EOF_'
     #	<< happy emacs
 
     # Create parasol batch and run it
     ls -1 ../ss/chr*/chr*.ss | sed 's/.ss$//' > in.list
 
     gensub2 in.list single template jobList
     para create jobList
     para try/check/push/etc.
     #	These jobs are very fast and very I/O intensive, even on the san
     #	they will hang it up as they work at full tilt.
 # Completed: 58 of 58 jobs
 # CPU time in finished jobs:       1739s      28.99m     0.48h    0.02d  0.000 y
 # IO & Wait Time:                   302s       5.03m     0.08h    0.00d  0.000 y
 # Average job time:                  35s       0.59m     0.01h    0.00d
 # Longest finished job:              47s       0.78m     0.01h    0.00d
 # Submission to last job:            77s       1.28m     0.02h    0.00d
 
     # combine predictions and transform scores to be in 0-1000 interval
     #	it uses a lot of memory, so on kolossus:
     ssh kolossus
     cd /san/sanvol1/scratch/gasAcu1/cons/consRun5
     #	The sed's and the sort get the file names in chrom,start order
     #	You might like to verify it is correct by first looking at the
     #	list it produces:
     find ./bed -type f | sed -e "s#/# x #g; s#\.# y #g; s#-# z #g" \
 	| sort -k7,7 -k9,9n \
 	| sed -e "s# z #-#g; s# y #\.#g; s# x #/#g" | less
     #	if that looks right, then let it run:
     find ./bed -type f | sed -e "s#/# x #g; s#\.# y #g; s#-# z #g" \
 	| sort -k7,7 -k9,9n \
 	| sed -e "s# z #-#g; s# y #\.#g; s# x #/#g" | xargs cat \
 	| awk '{printf "%s\t%d\t%d\tlod=%d\t%s\n", $1, $2, $3, $5, $5;}' \
 	| /cluster/bin/scripts/lodToBedScore /dev/stdin > mostConserved8Way.bed
     #	~ 16 seconds
     cp -p mostConserved8Way.bed /cluster/data/gasAcu1/bed/multiz8way
 
     # Figure out how much is actually covered by the bed file as so:
     #	Get the non-n genome size from faSize on all chroms:
     ssh kkstore05
     cd /cluster/data/gasAcu1/bed/multiz8way/cons
     faSize chr*/*.fa
     #	463354448 bases (16726587 N's 446627861 real 348847599
     #	upper 97780262 lower) in 23 sequences in 23 files
 
     cd /san/sanvol1/scratch/gasAcu1/cons/consRun5
     #	The 446627861 comes from the non-n genome as counted above.
     awk '
 {sum+=$3-$2}
 END{printf "%% %.2f = 100.0*%d/446627861\n",100.0*sum/446627861,sum}' \
 	mostConserved8Way.bed
     #	% 11.58 = 100.0*51703035/446627861 --exp-len 10 --tar-cov 0.20 fr2
     #	% 11.49 = 100.0*51326297/446627861 --exp-len 10 --tar-cov 0.20
     #	%  8.36 = 100.0*37347218/446627861 --exp-len 12 --tar-cov 0.01
     #	% 10.24 = 100.0*45733412/446627861 --exp-len 12 --tar-cov 0.10
     #	% 11.45 = 100.0*51136218/446627861 --exp-len 12 --tar-cov 0.17
 
     #	Aiming for %70 coverage in
     #	the following featureBits measurement on CDS:
     # Beware of negative scores when too high.  The logToBedScore
     # will output an error on any negative scores.
 
     HGDB_CONF=~/.hg.conf.read-only time nice -n +19 featureBits gasAcu1 \
 	-enrichment ensGene:cds mostConserved8Way.bed
     #	--expected-length 10 --target-coverage 0.20 fr2
     #	ensGene:cds 6.618%, mostConserved8Way.bed 11.576%, both 4.070%, cover
     #	61.49%, enrich 5.31x
     #	--expected-length 10 --target-coverage 0.20
     #	ensGene:cds 6.618%, mostConserved8Way.bed 11.492%, both 4.041%, cover
     #	61.07%, enrich 5.31x
     #	--expected-length 12 --target-coverage 0.01
     #	ensGene:cds 6.618%, mostConserved8Way.bed 8.362%, both 3.628%, cover
     #	54.82%, enrich 6.56x
     #	--expected-length 12 --target-coverage 0.10
     #	ensGene:cds 6.618%, mostConserved8Way.bed 10.240%, both 3.920%, cover
     #	59.24%, enrich 5.79x
     #	--expected-length 12 --target-coverage 0.17
     #	ensGene:cds 6.618%, mostConserved8Way.bed 11.449%, both 4.092%, cover
     #	61.84%, enrich 5.40x
 
     # Load most conserved track into database
     ssh hgwdev
     cd /cluster/data/gasAcu1/bed/multiz8way
     # ended up using the set: --expected-length 10 --target-coverage 0.20
     time nice -n +19 hgLoadBed -strict gasAcu1 phastConsElements8way \
 	mostConserved8Way.bed
     #	Loaded 1263039 elements of size 5
     #	real    0m29.697
 
     #	should measure the same as above
     time nice -n +19 \
 	featureBits gasAcu1 -enrichment ensGene:cds phastConsElements8way
     #	At: --expected-length 10 --target-coverage 0.20 fr2
     #	ensGene:cds 6.618%, phastConsElements8way 11.576%, both 4.070%, cover
     #	61.49%, enrich 5.31x
     #	At: --expected-length 10 --target-coverage 0.20
     #	ensGene:cds 6.618%, phastConsElements8way 11.492%, both 4.041%, cover
     #	61.07%, enrich 5.31x
     #	At: --expected-length 12 --target-coverage 0.10 result:
     #	ensGene:cds 6.618%, phastConsElements8way 10.240%, both 3.920%, cover
     #	59.24%, enrich 5.79x
     #	At: --expected-length 12 --target-coverage 0.17 result:
     #	ensGene:cds 6.618%, phastConsElements8way 11.449%,
     #	both 4.092%, cover 61.84%, enrich 5.40x
 
     # Create merged posterier probability file and wiggle track data files
     ssh pk
     cd /san/sanvol1/scratch/gasAcu1/cons/consRun5
     # the sed business gets the names sorted by chromName, chromStart
     #	so that everything goes in numerical order into wigEncode
     #	This was verified above to be correct
     time nice -n +19 find ./ppRaw -type f \
 	| sed -e "s#/# x #g; s#\.# y #g; s#-# z #g" \
 	| sort -k7,7 -k9,9n \
 	| sed -e "s# z #-#g; s# y #\.#g; s# x #/#g" | xargs cat \
 	    | $HOME/bin/$MACHTYPE/wigEncode -noOverlap stdin \
 		phastCons8.wig phastCons8.wib
     #	Converted stdin, upper limit 1.00, lower limit 0.00
     #	real    3m55.780
     #	-rw-rw-r--   1 297832681 Feb  3 14:50 phastCons8.wib
     #	-rw-rw-r--   1  49168315 Feb  3 14:50 phastCons8.wig
     time nice -n +19 cp -p phastCons8.wi? /cluster/data/gasAcu1/bed/multiz8way/
 
     #	prepare compressed copy of ascii data values for downloads
     ssh pk
     cd /san/sanvol1/scratch/gasAcu1/cons/consRun5
     cat << '_EOF_' > gzipAscii.sh
 #!/bin/sh
 
 TOP=`pwd`
 export TOP
 
 mkdir -p phastCons8Scores
 
 for D in ppRaw/chr*
 do
     C=${D/ppRaw\/}
     out=phastCons8Scores/${C}.data.gz
     echo "========================== ${C} ${D}"
     find ./${D} -type f | sed -e "s#/# x #g; s#\.# y #g; s#-# z #g" \
 	| sort -k7,7 -k9,9n \
 	| sed -e "s# z #-#g; s# y #\.#g; s# x #/#g" | xargs cat |
 	    gzip > ${out}
 done
 '_EOF_'
     #	<< happy emacs
     chmod +x gzipAscii.sh
     time nice -n +19 ./gzipAscii.sh
     #	real    5m31.855s
 
     #	copy them for downloads
     ssh kkstore05
     #	this directory is actually a symlink from store9 to store8 to
     #	avoid the data full problem on store9
     mkdir /cluster/data/gasAcu1/bed/multiz8way/phastCons8Scores
     cd /cluster/data/gasAcu1/bed/multiz8way/phastCons8Scores
     cp -p  /san/sanvol1/scratch/gasAcu1/cons/consRun5/phastCons8Scores/* .
     # make a README.txt file here, and an md5sum
 
     ssh hgwdev
     mkdir /usr/local/apache/htdocs/goldenPath/gasAcu1/phastCons8Scores
     cd /usr/local/apache/htdocs/goldenPath/gasAcu1/phastCons8Scores
     ln -s /cluster/data/gasAcu1/bed/multiz8way/phastCons8Scores/* .
 
     # Load gbdb and database with wiggle.
     ssh hgwdev
     cd /cluster/data/gasAcu1/bed/multiz8way
     ln -s `pwd`/phastCons8.wib /gbdb/gasAcu1/wib/phastCons8.wib
     # ended up using the set: --expected-length 10 --target-coverage 0.20
     time nice -n +19 hgLoadWiggle gasAcu1 phastCons8 phastCons8.wig
     #	real    0m9.256s
 
     #  Create histogram to get an overview of all the data
     ssh hgwdev
     cd /cluster/data/gasAcu1/bed/multiz8way
     time nice -n +19 hgWiggle -doHistogram \
 	-hBinSize=0.001 -hBinCount=1000 -hMinVal=0.0 -verbose=2 \
 	    -db=gasAcu1 phastCons8 > histogram.data 2>&1
     #	real    0m30.744
 
     #	create plot of histogram:
 
     cat << '_EOF_' | gnuplot > histo.png
 set terminal png small color \
         xffffff x000000 x000000 x444444 xaa4400 x00ffff xff0000
 set size 1.4, 0.8
 set key left box
 set grid noxtics
 set grid ytics
 set title " Stickleback gasAcu1 Histogram phastCons8 track"
 set xlabel " phastCons8 score - --expected-length 10 --target-coverage 0.20"
 set ylabel " Relative Frequency"
 set y2label " Cumulative Relative Frequency (CRF)"
 set y2range [0:1]
 set y2tics
 set yrange [0:0.02]
 
 plot "histogram.data" using 2:5 title " RelFreq" with impulses, \
         "histogram.data" using 2:7 axes x1y2 title " CRF" with lines
 '_EOF_'
     #	<< happy emacs
 
     display histo.png &
 
     #	The mostConserved can also be characterized by a histogram
     awk '{print $3-$2}' mostConserved8Way.bed > mostCons.txt
     textHistogram -verbose=2 -autoScale=1000 -pValues mostCons.txt \
 	> mostCons.histo.txt
 
     cat << '_EOF_' | gnuplot > mostCons.png
 set terminal png small color \
         xffffff x000000 x000000 x444444 xaa4400 x00ffff xff0000
 set size 1.4, 0.8
 set key left box
 set grid noxtics
 set grid ytics
 set title " Stickleback gasAcu1 histogram: lengths of mostConserved track elements"
 set xlabel " mostConserved element length - --expected-length 10 --target-coverage 0.20"
 set ylabel " # of elements at this length"
 set y2label " Cumulative Relative Frequency (CRF)"
 set y2range [0:1]
 set xrange [0:200]
 set y2tics
 set boxwidth 2
 set style fill solid
 
 plot "mostCons.histo.txt" using 2:3 title " # of elements" with boxes, \
         "mostCons.histo.txt" using 2:6 axes x1y2 title " CRF" with lines
 '_EOF_'
 #	<< happy emacs
 
 ##############################################################################
 ## Medaka oryLat1 swap (DONE - 2007-01-12 - Hiram)
     ## And the swap
     ssh kkstore05
     mkdir /cluster/data/gasAcu1/bed/blastz.orylat1.swap
     cd /cluster/data/gasAcu1/bed/blastz.orylat1.swap
     time doBlastzChainNet.pl -verbose=2 -smallClusterHub=pk \
 	/cluster/data/oryLat1/bed/blastz.gasAcu1.2007-01-11/DEF \
 	-chainMinScore=2000 -chainLinearGap=loose \
 	-tRepeats=windowmaskerSdust -qRepeats=windowmaskerSdust \
 	-swap -stop=net -bigClusterHub=pk > swap.log 2>&1 &
 
     time doBlastzChainNet.pl -verbose=2 -smallClusterHub=pk \
 	/cluster/data/oryLat1/bed/blastz.gasAcu1.2007-01-11/DEF \
 	-chainMinScore=2000 -chainLinearGap=loose \
 	-tRepeats=windowmaskerSdust -qRepeats=windowmaskerSdust \
 	-continue=net -swap -stop=net -bigClusterHub=pk > net-swap.log 2>&1 &
     #	real    18m21.951s
 
     ## Taking this chrUn chain result over to the other global result
     ssh kkstore05
     #	Note: previous result files temporarily moved out of the way
     cd /cluster/data/gasAcu1/bed/blastz.oryLat1.2006-12-22/axtChain
     chainSplit chain gasAcu1.oryLat1.all.chain.gz
     cp -p ../../blastz.orylat1.swap/axtChain/chain/chrUn.chain ./chain
     cd chain
     chainMergeSort *.chain | gzip -c > ../gasAcu1.oryLat1.all.chain.gz
     cd ..
     rm -fr chain
     #	they have new IDs
     chainSplit chain gasAcu1.oryLat1.all.chain.gz
     #	And then reusing the netChains.csh script, fixing up references
     #	to the appropriate chrom.sizes and 2bit file for gasAcu1
     cp netChains.csh netChainsUn.csh
     time ./netChainsUn.csh
     #	real    30m9.385s
 
     ## reloading, reuse the load.csh script, fixup reference to correct
     ##	chrom.sizes
     ssh hgwdev
     cd /cluster/data/gasAcu1/bed/blastz.oryLat1.2006-12-22/axtChain
 
 ###########################################################################
 ## SWAP FR1 BLASTZ (DONE - 2007-01-13 - Hiram)
     ssh kkstore02
     mkdir /cluster/data/gasAcu1/bed/blastz.fr1.swap
     cd /cluster/data/gasAcu1/bed/blastz.fr1.swap
     time doBlastzChainNet.pl -verbose=2 \
 	/cluster/data/fr1/bed/blastz.gasAcu1.2007-01-11/DEF \
 	-chainMinScore=2000 -chainLinearGap=loose \
 	-tRepeats=windowmaskerSdust -qRepeats=windowmaskerSdust \
 	-swap -stop=net -bigClusterHub=pk > swap.log 2>&1 &
     #	real    130m31.028s
 
     ## Take that chrUn result into the primary fr1 blastz result
     cd /cluster/data/gasAcu1/bed/blastz.fr1.2006-12-08/axtChain
     chainSplit chain gasAcu1.fr1.all.chain.gz
     cd chain
     cp -p /cluster/data/gasAcu1/bed/blastz.fr1.swap/axtChain/chain/chrUn.chain .
     chainMergeSort *.chain | gzip -c > ../gasAcu1.fr1.all.chain.gz
     cd ..
     chainSplit chain gasAcu1.fr1.all.chain.gz
     #	And then reusing the netChains.csh script, fixing up references
     #	to the appropriate chrom.sizes and 2bit file for gasAcu1
     ssh hgwdev
     cd /cluster/data/gasAcu1/bed/blastz.fr1.2006-12-08/axtChain
     cp netChains.csh netChainsUn.csh
     time ./netChainsUn.csh
     #	real    122m36.719s
     #  And then reloading these, reusing the loadUp.csh script, fixup
     #	reference to chrom.sizes
     cp loadUp.csh reLoad.csh
     time ./reLoad.csh > reLoad.out 2>&1
     #	real    3m38.799s
     #	After moving previous stuff out of the way
     ./makeMd5sum.csh
     ./installDownloads.csh
 
 #########################################################################
 # BLASTZ/CHAIN/NET FR2 (DONE - 2007-01-23 - Hiram)
 ##  no chrUn in gasAcu1, and align to fr2 scaffolds,
 ##	results lifted to fr2 chrUn coordinates
     ssh kkstore05
     mkdir /cluster/data/gasAcu1/bed/blastz.fr2.2007-01-23
     cd /cluster/data/gasAcu1/bed/blastz.fr2.2007-01-23
     cat << '_EOF_' > DEF
 # Stickleback vs. Fugu
 
 # Try "human-fugu" (more distant, less repeat-killed than mammal) params
 # +M=50:
 BLASTZ_H=2000
 BLASTZ_Y=3400
 BLASTZ_L=6000
 BLASTZ_K=2200
 BLASTZ_M=50
 BLASTZ_Q=/cluster/data/blastz/HoxD55.q
 
 # TARGET: Stippleback gasAcu1, no chrUn in this run
 SEQ1_DIR=/san/sanvol1/scratch/gasAcu1/gasAcu1.noUn.sdTrf.2bit
 SEQ1_LEN=/san/sanvol1/scratch/gasAcu1/gasAcu1.noUn.sdTrf.sizes
 SEQ1_CHUNK=40000000
 SEQ1_LIMIT=30
 SEQ1_LAP=0
 
 # QUERY: Fugu fr2
 #       Align to the scaffolds, results lifed up to chrUn.sdTrf coordinates
 SEQ2_DIR=/san/sanvol1/scratch/fr2/fr2.2bit
 SEQ2_LEN=/san/sanvol1/scratch/fr2/chrom.sizes
 SEQ2_CTGDIR=/san/sanvol1/scratch/fr2/fr2.scaffolds.2bit
 SEQ2_CTGLEN=/san/sanvol1/scratch/fr2/fr2.scaffolds.sizes
 SEQ2_LIFT=/san/sanvol1/scratch/fr2/liftAll.lft
 SEQ2_CHUNK=20000000
 SEQ2_LIMIT=30
 SEQ2_LAP=0
 
 BASE=/cluster/data/gasAcu1/bed/blastz.fr2.2007-01-23
 TMPDIR=/scratch/tmp
 '_EOF_'
     # << this line keeps emacs coloring happy
 
     time doBlastzChainNet.pl DEF -chainMinScore=2000 -chainLinearGap=loose \
 	-tRepeats=windowmaskerSdust -qRepeats=windowmaskerSdust \
 	-verbose=2 -bigClusterHub=pk \
 	-blastzOutRoot /cluster/bluearc/gasAcu1Fr2 > do.log 2>&1 &
     #	real    27m33.395s
     #	user    0m0.189s
     #	sys     0m0.163s
     # Error because of inconsistent chrom sizes. We had attempted to modify
     # our lift file to remove a gap inserted at the end of the entire
     # sequence, which we should not have done. 
     time doBlastzChainNet.pl DEF -chainMinScore=2000 -chainLinearGap=loose \
         -tRepeats=windowmaskerSdust -qRepeats=windowmaskerSdust \
         -verbose=2 -continue=chainRun -bigClusterHub=pk \
         -blastzOutRoot /cluster/bluearc/gasAcu1Fr2 > chainRun.log 2>&1 &
 
 ######################################################
 ## Now that it's finished, let's see how it did.  This already
 ## set up chain and net tracks for fr2 on gasAcu1, though a modification
 ## to ~/kent/src/hg/makeDb/trackDb/trackDb.ra needed to be done to make
 ## sure fr2 got displayed in the browser.
 ##
 ## track chainFr2
 ## shortLabel $o_db Chain
 ## longLabel $o_Organism ($o_date/$o_db) Chained Alignments
 ## group compGeno
 ## priority 140
 ## visibility hide
 ## color 100,50,0
 ## altColor 255,240,200
 ## matrix 16 91,-90,-25,-100,-90,100,-100,-25,-25,-100,100,-90,-100,-25,-90,91
 ## spectrum on
 ## type chain fr2
 ## otherDb fr2
 
 ## track netFr2
 ## shortLabel $o_db Net
 ## longLabel $o_Organism ($o_date/$o_db) Alignment Net
 ## group compGeno
 ## priority 140.1
 ## visibility hide
 ## spectrum on
 ## type netAlign fr2 chainFr2
 ## otherDb fr2
 
 ## To see how the matching did, let's use featureBits to find percentage of matching bases.
 ## This compares the gasAcu1 entries to only those in the Fr2 chain links that were created.
     ssh hgwdev
     cd /cluster/data/gasAcu1/bed
     ln -s blastz.fr2.2007-01-23 blastz.fr2
     cd blastz.fr2
     time nice -n 19 featureBits gasAcu1 chainFr2Link \
 	> fb.gasAcu1.chainFr2Link.txt 2>&1 &
     #   153570946 bases of 446627861 (34.385%) in intersection
 
 ## In addition, we want to put the chains and nets from gasAcu1 onto the
 ## fr2 browser, done below:
     # This is going to make the link back on the fr2 browser to
     # gasAcu1.
     mkdir /cluster/data/fr2/bed/blastz.gasAcu1.swap
     cd /cluster/data/fr2/bed/blastz.gasAcu1.swap
     time doBlastzChainNet.pl \
 	/cluster/data/gasAcu1/bed/blastz.fr2.2007-01-23/DEF \
 	-chainMinScore=2000 -chainLinearGap=loose \
 	-tRepeats=windowmaskerSdust -qRepeats=windowmaskerSdust \
 	-swap -bigClusterHub=pk > swap.log 2>&1 &
     #	real    24m33.761s
     #	user    0m0.125s
     #	sys     0m0.080s
 
     ## Now let's see how well these were matched back to Fr2.
     ssh hgwdev
     cd /cluster/data/fr2/bed/blastz.gasAcu1.swap
     time nice -n 19 featureBits fr2 chainGasAcu1Link \
 	> fb.fr2.chainGasAcu1Link.txt 2>&1 &
     # 158383996 bases of 393312790 (40.269%) in intersection
 
 ###########################################################################
 #  Manually fixing up the fr2 alignments to gasAcu1 to get chrUn results
 ##	(DONE - 2007-01-31 - 2007-02-02 - Hiram)
 ##	Ran the blastz on fr2 and aligned just the chrUn sequence to fr2
     ##	swap back to gasAcu1
     mkdir /cluster/data/gasAcu1/bed/blastz.fr2.swap
     cd /cluster/data/gasAcu1/bed/blastz.fr2.swap
     time doBlastzChainNet.pl -verbose=2 \
 	/cluster/data/fr2/bed/blastz.gasAcu1.2007-01-31/DEF \
 	-chainMinScore=2000 -chainLinearGap=loose \
 	-tRepeats=windowmaskerSdust -qRepeats=windowmaskerSdust \
 	-swap -stop=chainMerge -bigClusterHub=pk > swap.log 2>&1 &
 
     ## Now, with that chain result in hand, place it manually back in with
     ##	the full chroms chains and re-run the nets and so forth.
     ssh kkstore05
     cd /cluster/data/gasAcu1/bed/blastz.fr2.2007-01-23/axtChain
     #	restore the chain directory which was removed by cleanUp.csh
     chainSplit chain gasAcu1.fr2.all.chain.gz
     #	copy into here the chrUn.chain result
     cd chain
     cp -p \
 	/cluster/data/gasAcu1/bed/blastz.fr2.swap/axtChain/chain/chrUn.chain .
     time nice -n +19 chainMergeSort *.chain \
 	| gzip -c > ../gasAcu1.fr2.all.chain.gz
     cd ..
     rm -fr chain
     #	they have new IDs
     chainSplit chain gasAcu1.fr2.all.chain.gz
     #	And then reusing the netChains.csh script, fixing up references
     #	to the appropriate chrom.sizes and 2bit file for gasAcu1
     cp netChains.csh netChainsUn.csh
     time nice -n +19 ./netChainsUn.csh
     #	real    30m9.385s
 
     ## reloading, reuse the load.csh script, fixup reference to correct
     ##	chrom.sizes
     ssh hgwdev
     cd /cluster/data/gasAcu1/bed/blastz.fr2.2007-01-23/axtChain
     time nice -n +19 ./reLoad.csh > reLoad.out 2>&1
     #	real    3m53.174s
 
     nice -n +19 featureBits gasAcu1 chainFr2Link \
 	> fb.gasAcu1.chainFr2Link.txt 2>&1
     #	167817405 bases of 446627861 (37.574%) in intersection
     #	without chrUn covered before, it was:
     #	153570946 bases of 446627861 (34.385%) in intersection
 
     #	and fr2 is covered:
     nice -n +19 featureBits fr2 chainGasAcu1Link \
 	> fb.fr2.chainGasAcu1Link.txt 2>&1
     #	158383996 bases of 393312790 (40.269%) in intersection
 
 ###########################################################################
 # HUMAN (hg18) PROTEINS TRACK (DONE braney 2007-01-27)
     ssh kkstore05
     bash # if not using bash shell already
 
     mkdir /cluster/data/gasAcu1/blastDb
     cd /cluster/data/gasAcu1
     awk '/chrUn/ {if ($5 == "W") print $6}' downloads/UCSC.gasAcu1.agp > chrUnContigs.lst
     zcat downloads/assembly.bases.gz | faSomeRecords stdin chrUnContigs.lst chrUnContigs.fa
     cat noUn/chr*fa > temp.fa
     faSplit gap temp.fa 1000000 blastDb/
     faSplit sequence chrUnContigs.fa 100 blastDb/y
     rm temp.fa
     cd blastDb
     for i in *.fa
     do
 	/cluster/bluearc/blast229/formatdb -i $i -p F
     done
     rm *.fa
 
     mkdir -p /san/sanvol1/scratch/gasAcu1/blastDb
     cd /cluster/data/gasAcu1/blastDb
     for i in nhr nin nsq; 
     do 
 	echo $i
 	cp *.$i /san/sanvol1/scratch/gasAcu1/blastDb
     done
 
     mkdir -p /cluster/data/gasAcu1/bed/tblastn.hg18KG
     cd /cluster/data/gasAcu1/bed/tblastn.hg18KG
     echo  /san/sanvol1/scratch/gasAcu1/blastDb/*.nsq | xargs ls -S | sed "s/\.nsq//"  > query.lst
     wc -l query.lst
 # 609 query.lst
 
    # we want around 200000 jobs
    calc `wc /cluster/data/hg18/bed/blat.hg18KG/hg18KG.psl | awk "{print \\\$1}"`/\(200000/`wc query.lst | awk "{print \\\$1}"`\)
 
 # 36727/(200000/609) = 111.833715
 
    mkdir -p /cluster/bluearc/gasAcu1/bed/tblastn.hg18KG/kgfa
    split -l 112 /cluster/data/hg18/bed/blat.hg18KG/hg18KG.psl  /cluster/bluearc/gasAcu1/bed/tblastn.hg18KG/kgfa/kg
    ln -s /cluster/bluearc/gasAcu1/bed/tblastn.hg18KG/kgfa kgfa
    cd kgfa
    for i in *; do 
      nice pslxToFa $i $i.fa; 
      rm $i; 
      done
    cd ..
    ls -1S kgfa/*.fa > kg.lst
    mkdir -p /cluster/bluearc/gasAcu1/bed/tblastn.hg18KG/blastOut
    ln -s /cluster/bluearc/gasAcu1/bed/tblastn.hg18KG/blastOut
    for i in `cat kg.lst`; do  mkdir blastOut/`basename $i .fa`; done
    tcsh
    cd /cluster/data/gasAcu1/bed/tblastn.hg18KG
    cat << '_EOF_' > blastGsub
 #LOOP
 blastSome $(path1) {check in line $(path2)} {check out exists blastOut/$(root2)/q.$(root1).psl }
 #ENDLOOP
 '_EOF_'
 
    cat << '_EOF_' > blastSome
 #!/bin/sh
 BLASTMAT=/cluster/bluearc/blast229/data
 export BLASTMAT
 g=`basename $2`
 f=/tmp/`basename $3`.$g
 for eVal in 0.01 0.001 0.0001 0.00001 0.000001 1E-09 1E-11
 do
 if /cluster/bluearc/blast229/blastall -M BLOSUM80 -m 0 -F no -e $eVal -p tblastn -d $1 -i $2 -o $f.8
 then
         mv $f.8 $f.1
         break;
 fi
 done
 if test -f  $f.1
 then
     if /cluster/bin/i386/blastToPsl $f.1 $f.2
     then
 	liftUp -nosort -type=".psl" -nohead $f.3 /cluster/data/gasAcu1/blastDb.lft carry $f.2
         liftUp -nosort -type=".psl" -pslQ -nohead $3.tmp /cluster/data/hg18/bed/blat.hg18KG/protein.lft warn $f.3
 
         if pslCheck -prot $3.tmp                                                  
         then                                                                      
             mv $3.tmp $3                                                          
             rm -f $f.1 $f.2 $f.3 $f.4
         fi
         exit 0                                                                    
     fi                                                                            
 fi                                                                                
 rm -f $f.1 $f.2 $3.tmp $f.8 $f.3 $f.4
 exit 1
 '_EOF_'
     # << happy emacs
     chmod +x blastSome
     gensub2 query.lst kg.lst blastGsub blastSpec
     exit # back to bash
     
     ssh pk
     cd /cluster/data/gasAcu1/bed/tblastn.hg18KG
     para create blastSpec
 #    para try, check, push, check etc.
 
     para time
 
 # Completed: 199752 of 199752 jobs
 # CPU time in finished jobs:    4730447s   78840.78m  1314.01h   54.75d  0.150 y
 # IO & Wait Time:                929809s   15496.82m   258.28h   10.76d  0.029 y
 # Average job time:                  28s       0.47m     0.01h    0.00d
 # Longest running job:                0s       0.00m     0.00h    0.00d
 # Longest finished job:             519s       8.65m     0.14h    0.01d
 # Submission to last job:         65891s    1098.18m    18.30h    0.76d
 
     ssh kkstore05
     cd /cluster/data/gasAcu1/bed/tblastn.hg18KG
     tcsh
     mkdir chainRun
     cd chainRun
     cat << '_EOF_' > chainGsub
 #LOOP
 chainOne $(path1)
 #ENDLOOP
 '_EOF_'
 
     cat << '_EOF_' > chainOne
 (cd $1; cat q.*.psl | simpleChain -prot -outPsl -maxGap=100000 stdin /cluster/bluearc/gasAcu1/bed/tblastn.hg18KG/blastOut/c.`basename $1`.psl)
 '_EOF_'
     chmod +x chainOne
     ls -1dS /cluster/bluearc/gasAcu1/bed/tblastn.hg18KG/blastOut/kg?? > chain.lst
     gensub2 chain.lst single chainGsub chainSpec
     # do the cluster run for chaining
     ssh kk
     cd /cluster/data/gasAcu1/bed/tblastn.hg18KG/chainRun
     para create chainSpec
     para maxNode 30
     para try, check, push, check etc.
 
 
 # Completed: 328 of 328 jobs
 # CPU time in finished jobs:      25906s     431.77m     7.20h    0.30d  0.001 y
 # IO & Wait Time:                 77171s    1286.18m    21.44h    0.89d  0.002 y
 # Average job time:                 314s       5.24m     0.09h    0.00d
 # Longest finished job:             942s      15.70m     0.26h    0.01d
 # Submission to last job:          3630s      60.50m     1.01h    0.04d
 
     ssh kkstore05
     cd /cluster/data/gasAcu1/bed/tblastn.hg18KG/blastOut
     bash # if using another shell
     for i in kg??
     do
        cat c.$i.psl | awk "(\$13 - \$12)/\$11 > 0.6 {print}" > c60.$i.psl
        sort -rn c60.$i.psl | pslUniq stdin u.$i.psl
        awk "((\$1 / \$11) ) > 0.60 { print   }" c60.$i.psl > m60.$i.psl
        echo $i
     done
     sort -T /tmp -k 14,14 -k 16,16n -k 17,17n u.*.psl m60* | uniq > /cluster/data/gasAcu1/bed/tblastn.hg18KG/unliftBlastHg18KG.psl
     cd ..
     pslCheck unliftBlastHg18KG.psl
     liftUp blastHg18KG.psl ../../Un/chrUn.lft carry unliftBlastHg18KG.psl
 
     # load table 
     ssh hgwdev
     cd /cluster/data/gasAcu1/bed/tblastn.hg18KG
     hgLoadPsl gasAcu1 blastHg18KG.psl
 
     # check coverage
     featureBits gasAcu1 blastHg18KG 
 # 20127551 bases of 446627861 (4.507%) in intersection
 
     featureBits gasAcu1 ensGene:cds blastHg18KG  -enrichment
 # ensGene:cds 6.618%, blastHg18KG 4.507%, both 3.790%, cover 57.26%, enrich 12.71x
 
     ssh kkstore05
     rm -rf /cluster/data/gasAcu1/bed/tblastn.hg18KG/blastOut
     rm -rf /cluster/bluearc/gasAcu1/bed/tblastn.hg18KG/blastOut
 #end tblastn
 
 #######################################################################
 ## BLASTZ LIZARD ANOCAR1 - (DONE - 2007-02-18 - Hiram)
     ssh kkstore05
     mkdir /cluster/data/gasAcu1/bed/blastz.anoCar1.2007-02-18
     cd /cluster/data/gasAcu1/bed/blastz.anoCar1.2007-02-18
 
     cat << '_EOF_' > DEF
 # stickleback vs. lizard
 
 # Use same params as used for gasAcu1-danRer4
 BLASTZ_H=2000
 BLASTZ_Y=3400
 BLASTZ_L=6000
 BLASTZ_K=2200
 BLASTZ_M=50
 BLASTZ_Q=/cluster/data/blastz/HoxD55.q
 
 # TARGET: Stickleback gasAcu1 - largest chunk big enough
 #	for all chroms except chrUn
 SEQ1_DIR=/san/sanvol1/scratch/gasAcu1/gasAcu1.2bit
 SEQ1_LEN=/cluster/data/gasAcu1/chrom.sizes
 SEQ1_CHUNK=40000000
 SEQ1_LAP=10000
 
 # QUERY: Lizard AnoCar1 - largest chunk big enough for largest scaffold
 SEQ2_DIR=/san/sanvol1/scratch/anoCar1/anoCar1.2bit
 SEQ2_LEN=/san/sanvol1/scratch/anoCar1/chrom.sizes
 SEQ2_CHUNK=20000000
 SEQ2_LIMIT=30
 SEQ2_LAP=0
 
 BASE=/cluster/data/gasAcu1/bed/blastz.anoCar1.2007-02-18
 TMPDIR=/scratch/tmp
 '_EOF_'
     # << happy emacs
 
     time doBlastzChainNet.pl -verbose=2 DEF \
 	-tRepeats=windowmaskerSdust -qRepeats=windowmaskerSdust \
 	-bigClusterHub=pk -chainMinScore=5000 -chainLinearGap=loose \
 	-blastzOutRoot=/san/sanvol1/scratch/gasAcu1AnoCar1 > do.log 2>&1 &
     time doBlastzChainNet.pl -verbose=2 DEF \
 	-tRepeats=windowmaskerSdust -qRepeats=windowmaskerSdust \
 	-bigClusterHub=pk -chainMinScore=5000 -chainLinearGap=loose \
 	-blastzOutRoot=/san/sanvol1/scratch/gasAcu1AnoCar1 \
 	-continue=cleanup > cleanup.log 2>&1 &
 
     ## measuring
     time nice -n +19 featureBits gasAcu1 chainAnoCar1Link \
 	> fb.gasAcu1.chainAnoCar1Link.txt 2>&1
     #	56386298 bases of 446627861 (12.625%) in intersection
 
     ## swap documented in anoCar1.txt
     time doBlastzChainNet.pl -verbose=2 \
 	/cluster/data/gasAcu1/bed/blastz.anoCar1.2007-02-19/DEF \
 	-tRepeats=windowmaskerSdust -qRepeats=windowmaskerSdust \
 	-bigClusterHub=pk -chainMinScore=5000 -chainLinearGap=loose \
 	-swap > swap.log 2>&1 &
 
     time nice -n +19 featureBits anoCar1 chainGasAcu1Link \
 	> fb.anoCar1.chainGasAcu1Link.txt 2>&1  
     #	real    1m14.245s
     #	54464074 bases of 1741478929 (3.127%) in intersection
 
 
 #######################################################################
 # SELF CHAINS (DONE 3/14/07 angie)
 # Requested by Gill.
     ssh kkstore05
     mkdir /cluster/data/gasAcu1/bed/blastz.gasAcu1.2007-03-13
     cd /cluster/data/gasAcu1/bed/blastz.gasAcu1.2007-03-13
     cat << '_EOF_' > DEF
 # stickleback vs. self
 
 BLASTZ_H=2000
 BLASTZ_M=200
 
 # TARGET
 # Stickleback
 SEQ1_DIR=/iscratch/i/gasAcu1/gasAcu1.noUn.sdTrf.2bit
 SEQ1_LEN=/iscratch/i/gasAcu1/gasAcu1.noUn.sdTrf.sizes
 SEQ1_CHUNK=10000000
 SEQ1_LAP=10000
 
 # QUERY
 # Stickleback
 SEQ2_DIR=/iscratch/i/gasAcu1/gasAcu1.noUn.sdTrf.2bit
 SEQ2_LEN=/iscratch/i/gasAcu1/gasAcu1.noUn.sdTrf.sizes
 SEQ2_CHUNK=10000000
 SEQ2_LAP=10000
 
 BASE=/cluster/data/gasAcu1/bed/blastz.gasAcu1.2007-03-13
 TMPDIR=/scratch/tmp
 '_EOF_'
     # << emacs
     doBlastzChainNet.pl DEF \
       -tRepeats=windowmaskerSdust -qRepeats=windowmaskerSdust \
       -chainMinScore=5000 -chainLinearGap=medium \
       -blastzOutRoot=/cluster/bluearc/gasAcu1Self >& do.log &
     tail -f do.log
     # hgwdev can't see /iscratch, so the loadUp.csh script failed looking
     # for SEQ1_LEN.  So I edited DEF to use /cluster/data location and 
     # continued -load.
 
 ##########################################################################
 # NSCAN track - (2007-06-05 markd)
 # uses danRer4 as informant, no pseudogene masking 
 #
     cd /cluster/data/gasAcu1/bed/nscan/
 
     # obtainedf NSCAN predictions from michael brent's group
     # at WUSTL
     wget -nv http://mblab.wustl.edu/predictions/Gasterosteus_aculeatus/gasAcu1.gtf
     wget -nv http://mblab.wustl.edu/predictions/Gasterosteus_aculeatus/gasAcu1.prot.fa
 
     gzip -9 gasAcu1.*
     chmod a-w *.gz
 
     # load tracks.  Note that these have *utr features, rather than
     # exon features.  currently ldHgGene creates separate genePred exons
     # for these.
     ldHgGene -bin -gtf -genePredExt gasAcu1 nscanGene gasAcu1.gtf.gz
     hgPepPred gasAcu1 generic nscanPep gasAcu1.prot.fa.gz
     rm -f *.tab
 
     # update trackDb; need a gasAcu1-specific page to describe informants
     stickleback/gasAcu1/nscanGene.html    
     stickleback/gasAcu1/trackDb.ra
 
 #########################################################################
 # BLASTZ SWAP from Mouse Mm9 (DONE - 2007-09-07 - Hiram)
 
     #	the original
     cd /cluster/data/mm9/bed/blastzGasAcu1.2007-09-06
     cat fb.mm9.chainGasAcu1Link.txt
     #	48448585 bases of 2620346127 (1.849%) in intersection
 
     #	and the swap
     mkdir /cluster/data/gasAcu1/bed/blastz.mm9.swap
     cd /cluster/data/gasAcu1/bed/blastz.mm9.swap
     time nice -n +19 doBlastzChainNet.pl -chainMinScore=5000 \
 	/cluster/data/mm9/bed/blastzGasAcu1.2007-09-06/DEF \
 	-qRepeats=windowmaskerSdust -chainLinearGap=loose \
 	-swap -bigClusterHub=kk -verbose=2 > swap.log 2>&1 &
     cat fb.gasAcu1.chainMm9Link.txt
     #	43730193 bases of 446627861 (9.791%) in intersection
 
 #########################################################################
 ############################################################################
 # TRANSMAP vertebrate.2008-05-20 build  (2008-05-24 markd)
 
 vertebrate-wide transMap alignments were built  Tracks are created and loaded
 by a single Makefile. This is available from:
    svn+ssh://hgwdev.cse.ucsc.edu/projects/compbio/usr/markd/svn/projs/transMap/tags/vertebrate.2008-05-20
 
 see doc/builds.txt for specific details.
 ############################################################################
 ############################################################################
 # TRANSMAP vertebrate.2008-06-07 build  (2008-06-30 markd)
 
 vertebrate-wide transMap alignments were built  Tracks are created and loaded
 by a single Makefile. This is available from:
    svn+ssh://hgwdev.cse.ucsc.edu/projects/compbio/usr/markd/svn/projs/transMap/tags/vertebrate.2008-06-30
 
 see doc/builds.txt for specific details.
 
 ############################################################################
 # SWAP BLASTZ Medaka oryLat2 (DONE - 2008-08-27 - Hiram)
     ssh kkstore02	# not too important since everything moved to hive
     screen # use screen to control this job
     cd /cluster/data/oryLat2/bed/blastz.gasAcu1.2008-08-26
     cat fb.oryLat2.chainGasAcu1Link.txt
     #	211589769 bases of 700386597 (30.210%) in intersection
 
     #	And, for the swap:
     mkdir /cluster/data/gasAcu1/bed/blastz.oryLat2.swap
     cd /cluster/data/gasAcu1/bed/blastz.oryLat2.swap
     time doBlastzChainNet.pl -chainMinScore=3000 -chainLinearGap=medium -swap \
 	/cluster/data/oryLat2/bed/blastz.gasAcu1.2008-08-26/DEF \
 	-tRepeats=windowmaskerSdust -qRepeats=windowmaskerSdust \
 	-verbose=2 -smallClusterHub=pk -bigClusterHub=pk > swap.log 2>&1 &
     #	real    31m44.711s
     cat fb.gasAcu1.chainOryLat2Link.txt
     #	189793612 bases of 446627861 (42.495%) in intersection
 
 ############################################################################
 # add NCBI identifiers to the dbDb (DONE - 2008-10-21 - Hiram)
     hgsql -e 'update dbDb set
 sourceName="Broad Institute gasAcu1 (1.0) (NCBI project 13579, AANH01000000)" where name="gasAcu1";' hgcentraltest
 ###########################################################################
 ############################################################################
 # TRANSMAP vertebrate.2009-07-01 build  (2009-07-21 markd)
 
 vertebrate-wide transMap alignments were built  Tracks are created and loaded
 by a single Makefile. This is available from:
    svn+ssh://hgwdev.cse.ucsc.edu/projects/compbio/usr/markd/svn/projs/transMap/tags/vertebrate.2009-07-01
 
 see doc/builds.txt for specific details.
 ############################################################################
+# BLASTZ/CHAIN/NET TetNig2 (DONE - 2009-08-10,09-15 - Hiram)
+    #	create contigs only sequence to align properly to gasAcu1 contigs
+    mkdir /hive/data/genomes/gasAcu1/nonBridged
+    cd /hive/data/genomes/gasAcu1/nonBridged
+    gapToLift -verbose=2 gasAcu1 gasAcu1.contigs.lift \
+	-bedFile=gasAcu1.contigs.bed
+#       chrom count: 23
+# WARNING: gap at end of chromosome not telomere at
+#		chrUn:62549211-62550211, type: clone
+#       found 16945 gaps
+#       bed output requested to gasAcu1.contigs.bed
+#       no gaps on chrom: chrM, size: 15742
+    ~/kent/src/hg/utils/lft2BitToFa.pl ../gasAcu1.2bit gasAcu1.contigs.lift \
+	| gzip -c > gasAcu1.contigs.fa.gz
+    #	make sure nothing was destroyed:
+    faCount *.fa.gz > faCount.contigs.txt 2>&1
+    twoBitToFa ../gasAcu1.2bit stdout | faCount stdin > faCount.2bit.txt 2>&1
+    tail -1 faCount.contigs.txt
+# total   461441448       123670916       99610982        99564587
+#	123781376       14813587        14615136
+    tail -1 faCount.2bit.txt
+# total   463354448       123670916       99610982        99564587
+#	123781376       16726587        14615136
+
+    #	only the total size and N count are different
+    faToTwoBit gasAcu1.contigs.fa.gz gasAcu1.contigs.2bit
+    twoBitInfo gasAcu1.contigs.2bit stdout | sort -k2nr > gasAcu1.contigs.sizes
+    cp -p gasAcu1.contigs.2bit gasAcu1.contigs.sizes gasAcu1.contigs.lift \
+	/hive/data/staging/data/gasAcu1
+
+
+    mkdir /hive/data/genomes/gasAcu1/bed/lastzTetNig2.2009-08-10
+    cd /hive/data/genomes/gasAcu1/bed/lastzTetNig2.2009-08-10
+
+    cat << '_EOF_' > DEF
+# Stickleback vs. Tetraodon
+
+# TARGET: Stickleback gasAcu1, chunk large enough to run largest piece
+SEQ1_DIR=/scratch/data/gasAcu1/gasAcu1.2bit
+SEQ1_LEN=/scratch/data/gasAcu1/chrom.sizes
+SEQ1_CTGDIR=/scratch/data/gasAcu1/gasAcu1.contigs.2bit
+SEQ1_CTGLEN=/scratch/data/gasAcu1/gasAcu1.contigs.sizes
+SEQ1_LIFT=/scratch/data/gasAcu1/gasAcu1.contigs.lift
+SEQ1_CHUNK=22000000
+SEQ1_LAP=10000
+SEQ1_LIMIT=50
+
+# QUERY: Tetraodon TetNig2 - single chunk big enough to run single largest item
+SEQ2_DIR=/scratch/data/tetNig2/tetNig2.2bit
+SEQ2_LEN=/scratch/data/tetNig2/chrom.sizes
+SEQ2_CTGDIR=/scratch/data/tetNig2/tetNig2.contigs.2bit
+SEQ2_CTGLEN=/scratch/data/tetNig2/tetNig2.contigs.sizes
+SEQ2_LIFT=/scratch/data/tetNig2/tetNig2.contigs.lift
+SEQ2_CHUNK=20000000
+SEQ2_LAP=0
+SEQ2_LIMIT=50
+
+BASE=/hive/data/genomes/gasAcu1/bed/lastzTetNig2.2009-08-10
+TMPDIR=/scratch/tmp
+'_EOF_'
+    # << this line keeps emacs coloring happy
+
+    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
+	`pwd`/DEF \
+	-noLoadChainSplit -chainMinScore=2000 -chainLinearGap=medium \
+	-workhorse=hgwdev -smallClusterHub=memk -bigClusterHub=swarm \
+	> do.log 2>&1 &
+    #	about 72 minutes
+    #	forgot to indicate type of repeats, continuing the load:
+    cd /hive/data/genomes/gasAcu1/bed/lastzTetNig2.2009-08-10/axtChain
+    netClass -tRepeats=windowmaskerSdust -qRepeats=windowmaskerSdust \
+        -verbose=0 -noAr noClass.net gasAcu1 tetNig2 gasAcu1.tetNig2.net
+    netFilter -minGap=10 gasAcu1.tetNig2.net \
+	| hgLoadNet -verbose=0 gasAcu1 netTetNig2 stdin
+    cd ..
+    featureBits gasAcu1 chainTetNig2Link >&fb.gasAcu1.chainTetNig2Link.txt
+    cat fb.gasAcu1.chainTetNig2Link.txt
+    #	134497679 bases of 446627861 (30.114%) in intersection
+
+    mkdir /hive/data/genomes/tetNig2/bed/blastz.gasAcu1.swap
+    cd /hive/data/genomes/tetNig2/bed/blastz.gasAcu1.swap
+    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
+	/hive/data/genomes/gasAcu1/bed/lastzTetNig2.2009-08-10/DEF \
+	-swap -qRepeats=windowmaskerSdust -qRepeats=windowmaskerSdust \
+	-noLoadChainSplit -chainMinScore=2000 -chainLinearGap=medium \
+	-workhorse=hgwdev -smallClusterHub=memk -bigClusterHub=swarm \
+	> swap.log 2>&1 &
+
 ############################################################################
 # TRANSMAP vertebrate.2009-09-13 build  (2009-09-20 markd)
 
 vertebrate-wide transMap alignments were built  Tracks are created and loaded
 by a single Makefile. This is available from:
    svn+ssh://hgwdev.cse.ucsc.edu/projects/compbio/usr/markd/svn/projs/transMap/tags/vertebrate.2009-09-13
 
 see doc/builds.txt for specific details.
 ############################################################################