src/hg/makeDb/doc/calJac1.txt 1.22

1.22 2010/02/12 23:42:33 hiram
liftOver to calJac3 completed
Index: src/hg/makeDb/doc/calJac1.txt
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/makeDb/doc/calJac1.txt,v
retrieving revision 1.21
retrieving revision 1.22
diff -b -B -U 1000000 -r1.21 -r1.22
--- src/hg/makeDb/doc/calJac1.txt	25 Nov 2009 21:48:38 -0000	1.21
+++ src/hg/makeDb/doc/calJac1.txt	12 Feb 2010 23:42:33 -0000	1.22
@@ -1,1656 +1,1668 @@
 # for emacs: -*- mode: sh; -*-
 
 
 # This file describes browser build for the Marmoset
 # genome, July 2007
 #
 #	"$Id$"
 #
 ######################################################################
 ## DOWNLOAD SEQUENCE (DONE - 2007-08-21 - Hiram)
     ssh kkstore06
     mkdir /cluster/store4/calJac1
     ln -s /cluster/store4/calJac1 /cluster/data/calJac1
     mkdir /cluster/data/calJac1/wustl
     cd /cluster/data/calJac1/wustl
     for F in supercontigs.agp.gz supercontigs.fa.gz contigs.fa.gz contigs.fa.qual.gz
 do
     wget --timestamping \
 /pub/organism/Primates/Callithrix_jacchus/assembly/Callithrix_jacchus-2.0.2/output/${F} \
     -O ${F}
 done
     #	real    50m13.535s
     ls -ogrt
 # -rw-rw-r--  1   6656649 Jun 19 17:03 supercontigs.agp.gz
 # -rw-rw-r--  1 521109271 Jun 19 17:03 contigs.fa.qual.gz
 # -rw-rw-r--  1 781437003 Jun 19 18:30 contigs.fa.gz
 # -rw-rw-r--  1 851641082 Aug 21 13:29 supercontigs.fa.gz
 
 ##########################################################################
 # fetch photograph (DONE - 2007-08-21 - Hiram)
     mkdir /cluster/data/calJac1/photo
     cd /cluster/data/calJac1/photo
     wget --timestamping \
 	http://www.genome.gov/Images/press_photos/highres/82-300.jpg \
 	    -O nhgri.original.82-300.jpg
     convert -geometry 300x200 -quality 80 nhgri.original.82-300.jpg \
 	Callithrix_jacchus.jpg
     # check this .jpg image into the source tree browser/images/ directory
 
 #######################################################################
 ## create config.ra and run makeGenomeDb.pl 
     ssh kkstore06
     cd /cluster/data/calJac1
 
     cat << '_EOF_' > calJac1.config.ra
 # Config parameters for makeGenomeDb.pl:
 db calJac1
 scientificName Callithrix jacchus
 commonName Marmoset
 assemblyDate Jun. 2007
 assemblyLabel WUSTL 2.0.2
 orderKey 40
 clade mammal
 genomeCladePriority 16
 mitoAcc none
 fastaFiles /cluster/data/calJac1/wustl/supercontigs.fa.gz
 agpFiles /cluster/data/calJac1/wustl/supercontigs.agp.gz
 # qualFiles /dev/null
 dbDbSpeciesDir marmoset
 '_EOF_'
     # << happy emacs
     time nice -n +19 ~/kent/src/hg/utils/automation/makeGenomeDb.pl \
 	-stop=agp calJac1.config.ra > makeGenomeDb.out 2>&1 &
     #	real    24m24.468s
     time nice -n +19 ~/kent/src/hg/utils/automation/makeGenomeDb.pl \
 	-continue=db calJac1.config.ra > db.continue.out 2>&1 &
     # add the trackDb files to the source tree and to the trackDb/makefile
 
 ##########################################################################
 ## Repeat masker (DONE - 2007-08-21 - Hiram)
     ssh kkstore06
     ## use screen for this
     mkdir /cluster/data/calJac1/bed/RepeatMasker
     cd /cluster/data/calJac1/bed/RepeatMasker
     time nice -n +19 ~/kent/src/hg/utils/automation/doRepeatMasker.pl \
 	-bigClusterHub=kk \
 	-buildDir=/cluster/data/calJac1/bed/RepeatMasker calJac1 > do.out 2>&1 &
 
 ##############################################################################
 ## simpleRepeat masking (DONE - 2007-09-05 - Hiram)
     ## create a kki kluster run
     ssh kkr1u00
     mkdir /iscratch/i/calJac1
     cd /iscratch/i/calJac1
     cp -p /cluster/data/calJac1/calJac1.unmasked.2bit .
     cp -p /cluster/data/calJac1/chrom.sizes .
     twoBitToFa calJac1.unmasked.2bit calJac1.unmasked.fa
     mkdir split
     #  split sequence into about 1000 files, each about 3,000,000 bases
     time nice -n +19 faSplit about calJac1.unmasked.fa 3000000 split/cj1_
 
     for R in 2 3 4 5 6 7 8
 do
     rsync -a --progress /iscratch/i/calJac1/ kkr${R}u00:/iscratch/i/calJac1/
 done
 
     ssh kki
     mkdir -p /cluster/data/calJac1/bed/simpleRepeat/trf
     cd /cluster/data/calJac1/bed/simpleRepeat/trf
 
     cat << '_EOF_' > runTrf
 #!/bin/csh -fe 
 #
 set C = $1:r
 set SRC = /iscratch/i/calJac1/split/$C.fa
 mkdir -p /scratch/tmp/$C
 cp -p $SRC /scratch/tmp/$C/$C.fa
 pushd /scratch/tmp/$C
 /cluster/bin/i386/trfBig -trf=/cluster/bin/i386/trf $C.fa \
 	/dev/null -bedAt=$C.bed -tempDir=/scratch/tmp/$C
 popd
 rm -f $C.bed
 cp -p /scratch/tmp/$C/$C.bed .
 rm -fr /scratch/tmp/$C
 '_EOF_'
     # << happy emacs
     chmod +x runTrf
 
     cat << '_EOF_' > template
 #LOOP
 ./runTrf $(path1) {check out line $(root1).bed}
 #ENDLOOP
 '_EOF_'
     # << happy emacs
 
     ls /iscratch/i/calJac1/split > part.list
     gensub2 part.list single template jobList
     para create jobList
     para try ... check ... push ... etc ...
 # Completed: 947 of 947 jobs
 # CPU time in finished jobs:      37242s     620.70m    10.35h    0.43d  0.001 y
 # IO & Wait Time:                  2842s      47.36m     0.79h    0.03d  0.000 y
 # Average job time:                  42s       0.71m     0.01h    0.00d
 # Longest finished job:            1318s      21.97m     0.37h    0.02d
 # Submission to last job:          3572s      59.53m     0.99h    0.04d
 
     cat *.bed > ../simpleRepeat.bed
     cd ..
     awk '{if ($5 <= 12) print;}' simpleRepeat.bed > trfMask.bed
 
     ssh hgwdev
     cd /cluster/data/calJac1/bed/simpleRepeat
     time nice -n +19 hgLoadBed calJac1 simpleRepeat \
       simpleRepeat.bed -sqlTable=$HOME/kent/src/hg/lib/simpleRepeat.sql
     #	Loaded 846105 elements of size 16
     #	real    0m24.710s
 
     nice -n +19 featureBits calJac1 simpleRepeat \
 	> fb.simpleRepeat.calJac1.txt 2>&1
     cat fb.simpleRepeat.calJac1.txt
     #	100489601 bases of 2929139385 (3.431%) in intersection
 
     #	add the trfMask to the rmsk masked sequence to get our final
     #	masked sequence
     ssh kkstore06
     cd /cluster/data/calJac1
     time nice -n +19 cat bed/simpleRepeat/trfMask.bed \
 	| twoBitMask -add -type=.bed calJac1.rmsk.2bit stdin calJac1.2bit
     #	measure it
     time nice -n +19 twoBitToFa calJac1.2bit stdout \
 	| faSize stdin > faSize.calJac1.2bit.txt 2>&1
     grep masked faSize.calJac1.2bit.txt
     #	%45.93 masked total, %47.50 masked real
 
     ## clean up the /iscratch/i/calJac1/ directory
     ssh kkr1u00
     cd /iscratch/i/calJac1
     rm -fr *
     for R in 2 3 4 5 6 7 8
 do
     rsync -a --progress --delete --stats /iscratch/i/calJac1/ kkr${R}u00:/iscratch/i/calJac1/
 done
     cd ..
     rmdir calJac1
     for R in 2 3 4 5 6 7 8
 do
     ssh kkr${R}u00 rmdir /iscratch/i/calJac1
 done
 
 ############################################################################
 #  BLATSERVERS ENTRY (DONE - 2007-09-06 - Hiram)
 #	After getting a blat server assigned by the Blat Server Gods,
     ssh hgwdev
 
     hgsql -e 'INSERT INTO blatServers (db, host, port, isTrans, canPcr) \
 	VALUES ("calJac1", "blat13", "17786", "1", "0"); \
 	INSERT INTO blatServers (db, host, port, isTrans, canPcr) \
 	VALUES ("calJac1", "blat13", "17787", "0", "1");' \
 	    hgcentraltest
     #	test it with some sequence
 
 ############################################################################
 ##  BLASTZ swap from hg18 alignments (2007-11-11 - markd)
     ssh hgwdev
     mkdir /cluster/data/calJac1/bed/blastz.hg18.swap
     cd /cluster/data/calJac1/bed/blastz.hg18.swap
     ln -s blastz.hg18.swap ../blastz.hg18
     /cluster/bin/scripts/doBlastzChainNet.pl \
         -swap /cluster/data/hg18/bed/blastz.calJac1.2007-10-07/DEF >& swap.out&
 
     # fb.calJac1.chainHg18Link.txt:
     #   2426684781 bases of 2929139385 (82.846%) in intersection
 
     #	running syntenic net (DONE - 2007-12-14 - Hiram)
     time nice -n +19 $HOME/kent/src/hg/utils/automation/doBlastzChainNet.pl \
     /cluster/data/hg18/bed/blastz.calJac1.2007-10-07/DEF \
       -bigClusterHub=pk -continue=syntenicNet -syntenicNet \
       -swap -chainMinScore=3000 -chainLinearGap=medium > syntenicNet.log 2>&1 &
     #	real    8m24.277s
     #	failed during a chainSplit:
     #	Can't open chain/Contig836.chain to append: Too many open files
 
     #	create reciprocal best chains/nets for 9-way maf alignments
     ssh hgwdev
     cd /cluster/data/calJac1/bed/blastz.hg18.swap
     time nice -n +19 /cluster/bin/scripts/doRecipBest.pl calJac1 hg18 \
 	> rbest.log 2>&1 &
 
 #########################################################################
 ## genscan run (DONE - 2007-11-08 - Hiram)
 ##	create hard masked sequence
     ssh kkstore06
     cd /cluster/data/calJac1
     twoBitToFa calJac1.2bit stdout \
 	| maskOutFa stdin hard stdout | faToTwoBit stdin calJac1.hard.2bit
     #	And, make sure there aren't any sequences in this lot that have
     #	become all N's with no sequence left in them.  This drives genscan nuts
     twoBitToFa calJac1.hard.2bit stdout \
 	| faCount stdin > faCount.hard.txt
     #	the lowest three are:
     egrep -v "^#|^total" faCount.hard.txt \
 	| awk '{print $1,$2-$7}' | sort -k2,2nr | tail -3
     #	there are a lot of these that turned into zero sequence left
     #	will sort this out when making the sequence to use on the Iservers
 
     ssh kkr1u00
     mkdir /iscratch/i/calJac1/hardMasked
     cd /iscratch/i/calJac1/hardMasked
     twoBitToFa /cluster/data/calJac1/calJac1.hard.2bit stdout \
 	| faSplit byname stdin ./
     #	more than 128 bases of sequence results in the removal of 1,208
     #	sequences that are too short, leaving 48,516 sequences
     egrep -v "^#|^total" /cluster/data/calJac1/faCount.hard.txt \
 	| awk '{size=$2-$7; if (size < 128) {print $1}}' | while read F
 do
     rm -f "${F}.fa"
     echo "${F}.fa"
 done
     mkdir ../hardChunks
     cd ../hardChunks
     
     #	chunk them up into 4,000,000 base packages, no sequence is broken
     catDir ../hardMasked \
 	| faSplit about stdin 4000000 c_
     rm -fr ../hardMasked
 
     for R in 2 3 4 5 6 7 8
 do
     rsync -a --progress ./ kkr${R}u00:/iscratch/i/calJac1/hardChunks/
 done
 
     ssh hgwdev
     mkdir /cluster/data/calJac1/bed/genscan
     cd /cluster/data/calJac1/bed/genscan
     # Check out hg3rdParty/genscanlinux to get latest genscan:
     cvs co hg3rdParty/genscanlinux
 
     # Run on small cluster (more mem than big cluster).
     ssh kki
     cd /cluster/data/calJac1/bed/genscan
     # Make 3 subdirectories for genscan to put their output files in
     mkdir gtf pep subopt
     # Generate a list file, genome.list, of all the hard-masked contigs that 
     # *do not* consist of all-N's (which would cause genscan to blow up)
     #	Since we split on gaps, we have no chunks like that.  You can
     #	verify with faCount on the chunks.
     ls -1Sr /iscratch/i/calJac1/hardChunks/c_*.fa > genome.list
 
     # Create script to run gsBig
     cat << '_EOF_' > runGsBig
 #!/bin/csh -fe
 set runDir = `pwd`
 set srcDir = $1
 set inFile = $2
 set fileRoot = $inFile:r
 mkdir /scratch/tmp/$fileRoot
 cp -p $srcDir/$inFile /scratch/tmp/$fileRoot
 pushd /scratch/tmp/$fileRoot
 /cluster/bin/x86_64/gsBig $inFile $fileRoot.gtf -trans=$fileRoot.pep -subopt=$fileRoot.bed -exe=$runDir/hg3rdParty/genscanlinux/genscan -par=$runDir/hg3rdParty/genscanlinux/HumanIso.smat -tmp=/scratch/tmp -window=2400000
 popd
 cp -p /scratch/tmp/$fileRoot/$fileRoot.gtf gtf
 cp -p /scratch/tmp/$fileRoot/$fileRoot.pep pep
 cp -p /scratch/tmp/$fileRoot/$fileRoot.bed subopt
 rm -fr /scratch/tmp/$fileRoot
 '_EOF_'
     # << happy emacs
     chmod +x runGsBig
 
     #	template file for gensub2
     cat << '_EOF_' > template
 #LOOP
 runGsBig /iscratch/i/calJac1/hardChunks $(file1) {check out line gtf/$(root1).gtf} {check out line pep/$(root1).pep} {check out line subopt/$(root1).bed} 
 #ENDLOOP
 '_EOF_'
     # << happy emacs
 
     gensub2 genome.list single template jobList
     para create jobList
     para try, check, push, check, ...
 # Completed: 720 of 720 jobs
 # CPU time in finished jobs:      55325s     922.09m    15.37h    0.64d  0.002 y
 # IO & Wait Time:                  2063s      34.38m     0.57h    0.02d  0.000 y
 # Average job time:                  80s       1.33m     0.02h    0.00d
 # Longest finished job:             132s       2.20m     0.04h    0.00d
 # Submission to last job:         65396s    1089.93m    18.17h    0.76d
 
     # cat and lift the results into single files
     ssh kkstore06
     cd /cluster/data/calJac1/bed/genscan
     cat gtf/c_*.gtf > genscan.gtf
     cat subopt/c_*.bed > genscanSubopt.bed
     cat pep/c_*.pep > genscan.pep
 
     # Load into the database as so:
     ssh hgwdev
     cd /cluster/data/calJac1/bed/genscan
     ldHgGene calJac1 -gtf genscan genscan.gtf
     #	Read 64005 transcripts in 344791 lines in 1 files
     #	64005 groups 23602 seqs 1 sources 1 feature types
     #	64005 gene predictions
 
     hgPepPred calJac1 generic genscanPep genscan.pep
     hgLoadBed calJac1 genscanSubopt genscanSubopt.bed
     #	Loaded 576960 elements of size 6
 
     #	check the numbers
     time nice -n +19 featureBits calJac1 genscan
     #	59205113 bases of 2929139385 (2.021%) in intersection
     #	the next closest genome with a genscan track
     time nice -n +19 featureBits panTro2 genscan
     #	53758386 bases of 2909485072 (1.848%) in intersection
     time nice -n +19 featureBits mm9 genscan
     #	55293837 bases of 2620346127 (2.110%) in intersection
 
 ############################################################################
 # GENBANK AUTO UPDATE (DONE - 2007-11-21 - Hiram)
     #	Create a lift file as per the procedures for Chimp from the AGP:
     ssh kolossus
     cd /cluster/data/calJac1
     # MAKE 11.OOC FILE FOR BLAT
     blat calJac1.2bit \
 	 /dev/null /dev/null -tileSize=11 -makeOoc=11.ooc -repMatch=1024
     #	Wrote 34303 overused 11-mers to 11.ooc
 
     # align with latest genbank process.
     ssh hgwdev
     cd ~/kent/src/hg/makeDb/genbank
     cvsup
     # edit etc/genbank.conf to add calJac1 just after panTro2
     # calJac1
 # Marmoset
 calJac1.serverGenome = /cluster/data/calJac1/calJac1.2bit
 calJac1.clusterGenome = /scratch/data/calJac1/calJac1.2bit
 calJac1.ooc = /cluster/data/calJac1/calJac1/11.ooc
 calJac1.lift = no
 calJac1.refseq.mrna.native.pslCDnaFilter  = ${ordered.refseq.mrna.native.pslCDnaFilter}
 calJac1.refseq.mrna.xeno.pslCDnaFilter    = ${ordered.refseq.mrna.xeno.pslCDnaFilter}
 calJac1.genbank.mrna.native.pslCDnaFilter = ${ordered.genbank.mrna.native.pslCDnaFilter}
 calJac1.genbank.mrna.xeno.pslCDnaFilter   = ${ordered.genbank.mrna.xeno.pslCDnaFilter}
 calJac1.genbank.est.native.pslCDnaFilter  = ${ordered.genbank.est.native.pslCDnaFilter}
 calJac1.genbank.est.xeno.pslCDnaFilter    = ${ordered.genbank.est.xeno.pslCDnaFilter}
 calJac1.downloadDir = calJac1
 calJac1.genbank.est.xeno.load = no
 calJac1.refseq.mrna.native.load  = yes
 calJac1.refseq.mrna.xeno.load = yes
 calJac1.refseq.mrna.xeno.loadDesc  = yes
 
     cvs ci -m "Added calJac1." etc/genbank.conf
     # update /cluster/data/genbank/:
     make etc-update
 
     # Edit src/lib/gbGenome.c to add new species.  With these two lines:
     #	static char *calJacNames[] = {"Callithrix jacchus", NULL};
     #	{"calJac", calJacNames},
 
     cvs ci -m "Added Callithrix jacchus (Marmoset)." src/lib/gbGenome.c
     make install-server
 
     ssh genbank
     screen  # control this business with a screen since it takes a while
     cd /cluster/data/genbank
 
     # This is a call to a script that will push our jobs out to the cluster
     # since it's a big job.  
     time nice -n +19 bin/gbAlignStep -initial calJac1 &
     # logFile: var/build/logs/2007.11.20-11:31:54.calJac1.initalign.log
     #	real    607m38.957s
 
     # load database when finished
     ssh hgwdev
     cd /cluster/data/genbank
     time nice -n +19 ./bin/gbDbLoadStep -drop -initialLoad calJac1
     #	logFile: var/dbload/hgwdev/logs/2007.11.25-10:26:23.dbload.log
     #	real    26m30.926s
 
     # enable daily alignment and update of hgwdev (DONE - 2007-11-21 - Hiram)
     cd ~/kent/src/hg/makeDb/genbank
     cvsup
     # add calJac1 to:
         etc/align.dbs
         etc/hgwdev.dbs
     cvs ci -m "Added calJac1." etc/align.dbs etc/hgwdev.dbs
     make etc-update
 
 #########################################################################
 # Blastz Platypus ornAna1 (DONE - 2007-11-14 - Hiram)
 #	this was done a second time, see ornAna1.txt for the second run
 #	since this run produced a null result for some unknown reason.
     ssh kkstore06
     screen # use screen to control this job
     mkdir /cluster/data/calJac1/bed/blastzOrnAna1.2007-11-14
     cd /cluster/data/calJac1/bed/blastzOrnAna1.2007-11-14
 
     cat << '_EOF_' > DEF
 # Orangutan vs. platypus
 BLASTZ_Y=3400
 BLASTZ_L=6000
 BLASTZ_K=2200
 BLASTZ_Q=/cluster/data/blastz/HoxD55.q
 BLASTZ_M=50
 
 # QUERY: Marmoset calJac1
 SEQ1_DIR=/cluster/bluearc/scratch/data/calJac1/calJac1.2bit
 SEQ1_LEN=/cluster/data/calJac1/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ2_LIMIT=400
 SEQ1_LAP=0
 
 # QUERY: Platypus ornAna1
 SEQ2_DIR=/cluster/bluearc/scratch/data/ornAna1/ornAna1.2bit
 SEQ2_LEN=/cluster/data/ornAna1/chrom.sizes
 SEQ2_CHUNK=40000000
 SEQ2_LIMIT=400
 SEQ2_LAP=0
 
 BASE=/cluster/data/calJac1/bed/blastzOrnAna1.2007-11-14
 TMPDIR=/scratch/tmp
 '_EOF_'
     # << happy emacs
 
     time nice -n +19 doBlastzChainNet.pl DEF -chainMinScore=5000 \
 	-chainLinearGap=loose -bigClusterHub=pk -verbose=2 > do.log 2>&1 &
     #	real    1927m20.962s - to the first pk crash
     #	this was a tough job to get finished.  Several pk crashes,
     #	problems with garbage in the para.results file, and so forth.
     #	But, it did finish as of Monday afternoon 2007-11-19
 # Completed: 899536 of 900180 jobs
 # Crashed: 644 jobs
 # CPU time in finished jobs:  131663141s 2194385.68m 36573.09h 1523.88d  4.175 y
 # IO & Wait Time:              12592457s  209874.29m  3497.90h  145.75d  0.399 y
 # Average job time:                 160s       2.67m     0.04h    0.00d
 # Longest finished job:            1795s      29.92m     0.50h    0.02d
 # Submission to last job:        440290s    7338.17m   122.30h    5.10d
     #	despite the '644 jobs' crashed, they are actually done and all results
     #	are complete
 
     #	continuing:
     time nice -n +19 doBlastzChainNet.pl -verbose=2 `pwd`/DEF \
 	-chainMinScore=5000 -chainLinearGap=loose \
 	-continue=cat -bigClusterHub=pk > cat.log 2>&1 &
     #	real    31m45.069s
     cat fb.calJac1.chainOrnAna1Link.txt
     #	0 bases of 2929139385 (0.000%) in intersection
     #	This error was fixed in the script.  It failed on a command in one of
     #	the ssh scripts that happened to run under the bash shell which did
     #	not detect the error in a set of piped commands.  It was a shell
     #	wild-card expansion problem, changed to a 'find' to avoid that.
 
 ###########################################################################
 ## BLASTZ Mouse Mm9 swap (DONE - 2007-09-07 - Hiram
     ssh kkstore06
     #	use a screen to control this job
     screen
     #	the original alignment
     cd /cluster/data/mm9/bed/blastzCalJac1.2007-09-06
     cat fb.mm9.chainCalJac1Link.txt
     #	863961573 bases of 2620346127 (32.971%) in intersection
 
     #	the swap
     mkdir /cluster/data/calJac1/bed/blastz.mm9.swap
     cd /cluster/data/calJac1/bed/blastz.mm9.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	/cluster/data/mm9/bed/blastzCalJac1.2007-09-06/DEF \
 	-stop=load -chainMinScore=3000 \
 	-swap -chainLinearGap=medium -bigClusterHub=pk > swap.log 2>&1 &
     #	real    217m10.835s
     cat fb.calJac1.chainMm9Link.txt
     #	887586922 bases of 2929139385 (30.302%) in intersection
     time nice -n +19 $HOME/kent/src/hg/utils/automation/doBlastzChainNet.pl \
 	-verbose=2 /cluster/data/mm9/bed/blastzCalJac1.2007-09-06/DEF \
 	-continue=download -chainMinScore=3000 \
 	-swap -chainLinearGap=medium -bigClusterHub=pk > download.log 2>&1 &
     #	real    1m9.876s
 
     #	run the syntenic net for multiple alignment (DONE - 2007-12-14 - Hiram)
     time nice -n +19 $HOME/kent/src/hg/utils/automation/doBlastzChainNet.pl \
 	-verbose=2 /cluster/data/mm9/bed/blastzCalJac1.2007-09-06/DEF \
 	-syntenicNet -continue=syntenicNet -chainMinScore=3000 \
 	-swap -chainLinearGap=medium -bigClusterHub=pk > syntenicNet.log 2>&1 &
     #	real    7m23.683s
     #	failed during a chainSplit:
     #	Can't open chain/Contig1203.chain to append: Too many open files
 
     #	create reciprocal best chains/nets for 9-way multiple alignment
     ssh hgwdev
     cd /cluster/data/calJac1/bed/blastz.mm9.swap
     time nice -n +19 /cluster/bin/scripts/doRecipBest.pl calJac1 mm9 \
 	> rbest.log 2>&1 &
     time nice -n +19 /cluster/bin/scripts/doRecipBest.pl calJac1 mm9 \
 	-continue=download > rbest.download.log 2>&1 &
 
 ###########################################################################
 # Blastz swap Chimp panTro2 (DONE - 2007-11-14 - Hiram)
     ssh kkstore06
     screen # use screen to manage this job
     cd /cluster/data/panTro2/bed/blastzCalJac1.2007-11-13
     cat fb.panTro2.chainCalJac1Link.txt
     #	2220169777 bases of 2909485072 (76.308%) in intersection
 
     mkdir /cluster/data/calJac1/bed/blastz.panTro2.swap
     cd /cluster/data/calJac1/bed/blastz.panTro2.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	/cluster/data/panTro2/bed/blastzCalJac1.2007-11-13/DEF \
 	-chainMinScore=3000 -chainLinearGap=medium \
 	-swap -bigClusterHub=pk > swap.log 2>&1 &
     #	real    320m14.293s
     cat fb.calJac1.chainPanTro2Link.txt
     #	2264115411 bases of 2929139385 (77.296%) in intersection
 
     #	create reciprocal best chains/nets for 9-way maf alignments
     ssh hgwdev
     cd /cluster/data/calJac1/bed/blastz.panTro2.swap
     time nice -n +19 /cluster/bin/scripts/doRecipBest.pl calJac1 panTro2 \
 	> rbest.log 2>&1 &
 
 ###########################################################################
 # SWAP BLASTZ Orangutan ponAbe2 (DONE - 2007-11-29 - Hiram)
     #	primary blastz result
     cd /cluster/data/ponAbe2/bed/blastzCalJac1.2007-11-18
     cat fb.ponAbe2.chainCalJac1Link.txt
     #	2310720863 bases of 3093572278 (74.694%) in intersection
 
     #	and for the swap
     ssh kkstore02
     mkdir /cluster/data/calJac1/bed/blastz.ponAbe2.swap
     cd /cluster/data/calJac1/bed/blastz.ponAbe2.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	/cluster/data/ponAbe2/bed/blastzCalJac1.2007-11-18/DEF \
 	-chainMinScore=3000 -chainLinearGap=medium \
 	-swap -bigClusterHub=kk > swap.log 2>&1 &
     #	real    341m54.548s
     cat fb.calJac1.chainPonAbe2Link.txt
     #	2253236255 bases of 2929139385 (76.925%) in intersection
 
     #	reciprocal best for 9-way maf alignments
     ssh hgwdev
     cd /cluster/data/calJac1/bed/blastz.ponAbe2.swap
     time nice -n +19 /cluster/bin/scripts/doRecipBest.pl calJac1 ponAbe2 \
 	> rbest.log 2>&1 &
     #	real    96m17.285s
 
 ###########################################################################
 # SWAP BLASTZ Dog canFam2 (DONE - 2007-11-30 - Hiram)
     # primary blastz result
     cd /cluster/data/canFam2/bed/blastzCalJac1.2007-11-28
     cat fb.canFam2.chainCalJac1Link.txt
     #	1369690756 bases of 2384996543 (57.429%) in intersection
 
     mkdir /cluster/data/calJac1/bed/blastz.canFam2.swap
     cd /cluster/data/calJac1/bed/blastz.canFam2.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	/cluster/data/canFam2/bed/blastzCalJac1.2007-11-28/DEF \
 	-chainMinScore=3000 -chainLinearGap=medium \
 	-swap -bigClusterHub=kk > swap.log 2>&1 &
     #	encountered difficulties with /scratch/data/ on kolossus
     #	had to finish the netChains.csh script manually, then continuing:
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	/cluster/data/canFam2/bed/blastzCalJac1.2007-11-28/DEF \
 	-continue=load -chainMinScore=3000 -chainLinearGap=medium \
 	-swap -bigClusterHub=kk > load.log 2>&1 &
     #	real    56m44.375s
 
     cat fb.calJac1.chainCanFam2Link.txt
     #	1451345669 bases of 2929139385 (49.549%) in intersection
 
     #	reciprocal best for 9-way maf alignments
     ssh hgwdev
     #	expects blastz.canFam2 to exist
     cd /cluster/data/calJac1/bed
     ln -s blastz.canFam2.swap blastz.canFam2
     cd /cluster/data/calJac1/bed/blastz.canFam2
     time nice -n +19 /cluster/bin/scripts/doRecipBest.pl calJac1 canFam2 \
 	> rbest.log 2>&1 &
     #	real    70m45.324s
 
 ###########################################################################
 # SWAP BLASTZ Chimp rheMac2 (DONE - 2007-11-18 - Hiram)
     #	primary blastz result
     cd /cluster/data/rheMac2/bed/blastzCalJac1.2007-11-16
     cat fb.rheMac2.chainCalJac1Link.txt
     #	2055107003 bases of 2646704109 (77.648%) in intersection
 
     #	and the download
     mkdir /cluster/data/calJac1/bed/blastz.rheMac2.swap
     cd /cluster/data/calJac1/bed/blastz.rheMac2.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	/cluster/data/rheMac2/bed/blastzCalJac1.2007-11-16/DEF \
 	-swap -chainMinScore=3000 -chainLinearGap=medium \
 	-bigClusterHub=kk > swap.log 2>&1 &
     #	real    349m36.073s
     cat fb.calJac1.chainRheMac2Link.txt
     #	2191300051 bases of 2929139385 (74.810%) in intersection
 
     #	reciprocal best for 9-way maf alignments
     ssh hgwdev
     #	expects blastz.rheMac2 to exist
     cd /cluster/data/calJac1/bed
     ln -s blastz.rheMac2.swap blastz.rheMac2
     cd /cluster/data/calJac1/bed/blastz.rheMac2
     time nice -n +19 /cluster/bin/scripts/doRecipBest.pl calJac1 rheMac2 \
 	> rbest.log 2>&1 &
     #	real    87m53.651s
 
 #########################################################################
 ## 9-Way Multiz (DONE - 2007-12-21 - Hiram)
 ##
     ssh hgwdev
     mkdir /cluster/data/calJac1/bed/multiz9way
     cd /cluster/data/calJac1/bed/multiz9way
     #	take the 30-way tree from mm9 and eliminate genomes not in
     #	this alignment
     #	rearrange to get calJac1 on the top of the graph
     #	paste this tree into the on-line phyloGif tool:
     #	http://genome.ucsc.edu/cgi-bin/phyloGif
     #	to create the image for the tree diagram
 
     #	select the 9 organisms from the 30-way recently done on mouse mm9
     /cluster/bin/phast/tree_doctor \
 	--prune-all-but Human_hg18,Mouse_mm9,Chimp_panTro2,Marmoset_calJac1,Rhesus_rheMac2,Orangutan_ponAbe2,Dog_canFam2,Platypus_ornAna1,Opossum_monDom4 \
 	/cluster/data/mm9/bed/multiz30way/mm9OnTop.fullNames.nh \
 	> 9-way.fullNames.nh
 
     #	looks something like this:
 ((((Mouse_mm9:0.325818,
 ((((Human_hg18:0.005873,Chimp_panTro2:0.007668):0.013037,
      Orangutan_ponAbe2:0.020000):0.013037,
        Rhesus_rheMac2:0.031973):0.036500,
 Marmoset_calJac1:0.070000):0.058454):0.019763,
 Dog_canFam2:0.187963):0.243550,
 Opossum_monDom4:0.320721):0.088647,Platypus_ornAna1:0.488110);
 
 (((
 (Mouse_mm9:0.325818,
 (Marmoset_calJac1:0.070000,
  (((Human_hg18:0.005873,Chimp_panTro2:0.007668):0.013037,
      Orangutan_ponAbe2:0.020000):0.013037,
        Rhesus_rheMac2:0.031973):0.036500):0.058454):0.019763,
 Dog_canFam2:0.187963):0.243550,
 Opossum_monDom4:0.320721):0.088647,Platypus_ornAna1:0.488110);
 
     #	rearrange to get Marmoset at the top:
     # this leaves us with:
     cat << '_EOF_' > calJac1.9-way.nh
 (((((Marmoset_calJac1:0.070000,
      (((Human_hg18:0.005873,Chimp_panTro2:0.007668):0.013037,
     Orangutan_ponAbe2:0.020000):0.013037,
    Rhesus_rheMac2:0.031973):0.036500):0.058454,
   Mouse_mm9:0.325818):0.019763,
  Dog_canFam2:0.187963):0.243550,
 Opossum_monDom4:0.320721):0.088647,Platypus_ornAna1:0.488110);
 '_EOF_'
     #	<< happy emacs
 
     #	create a species list from that file:
     sed -e 's/[()]//g; s/ /\n/g; s/,/\n/g' calJac1.9-way.nh \
         | sed -e "s/[ \t]*//g; /^[ \t]$/d; /^$/d" | sort -u \
         | sed -e "s/.*_//; s/:.*//" | sort > species.list
     #	verify that has 9 db names in it
     # create a stripped down nh file for use in autoMZ run
     echo \
 `sed 's/[a-zA-Z0-9]*_//g; s/:0.[0-9]*//g; s/[,;]/ /g' calJac1.9-way.nh \
 	| sed -e "s/  / /g"` > tree.9.nh
     #	that looks like, as a single line:
 # (((((calJac1(((hg18 panTro2)ponAbe2)rheMac2)) mm9) canFam2) monDom4) ornAna1)
 
     # verify all blastz's exists
     cat << '_EOF_' > listMafs.csh
 #!/bin/csh -fe
 cd /cluster/data/calJac1/bed/multiz9way
 foreach db (`grep -v calJac1 species.list`)
     set bdir = /cluster/data/calJac1/bed/blastz.$db
     if (-e $bdir/mafRBestNet/calJac1.$db.rbest.maf.gz) then
 	echo "$db mafRBestNet"
     else if (-e $bdir/mafSynNet/calJac1.$db.net.maf.gz) then
 	echo "$db mafSynNet"
     else if (-e $bdir/mafNet/calJac1.$db.net.maf.gz) then
 	echo "$db mafNet"
     else
 	echo "$db mafs not found"
     endif
 end
 '_EOF_'
     # << happy emacs
     chmod +x ./listMafs.csh
     #	see what it says, the "mafs not found" should only show up on calJac1
     ./listMafs.csh
 # canFam2 mafRBestNet
 # hg18 mafRBestNet
 # mm9 mafRBestNet
 # monDom4 mafNet
 # ornAna1 mafNet
 # panTro2 mafRBestNet
 # ponAbe2 mafRBestNet
 # rheMac2 mafRBestNet
 
 
     /cluster/bin/phast/all_dists calJac1.9-way.nh > 9way.distances.txt
     grep -i caljac 9way.distances.txt | sort -k3,3n
 Marmoset_calJac1        Human_hg18      0.138447
 Marmoset_calJac1        Rhesus_rheMac2  0.138473
 Marmoset_calJac1        Orangutan_ponAbe2       0.139537
 Marmoset_calJac1        Chimp_panTro2   0.140242
 Marmoset_calJac1        Dog_canFam2     0.336180
 Marmoset_calJac1        Mouse_mm9       0.454272
 Marmoset_calJac1        Opossum_monDom4 0.712488
 Marmoset_calJac1        Platypus_ornAna1        0.968524
 
     #	use the calculated
     #	distances in the table below to order the organisms and check
     #	the button order on the browser.  Zebrafish ends up before
     #	tetraodon and fugu on the browser despite its distance.
     #	And if you can fill in the table below entirely, you have
     #	succeeded in finishing all the alignments required.
     #
 #                         featureBits chainLink measures
 #                                       chainCalJac1Link   chain   linearGap
 #    distance                     on CalJac1    on other   minScore
 #  1  0.138447 Human_hg18         (% 82.846)   (% 78.351)   3000     medium
 #  2  0.138473 Rhesus_rheMac2     (% 74.810)   (% 77.648)   3000     medium
 #  3  0.139537 Orangutan_ponAbe2  (% 76.925)   (% 74.694)   3000     medium
 #  4  0.140242 Chimp_panTro2      (% 77.296)   (% 76.308)   3000     medium
 #  5  0.336180 Dog_canFam2        (% 57.429)   (% 49.549)   3000     medium
 #  6  0.454272 Mouse_mm9          (% 30.302)   (% 32.971)   3000     medium
 #  6  0.712488 Opossum_monDom4    (% 13.357)   (% 11.050)   5000     loose
 #  7  0.968524 Platypus_ornAna1   (%  7.221)   (% 10.619)   5000     loose
 
     # copy net mafs to cluster-friendly storage, splitting chroms
     mkdir mafLinks
     cd mafLinks
     # hint: obtained these links by altering listMafs.csh above
     #	add an echo statement to output these commands
     ln -s ../../blastz.canFam2/mafRBestNet/calJac1.canFam2.rbest.maf.gz \
 	mafLinks/canFam2.maf.gz
     ln -s ../../blastz.hg18/mafRBestNet/calJac1.hg18.rbest.maf.gz \
 	mafLinks/hg18.maf.gz
     ln -s ../../blastz.mm9/mafRBestNet/calJac1.mm9.rbest.maf.gz \
 	mafLinks/mm9.maf.gz
     ln -s ../../blastz.monDom4/mafNet/calJac1.monDom4.net.maf.gz \
 	mafLinks/monDom4.maf.gz
     ln -s ../../blastz.ornAna1/mafNet/calJac1.ornAna1.net.maf.gz \
 	mafLinks/ornAna1.maf.gz
     ln -s ../../blastz.panTro2/mafRBestNet/calJac1.panTro2.rbest.maf.gz \
 	mafLinks/panTro2.maf.gz
     ln -s ../../blastz.ponAbe2/mafRBestNet/calJac1.ponAbe2.rbest.maf.gz \
 	mafLinks/ponAbe2.maf.gz
     ln -s ../../blastz.rheMac2/mafRBestNet/calJac1.rheMac2.rbest.maf.gz \
 	mafLinks/rheMac2.maf.gz
     #	need to split these things up by Contig number for efficient kluster run
     ssh kkstore06
     cd /cluster/data/calJac1/bed/multiz9way/mafLinks
     mkdir -p /san/sanvol1/scratch/calJac1/multiz9way/contigMaf
     cd /scratch/tmp
     for D in `grep -v calJac1 /cluster/data/calJac1/bed/multiz9way/species.list`
 do
     mkdir /scratch/tmp/${D}
     cd /scratch/tmp/${D}
     mafSplit -verbose=2 /dev/null -byTarget -useSequenceName Contig \
 	/cluster/data/calJac1/bed/multiz9way/mafLinks/${D}.maf.gz -outDirDepth=2
     rsync -a --progress ./ \
 	/san/sanvol1/scratch/calJac1/multiz9way/contigMaf/${D}
     cd /scratch/tmp
     rm -fr ${D}
 done
     #	create a run-time list of contigs to operate on, not all contigs
     #	exist in all alignments, but we want all contig names used in any
     #	alignment:
     cd /san/sanvol1/scratch/calJac1/multiz9way/contigMaf
     for D in *
 do
     cd "${D}"
     find . -type f
     cd ..
 done | sort -u > /tmp/9-way.contig.list
     wc -l /tmp/9-way.contig.list
     #	36707 /tmp/9-way.contig.list
 
     # ready for the multiz run
     ssh pk
     mkdir /cluster/data/calJac1/bed/multiz9way/splitRun
     cd /cluster/data/calJac1/bed/multiz9way/splitRun
     scp -p kkstore06:/tmp/9-way.contig.list .
     mkdir -p maf run
     cd run
     mkdir penn
     # use latest penn utilities
     P=/cluster/bin/penn/multiz.v11.2007-03-19/multiz-tba
     cp -p $P/{autoMZ,multiz,maf_project} penn
 
     #	set the db and pairs directories here
     cat > autoMultiz.csh << '_EOF_'
 #!/bin/csh -ef
 set db = calJac1
 set subdir = $1
 set c = $2
 set result = $3
 set resultDir = $result:h
 set run = `pwd`
 set tmp = /scratch/tmp/$db/multiz.$c
 set pairs = /san/sanvol1/scratch/$db/multiz9way/contigMaf
 rm -fr $tmp
 mkdir -p $tmp
 mkdir -p $resultDir
 cp ../../tree.9.nh ../../species.list $tmp
 pushd $tmp
 foreach s (`grep -v $db species.list`)
     set in = $pairs/$s/$subdir/$c.maf
     set out = $db.$s.sing.maf
     if (-e $in.gz) then
 	zcat $in.gz > $out
     else if (-e $in) then
 	cp $in $out
     else
 	echo "##maf version=1 scoring=autoMZ" > $out
     endif
 end
 set path = ($run/penn $path); rehash
 $run/penn/autoMZ + T=$tmp E=$db "`cat tree.9.nh`" $db.*.sing.maf $c.maf
 popd
 cp $tmp/$c.maf $result
 rm -fr $tmp
 rmdir --ignore-fail-on-non-empty /scratch/tmp/$db
 '_EOF_'
 # << happy emacs
     chmod +x autoMultiz.csh
 
     cat  << '_EOF_' > template
 #LOOP
 ./autoMultiz.csh $(dir1) $(root1) {check out line+ /cluster/data/calJac1/bed/multiz9way/splitRun/maf/$(dir1)/$(root1).maf}
 #ENDLOOP
 '_EOF_'
     # << emacs
     #	it is a single job since everything is in the same maf file
     time nice -n +19 ./autoMultiz.csh calJac1
     XXX - running 2007-12-21 16:32 on mkr0u3
 
     sed -e "s/^\.\///" ../9-way.contig.list \
 	| gensub2 stdin single template jobList
 
     para create jobList
     para try ... check ... push ... etc
 # Completed: 36707 of 36707 jobs
 # CPU time in finished jobs:     244659s    4077.65m    67.96h    2.83d  0.008 y
 # IO & Wait Time:                115457s    1924.29m    32.07h    1.34d  0.004 y
 # Average job time:                  10s       0.16m     0.00h    0.00d
 # Longest finished job:             249s       4.15m     0.07h    0.00d
 # Submission to last job:          2454s      40.90m     0.68h    0.03d
 
     # put the split maf results back together into a single maf file
     #	eliminate duplicate comments
     ssh kkstore06
     cd /cluster/data/calJac1/bed/multiz9way
     mkdir togetherMaf
     grep "^##maf version" splitRun/maf/0/0/Contig00000.maf \
 	| sort -u > togetherMaf/calJac1.9way.maf
     for F in `find ./splitRun/maf -type f -depth`
 do
     grep -h "^#" "${F}" | egrep -v "maf version=1|eof maf" \
 	| sed -e "s#/_MZ_[^ ]* # #g; s#__[0-9]##g"
 done | sort -u >> togetherMaf/calJac1.9way.maf
     for F in `find ./splitRun/maf -type f -depth`
 do
     grep -v -h "^#" "${F}"
 done >> togetherMaf/calJac1.9way.maf
     grep "^##eof maf" splitRun/maf/0/0/Contig00000.maf \
 	| sort -u >> togetherMaf/calJac1.9way.maf
 
     # load tables for a look
     ssh hgwdev
     mkdir -p /gbdb/calJac1/multiz9way/maf
     ln -s /cluster/data/calJac1/bed/multiz9way/togetherMaf/*.maf \
                 /gbdb/calJac1/multiz9way/maf/multiz9way.maf
     # this generates an immense multiz9way.tab file in the directory
     #	where it is running.  Best to run this over in scratch.
     cd /scratch/tmp
     time nice -n +19 hgLoadMaf \
 	-pathPrefix=/gbdb/calJac1/multiz9way/maf calJac1 multiz9way
     #	real    5m6.330s
     #	Loaded 8484286 mafs in 1 files from /gbdb/calJac1/multiz9way/maf
     # load summary table
     time nice -n +19 cat /gbdb/calJac1/multiz9way/maf/*.maf \
 	| hgLoadMafSummary calJac1 -minSize=30000 -mergeGap=1500 \
 	 -maxSize=200000  multiz9waySummary stdin
     #	real    5m58.150s
     #	Created 121083 summary blocks from 3410157 components
     #	and 693943 mafs from stdin
 
     # Gap Annotation
     # prepare bed files with gap info
     ssh kkstore02
     mkdir /cluster/data/calJac1/bed/multiz9way/anno
     cd /cluster/data/calJac1/bed/multiz9way/anno
     mkdir maf run
 
     #	these actually already all exist from previous multiple alignments
     for DB in `cat ../species.list`
 do
     CDIR="/cluster/data/${DB}"
     if [ ! -f ${CDIR}/${DB}.N.bed ]; then
 	echo "creating ${DB}.N.bed"
 	echo twoBitInfo -nBed ${CDIR}/${DB}.2bit ${CDIR}/${DB}.N.bed
     else
 	ls -og ${CDIR}/${DB}.N.bed
     fi
 done
 
     cd run
     rm -f nBeds sizes
     for DB in `grep -v calJac1 ../../species.list`
 do
     echo "${DB} "
     ln -s  /cluster/data/${DB}/${DB}.N.bed ${DB}.bed
     echo ${DB}.bed  >> nBeds
     ln -s  /cluster/data/${DB}/chrom.sizes ${DB}.len
     echo ${DB}.len  >> sizes
 done
 
     ssh memk
     #	temporarily copy the calJac1.9way.maf file onto the memk
     #	nodes /scratch/data/calJac1/maf/ directory
     for R in 0 1 2 3 4 5 6 7
 do
     ssh mkr0u${R} rsync -a --progress \
 /cluster/data/calJac1/bed/multiz9way/togetherMaf/calJac1.9way.maf.gz \
 	/scratch/data/calJac1/maf/
 done
     mkdir /cluster/data/calJac1/bed/multiz9way/anno/splitMaf
     #	need to split up the single maf file into individual
     #	per-scaffold maf files to run annotation on
     cd /cluster/data/calJac1/bed/multiz9way/anno/splitMaf
     #	create bed files to list approximately 1553 scaffolds in
     #	a single list, approximately 33 lists
     cat << '_EOF_' > mkBedLists.pl
 #!/usr/bin/env perl
 
 use strict;
 use warnings;
 
 my $bedCount = 0;
 my $i = 0;
 
 my $bedFile = sprintf("file_%d.bed", $bedCount);
 
 open (BF,">$bedFile") or die "can not write to $bedFile $!";
 
 open (FH,"</cluster/data/calJac1/chrom.sizes") or
         die "can not read /cluster/data/calJac1/chrom.sizes $!";
 while (my $line = <FH>) {
     chomp $line;
     if ( (($i + 1) % 1553) == 0 )  {
         printf "%s\n", $line;
         close (BF);
         ++$bedCount;
         $bedFile = sprintf("file_%d.bed", $bedCount);
         open (BF,">$bedFile") or die "can not write to $bedFile $!";
     }
     ++$i;
     my ($chr, $size) = split('\s+',$line);
     printf BF "%s\t0\t%d\t%s\n", $chr, $size, $chr;
 }
 close (FH);
 close (BH);
 '_EOF_'
     # << happy emacs
     chmod +x mkBedLists.pl
     ./mkBedLists.pl
 
     #	now, run a mafsInRegion on each one of those lists
     cat << '_EOF_' > runOne
 #!/bin/csh -fe
 set runDir = "/cluster/data/calJac1/bed/multiz9way/anno/splitMaf"
 set resultDir = $1
 set bedFile = $resultDir.bed
 mkdir -p $resultDir
 mkdir -p /scratch/tmp/calJac1/$resultDir
 pushd /scratch/tmp/calJac1/$resultDir
 mafsInRegion $runDir/$bedFile -outDir . \
         /scratch/data/calJac1/maf/calJac1.9way.maf
 popd
 rsync -q -a /scratch/tmp/calJac1/$resultDir/ ./$resultDir/
 rm -fr /scratch/tmp/calJac1/$resultDir
 rmdir --ignore-fail-on-non-empty /scratch/tmp/calJac1
 '_EOF_'
     # << happy emacs
     chmod +x runOne
 
     cat << '_EOF_' > template
 #LOOP
 ./runOne $(root1)
 #ENDLOOP
 '_EOF_'
     # << happy emacs
 
     ls file*.bed > runList
     gensub2 runList single template jobList
     para create jobList
     para try ... check ... push ... etc
 # Completed: 33 of 33 jobs
 # CPU time in finished jobs:      11075s     184.58m     3.08h    0.13d  0.000 y
 # IO & Wait Time:                 22992s     383.20m     6.39h    0.27d  0.001 y
 # Average job time:                1032s      17.21m     0.29h    0.01d
 # Longest running job:                0s       0.00m     0.00h    0.00d
 # Longest finished job:            2019s      33.65m     0.56h    0.02d
 # Submission to last job:         22051s     367.52m     6.13h    0.26d
 
     cd /cluster/data/calJac1/bed/multiz9way/anno/run
 
     cat << '_EOF_' > doAnno.csh
 #!/bin/csh -ef
 set outDir = ../maf/$2
 set result = $3
 set input = $1
 mkdir -p $outDir
 cat $input | \
 nice mafAddIRows -nBeds=nBeds stdin /scratch/data/calJac1/calJac1.2bit $result
 '_EOF_'
     # << happy emacs
     chmod +x doAnno.csh
 
     cat << '_EOF_' > template
 #LOOP
 ./doAnno.csh $(path1) $(lastDir1) {check out line+ ../maf/$(lastDir1)/$(root1).maf}
 #ENDLOOP
 '_EOF_'
     # << happy emacs
 
     find ../splitMaf -type f -name "*.maf > maf.list
     gensub2 maf.list single template jobList
     para create jobList
     para try ... check ... push ... etc.
 # Completed: 36707 of 36707 jobs
 # CPU time in finished jobs:      94093s    1568.22m    26.14h    1.09d  0.003 y
 # IO & Wait Time:                 94674s    1577.90m    26.30h    1.10d  0.003 y
 # Average job time:                   5s       0.09m     0.00h    0.00d
 # Longest finished job:              12s       0.20m     0.00h    0.00d
 # Submission to last job:          6129s     102.15m     1.70h    0.07d
 
     ssh kkstore06
     cd /cluster/data/calJac1/bed/multiz9way/anno
     grep "^##maf version" maf/file_0/Contig0.maf \
 	| sort -u > calJac1.anno.9way.maf
     find ./maf -type f -depth -name "*.maf" | while read F
 do
     grep -v -h "^#" "${F}"
 done >> calJac1.anno.9way.maf
     echo "##eof maf" >> calJac1.anno.9way.maf
 
     ssh hgwdev
     cd /cluster/data/calJac1/bed/multiz9way/anno
     mkdir -p /gbdb/calJac1/multiz9way/anno
     ln -s `pwd`/calJac1.anno.9way.maf \
                 /gbdb/calJac1/multiz9way/anno/multiz9way.maf
     #	by loading this into the table multiz9way, it will replace the
     #	previously loaded table with the unannotated mafs
     #	huge temp files are made, do them on local disk
     cd /scratch/tmp
     time nice -n +19 hgLoadMaf -pathPrefix=/gbdb/calJac1/multiz9way/anno \
                 calJac1 multiz9way
     #	Loaded 9243378 mafs in 1 files from /gbdb/calJac1/multiz9way/anno
     #	real    5m39.367s
 
     #	normally filter this for chrom size > 1,000,000 and only load
     #	those chroms.  But this is a scaffold assembly, load everything:
     hgLoadMafSummary calJac1 -minSize=30000 -mergeGap=1500 \
 	-maxSize=200000  multiz9waySummary \
 	    /gbdb/calJac1/multiz9way/anno/multiz9way.maf
     #	Created 121083 summary blocks from 3410157 components and 749940 mafs
     #	from /gbdb/calJac1/multiz9way/anno/multiz9way.maf
 
     #	by loading this into the table multiz9waySummary, it will replace
     #	the previously loaded table with the unannotated mafs
     #	remove the multiz9way*.tab files in this /scratch/tmp directory
     rm multiz9way*.tab
     #	And, you can remove the previously loaded non-annotated maf file link:
     rm /gbdb/calJac1/multiz9way/maf/multiz9way.maf
     rmdir /gbdb/calJac1/multiz9way/maf
 
 ###########################################################################
 ## Annotate 9-way multiple alignment with gene annotations
 ##		(DONE - 2008-01-08 - Hiram)
     # Gene frames
     ## given previous survey done for 8-way alignment on Orangutan,
     ## try using the following tables for this gene annotation
     #	use knownGene for hg18, mm9
     #	use ensGene for monDom4, ornAna1, panTro2, rheMac2
     #	new try with xenoMrna for ponAbe2, canFam2 and calJac1
 
     ssh hgwdev
     mkdir /cluster/data/calJac1/bed/multiz9way/frames
     cd /cluster/data/calJac1/bed/multiz9way/frames
     mkdir genes
     # knownGene
     for DB in hg18 mm9
 do
     hgsql -N -e "select name,chrom,strand,txStart,txEnd,cdsStart,cdsEnd,exonCount,exonStarts,exonEnds from knownGene" ${DB} \
       | genePredSingleCover stdin stdout | gzip -2c \
         > /scratch/tmp/${DB}.tmp.gz
     mv /scratch/tmp/${DB}.tmp.gz genes/$DB.gp.gz
     echo "${DB} done"
 done
     # ensGene
     for DB in monDom4 ornAna1 panTro2 rheMac2
 do
     hgsql -N -e "select name,chrom,strand,txStart,txEnd,cdsStart,cdsEnd,exonCount,exonStarts,exonEnds from ensGene" ${DB} \
       | genePredSingleCover stdin stdout | gzip -2c \
         > /scratch/tmp/${DB}.tmp.gz
     mv /scratch/tmp/${DB}.tmp.gz genes/$DB.gp.gz
     echo "${DB} done"
 done
 
     #	use xenoMrna for ponAbe2, canFam2, calJac1
     #	loxAfr1 oryCun1 ponAbe2
     for DB in ponAbe2 canFam2 calJac1
 do
 tmpExt=`mktemp temp.XXXXXX`
 tmpMrnaCds=${DB}.mrna-cds.${tmpExt}
 tmpMrna=${DB}.mrna.${tmpExt}
 tmpCds=${DB}.cds.${tmpExt}
 hgsql -N -e 'select xenoMrna.qName,cds.name,xenoMrna.* \
 	   from xenoMrna,gbCdnaInfo,cds \
 	   where (xenoMrna.qName = gbCdnaInfo.acc) and \
 	     (gbCdnaInfo.cds != 0) and (gbCdnaInfo.cds = cds.id)' \
 $DB > ${tmpMrnaCds}
 cut -f 1-2  ${tmpMrnaCds} > ${tmpCds}
 cut -f 4-100  ${tmpMrnaCds} > ${tmpMrna}
 mrnaToGene -cdsFile=${tmpCds} -smallInsertSize=8 -quiet ${tmpMrna} stdout | \
 genePredSingleCover stdin stdout | gzip -2c > /scratch/tmp/$DB.tmp.gz
 rm ${tmpMrnaCds} ${tmpMrna} ${tmpCds}
 mv /scratch/tmp/$DB.tmp.gz genes/$DB.gp.gz
 rm -f $tmpExt
 echo "${DB} done"
 done
 
     ls -og genes
 # -rw-rw-r--  1 2697811 Jan  8 15:43 calJac1.gp.gz
 # -rw-rw-r--  1 2551552 Jan  8 15:41 canFam2.gp.gz
 # -rw-rw-r--  1 2008806 Jan  8 15:33 hg18.gp.gz
 # -rw-rw-r--  1 1965274 Jan  8 15:33 mm9.gp.gz
 # -rw-rw-r--  1 1751726 Jan  8 15:33 monDom4.gp.gz
 # -rw-rw-r--  1 1232719 Jan  8 15:33 ornAna1.gp.gz
 # -rw-rw-r--  1 1980696 Jan  8 15:33 panTro2.gp.gz
 # -rw-rw-r--  1 2703247 Jan  8 15:39 ponAbe2.gp.gz
 # -rw-rw-r--  1 1935916 Jan  8 15:33 rheMac2.gp.gz
 
     ssh kkstore06
     cd /cluster/data/calJac1/bed/multiz9way/frames
     #	anything to annotate is in a pair, e.g.: calJac1 genes/calJac1.gp.gz
     time (cat  ../anno/calJac1.anno.9way.maf | nice -n +19 genePredToMafFrames calJac1 stdin stdout calJac1 genes/calJac1.gp.gz hg18 genes/hg18.gp.gz mm9 genes/mm9.gp.gz rheMac2 genes/rheMac2.gp.gz ponAbe2 genes/ponAbe2.gp.gz panTro2 genes/panTro2.gp.gz canFam2 genes/canFam2.gp.gz monDom4 genes/monDom4.gp.gz ornAna1 genes/ornAna1.gp.gz | gzip > multiz9way.mafFrames.gz) > frames.log 2>&1
     # see what it looks like in terms of number of annotations per DB:
     zcat multiz9way.mafFrames.gz | cut -f4 | sort | uniq -c | sort -n
 # 206370 hg18
 # 208834 panTro2
 # 211731 rheMac2
 # 224988 calJac1
 # 225518 canFam2
 # 225632 mm9
 # 261163 ponAbe2
 # 417544 ornAna1
 # 462890 monDom4
 
     #	load the resulting file
     ssh hgwdev
     cd /cluster/data/calJac1/bed/multiz9way/frames
     time nice -n +19 hgLoadMafFrames calJac1 multiz9wayFrames \
 	multiz9way.mafFrames.gz
     #	real    0m38.282s
 
     #	enable the trackDb entries:
 # frames multiz9wayFrames
 # irows on
 #############################################################################
 # phastCons 9-way (DONE - 2007-10-16 - Hiram)
 
     # split 9way mafs into 10M chunks and generate sufficient statistics 
     # files for # phastCons
     ssh memk
     mkdir /cluster/data/calJac1/bed/multiz9way/msa.split
     cd /cluster/data/calJac1/bed/multiz9way/msa.split
     mkdir -p /san/sanvol1/scratch/calJac1/multiz9way/cons/ss
 
     cat << '_EOF_' > doSplit.csh
 #!/bin/csh -ef
 set MAFS = /cluster/data/calJac1/bed/multiz9way/anno/maf
 set WINDOWS = /san/sanvol1/scratch/calJac1/multiz9way/cons/ss
 pushd $WINDOWS
 set resultDir = $1
 set c = $2
 rm -fr $resultDir/$c
 mkdir -p $resultDir
 twoBitToFa -seq=$c /scratch/data/calJac1/calJac1.2bit /scratch/tmp/calJac1.$c.fa
 /cluster/bin/phast/$MACHTYPE/msa_split $MAFS/$resultDir/$c.maf -i MAF \
     -M /scratch/tmp/calJac1.$c.fa \
     -o SS -r $resultDir/$c -w 10000000,0 -I 1000 -B 5000
 rm -f /scratch/tmp/calJac1.$c.fa
 popd
 mkdir -p $resultDir
 date > $resultDir/$c.out
 '_EOF_'
     # << happy emacs
     chmod +x doSplit.csh
 
     cat << '_EOF_' > template
 #LOOP
 doSplit.csh $(dir1) $(root1) {check out line+ $(dir1)/$(root1).out}
 #ENDLOOP
 '_EOF_'
     # << happy emacs
 
     #	create list of maf files:
     (cd ../anno/maf; find . -type f) | sed -e "s#^./##" > maf.list
 
     gensub2 maf.list single template jobList
     para create jobList
     para try ... check ... etc
 # CPU time in finished jobs:       5250s      87.50m     1.46h    0.06d  0.000 y
 # IO & Wait Time:                 94631s    1577.18m    26.29h    1.10d  0.003 y
 # Average job time:                   3s       0.05m     0.00h    0.00d
 # Longest running job:                0s       0.00m     0.00h    0.00d
 # Longest finished job:               9s       0.15m     0.00h    0.00d
 # Submission to last job:          3697s      61.62m     1.03h    0.04d
 
     # take the cons and noncons trees from the mouse 30-way
 
     #	Estimates are not easy to make, probably more correctly,
     #	take the 30-way .mod file, and re-use it here.
     ssh hgwdev
     cd /cluster/data/calJac1/bed/multiz9way
     cp -p /cluster/data/mm9/bed/multiz30way/mm9.30way.mod .
 
     # Run phastCons
     #	This job is I/O intensive in its output files, thus it is all
     #	working over in /scratch/tmp/
     ssh memk
     mkdir -p /cluster/data/calJac1/bed/multiz9way/cons/run.cons
     cd /cluster/data/calJac1/bed/multiz9way/cons/run.cons
 
     #	there are going to be several different phastCons runs using
     #	this same script.  They trigger off of the current working directory
     #	$cwd:t which is the "grp" in this script.  It is one of:
     #	all gliers placentals
     #	Well, that's what it was when used in the Mm9 30-way,
     #	in this instance, there is only the directory "all"
 
     cat << '_EOF_' > doPhast.csh
 #!/bin/csh -fe
 set PHASTBIN = /cluster/bin/phast.2007-05-04
 set subDir = $1
 set f = $2
 set c = $2:r
 set len = $3
 set cov = $4
 set rho = $5
 set grp = $cwd:t
 set tmp = /scratch/tmp/$f
 set cons = /cluster/data/calJac1/bed/multiz9way/cons
 mkdir -p $tmp
 set san = /san/sanvol1/scratch/calJac1/multiz9way/cons
 if (-s $cons/$grp/$grp.non-inf) then
   cp -p $cons/$grp/$grp.mod $cons/$grp/$grp.non-inf $tmp
   cp -p $san/ss/$subDir/$f.ss $cons/$grp/$grp.mod $cons/$grp/$grp.non-inf $tmp
 else
   cp -p $cons/$grp/$grp.mod $tmp
   cp -p $san/ss/$subDir/$f.ss $cons/$grp/$grp.mod $tmp
 endif
 pushd $tmp > /dev/null
 if (-s $grp.non-inf) then
   $PHASTBIN/phastCons $f.ss $grp.mod \
     --rho $rho --expected-length $len --target-coverage $cov --quiet \
     --not-informative `cat $grp.non-inf` \
     --seqname $c --idpref $c --most-conserved $f.bed --score > $f.pp
 else
   $PHASTBIN/phastCons $f.ss $grp.mod \
     --rho $rho --expected-length $len --target-coverage $cov --quiet \
     --seqname $c --idpref $c --most-conserved $f.bed --score > $f.pp
 endif
 popd > /dev/null
 mkdir -p $san/$grp/pp/$subDir $san/$grp/bed/$subDir
 sleep 4
 touch $san/$grp/pp/$subDir $san/$grp/bed/$subDir
 rm -f $san/$grp/pp/$subDir/$f.pp
 rm -f $san/$grp/bed/$subDir/$f.bed
 mv $tmp/$f.pp $san/$grp/pp/$subDir
 mv $tmp/$f.bed $san/$grp/bed/$subDir
 rm -fr $tmp
 '_EOF_'
     # << happy emacs
     chmod a+x doPhast.csh
 
     cat << '_EOF_' > template
 #LOOP
 ../doPhast.csh $(root1) $(file1) 45 .3 .31 {check out line+ /san/sanvol1/scratch/calJac1/multiz9way/cons/all/pp/$(root1)/$(file1).pp}
 #ENDLOOP
 '_EOF_'
     # << happy emacs
 
     # Create parasol batch and run it
     pushd /san/sanvol1/scratch/calJac1/multiz9way/cons
     find ./ss -type f -name "*.ss" | sed -e "s#^./##; s/.ss$//" \
 	> /cluster/data/calJac1/bed/multiz9way/cons/ss.list
 
     # run for all species
     cd ..
     mkdir -p all run.cons/all
     cd all
     /cluster/bin/phast.new/tree_doctor ../../mm9.30way.mod \
 --prune-all-but=calJac1,hg18,panTro2,rheMac2,ponAbe2,mm9,canFam2,monDom4,ornAna1 \
 	> all.mod
     cd ../run.cons/all
 
     #	root1 == chrom name, file1 == ss file name without .ss suffix
     # Create template file for "all" run
     cat << '_EOF_' > template
 #LOOP
 ../run.cons/doPhast.csh $(lastDir1) $(file1) 45 .3 .31 {check out line+ /san/sanvol1/scratch/calJac1/multiz9way/cons/all/pp/$(lastDir1)/$(file1).pp}
 #ENDLOOP
 '_EOF_'
     # << happy emacs
     gensub2 ../ss.list single template jobList
     para create jobList
     para try ... check ... push ... etc.
 # Completed: 28485 of 28485 jobs
 # CPU time in finished jobs:      14082s     234.70m     3.91h    0.16d  0.000 y
 # IO & Wait Time:                188534s    3142.23m    52.37h    2.18d  0.006 y
 # Average job time:                   7s       0.12m     0.00h    0.00d
 # Longest running job:                0s       0.00m     0.00h    0.00d
 # Longest finished job:              17s       0.28m     0.00h    0.00d
 # Submission to last job:         72420s    1207.00m    20.12h    0.84d
 
     # create Most Conserved track
     ssh kolossus
     cd /san/sanvol1/scratch/calJac1/multiz9way/cons/all
     find ./bed -type f -name "Contig*.bed" | xargs cat \
 	| sort -k1,1 -k2,2n | \
         awk '{printf "%s\t%d\t%d\tlod=%d\t%s\n", $1, $2, $3, $5, $5;}' | \
             /cluster/bin/scripts/lodToBedScore /dev/stdin > mostConserved.bed
     #	~ 3 minutes
     cp -p mostConserved.bed /cluster/data/calJac1/bed/multiz9way/cons/all
 
     # load into database
     ssh hgwdev
     cd /cluster/data/calJac1/bed/multiz9way/cons/all
     time nice -n +19 hgLoadBed calJac1 phastConsElements9way mostConserved.bed
     #	Loaded 1297014 elements of size 5
 
     # Try for 5% overall cov, and 70% CDS cov 
     #	We don't have any gene tracks to compare CDS coverage
     #	--rho .31 --expected-length 45 --target-coverage .3
     featureBits calJac1 phastConsElements9way
     #	141561229 bases of 2929139385 (4.833%) in intersection
 
     # Create merged posterier probability file and wiggle track data files
     # currently doesn't matter where this is performed, the san is the same
     # network distance from all machines.
     # sort by chromName, chromStart so that items are in numerical order 
     #  for wigEncode
     cd /san/sanvol1/scratch/calJac1/multiz9way/cons/all
     mkdir -p phastCons9wayScores
 
 for D in `ls -1d pp/file* | sort -t_ -k2n`
 do
     F=${D/pp\/}
     out=phastCons9wayScores/${F}.data.gz
     echo "${D} > ${F}.data.gz"
     ls -S ${D}/*.pp | xargs cat | gzip > ${out}
 done
 
     #	real    38m22.760s
     #	copy the phastCons9wayScores to:
 # /cluster/data/calJac1/bed/multiz9way/downloads/phastCons9way/phastConsScores
     #	for hgdownload downloads
 
     # Create merged posterier probability file and wiggle track data files
     # currently doesn't matter where this is performed, the san is the same
     # network distance from all machines.
     cd /san/sanvol1/scratch/calJac1/multiz9way/cons/all
     ls -1 phastCons9wayScores/*.data.gz | sort -t_ -k2n | xargs zcat \
 	| wigEncode -noOverlap stdin phastCons9way.wig phastCons9way.wib
     # Converted stdin, upper limit 1.00, lower limit 0.00
     time nice -n +19 cp -p *.wi? /cluster/data/calJac1/bed/multiz9way/cons/all
     #	real    1m4.483s
 
     # Load gbdb and database with wiggle.
     ssh hgwdev
     cd /cluster/data/calJac1/bed/multiz9way/cons/all
     ln -s `pwd`/phastCons9way.wib /gbdb/calJac1/multiz9way/phastCons9way.wib
     time nice -n +19 hgLoadWiggle -pathPrefix=/gbdb/calJac1/multiz9way calJac1 \
 	phastCons9way phastCons9way.wig
     #	real    0m56.271s
     # remove garbage
     rm wiggle.tab
 
     #  Create histogram to get an overview of all the data
     ssh hgwdev
     cd /cluster/data/calJac1/bed/multiz9way/cons/all
     time nice -n +19 hgWiggle -doHistogram \
 	-hBinSize=0.001 -hBinCount=1000 -hMinVal=0.0 -verbose=2 \
 	    -db=calJac1 phastCons9way > histogram.data 2>&1
     #	real    5m0.608s
 
     #	create plot of histogram:
 
     cat << '_EOF_' | gnuplot > histo.png
 set terminal png small color x000000 xffffff xc000ff x66ff66 xffff00 x00ffff
 set size 1.4, 0.8
 set key left box
 set grid noxtics
 set grid ytics
 set title " Orangutan PonAbe2 Histogram phastCons9way track"
 set xlabel " phastCons9way score"
 set ylabel " Relative Frequency"
 set y2label " Cumulative Relative Frequency (CRF)"
 set y2range [0:1]
 set y2tics
 set yrange [0:0.02]
 
 plot "histogram.data" using 2:5 title " RelFreq" with impulses, \
         "histogram.data" using 2:7 axes x1y2 title " CRF" with lines
 '_EOF_'
     #	<< happy emacs
 
     display histo.png &
 
     #	These trackDb entries turn on the wiggle phastCons data track:
     #	type wigMaf 0.0 1.0
     #	maxHeightPixels 100:40:11
     #	wiggle phastCons9way
     #	spanList 1
     #	autoScale Off
     #	windowingFunction mean
     #	pairwiseHeight 12
     #	yLineOnOff Off
 
 #############################################################################
 #  Downloads (DONE - 2008-01-11 - Hiram)
     #	Let's see if the downloads will work
     ssh hgwdev
     /cluster/data/calJac1
     #	expecting to find repeat masker .out file here:
     ln -s bed/RepeatMasker/calJac1.fa.out .
     time nice -n +19 /cluster/bin/scripts/makeDownloads.pl \
 	-workhorse=hgwdev calJac1 > jkStuff/downloads.log 2>&1
     #	real    24m3.210s
     #	failed making upstream sequences:
     #	featureBits calJac1 mgcGenes:upstream:1000 -fa=stdout
     #	setpriority: Permission denied.
     #	the 'nice' from my bash shell causes trouble inside the csh
     #	script which uses nice.  Finish off the install step manually
     #	with the mgcGenes upstreams ...
 
 #############################################################################
 #  PushQ entries (DONE - 2008-01-11 - Hiram)
     ssh hgwdev
     /cluster/data/calJac1
     /cluster/bin/scripts/makePushQSql.pl calJac1 > jkStuff/pushQ.sql
     #	output warnings:
 # calJac1 does not have seq
 # calJac1 does not have gbMiscDiff
 # Could not tell (from trackDb, all.joiner and hardcoded lists of supporting
 # and genbank tables) which tracks to assign these tables to:
 #	genscanPep
 
 #############################################################################
 #  Create 9-way downloads (DONE - 2008-03-28 - Hiram)
     ssh hgwdev
     mkdir -p /cluster/data/calJac1/bed/multiz9way/downloads/phastCons9way
     cd /cluster/data/calJac1/bed/multiz9way/downloads/phastCons9way
     cp -p \
 /san/sanvol1/scratch/calJac1/multiz9way/cons/all/phastCons9wayScores/* .
     ln -s ../../cons/all/all.mod ./9way.mod
     cp /cluster/data/ponAbe2/bed/multiz8way/downloads/phastCons8way/README.txt .
     # edit that README.txt to be correct for this 9-way alignment
     cd ..
     mkdir multiz9way
     cd multiz9way
     cp -p /cluster/data/ponAbe2/bed/multiz8way/downloads/multiz8way/README.txt .
     # edit that README.txt to be correct for this 9-way alignment
     ssh kkstore06
     mkdir -p /cluster/data/calJac1/bed/multiz9way/downloads/multiz9way
     cd /cluster/data/calJac1/bed/multiz9way/downloads/multiz9way
     ln -s ../../calJac1.9-way.nh ./9way.nh
 
     time nice -n +19 gzip -c ../../../anno/calJac1.anno.9way.maf \
 	> calJac1.9way.maf.gz
     #	real    310m12.800s
     #	unusual long time due to nice +19 and conflice with other long-running
     #	jobs on kkstore06
 
     ssh hgwdev
     cd /cluster/data/calJac1/bed/multiz9way/downloads/multiz9way
     #	creating upstream files from xenoRefGene, bash script:
     cat << '_EOF_' > mkUpstream.sh
 #!/bin/bash
 DB=calJac1
 GENE=xenoRefGene
 NWAY=multiz9way
 export DB GENE
 
 for S in 1000 2000 5000
 do
     echo "making upstream${S}.maf"
     featureBits ${DB} ${GENE}:upstream:${S} -fa=/dev/null -bed=stdout \
         | perl -wpe 's/_up[^\t]+/\t0/' | sort -k1,1 -k2,2n \
         | $HOME/kent/src/hg/ratStuff/mafFrags/mafFrags ${DB} ${NWAY} \
                 stdin stdout \
                 -orgs=/cluster/data/${DB}/bed/${NWAY}/species.list \
         | gzip -c > upstream${S}.maf.gz
     echo "done upstream${S}.maf.gz"
 done
 '_EOF_'
     # << happy emacs
     chmod +x ./mkUpstream.sh
     time nice -n +19 ./mkUpstream.sh
     #	real    119m5.562s
 # -rw-rw-r--  1   42975041 Mar 28 14:27 upstream1000.maf.gz
 # -rw-rw-r--  1   76363192 Mar 28 15:03 upstream2000.maf.gz
 # -rw-rw-r--  1  303870318 Mar 28 15:42 upstream5000.maf.gz
 
     #	check the names in these upstream files to ensure sanity:
     zcat upstream1000.maf.gz | grep "^s " | awk '{print $2}' \
 	| sort | uniq -c | sort -rn | less
     #	should be a list of the other 4 species with a high count,
     #	then xenoRefGene names, e.g.:
     #	51588 rheMac2
     #	51588 ponAbe2
     #	51588 panTro2
     #	51588 ornAna1
     #	51588 monDom4
     #	51588 mm9
     #	51588 hg18
     #	51588 canFam2
     #	   18 NM_001033610
     #	   17 NM_016957
     #	   17 NM_000992
     #	   16 NM_181722
 
     ssh kkstore06
     cd /cluster/data/calJac1/bed/multiz9way/downloads/multiz9way
     md5sum *.maf.gz > md5sum.txt
     cd ../phastCons9way
     md5sum *.data.gz *.mod > md5sum.txt
 
     ssh hgwdev
     mkdir /usr/local/apache/htdocs/goldenPath/calJac1/multiz9way
     mkdir /usr/local/apache/htdocs/goldenPath/calJac1/phastCons9way
     cd /cluster/data/calJac1/bed/multiz9way/downloads/multiz9way
     ln -s `pwd`/* /usr/local/apache/htdocs/goldenPath/calJac1/multiz9way
     cd ../phastCons9way
     ln -s `pwd`/* /usr/local/apache/htdocs/goldenPath/calJac1/phastCons9way
     #	if your ln -s `pwd`/* made extra links to files you don't want there,
     #	check the goldenPath locations and remove those extra links
 
 #############################################################################
 # N-SCAN gene predictions (nscanGene) - (2008-04-03 markd)
 
     # obtained NSCAN predictions from michael brent's group
     # at WUSTL
     cd /cluster/data/calJac1/bed/nscan/
     wget http://mblab.wustl.edu/predictions/marmoset/calJac1/calJac1.gtf
     wget http://mblab.wustl.edu/predictions/marmoset/calJac1/calJac1.prot.fa
     wget http://mblab.wustl.edu/predictions/marmoset/calJac1/readme.html
     bzip2 calJac1.*
     chmod a-w *
 
     # load track
     gtfToGenePred -genePredExt calJac1.gtf.bz2 stdout | hgLoadGenePred -bin -genePredExt calJac1 nscanGene stdin
     hgPepPred calJac1 generic nscanPep  calJac1.prot.fa.bz2
     rm *.tab
 
     # update trackDb; need a calJac1-specific page to describe informants
     marmoset/calJac1/nscanGene.html   (copy from readme.html)
     marmoset/calJac1/trackDb.ra
     # set search regex to
         termRegex chr[0-9a-zA-Z_].*\.[0-9]+\.[0-9]
 
 #############################################################################
 ############################################################################
 # TRANSMAP vertebrate.2008-05-20 build  (2008-05-24 markd)
 
 vertebrate-wide transMap alignments were built  Tracks are created and loaded
 by a single Makefile. This is available from:
    svn+ssh://hgwdev.cse.ucsc.edu/projects/compbio/usr/markd/svn/projs/transMap/tags/vertebrate.2008-05-20
 
 see doc/builds.txt for specific details.
 ############################################################################
 ############################################################################
 # TRANSMAP vertebrate.2008-06-07 build  (2008-06-30 markd)
 
 vertebrate-wide transMap alignments were built  Tracks are created and loaded
 by a single Makefile. This is available from:
    svn+ssh://hgwdev.cse.ucsc.edu/projects/compbio/usr/markd/svn/projs/transMap/tags/vertebrate.2008-06-30
 
 see doc/builds.txt for specific details.
 ############################################################################
 
 ################################################
 # AUTOMATE UPSTREAM FILE CREATION (2008-10-15 markd)
 update genbank.conf:
 calJac1.upstreamGeneTbl = xenoRefGene
 calJac1.upstreamMaf = multiz9way /hive/data/genomes/calJac1/bed/multiz9way/species.list
 
 ############################################################################
 # QUALITY TRACK (DONE - 2008-11-25 - Hiram)
     mkdir /hive/data/genomes/calJac1/bed/qual
     cd /hive/data/genomes/calJac1/bed/qual
     #	the qac file was created by Rico during 28-way annotations
     qacToWig -fixed ../quality/calJac1.qac stdout \
 	| wigEncode stdin qual.wig qual.wib
     ln -s `pwd`/qual.wib /gbdb/calJac1/wib
     hgLoadWiggle -pathPrefix=/gbdb/calJac1/wib calJac1 quality qual.wig
 ############################################################################
 # TRANSMAP vertebrate.2009-07-01 build  (2009-07-21 markd)
 
 vertebrate-wide transMap alignments were built  Tracks are created and loaded
 by a single Makefile. This is available from:
    svn+ssh://hgwdev.cse.ucsc.edu/projects/compbio/usr/markd/svn/projs/transMap/tags/vertebrate.2009-07-01
 
 see doc/builds.txt for specific details.
 ############################################################################
 ############################################################################
 # TRANSMAP vertebrate.2009-09-13 build  (2009-09-20 markd)
 
 vertebrate-wide transMap alignments were built  Tracks are created and loaded
 by a single Makefile. This is available from:
    svn+ssh://hgwdev.cse.ucsc.edu/projects/compbio/usr/markd/svn/projs/transMap/tags/vertebrate.2009-09-13
 
 see doc/builds.txt for specific details.
+
 ############################################################################
+# LIFTOVER TO calJac3 (DONE - 2010-02-11 - Hiram )
+    mkdir /hive/data/genomes/calJac1/bed/blat.calJac3.2010-02-11
+    cd /hive/data/genomes/calJac1/bed/blat.calJac3.2010-02-11
+    # -debug run to create run dir, preview scripts...
+    doSameSpeciesLiftOver.pl -debug calJac1 calJac3
+    # Real run:
+    time nice -n +19 doSameSpeciesLiftOver.pl -verbose=2 \
+	-bigClusterHub=swarm -dbHost=hgwdev -workhorse=hgwdev \
+	 calJac1 calJac3 > do.log 2>&1
+    #	real    36m16.693s
+#############################################################################