src/hg/makeDb/doc/monDom5.txt 1.19

1.19 2010/02/16 04:47:42 hiram
completed calJac3 lastz alignment
Index: src/hg/makeDb/doc/monDom5.txt
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/makeDb/doc/monDom5.txt,v
retrieving revision 1.18
retrieving revision 1.19
diff -b -B -U 1000000 -r1.18 -r1.19
--- src/hg/makeDb/doc/monDom5.txt	6 Feb 2010 00:17:33 -0000	1.18
+++ src/hg/makeDb/doc/monDom5.txt	16 Feb 2010 04:47:42 -0000	1.19
@@ -1,1537 +1,1584 @@
 # for emacs: -*- mode: sh; -*-
 
 
 #	Creating the assembly for Monodelphis domestica
 #	South American, Short-tailed Opossum
 #	http://www.genome.gov/11510687
 #	http://www.genome.gov/12512285
 
 #  NOTE:  this doc may have genePred loads that fail to include
 #  the bin column.  Please correct that for the next build by adding
 #  a bin column when you make any of these tables:
 #
 #  mysql> SELECT tableName, type FROM trackDb WHERE type LIKE "%Pred%";
 #  +-------------+---------------------------------+
 #  | tableName   | type                            |
 #  +-------------+---------------------------------+
 #  | refGene     | genePred refPep refMrna         |
 #  | xenoRefGene | genePred xenoRefPep xenoRefMrna |
 #  | ensGene     | genePred ensPep                 |
 #  | genscan     | genePred genscanPep             |
 #  +-------------+---------------------------------+
 
 #########################################################################
 # DOWNLOAD SEQUENCE (DONE - 2007-05-11 - Hiram)
     ssh kkstore04
     mkdir -p /cluster/store8/monDom5/broad
     ln -s /cluster/store8/monDom5 /cluster/data/monDom5
     cd /cluster/data/monDom4/broad.mit.edu
     time nice -n +19 wget --timestamping \
 	'ftp://broad.mit.edu/pub/assemblies/mammals/monodelphis/monDom5/*'
     #	real    52m48.859s
     #	user    0m2.122s
     #	sys     0m22.031s
     #	This was a symlink to ../monDom2/
 # lrwxrwxrwx  1 36 May 10 16:00 BACread_2_BACclone.txt.gz -> ../monDom2/BACread_2_BACclone.txt.gz
     #	so actually fetch the file here
     rm  BACread_2_BACclone.txt.gz
     time nice -n +19 wget --timestamping \
 'ftp://broad.mit.edu/pub/assemblies/mammals/monodelphis/monDom2/BACread_2_BACclone.txt.gz'
     #	it looks like they've been using our tools, their agp file has
     #	the extra gap at the end of chrUn:
 # chrUn   103240612       103241611       16995   N       1000    clone   no
     #	Their fasta file is in 5,000,000 chunks
 
     #	fixup the split fasta files
     time nice -n +19 gunzip Monodelphis5.0.agp.chromosome.qual.gz \
 	Monodelphis5.0.agp.chromosome.fasta.gz
     #	real    5m24.942s
     #	user    1m31.895s
     #	sys     0m51.726s
     mkdir splitFa
     time nice -n +19 faSplit -verbose=2 byname \
 	Monodelphis5.0.agp.chromosome.fasta splitFa/
     #	2m44s
 
     mkdir chrFa
     #	combine the Broad split files into single chrom fasta files
 time for C in 1 2 3 4 5 6 7 8 X Un
 do
     rm -f chrFa/chr${C}.fa
     echo ">chr${C}" > chrFa/chr${C}.fa
     echo -n "chrFa/chr${C}.fa working ... "
     ls splitFa/${C}.*-*.fa | sort -t"." -k2,2n | while read F
     do
         grep -v "^>" ${F} >> chrFa/chr${C}.fa
     done
     echo "done"
 done
 
     #	verify nothing was lost, should be the same totals here
     faSize chrFa/chr*.fa
     #	3605614649 bases (103971429 N's 3501643220 real 3501643220 upper 0
     #	lower) in 10 sequences in 10 files
     faSize Monodelphis5.0.agp.chromosome.fasta
     #	3605614649 bases (103971429 N's 3501643220 real 3501643220 upper 0
     #	lower) in 726 sequences in 1 files
 
     #	put them together into a single file:
     cat chrFa/chr*.fa > ucscChroms.fa
 
     #	create a lift file from the information in the fasta headers
     cat << '_EOF_' > liftBroadToChroms.pl
 #!/usr/bin/env perl
 
 use strict;
 use warnings;
 
 open (FH, 'grep "^>" Monodelphis5.0.agp.chromosome.fasta|') or
 	die "can not grep Monodelphis5.0.agp.chromosome.fasta";
 
 my %liftSpec;	# key is chrom_start, value is end
 my %chrSize;	# key is chrom, value is size
 my @liftLines;	# index is line number, value is line to output
 
 $chrSize{'1'} = 0;
 $chrSize{'2'} = 0;
 $chrSize{'3'} = 0;
 $chrSize{'4'} = 0;
 $chrSize{'5'} = 0;
 $chrSize{'6'} = 0;
 $chrSize{'7'} = 0;
 $chrSize{'8'} = 0;
 $chrSize{'X'} = 0;
 $chrSize{'Un'} = 0;
 
 my $lineCount = 0;
 my $prevChr = "";
 my $chrStart = 0;
 while (my $line = <FH>) {
     my ($chr_pos, $name) = split('\s+',$line);
     my ($chr, $range) = split('\.',$chr_pos);
     $chr =~ s/>//;
     if ($chr ne $prevChr) {
 	$chrStart = 0;
 	$prevChr = $chr;
 	print STDERR "chr$chr starting\n";
     }
     my ($start,$end) = split('-',$range);
     my $key = sprintf("%s_%d", $chr, $start);
     $liftSpec{$key} = $end;
     $chrSize{$chr} = $end if ($end > $chrSize{$chr});
     my $fragSize = $end - $start + 1;
     $liftLines[$lineCount++] = sprintf "%d\t%s.%d-%d\t%d\tchr%s",
 	$chrStart, $chr, $start, $end, $fragSize, $chr;
     $chrStart += $fragSize;
 }
 
 close (FH);
 
 for (my $i = 0; $i < $lineCount; ++$i) {
     my ($chrStart, $fragName, $fragSize, $chr) = split('\t',$liftLines[$i]);
     $chr =~ s/chr//;
     printf "%s\t%d\n", $liftLines[$i], $chrSize{$chr};
 }
 '_EOF_'
     # << happy emacs
     chmod +x liftBroadToChroms.pl
 
     ./liftBroadToChroms.pl liftBroad.lft
     #	split up the quality file to get it put together into a single
     #	chrom based file
     cat << '_EOF_' > splitQual.pl
 #!/usr/bin/env perl
 
 use strict;
 use warnings;
 
 open (FH, "<Monodelphis5.0.agp.chromosome.qual") or
         die "can not read Monodelphis5.0.agp.chromosome.qual";
 
 # open an initial output file to get OUT established, not a real chr name
 my $fileName = "splitQual/0.1.fa";
 open (OUT,">$fileName") or die "can not write to $fileName";
 
 while (my $line = <FH>) {
     if ($line =~ m/^>/) {
         close(OUT);
         $line =~ s/>//;
         $line =~ s/-.*//;
         $fileName = "splitQual/$line";
         open (OUT,">$fileName") or die "can not write to $fileName";
         printf STDERR "writing to $fileName";
     } else {
         print OUT $line;
     }
 }
 
 close (FH);
 close (OUT);
 '_EOF_'
     # << happy emacs
     chmod +x splitQual.pl
     mkdir splitQual
     ./splitQual.pl
     # put them back together in order as full chroms
 for C in 1 2 3 4 5 6 7 8 X Un
 do
     echo ">chr${C}" > chrQual/chr${C}.qual.fa
     ls splitQual/${C}.* | sort -t'.' -k2,2n | while read F
 do
     cat $F >> chrQual/chr${C}.qual.fa
 done
 done
     #	real    6m7.079s
     #	and as a single file
 for C in 1 2 3 4 5 6 7 8 X Un
 do
     cat chrQual/chr${C}.qual.fa
 done | gzip -c > ucscChroms.qual.fa.gz
     #	real    4m41.229s
     #	and turn it into a qac file
     qaToQac ucscChroms.qual.fa.gz ucscChroms.qac
     #	real    3m49.380s
 
 #########################################################################
 # create genome assembly database (DONE - 2008-11-25 - Hiram)
     cd /hive/data/genomes/monDom5/
     cat << '_EOF_' > monDom5.config.ra
 # Config parameters for makeGenomeDb.pl:
 db monDom5
 clade mammal
 scientificName Monodelphis domestica
 commonName Opossum
 assemblyDate Oct. 2006
 assemblyLabel Broad Institute monDom5 (NCBI project 12561, accession AAFR03000000)
 orderKey 354
 mitoAcc NC_006299
 fastaFiles /hive/data/genomes/monDom5/broad/ucscChroms.fa
 agpFiles /hive/data/genomes/monDom5/broad/Monodelphis5.0.agp
 qualFiles /hive/data/genomes/monDom5/broad/ucscChroms.qac
 dbDbSpeciesDir opossum
 '_EOF_'
     # << happy emacs
 
     time makeGenomeDb.pl -verbose=2 -stop=seq monDom5.config.ra > seq.log 2>&1
     #	real    3m3.414s
     time makeGenomeDb.pl -verbose=2 -continue=agp -stop=agp monDom5.config.ra \
 	> agp.log 2>&1
     #	real    0m36.723s
     time makeGenomeDb.pl -verbose=2 -continue=db -stop=db monDom5.config.ra \
 	> db.log 2>&1
     #	real    36m30.041
     time makeGenomeDb.pl -verbose=2 -continue=dbDb -stop=dbDb \
 	monDom5.config.ra > dbDb.log 2>&1
     #	real    0m1.074s
     time makeGenomeDb.pl -verbose=2 -continue=trackDb -stop=trackDb \
 	monDom5.config.ra > trackDb.log 2>&1
     #	check in the trackDb files and the browser should be up and running
 
 #########################################################################
 #  monDom5 - Opossum - Ensembl Genes version 50  (DONE - 2008-12-02 - hiram)
     ssh hgwdev
     cd /hive/data/genomes/monDom5
     cat << '_EOF_' > monDom5.ensGene.ra
 # required db variable
 db monDom5
 # optional nameTranslation, the sed command that will transform
 #       Ensemble names to UCSC names.  With quotes just to make sure.
 nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
 '_EOF_'
 #  << happy emacs
 
     doEnsGeneUpdate.pl -ensVersion=50 monDom5.ensGene.ra
     ssh hgwdev
     cd /hive/data/genomes/monDom5/bed/ensGene.50
     featureBits monDom5 ensGene
     # 32970520 bases of 3501660299 (0.942%) in intersection
 
  *** All done!  (through the 'makeDoc' step)
  *** Steps were performed in /hive/data/genomes/monDom5/bed/ensGene.50
 
 ############################################################################
 #  monDom5 - Opossum - Ensembl Genes version 51  (DONE - 2008-12-02 - hiram)
     ssh hgwdev
     cd /hive/data/genomes/monDom5
     cat << '_EOF_' > monDom5.ensGene.ra
 # required db variable
 db monDom5
 # optional nameTranslation, the sed command that will transform
 #       Ensemble names to UCSC names.  With quotes just to make sure.
 nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
 '_EOF_'
 #  << happy emacs
 
     doEnsGeneUpdate.pl -ensVersion=51 monDom5.ensGene.ra
     ssh hgwdev
     cd /hive/data/genomes/monDom5/bed/ensGene.51
     featureBits monDom5 ensGene
     # 32959755 bases of 3501660299 (0.941%) in intersection
 
  *** All done!  (through the 'makeDoc' step)
  *** Steps were performed in /hive/data/genomes/monDom5/bed/ensGene.51
 
 #########################################################################
 ## REPEATMASKER (DONE 2008-11-26 Andy)
     screen -S monDom5RepeatMasker	# to manage this several day job
     mkdir /hive/data/genomes/monDom5/bed/repeatMasker
     cd /hive/data/genomes/monDom5/bed/repeatMasker
     time $HOME/kent/src/hg/utils/automation/doRepeatMasker.pl -workhorse=hgwdev \
          -bigClusterHub=swarm -buildDir=`pwd` monDom5 > do.log
 ## (detach screen Ctrl+A+D)
 ## ... (reattach on hgwdev: screen -r monDom5RepeatMasker)
 #
 #Checking finished jobs
 #cat run.time
 #HgStepManager: executing step 'cat' Wed Nov 26 07:29:04 2008.
 #Use of uninitialized value in substitution (s///) at /cluster/home/aamp/kent/src/hg/utils/automation/HgAutomate.pm line 262.
 #Could not extract server from output of "df /hive/data/genomes/monDom5/bed/repeatMasker":
 #/dev/hivedev         171434397696 55883525376 115550872320  33% /hive
 #...
 #user    0m0.099s
 #sys     0m0.070s
 
 ## checking the problem, it's that I don't have a $HOST in my env.
 ## ... so, tcsh it is.
      tcsh
      time $HOME/kent/src/hg/utils/automation/doRepeatMasker.pl -continue=cat \
          -workhorse=hgwdev -bigClusterHub=swarm -buildDir=`pwd` \ 
          monDom5 > doAfterCat.log
 #0.389u 0.348s 1:12:48.52 0.0%   0+0k 0+0io 1pf+0w
 
 #########################################################################
 ## SIMPLE REPEATS TRF (DONE 2008-11-26 - Andy)
     screen
     mkdir /hive/data/genomes/monDom5/bed/simpleRepeat
     cd /hive/data/genomes/monDom5/bed/simpleRepeat
     time $HOME/kent/src/hg/utils/automation/doSimpleRepeat.pl \
         -buildDir=/cluster/data/monDom5/bed/simpleRepeat monDom5 > do.log
 #0.233u 0.153s 2:52:17.94 0.0%   0+0k 0+0io 0pf+0w
 
     cat fb.simpleRepeat
 # 83450133 bases of 3501660299 (2.383%) in intersection
 
     ##   after RM run is done, add this mask:
     cd /hive/data/genomes/monDom5
     rm monDom5.2bit
     twoBitMask monDom5.rmsk.2bit -add bed/simpleRepeat/trfMask.bed monDom5.2bit
     ##  can safely ignore warning about >=13 fields in bed file
     twoBitToFa monDom5.2bit stdout | faSize stdin > monDom5.2bit.faSize.txt
 # 3605631728 bases (103971429 N's 3501660299 real 1542619938 upper 1959040361 lower)
 # %54.33 masked total, %55.95 masked real
 
     ##   link to gbdb
     ln -s `pwd`/monDom5.2bit /gbdb/monDom5
 
 ###########################################################################
 # WINDOWMASKER (DONE 2008-12-02 Andy)
     ssh kolossus
     mkdir /hive/data/genomes/monDom5/bed/WindowMasker
     cd /hive/data/genomes/monDom5/bed/WindowMasker
     screen -S monDom5_WindowMasker
     tcsh
     ~/kent/src/hg/utils/automation/doWindowMasker.pl monDom5 -buildDir=`pwd` -workhorse=kolossus >& wm.log
     ## oops forgot to time it (or nice it).  It took less than 8 hrs though
     ## load result to prep for cleaning
     ssh hgwdev
     cd /hive/data/genomes/monDom5/bed/WindowMasker
     hgLoadBed monDom5 windowmaskerSdust windowmasker.sdust.bed.gz
     #	Loaded 1346187 elements of size 3
     featureBits monDom5 windowmaskerSdust
     # 1619987578 bases of 3501660299 (46.263%) in intersection
     #	eliminate the gaps from the masking (WM bug)
     featureBits monDom5 -not gap -bed=notGap.bed
     #	170473138 bases of 170473138 (100.000%) in intersection
     screen -S monDom5_WindowMasker
     time nice featureBits monDom5 windowmasker.sdust.bed.gz notGap.bed \
         -bed=stdout | gzip -c > cleanWMask.bed.gz
 #1516026449 bases of 3501660299 (43.295%) in intersection
 #
 #real    12m17.912s
 #user    3m56.801s
 #sys     0m13.704s
     ##	reload track to get it clean
     hgLoadBed monDom5 windowmaskerSdust cleanWMask.bed.gz
     ##	Loaded 22241063 elements of size 4
 
 ##########################################################################
 ## CPG ISLANDS (DONE 2008-12-03, Andy)
     ssh hgwdev
     screen -S monDom5_CpG
     ~/kent/src/hg/utils/automation/doCpgIslands.pl monDom5 
     ## [detach screen]
     ## took an hour or two
     featureBits monDom5 cpgIslandExt
 # 14553072 bases of 3501660299 (0.416%) in intersection
 
 ##########################################################################
 ## LIFTOVER CHAIN MONDOM4->MONDOM5 (DONE 2008-12-06, Andy)
     ssh hgwdev
     screen -S monDom5_liftOver
     tcsh
     ~/kent/src/hg/utils/automation/doSameSpeciesLiftOver.pl monDom4 monDom5
     ## [detach]
     ## ... 8 hippos
         ssh swarm
         cd /hive/data/genomes/monDom4/bed/blat.monDom5.2008-12-04/run.blat
         ## edit job.csh and add -maxIntron=1 -mask=lower 
         ## to blat command and up minScore to 5000 and minIdentity to 99
         para stop
         para push
     ## Took several hours after hippos were dealt with and -continue net
     ## had to be run as well after it crashed running out of tmp disk space
     cd /hive/data/genomes/monDom4/bed/blat.monDom5.2008-12-04
     rm -rf run.chain/ run.blat/
     mv monDom4ToMonDom5.over.chain.gz ../liftOver/
     cd /gbdb/monDom4/liftOver
 
 #######################################################################
 # MAKE 11.OOC FILE FOR BLAT (DONE 2008-12-11, Andy)
     ssh hgwdev
     screen -S monDom5_11.ooc
     cd /hive/data/genomes/monDom5
     ##	use repMatch of 1228 as this genome is ~ %20 larger than human
     ##	1024 + (1024 * 0.2) = 1228
     time nice blat monDom5.2bit \
 	/dev/null /dev/null -tileSize=11 -makeOoc=11.ooc -repMatch=1228
     #	Wrote 43992 overused 11-mers to 11.ooc
     #	real    3m24.793s
 
 #######################################################################
 # GENBANK AUTOUPDATE SETUP (DONE 2008-12-15 Andy)
     ssh hgwdev
     screen -S monDom5_genbank
     cd ~/kent/src/hg/makeDb/genbank/conf/
     ## edit genbank.conf: search monDom4, copy and paste that whole entry
     ## and replace monDom4 with monDom5:
     ## cvs ci genbank.conf
     make etc-update 
     ssh genbank
     cd /cluster/data/genbank
     time nice -n +19 bin/gbAlignStep -initial monDom5 &
     # logFile: var/build/logs/2008.12.11-16:37:37.monDom5.initalign.log 
 #real    633m58.880s
 ##user    96m10.868s
 #sys     35m42.207s
     exit 
     ## we're back on hgwdev 
     nice -n 19 ./bin/gbDbLoadStep -drop -initialLoad  monDom5 &
 #logFile: var/dbload/genbank/logs/2008.12.15-10:44:36.dbload.log    
     cd ~/kent/src/hg/makeDb/genbank
     cvsup
     ## add monDom5 to:
         etc/align.dbs
         etc/hgwdev.dbs
     cvs ci -m "Added monDom5" etc/align.dbs etc/hgwdev.dbs
     make etc-update
 
 ##########################################################################
 ## BLAT (DONE 2008-01-20 Andy)
 # e-mail cluster-admin and ask for a server and tell them about 
 # /gbdb/monDom5/monDom5.2bit
 # then
 ssh hgwdev
 hgsql -N hgcentraltest
 mysql> insert into blatServers (db, host, port, isTrans, canPcr) values ('monDom5', 'blatx', '17778', '1', '0');
 Query OK, 1 row affected (0.01 sec)
 mysql> insert into blatServers (db, host, port, isTrans, canPcr) values ('monDom5', 'blatx', '17779', '0', '1');
 Query OK, 1 row affected (0.00 sec)
 
 ###########################################################################
 # HUMAN (hg18) PROTEINS TRACK (DONE 2009-02-05 braney )
     # bash  if not using bash shell already
 
     cd /cluster/data/monDom5
     grep chrUn  monDom5.agp | /cluster/bin/scripts/agpToLift > jkStuff/Un.lft
     mkdir /cluster/data/monDom5/blastDb
 
     cat broad/Monodelphis5.0.agp |         awk '/^chrUn/ {if ($5 == "W")
       { 
 	printf "echo \\>%s\n",$6 ; 
 	printf "twoBitToFa monDom5.2bit:chrUn:%d-%d stdout  | tail -n +2\n", $2,$3 
       }}' > script
 
     sh script > chrUn.fa
      grep -v chrUn chrom.sizes | awk '{ printf "twoBitToFa monDom5.2bit:%s stdout \n", $1}' >  script
     
     sh script > chrNotUn.fa
 
     cat chrUn.fa chrNotUn.fa | faToTwoBit stdin monDom5.chrUnAsContigs.2bit
     twoBitInfo monDom5.chrUnAsContigs.2bit monDom5.chrUnAsContigs.sizes
 
     awk '{if ($2 > 1000000) print $1}' monDom5.chrUnAsContigs.sizes > 1meg.lst
     twoBitToFa -seqList=1meg.lst  monDom5.chrUnAsContigs.2bit temp.fa
     faSplit gap temp.fa 1000000 blastDb/x -lift=blastDb.lft
 # 4366 pieces of 4366 written
     rm temp.fa 1meg.lst
 
     awk '{if ($2 <= 1000000) print $1}' monDom5.chrUnAsContigs.sizes > less1meg.lst
     twoBitToFa -seqList=less1meg.lst  monDom5.chrUnAsContigs.2bit temp.fa
     faSplit about temp.fa 1000000 blastDb/y 
     ls blastDb/y* | wc -l
 # 88
     rm temp.fa less1meg.lst
 
     cd blastDb
     for i in *.fa
     do
 	/hive/data/outside/blast229/formatdb -i $i -p F
     done
     rm *.fa &
     ls *.nsq | wc -l
 # 4454
 
     mkdir -p /cluster/data/monDom5/bed/tblastn.hg18KG
     cd /cluster/data/monDom5/bed/tblastn.hg18KG
     echo  ../../blastDb/*.nsq | xargs ls -S | sed "s/\.nsq//"  > query.lst
     wc -l query.lst
 # 4454 query.lst
 
 
    # we want around 1000000 jobs 
    calc `wc /cluster/data/hg18/bed/blat.hg18KG/hg18KG.psl | awk '{print $1}'`/\(1000000/`wc query.lst | awk '{print $1}'`\)
 
 # 36727/(1000000/4454) = 163.582058
 
    mkdir -p kgfa
    split -l 164 /cluster/data/hg18/bed/blat.hg18KG/hg18KG.psl  kgfa/kg
    cd kgfa
    for i in *; do 
      nice pslxToFa $i $i.fa; 
      rm $i; 
    done
    cd ..
    ls -1S kgfa/*.fa > kg.lst
    mkdir -p blastOut
    for i in `cat kg.lst`; do  mkdir blastOut/`basename $i .fa`; done
    tcsh
    cd /cluster/data/monDom5/bed/tblastn.hg18KG
    cat << '_EOF_' > blastGsub
 #LOOP
 blastSome $(path1) {check in line $(path2)} {check out exists blastOut/$(root2)/q.$(root1).psl }
 #ENDLOOP
 '_EOF_'
 
    cat << '_EOF_' > blastSome
 #!/bin/sh
 BLASTMAT=/hive/data/outside/blast229/data
 export BLASTMAT
 g=`basename $2`
 f=/tmp/`basename $3`.$g
 for eVal in 0.01 0.001 0.0001 0.00001 0.000001 1E-09 1E-11
 do
 if /hive/data/outside/blast229/blastall -M BLOSUM80 -m 0 -F no -e $eVal -p tblastn -d $1 -i $2 -o $f.8
 then
         mv $f.8 $f.1
         break;
 fi
 done
 if test -f  $f.1
 then
     if /cluster/bin/i386/blastToPsl $f.1 $f.2
     then
 	liftUp -nosort -type=".psl" -nohead $f.3 /cluster/data/monDom5/blastDb.lft carry $f.2
         liftUp -nosort -type=".psl" -pslQ -nohead $3.tmp /cluster/data/hg18/bed/blat.hg18KG/protein.lft warn $f.3
         if pslCheck -prot $3.tmp
         then                  
             mv $3.tmp $3     
             rm -f $f.1 $f.2 $f.3 $f.4
         fi
         exit 0               
     fi                      
 fi                         
 rm -f $f.1 $f.2 $3.tmp $f.8 $f.3 $f.4
 exit 1
 '_EOF_'
     # << happy emacs
     chmod +x blastSome
     exit 
     
     ssh swarm
     cd /cluster/data/monDom5/bed/tblastn.hg18KG
     gensub2 query.lst kg.lst blastGsub blastSpec
     para create blastSpec
 #    para try, check, push, check etc.
 
     para time
 
 
 # Completed: 997696 of 997696 jobs
 # CPU time in finished jobs:   21259114s  354318.56m  5905.31h  246.05d  0.674 y
 # IO & Wait Time:               3461687s   57694.79m   961.58h   40.07d  0.110 y
 # Average job time:                  25s       0.41m     0.01h    0.00d
 # Longest finished job:              76s       1.27m     0.02h    0.00d
 # Submission to last job:         31794s     529.90m     8.83h    0.37d
 
     ssh swarm
     cd /cluster/data/monDom5/bed/tblastn.hg18KG
 
     cat << _EOF_ > splitOne
     (cd \$1; 
     grep -h -v contig q.*.psl | pslSplitOnTarget stdin chroms
     grep -h  contig q.*.psl > chroms/contigs.psl
     )
 _EOF_
     chmod +x splitOne
 
     for i in `pwd`/blastOut/kg??; do
 	echo ./splitOne $i
     done > split.jobs
 
     para create split.jobs
     para maxJob 10
     para shove
 
 # Completed: 224 of 224 jobs
 # CPU time in finished jobs:        910s      15.17m     0.25h    0.01d  0.000 y
 # IO & Wait Time:                128754s    2145.89m    35.76h    1.49d  0.004 y
 # Average job time:                 579s       9.65m     0.16h    0.01d
 # Longest finished job:             683s      11.38m     0.19h    0.01d
 # Submission to last job:         13166s     219.43m     3.66h    0.15d
 
     ssh swarm
     cd /cluster/data/monDom5/bed/tblastn.hg18KG
     mkdir chainRun
     cd chainRun
     tcsh
     cat << '_EOF_' > chainGsub
 #LOOP
 chainOne $(path1)
 #ENDLOOP
 '_EOF_'
 
     cat << '_EOF_' > chainOne
 ( 
 f=`basename $1`;
 d=`dirname $1`;
 if simpleChain -prot -outPsl -maxGap=150000 $1 $d/c.$f.tmp
 then
     mv $d/c.$f.tmp $d/c.$f
 else
     rm $d/c.$f.tmp
 fi
 )
 '_EOF_'
     chmod +x chainOne
     ls -1dS ../blastOut/kg??/chroms/*.psl > chain.lst
     gensub2 chain.lst single chainGsub chainSpec
     # do the cluster run for chaining
     para create chainSpec
     para try, check, push, check etc.
 
 # Completed: 2270 of 2270 jobs
 # CPU time in finished jobs:   10821559s  180359.32m  3005.99h  125.25d  0.343 y
 # IO & Wait Time:                249403s    4156.72m    69.28h    2.89d  0.008 y
 # Average job time:                4877s      81.28m     1.35h    0.06d
 # Longest finished job:          205913s    3431.88m    57.20h    2.38d
 # Submission to last job:        205932s    3432.20m    57.20h    2.38d
 
     cd /cluster/data/monDom5/bed/tblastn.hg18KG/blastOut
     for i in kg??
     do
        cat $i/chroms/c.*.psl | awk "(\$13 - \$12)/\$11 > 0.6 {print}" > c60.$i.psl
        sort -rn c60.$i.psl | pslUniq stdin u.$i.psl
        awk "((\$1 / \$11) ) > 0.60 { print   }" c60.$i.psl > m60.$i.psl
        echo $i
     done
     sort u.*.psl m60* | uniq | sort -T /tmp -k 14,14 -k 16,16n -k 17,17n > ../unLift.psl
     cd ..
     pslCheck unLift.psl
 # checked: 71490 failed: 0 errors: 0
 
     liftUp blastHg18KG.psl ../../jkStuff/Un.lft carry  unLift.psl
     pslCheck -prot blastHg18KG.psl
 # checked: 71490 failed: 0 errors: 0
 
     # load table 
     ssh hgwdev
     cd /cluster/data/monDom5/bed/tblastn.hg18KG
     hgLoadPsl monDom5 blastHg18KG.psl
 
     # check coverage
     featureBits monDom5 blastHg18KG 
 # 33631308 bases of 3501660299 (0.960%) in intersection
 
     featureBits monDom4 blastHg18KG 
 # 19424941 bases of 3501643220 (0.555%) in intersection
 
     featureBits monDom5 all_mrna blastHg18KG  -enrichment
 # all_mrna 0.006%, blastHg18KG 0.960%, both 0.003%, cover 43.79%, enrich 45.59x
 
     rm -rf blastOut
 #end tblastn
 
 #########################################################################
 ## NSCAN LIFT (FROM MONDOM4) (DONE 2008-12-08 Andy)
 ssh hgwdev
 mkdir /hive/data/genomes/monDom5/bed/nscanGene.lifted
 cd /hive/data/genomes/monDom5/bed/nscanGene.lifted
 echo "select * from nscanGene" | hgsql monDom4 | sed '1d' > nscanGene.monDom4.gp
 liftOver nscanGene.monDom4.gp /gbdb/monDom4/liftOver/monDom4ToMonDom5.over.chain.gz \
   | nscanGene.monDom5.gp unmapped.gp
 ldHgGene -predTab monDom5 nscanGene nscanGene.monDom5.gp
 
 ##########################################################################
 ## GENSCAN (DONE 2008-01-22 Andy)
 ssh hgwdev
 mkdir -p /hive/data/genomes/monDom5/bed/genscan/{fasta,gtf,pep,subopt}
 cd /hive/data/genomes/monDom5/bed/genscan
 twoBitToFa ../../monDom5.2bit stdout | maskOutFa stdin hard stdout \
    | faSplit -maxN=5000000 -lift=chunks.lft gap stdin 8000000 fasta/chunk
 cvs co hg3rdParty/genscanlinux
 cat << '_EOF_' > gsub
 #LOOP
 gsBig {check in line+ $(path1)} {check out line gtf/$(root1).gtf} -trans={check out line pep/$(root1).pep} -subopt={check out line subopt/$(root1).bed} -exe=hg3rdParty/genscanlinux/genscan - par=hg3rdParty/genscanlinux/HumanIso.smat -tmp=/tmp -window=2400000
 #ENDLOOP
 '_EOF_'
     # << emacs
 ls -1 fasta/* > chunk.lst
 ssh swarm
 cd /hive/data/genomes/monDom5/bed/genscan
 gensub2 chunk.lst single gsub spec
 para create spec
 para push
 para time
 #Completed: 421 of 421 jobs
 #CPU time in finished jobs:      60788s    1013.13m    16.89h    0.70d  0.002 y
 #IO & Wait Time:                  5097s      84.96m     1.42h    0.06d  0.000 y
 #Average job time:                 156s       2.61m     0.04h    0.00d
 #Longest finished job:             367s       6.12m     0.10h    0.00d
 #Submission to last job:           378s       6.30m     0.10h    0.00d
 catDir subopt | liftUp my.bed chunks.lft error stdin
 sed 's/chunk/opossum/' my.bed > genscan.subopt.bed
 catDir gtf | liftUp my.gtf chunks.lft error stdin
 sed 's/chunk/opossum/g' my.gtf > genscan.gtf
 catDir pep | sed 's/chunk/opossum/' > genscan.pep.fa
 ldHgGene -gtf monDom5 genscan genscan.gtf 
 hgPepPred monDom5 generic genscanPep genscan.pep.fa 
 hgLoadBed monDom5 genscanSubopt genscan.subopt.bed 
 
 #########################################################################
 #  BIGZIPS/DOWNLOADS (DONE, 2009-06-08 Andy)
     ssh hgwdev
     mkdir /hive/data/genomes/monDom5/downloads
     mkdir /hive/data/genomes/monDom5/downloads/bigZips
     mkdir /hive/data/genomes/monDom5/downloads/chromosomes
     cd /hive/data/genomes/monDom5/downloads/chromosomes
     cp /usr/local/apache/htdocs/goldenPath/monDom4/chromosomes/README.txt .
     #	edit that readme to provide correct references and details
     ln -s `pwd` /usr/local/apache/htdocs/goldenPath/monDom5/chromosomes
     cd /hive/data/genomes/monDom5/downloads/chromosomes
     for c in `cut -f1 ../../chrom.sizes`; do
       echo $c;
       twoBitToFa ../../monDom5.unmasked.2bit:$c stdout \
         | gzip -c > $c.fa.gz; 
     done
     md5sum *.fa.gz R*.txt > md5sum.txt
     cd ../../
     for c in `cut -f1 chrom.sizes`; do 
       echo ${c#chr};
       pushd ${c#chr};
         cp ../downloads/chromosomes/${c}.fa.gz .
         gunzip ${c}.fa.gz;
       popd;
     done
     tar cvzf downloads/bigZips/chromFa.tar.gz ?/chr*.fa Un/chrUn.fa
     tar cvzf downloads/bigZips/chromOut.tar.gz ?/chr*.fa.out Un/chrUn.fa.out
     for c in `cut -f1 chrom.sizes`; do echo $c; twoBitToFa monDom5.2bit:$c ${c#chr}/${c}.fa.masked; done   
     tar cvzf downloads/bigZips/chromFaMasked.tar.gz ?/chr*.fa.masked \
 	Un/chrUn.fa.masked 
     #real    17m19.439s
     cd bed/simpleRepeat/
     mkdir trfMask
     cd trfMask
     ln -s ../trfMaskChrom/* .
     cd ../
     tar -hcvzf chromTrf.tar.gz ./trfMask
     mv chromTrf.tar.gz ../../downloads/bigZips/
     cd ../../downloads/bigZips/
     cp ../../broad/Monodelphis5.0.agp .
     gzip Monodelphis5.0.agp
     cp /usr/local/apache/htdocs/goldenPath/monDom4/bigZips/README.txt .
     # [edit]
     ln -s /hive/data/genomes/monDom5/downloads/bigZips \
 	/usr/local/apache/htdocs/goldenPath/monDom5/bigZips
     cd /usr/local/apache/htdocs/goldenPath/monDom5/bigZips
     ln -s /hive/data/genomes/monDom5/monDom5.2bit .
     md5sum *.gz *.2bit README.txt > md5sum.txt
     cd ../chromosomes/
     md5sum *.gz R*.txt > md5sum.txt
     cd ../
     mkdir database
     cd database/
     cp ../../monDom4/database/README.txt .
     # [edit README.txt]
   
 #########################################################################
 #nscanGene (2009-06-23 markd)
    # nscanGene track from WUSTL
    cd /hive/data/genomes/monDom5/bed/nscan
 
 
    wget http://mblab.wustl.edu/predictions/possum/monDom5/README
    wget http://mblab.wustl.edu/predictions/possum/monDom5/monDom5.gtf
    wget -r -np  -e robots=off -l 1 http://mblab.wustl.edu/predictions/possum/monDom5/chr_ptx/
     nice bzip2 chr_ptx/* monDom5.gtf 
 
    # load track
    gtfToGenePred -genePredExt monDom5.gtf.bz2 stdout| hgLoadGenePred -genePredExt monDom5 nscanGene stdin
    bzcat chr_ptx/*.fa.bz2 | hgPepPred monDom5 generic nscanPep stdin
    rm *.tab
 
    # validate same number of transcripts and peptides are loaded
    hgsql -Ne 'select count(*) from nscanGene' monDom5
    hgsql -Ne 'select count(*) from nscanPep' monDom5
 
    # validate search expression
    hgc-sql -Ne 'select name from nscanGene' monDom5 | egrep -v -e '^chr[0-9a-zA-Z_]+\.[0-9]+\.[0-9]+(\.[0-9a-z]+)?$' |wc -l
 
 #########################################################################
 # lastz Horse equCab2 (DONE - 2009-06-29,07-02 - Hiram)
     mkdir /hive/data/genomes/monDom5/bed/lastzEquCab2.2009-06-29
     cd /hive/data/genomes/monDom5/bed/lastzEquCab2.2009-06-29
 
     cat << '_EOF_' > DEF
 # opossum vs. Horse
 
 # settings for more distant organism alignments
 BLASTZ_H=2000
 BLASTZ_Y=3400
 BLASTZ_L=10000
 BLASTZ_K=2200
 BLASTZ_M=50
 BLASTZ_Q=/scratch/data/blastz/HoxD55.q
 
 # TARGET: Opossum (monDom5)
 SEQ1_DIR=/scratch/data/monDom5/monDom5.2bit
 SEQ1_LEN=/scratch/data/monDom5/chrom.sizes
 SEQ1_CHUNK=10000000
 SEQ1_LAP=10000
 
 # QUERY: Horse equCab2
 SEQ2_DIR=/scratch/data/equCab2/equCab2.2bit
 SEQ2_LEN=/scratch/data/equCab2/chrom.sizes
 SEQ2_CTGDIR=/hive/data/genomes/equCab2/equCab2.UnScaffolds.2bit
 SEQ2_CTGLEN=/hive/data/genomes/equCab2/equCab2.UnScaffolds.sizes
 SEQ2_LIFT=/hive/data/genomes/equCab2/jkStuff/equCab2.chrUn.lift
 SEQ2_CHUNK=20000000
 SEQ2_LIMIT=100
 SEQ2_LAP=0
 
 BASE=/hive/data/genomes/monDom5/bed/lastzEquCab2.2009-06-29
 TMPDIR=/scratch/tmp
 '_EOF_'
     # << happy emacs
 
     time doBlastzChainNet.pl `pwd`/DEF \
 	-noLoadChainSplit -verbose=2 -bigClusterHub=swarm \
 	-workhorse=hgwdev \
 	-chainMinScore=5000 -chainLinearGap=loose > do.log 2>&1 &
     #	real    1353m4.161s
     #	had some trouble with the first kluster run due to various factors
     #	not the least of which was a major power outage
     time doBlastzChainNet.pl `pwd`/DEF \
 	-noLoadChainSplit -verbose=2 -bigClusterHub=swarm \
 	-continue=cat -workhorse=hgwdev \
 	-chainMinScore=5000 -chainLinearGap=loose > cat.log 2>&1 &
     #	real    957m25.164s
     cat fb.monDom5.chainEquCab2Link.txt 
     #	355004426 bases of 3501660299 (10.138%) in intersection
 
     mkdir /hive/data/genomes/equCab2/bed/blastz.monDom5.swap
     cd /hive/data/genomes/equCab2/bed/blastz.monDom5.swap
     time doBlastzChainNet.pl \
 	/hive/data/genomes/monDom5/bed/lastzEquCab2.2009-06-29/DEF \
 	-noLoadChainSplit -verbose=2 -bigClusterHub=swarm \
 	-swap -workhorse=hgwdev \
 	-chainMinScore=5000 -chainLinearGap=loose > swap.log 2>&1 &
     #	real    320m3.573s
     cat fb.equCab2.chainMonDom5Link.txt 
     #	351787662 bases of 2428790173 (14.484%) in intersection
 
 #########################################################################
 # Set default position to OPN1LW gene 2009-07-20, Brooke
 hgsql -e \
 "update dbDb set defaultPos='chrX:14658820-14669558' where name='monDom5'" \
 hgcentraltest
 
 #########################################################################
 
 ############################################################################
 # TRANSMAP vertebrate.2009-07-01 build  (2009-07-21 markd)
 
 vertebrate-wide transMap alignments were built  Tracks are created and loaded
 by a single Makefile. This is available from:
    svn+ssh://hgwdev.cse.ucsc.edu/projects/compbio/usr/markd/svn/projs/transMap/tags/vertebrate.2009-07-01
 
 see doc/builds.txt for specific details.
 
 ###########################################################################
 # ALIGNMENTS/CHAINS/NETS (DONE Dec 2008, Andy)
 # 
 # To be honest I didn't really concentrate on getting the whole enchilada of 
 # make-notes into record, because the whole process is so robotic.  
 # 
 # I'll start with the DEF files.  These have varying parameters based on 
 # the query species.  So here's the various DEF parameters:
 #
      DB    H    Y     L    K   T   M   A_R*      Q  1CHUNK 1LAP 2CHUNK 2LAP 2LIMIT
 bosTau4    - 3400  6000 2200   -   -     -  HoxD55      1M  10K    20M    0    100
 canFam2 2000 3400 10000 2200   -  50     0  HoxD55      1M  10K    30M    0      -
 danRer5 2000 3400  6000 2200   -   -     -  HoxD55      5M  10K    10M    0      -
 galGal3 2000 3400 10000 2200   -   -     -  HoxD55      5M  10K    20M    0      -
    hg18 2000 3400 10000 2200   -  50     0  HoxD55      5M  10K    10M    0      -
 macEug1    - 3400     -    -   2  10     -       -     10M 320K   500K    0    100
    mm9     - 3400  6000 2200   -   -     -  HoxD55      5M  10K    20M    0      -
 ornAna1 2000 3400  6000 2200   -  50     -  HoxD55      5M  10K    20M    0    300
 panTro2 2000 3400 10000 2200   -  50     0  HoxD55      1M  10K    30M    0      -
 ponAbe2 2000 3400 10000 2200   -  50     0  HoxD55      1M  10K    30M    0      -
 rheMac2 2000 3400 10000 2200   -  50     0  HoxD55      1M  10K    30M    0      -
     rn4    - 3400  6000 2200   -   -     -  HoxD55      2M  10K    10M    0      -
     rn5    - 3400  6000 2200   -   -     -       -      5M  10K    20M    0      -
 xenTro2 2000 3400  8000 2200   -  50     -  HoxD55      5M  10K    20M    0    100
 
 * BLASTZ_ABRIDGE_REPEATS
 
 # In most of those cases I was looking to the DEF variables in the monDom4 version
 # of the alignment.  In the case of macEug1, the variable settings were given by
 # Webb Miller.
 # 
 # After all the DEF files are created, it's time to run doBlastzChainNet.pl
 # In the case of monDom5, they all have the profile of this:
 
 cd /hive/data/genomes/monDom5/bed
 mkdir blastz.otherDb
 cd blastz.otherDb
 screen -S otherDb_monDom5
 doBlastzChainNet.pl -bigClusterHub swarm -stop cat DEF >& doUntilCat.log
 #[detach screen, come back when it's done]
 
 # The reason I'd stop after the cat step is that I was having better results 
 # using swarm for chaining instead of memk.  At the time, memk only had a dozen
 # nodes, and usually only 8 would be online.  Because of the large chromosomes,
 # the chaining step would be bottlenecked by the hippo jobs on the big chroms.
 # It was going much more quickly when the cluster could accommodate more
 # concurrent jobs. On memk the jobs were allocated 8GB of RAM and they normally
 # only get 2GB on swarm, so in case that mattered, I did this
 
 #[re-attach screen]
 doBlastzChainNet.pl -bigClusterHub swarm -smallClusterHub swarm -minChainScore 3000 DEF >& doAfterCat.log
 #[detach screen]
 ssh swarm
 cd /hive/data/genomes/monDom5/bed/blastz.otherDb/axtChain/run
 para check
 # check to see the jobs have started, once they have:
 para stop; para resetCounts; para -ram=8g -cpu=4 create jobList; para push
 # check to see things are all fine, and log out of swarm
 
 # after that all should have completed fine. Stopping the cluster run manually
 # while the doBlastzChainNet.pl script is running doesn't typically confuse it.
 # I suppose it's possible that the script while check on the run in the small
 # period while it's stopped, but it didn't happen to me.
 #
 # In retrospect I think the memk trick is possibly avoidable now that there are
 # more memk nodes.  However the oppossum is a particularly difficult species to
 # chain, so I'm just mentioning it anyway.  Later when doing alignments on hg19,
 # memk was up to the task just fine.
 # 
 # Also of note is the inconsistency of DEF parameter settings.  When monDom6
 # happens, someone will probably look here to see what was done with monDom5.
 # If I have any advice it's to take an approach like the one with hg19 and
 # use consistent parameters as much as possible and set them according to
 # different tiers of evolutionary distance.  
 
 
 ####################################################################
 # RELOAD CHAINS/NETS AS NON-SPLIT (2009-06-09, Andy)
 
 for d in blastz.*; do 
    if [ $d != "blastz.bosTau4" ]; then 
       db=${d#blastz.};
       Db=`echo $db | sed 's/^./\u&/'`; 
       echo Loading $db chains into monDom5...;
       time nice -n +19 hgLoadChain -tIndex monDom5 chain$Db \
          blastz.$db/axtChain/monDom5.$db.all.chain.gz;
     fi; 
 done >& unsplit/chainReloads.log
 # problem with macEug1 
 
 cd blastz.macEug1/axtChain/
 time nice -n +19 hgLoadChain -tIndex monDom5 chainMacEug1 monDom5.macEug1.all.chain.gz
 #Loading 19668859 chains into monDom5.chainMacEug1
 #Can't start query:
 #load data local infile 'link.tab' into table chainMacEug1Link
 #
 #mySQL error 1114: The table 'chainMacEug1Link' is full
 #real    146m54.273s
 #
 wc -l link.tab
 #200440062 link.tab
 #19668859 chain.tab
 randomLines link.tab 10000000 stdout | awk '{print length($0)}' | sort | uniq -c
 randomLines chain.tab 1000000 stdout | awk '{print length($0)}' | sort | uniq -c
 # 92 chain, 42 link
 sed "s/hgLoadChain.*/hgsqldump monDom5 chainRn4Link --no-data --skip-comments\n\
  | sed \'s\/Rn4\/MacEug1\/; s\/TYPE=MyISAM\/ENGINE=MyISAM max_rows=201000000\n\
  avg_row_length=42 pack_keys=1 CHARSET=latin1\/\' | hgsql monDom5 \n\
 /" loadUp.csh > manualLoadUp.csh
 
 hgsqldump monDom5 chainRn4 --no-data --skip-comments | sed \'s\/Rn4\/MacEug1\/; s\/TYPE=MyISAM\/ENGINE=MyISAM max_rows=20000000 avg_row_length=92 pack_keys=1 CHARSET=latin1\/\' | hgsql monDom5 \n\
 hgsql monDom5 -e \"load data local infile \'chain.tab\' into table chainMacEug1\"\n\
 hgsql monDom5 -e \"load data local infile \'link.tab\' into table chainMacEug1Link\"\n\
 hgsql monDom5 -e \"INSERT into history (ix, startId, endId, who, what, modTime, errata) VALUES(NULL,0,0,\'aamp\',\'Loaded 19668859 chains into macEug1 chain table manually\', NOW(), NULL)\"\
 ############################################################################
 # TRANSMAP vertebrate.2009-09-13 build  (2009-09-20 markd)
 
 vertebrate-wide transMap alignments were built  Tracks are created and loaded
 by a single Makefile. This is available from:
    svn+ssh://hgwdev.cse.ucsc.edu/projects/compbio/usr/markd/svn/projs/transMap/tags/vertebrate.2009-09-13
 
 see doc/builds.txt for specific details.
 ############################################################################
 
 ssh hgwdev
 mkdir /hive/data/genomes/monDom5/bed/multiz9way
 cd /hive/data/genomes/monDom5/bed/multiz9way
 ## Build the tree by pruning out the 8 species from the recent 44way, 
 ## changing monDom4 to monDom5, and adding macEug1
 ## (Wallaby placement deduced from (Margulies et al.,PNAS 2004) ). 
 /cluster/bin/phast/tree_doctor \
    --prune-all-but=hg18,mm9,canFam2,ornAna1,galGal3,xenTro2,danRer5,monDom4 \
    /hive/data/genomes/hg18/bed/multiz44way/44way.nh | \
    sed 's/:0\.[[:digit:]]\+/ /g; s/,//g; s/;//; s/\ )/)/g; s/monDom4/(monDom5 macEug1)/' \
  > tree.nh
 sed 's/(//g; s/)//g' tree.nh > species.lst
 
 ## Make per-chrom maf links
 mkdir mafLinks
 cd mafLinks
 bash
 for db in hg18 mm9 canFam2 macEug1 ornAna1 galGal3 xenTro2 danRer5; do
    mkdir $db; pushd $db; 
    if [ -e /hive/data/genomes/monDom5/bed/blastz.${db}/mafSynNet ]; then
       ln -s /hive/data/genomes/monDom5/bed/blastz.${db}/mafSynNet/* .
    else
       ln -s /hive/data/genomes/monDom5/bed/blastz.${db}/mafNet/* .
    fi
    popd
 done
 cd ../
 
 ## Split mafs up
 mkdir mafSplit
 mafSplitPos -minGap=50000 monDom5 10 mafSplit.bed
 for db in `ls -1 mafLinks`; do
    mkdir mafSplit/$db
    pushd mafSplit/$db
    mafSplit ../../mafSplit.bed monDom5_ ../../mafLinks/${db}/chr*.maf.gz \ 
       -verbose=2
    popd
 done
 
 ## Make file list for cluster run
 cd mafSplit/canFam2/
 find . -type f | sort -u | sed -e "s#./##" > ../../9-way.split.lst
 ## Double-check the file sets are all the same in each database subdir
 
 ## Cluster run on split mafs
 ssh swarm
 cd /hive/data/genomes/monDom5/bed/multiz9way
 mkdir -p multizRun/run/penn multizRun/maf
 cd multizRun/run/
 cp -p /cluster/bin/penn/multiz.2008-11-25/{multiz,maf_project,autoMZ} penn/
 cat > autoMultiz.csh << '_EOF_'                                                                                                   
 #!/bin/csh -ef
 set db = monDom5
 set c = $1
 set result = $2
 set run = `pwd`
 set tmp = $run/tmp/$db/multiz.$c
 set pairs = /hive/data/genomes/monDom5/bed/multiz9way/mafSplit
 /bin/rm -fr $tmp
 /bin/mkdir -p $tmp
 /bin/cp -p ../../tree.nh ../../species.lst $tmp
 pushd $tmp
 foreach s (`sed -e "s/ $db//" species.lst`)
     set in = $pairs/$s/$c.maf
     set out = $db.$s.sing.maf
     if (-e $in.gz) then
         /bin/zcat $in.gz > $out
     else if (-e $in) then
         ln -s $in $out
     else
         echo "##maf version=1 scoring=autoMZ" > $out
     endif
 end
 set path = ($run/penn $path); rehash
 $run/penn/autoMZ + T=$tmp E=$db "`cat tree.nh`" $db.*.sing.maf $c.maf
 popd
 /bin/rm -f $result
 /bin/cp -p $tmp/$c.maf $result
 /bin/rm -fr $tmp
 /bin/rmdir --ignore-fail-on-non-empty $run/tmp/$db
 /bin/rmdir --ignore-fail-on-non-empty $run/tmp
 '_EOF_'
 # << emacs
 chmod +x autoMultiz.csh
 cat << '_EOF' > gsub
 #LOOP
 ./autoMultiz.csh $(root1) {check out line+ /hive/data/genomes/monDom5/bed/multiz9way/multizRun/maf/$(root1).maf}
 #ENDLOOP
 '_EOF_'
 # << emacs
 para -ram=4g -cpu=2 create jobList
 para push
 para time
 #Completed: 332 of 332 jobs
 #CPU time in finished jobs:      17639s     293.98m     4.90h    0.20d  0.001 y
 #IO & Wait Time:                 46958s     782.64m    13.04h    0.54d  0.001 y
 #Average job time:                 195s       3.24m     0.05h    0.00d
 #Longest finished job:            1693s      28.22m     0.47h    0.02d
 #Submission to last job:          1706s      28.43m     0.47h    0.02d
 
 ## Sew up the mafs into one file
 ssh hgwdev
 cd /hive/data/genomes/hg18/bed/multiz44way/multizRun
 mkdir ../maf
 ls -1 maf | sed 's/monDom5_//; s/\..*//' | sort -u | while read chrom; do
     head -q -n 1 maf/monDom5_${chrom}.*.maf | sort -u > ../maf/${chrom}.maf
     grep -h "^#" maf/monDom5_${chrom}.*.maf | egrep -v "maf version=1|eof maf" | sed -e "s#${chrom}.[0-9][0-9]*#${chrom}#g; s#_MZ_[^ ]* # #g;" | sort -u >> ../maf/${chrom}.maf
     grep -h -v "^#" `ls maf/monDom5_${chrom}.*.maf | sort -t. -k2,2n` >> ../maf/${chrom}.maf
     tail -q -n 1 maf/monDom5_${chrom}.*.maf | sort -u >> ../maf/${chrom}.maf
     echo done with $chrom
 done
 cd ../
 rm -rf multizRun/maf/ mafSplit/
 
 ## gap annotation
 mkdir -p anno/{maf,run}
 cd anno/
 for db in `cat ../species.lst`; do
    dbDir="/hive/data/genomes/${db}"
    if [ ! -f ${dbDir}/${db}.N.bed ]; then
        echo "creating ${db}.N.bed"
        twoBitInfo -nBed ${dbDir}/${db}.2bit ${dbDir}/${db}.N.bed
    else
        ls -og ${dbDir}/${db}.N.bed
    fi
 done
 cd run/
 for db in `sed -e "s/monDom5 //" ../../species.lst`; do
    echo "${db} "
    ln -s  /hive/data/genomes/${db}/${db}.N.bed ${db}.bed
    echo ${db}.bed  >> nBeds
    ln -s  /hive/data/genomes/${db}/chrom.sizes ${db}.len
    echo ${db}.len  >> sizes
 done
 ssh hgwdevnew
 cd /hive/data/genomes/monDom5/bed/multiz9way/run
 for c in `ls -1 ../../maf | sed -e "s/.maf//"`; do
    echo starting $c
    time nice mafAddIRows -nBeds=nBeds ../../maf/${c}.maf /hive/data/genomes/monDom5/monDom5.2bit ../maf/${c}.maf
 done
 exit # hgwdevnew
 cd ../../
 
 ## quality information (only available for 6 of 9 species including monDom5)
 mkdir -p quals/{maf,qa}
 cd quals/qa/
 ln -s /hive/data/genomes/monDom5/monDom5.{qac,qdx} .
 ln -s /hive/data/genomes/hg18/bed/multiz44way/quals/allInOnePlace/{galGal3,canFam2,ornAna1}.* .
 qaToQac /hive/data/genomes/rn5/baylor/Rnor4.chroms.fa.qual.gz \
    rn5.noIdx.qac
 qacAddGapIdx /hive/data/genomes/rn5/baylor/Rnor4.scaffold_chr.agp \
    rn5.noIdx.qac rn5.qac rn5.qdx
 rm rn5.noIdx.qac
 qaToQac /hive/data/genomes/macEug1/baylor/macEug1.contigs.onlyInAgp.qa.gz \
    macEug1.noIdx.qac
 qacAddGapIdx /hive/data/genomes/macEug1/baylor/macEug1.agp macEug1.noIdx.qac \
    macEug1.qac macEug1.qdx
 for db in canFam2 galGal3 macEug1 monDom5 ornAna1 rn5; do
     echo $db | awk 'BEGIN{OFS="\t"}{print $1, "/hive/data/genomes/monDom5/bed/multiz9way/quals/qa";}'; 
 done > quals.lst
 cat << '_EOF_' > gsub
 #LOOP
 mafAddQRows quals.lst $(path1) {check out line+ maf/$(file1) }
 #ENDLOOP
 '_EOF_'
 # << emacs
 ssh swarm
 cd /hive/data/genomes/monDom5/bed/multiz9way/quals
 mkdir inMaf
 cd inMaf/
 ln -s ../../anno/maf/* .
 cd ../
 ls -1 inMaf/* > in.lst
 gensub2 in.lst single gsub jobList
 para -ram=8g -cpu=4 create jobList
 para push
 para time
 #Completed: 11 of 11 jobs
 #CPU time in finished jobs:       5419s      90.32m     1.51h    0.06d  0.000 y
 #IO & Wait Time:                 19745s     329.08m     5.48h    0.23d  0.001 y
 #Average job time:                2288s      38.13m     0.64h    0.03d
 #Longest finished job:            5196s      86.60m     1.44h    0.06d
 #Submission to last job:          5206s      86.77m     1.45h    0.06d
 cd ../
 
 ## Gene frames
 mkdir -p genes/{genePreds,inMaf,chrFrames}
 cd genes/genePreds/
 ## human/mouse use known genes
 for db in hg18 mm9; do 
    hgsql -N -e "select name,chrom,strand,txStart,txEnd,cdsStart,cdsEnd,exonCount,exonStarts,exonEnds from knownGene" ${db} | \
      genePredSingleCover stdin stdout | gzip -c > ${db}.gp.gz
 done
 ## monDom5/canFam2/ornAna1/galGal3/xenTro2/danRer5 use ensGene
 for db in monDom5 canFam2 ornAna1 galGal3 xenTro2 danRer5; do
     hgsql -N -e "select name,chrom,strand,txStart,txEnd,cdsStart,cdsEnd,exonCount,exonStarts,exonEnds from ensGene" $db | \
       genePredSingleCover stdin stdout | gzip -c > ${db}.gp.gz
 done
 cd ../inMaf/
 ln -s ../../quals/maf/* .
 cd ../
 ls -1 inMaf/ | sed 's/.maf//' > chr.lst
 ls -1 genePreds/ | sed 's/.gp.gz//' > gp.lst
 cat << '_EOF_' > mafFrames.csh
 #!/bin/csh -fe
 set c = $1
 set g = $2
 cat inMaf/${c}.maf | genePredToMafFrames monDom5 stdin stdout \
         ${g} genePreds/${g}.gp.gz | gzip -c > chrFrames/${c}.${g}.mafFrames.gz
 '_EOF_'
 cat << '_EOF_' > gsub
 #LOOP
 ./mafFrames.csh $(root1) $(root2) {check out exists+ chrFrames/$(root1).$(root2).mafFrames.gz}
 #ENDLOOP
 '_EOF_'
 ssh swarm
 cd /hive/data/genomes/monDom5/bed/multiz9way/genes
 gensub2 chr.lst gp.lst gsub jobList
 para -ram=8g -cpu=4 create jobList
 para push
 para time
 #Completed: 88 of 88 jobs
 #CPU time in finished jobs:       1610s      26.83m     0.45h    0.02d  0.000 y
 ##IO & Wait Time:                  3101s      51.69m     0.86h    0.04d  0.000 y
 #Average job time:                  54s       0.89m     0.01h    0.00d
 #Longest finished job:              95s       1.58m     0.03h    0.00d
 #Submission to last job:           211s       3.52m     0.06h    0.00d
 exit # [exit swarm]
 find chrFrames -type f | while read frameFile; do
    zcat ${frameFile}
 done | sort -k1,1 -k2,2n > multiz9wayFrames.bed
 hgLoadMafFrames monDom5 multiz9wayFrames{,.bed}
 
 ## make downloads
 mkdir -p download/maf
 cd download/maf
 cp -p ../../quals/maf/chr*.maf .
 gzip --rsyncable *.maf
 md5sum *.gz > md5sum.txt
 mkdir -p /usr/local/apache/htdocs/goldenPath/monDom5/multiz9way/maf
 ln -s `pwd -P`/* /usr/local/apache/htdocs/goldenPath/monDom5/multiz9way/maf
 cd ../../
 
 ## load mafs into database
 cd anno/maf
 mkdir -p /gbdb/monDom5/multiz9way/maf
 ln -s `pwd -P`/*.maf /gbdb/monDom5/multiz9way/maf
 pushd /data/tmp
 nice -n +19 hgLoadMaf -pathPrefix=/gbdb/monDom5/multiz9way/maf monDom5 multiz9way
 nice -n +19 cat /gbdb/monDom5/multiz9way/maf/*.maf | hgLoadMafSummary monDom5 multiz9waySummary stdin
 popd
 cd ../../
 
 ## phastCons
 ##   1. collect all mafs with all nine species in components.  
 mkdir cons
 cd cons/
 for chrom in `ls -1 ../initialMafs/`; do 
    for db in `cat ../species.lst`; do
        maf=../initialMafs/$chrom
        if [ -e withAllNine.$chrom ]; then
            maf=withAllNine.$chrom
        fi
        mafFilter -needComp=$db $maf > tmp.maf
        mv tmp.maf withAllNine.$chrom
     done
     echo Done with withAllNine.$chrom
 done
 ##   2. split into 10 Mbase pieces and generate .ss files
 mkdir msa.split ss
 cd msa.split/
 ln -s ../../../../monDom5.2bit
 cat << '_EOF_' > doSplit.csh
 #!/bin/csh -ef
 set c = $1
 set maf = /hive/data/genomes/monDom5/bed/multiz9way/cons/maf/withAllNine.${c}.maf
 set chromDir = /hive/data/genomes/monDom5/bed/multiz9way/cons/ss/$c
 rm -fr $chromDir
 mkdir $chromDir
 pushd $chromDir > /dev/null
 twoBitToFa -seq=$c monDom5.2bit monDom5.${c}.fa
 msa_split $maf -i MAF \
     -M monDom5.${c}.fa -o SS -r $chromDir/$c -w 20000000,0 -I 100 -B 5000
 rm -f monDom5.${c}.fa
 popd > /dev/null
 date >> ${c}.done
 '_EOF_'
     # << happy emacs
     chmod +x doSplit.csh
     cat << '_EOF_' > template
 #LOOP
 ./doSplit.csh $(root1) {check out line+ $(root1).done}
 #ENDLOOP
 '_EOF_'
     # << happy emacs
 ssh swarm
 cd /hive/data/genomes/monDom5/bed/multiz9way/cons/msa.split
 ls -1 ../maf/ | sed 's/withAllNine.//; s/.maf//' > chr.lst
 gensub2 chr.lst single gsub jobList
 para -ram=8g -cpu=4 create jobList
 para push
 para time
 #Completed: 11 of 11 jobs
 #CPU time in finished jobs:        805s      13.42m     0.22h    0.01d  0.000 y
 #IO & Wait Time:                   367s       6.11m     0.10h    0.00d  0.000 y
 #Average job time:                 107s       1.78m     0.03h    0.00d
 #Longest finished job:             211s       3.52m     0.06h    0.00d
 #Submission to last job:           220s       3.67m     0.06h    0.00d
 cd ../
 ## another run, full chroms
 mkdir ss.chrom msa_view.run
 cd msa_view.run
 ln -s ../../../../monDom5.2bit
 cat << '_EOF_' > doChrom.csh
 #!/bin/csh -ef
 set c = $1
 twoBitToFa -seq=$c monDom5.2bit ${c}.fa
 msa_view ../maf/withAllNine.${c}.maf -i MAF -M ${c}.fa -o SS > ../ss.chrom/${c}.ss
 rm ${c}.fa
 '_EOF_'
    # << emacs
 cat << '_EOF_' > gsub
 #LOOP
 ./doChrom.csh $(root1) {check out line+ ../ss.chrom/$(root1).ss }
 #ENDLOOP
 '_EOF_'
 ls -1 ../maf/ | sed 's/withAllNine.//; s/.maf//' > chr.lst
 gensub2 chr.lst single gsub jobList
 para -ram=8g -cpu=4 create jobList
 para push
 para time
 #Completed: 11 of 11 jobs
 #CPU time in finished jobs:        769s      12.81m     0.21h    0.01d  0.000 y
 #IO & Wait Time:                   426s       7.10m     0.12h    0.00d  0.000 y
 #Average job time:                 109s       1.81m     0.03h    0.00d
 #Longest finished job:             211s       3.52m     0.06h    0.00d
 #Submission to last job:           217s       3.62m     0.06h    0.00d
 mkdir phyloFit.run
 cd phyloFit.run/
 ls -1 ../ss.chrom/* > chr.lst
 cat << '_EOF_' > gsub
 #LOOP
 /cluster/bin/phast/x86_64/phyloFit --msa-format SS --tree start.tree $(path1) { check out line+ ../trees/$(root1).mod }
 #ENDLOOP
 '_EOF_'
 gensub2 chr.lst single gsub jobList
 para -ram=8g -cpu=4 create jobList
 para push
 para time
 cd ../trees/
 phyloBoot --read-mods chr{1,2,3,4,5,6,7,8}.mod --output-average chroms1-8.mod
 
 ########  Let's back up a minute
 
 exit # back to hgwdev
 mkdir msa_split.chr1
 cd msa_split.chr1/
 twoBitToFa ../../../../monDom5.2bit:chr1 chr1.fa
 time msa_split ../maf/withAllNine.chr1.maf --refseq chr1.fa --in-format MAF \
 	--windows 100000000,1000 --out-format SS \
 	--between-blocks 5000 --out-root ssChr1
 ssh swarm
 cd /hive/data/genomes/monDom5/bed/multiz9way/cons/phastCons.estimateTrees.run
 ls -1 ../msa_split.chr1/* > ss.lst
 grep BACKGROUND ../trees/chroms1-8.mod | awk '{printf "%0.3f\n", $3 + $4;}'
 #0.383
 cat << '_EOF_' > gsub
 #LOOP
 /cluster/bin/phast/x86_64/phastCons --gc 0.383 --target-coverage 0.17 --expected-length 12 --estimate-trees --no-post-probs trees/$(root1) $(path1) ../trees/chroms1-8.mod
 #ENDLOOP
 '_EOF_'
 gensub2 ss.lst single gsub jobList
 para -ram=8g -cpu=4 create jobList
 para push
 para time
 #Completed: 8 of 8 jobs
 #CPU time in finished jobs:      51655s     860.91m    14.35h    0.60d  0.002 y
 #IO & Wait Time:                   950s      15.84m     0.26h    0.01d  0.000 y
 #Average job time:                6576s     109.59m     1.83h    0.08d
 #Longest finished job:            8998s     149.97m     2.50h    0.10d
 #Submission to last job:          9217s     153.62m     2.56h    0.11d
 
 ## OK now average these 
 cd ../
 ls trees/*.cons.mod > cons.txt
 phyloBoot --read-mods '*cons.txt' --output-average ave.cons.mod
 ls trees/*.noncons.mod > noncons.txt
 phyloBoot --read-mods '*noncons.txt' --output-average ave.noncons.mod
 
 ## Now break the big mafs into .ss files
 
 cd ../
 mkdir msa_split.bigMafs ss.splits
 cd msa_split.bigMafs/
 ls -1 ../../initialMafs/* > mafs.lst
 cat << '_EOF_' > msa.sh
 #!/bin/bash
 chr=`basename $1 .maf`
 mkdir -p $chr
 twoBitToFa ../../../../monDom5.2bit:$chr ${chr}.fa
 /cluster/bin/phast/x86_64/msa_split $1 -M ${chr}.fa -i MAF -o SS -w 10000000,1000 -B 5000 -r ${chr}/ss
 rm ${chr}.fa
 '_EOF_'
 chmod +x msa.sh
 cat << '_EOF_' > gsub
 #LOOP
 ./msa.sh $(path1) { check exists $(root1) }
 #ENDLOOP
 '_EOF_'
 ssh swarm 
 cd /hive/data/genomes/monDom5/bed/multiz9way/cons/msa_split.bigMafs
 gensub2 mafs.lst single gsub spec
 para create spec
 para push
 para time
 
 ## Run phastCons on the many .ss files
 
 mkdir ../phast.run
 cd ../phast.run/
 find ../msa_split.bigMafs/ -type f -name '*.ss' > ss.lst
 ln -s ../phastCons.estimateTrees.run/*.mod .
 cat << '_EOF_' > phast.sh
 #!/bin/bash
 ss=$1
 wig=$2
 bed=$3
 mkdir -p `dirname $wig`
 mkdir -p `dirname $bed`
 /cluster/bin/phast/x86_64/phastCons --target-coverage 0.17 --expected-length 12 --most-conserved $bed --score $ss ave.cons.mod,ave.noncons.mod > $wig
 '_EOF_'
 chmod +x phast.sh
 cat << '_EOF_' > gsub
 #LOOP
 ./phast.sh $(path1) wig/$(lastDir1)/$(root1).wig bed/$(lastDir1)/$(root1).bed
 #ENDLOOP
 '_EOF_'
 gensub2 ss.lst single gsub spec
 para create spec
 para push
 para time
 #Completed: 367 of 367 jobs
 #CPU time in finished jobs:       9131s     152.18m     2.54h    0.11d  0.000 y
 #IO & Wait Time:                  6169s     102.82m     1.71h    0.07d  0.000 y
 #Average job time:                  42s       0.69m     0.01h    0.00d
 #Longest finished job:              70s       1.17m     0.02h    0.00d
 #Submission to last job:           343s       5.72m     0.10h    0.00d
 
 ## Stitch up wig files, make .wibs, load in DB
 
 ############################################################################
 # susScr1 Pig BLASTZ/CHAIN/NET (DONE - 2010-01-21,22 - Hiram)
     screen # use a screen to manage this multi-day job
     mkdir /hive/data/genomes/monDom5/bed/lastzSusScr1.2010-01-21
     cd /hive/data/genomes/monDom5/bed/lastzSusScr1.2010-01-21
 
     cat << '_EOF_' > DEF
 # Pig vs. Opossum
 BLASTZ_M=50
 
 # TARGET: Opossum MonDom5
 SEQ1_DIR=/scratch/data/monDom5/monDom5.2bit
 SEQ1_LEN=/scratch/data/monDom5/chrom.sizes
 SEQ1_CHUNK=10000000
 SEQ1_LAP=10000
 
 # QUERY: Pig SusScr1
 SEQ2_DIR=/scratch/data/susScr1/susScr1.2bit
 SEQ2_LEN=/scratch/data/susScr1/chrom.sizes
 SEQ2_CHUNK=10000000
 SEQ2_LAP=0
 
 BASE=/hive/data/genomes/monDom5/bed/lastzSusScr1.2010-01-21
 TMPDIR=/scratch/tmp
 '_EOF_'
     # << this line keeps emacs coloring happy
 
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	`pwd`/DEF \
 	-noLoadChainSplit -syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=memk -bigClusterHub=swarm \
 	-chainMinScore=5000 -chainLinearGap=loose > do.log 2>&1 &
     #	real    992m51.260s
     cat fb.monDom5.chainSusScr1Link.txt 
     #	179898391 bases of 3501660299 (5.138%) in intersection
 
     mkdir /hive/data/genomes/susScr1/bed/blastz.monDom5.swap
     cd /hive/data/genomes/susScr1/bed/blastz.monDom5.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/monDom5/bed/lastzSusScr1.2010-01-21/DEF \
 	-swap -noLoadChainSplit -syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=memk -bigClusterHub=swarm \
 	-chainMinScore=5000 -chainLinearGap=loose > swap.log 2>&1 &
     #	real    97m35.156s
     cat fb.susScr1.chainMonDom5Link.txt 
     #	182834626 bases of 2231332019 (8.194%) in intersection
 
 #########################################################################
 # ailMel1 Panda BLASTZ/CHAIN/NET (DONE - 2010-02-04 - Hiram)
     screen # use a screen to manage this multi-day job
     mkdir /hive/data/genomes/monDom5/bed/lastzAilMel1.2010-02-04
     cd /hive/data/genomes/monDom5/bed/lastzAilMel1.2010-02-04
 
     cat << '_EOF_' > DEF
 # Panda vs. Opossum
 BLASTZ_M=50
 
 # TARGET: Opossum MonDom5
 SEQ1_DIR=/scratch/data/monDom5/monDom5.2bit
 SEQ1_LEN=/scratch/data/monDom5/chrom.sizes
 SEQ1_CHUNK=10000000
 SEQ1_LAP=10000
 
 # QUERY: Panda AilMel1
 SEQ2_DIR=/scratch/data/ailMel1/ailMel1.2bit
 SEQ2_LEN=/scratch/data/ailMel1/chrom.sizes
 SEQ2_CHUNK=10000000
 SEQ2_LAP=0
 
 BASE=/hive/data/genomes/monDom5/bed/lastzAilMel1.2010-02-04
 TMPDIR=/scratch/tmp
 '_EOF_'
     # << this line keeps emacs coloring happy
 
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	`pwd`/DEF \
 	-noLoadChainSplit -syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=memk -bigClusterHub=swarm \
 	-chainMinScore=5000 -chainLinearGap=loose > do.log 2>&1 &
     #	real    492m43.088s
     cat fb.monDom5.chainAilMel1Link.txt 
     #	223510659 bases of 3501660299 (6.383%) in intersection
 
     mkdir /hive/data/genomes/ailMel1/bed/blastz.monDom5.swap
     cd /hive/data/genomes/ailMel1/bed/blastz.monDom5.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/monDom5/bed/lastzAilMel1.2010-02-04/DEF \
 	-swap -noLoadChainSplit -syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=memk -bigClusterHub=swarm \
 	-chainMinScore=5000 -chainLinearGap=loose > swap.log 2>&1 &
     #	real    69m35.464s
     cat fb.ailMel1.chainMonDom5Link.txt 
     #	211209682 bases of 2245312831 (9.407%) in intersection
 
-#########################################################################
+##############################################################################
+# calJac3 Marmoset LASTZ/CHAIN/NET (DONE - 2010-02-12 - Hiram)
+    screen # use a screen to manage this multi-day job
+    mkdir /hive/data/genomes/monDom5/bed/lastzCalJac3.2010-02-12
+    cd /hive/data/genomes/monDom5/bed/lastzCalJac3.2010-02-12
+
+    cat << '_EOF_' > DEF
+# Marmoset vs. Opossum
+BLASTZ_M=50
+
+# TARGET: Opossum MonDom5
+SEQ1_DIR=/scratch/data/monDom5/monDom5.2bit
+SEQ1_LEN=/scratch/data/monDom5/chrom.sizes
+SEQ1_CHUNK=10000000
+SEQ1_LAP=10000
+
+# QUERY: Marmoset CalJac3
+SEQ2_DIR=/scratch/data/calJac3/calJac3.2bit
+SEQ2_LEN=/scratch/data/calJac3/chrom.sizes
+SEQ2_CHUNK=10000000
+SEQ2_LAP=0
+
+BASE=/hive/data/genomes/monDom5/bed/lastzCalJac3.2010-02-12
+TMPDIR=/scratch/tmp
+'_EOF_'
+    # << this line keeps emacs coloring happy
+
+    time nice -n +19 $HOME/kent/src/hg/utils/automation/doBlastzChainNet.pl \
+	-verbose=2 `pwd`/DEF \
+	-syntenicNet -workhorse=hgwdev -smallClusterHub=memk \
+	-bigClusterHub=swarm -chainMinScore=5000 -chainLinearGap=loose \
+	> do.log 2>&1 &
+    #	real    2772m44.886s
+    cat fb.monDom5.chainCalJac3Link.txt 
+    #	216197506 bases of 3501660299 (6.174%) in intersection
+
+    mkdir /hive/data/genomes/calJac3/bed/blastz.monDom5.swap
+    cd /hive/data/genomes/calJac3/bed/blastz.monDom5.swap
+    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
+	/hive/data/genomes/monDom5/bed/lastzCalJac3.2010-02-12/DEF \
+	-swap -noLoadChainSplit -syntenicNet \
+	-workhorse=hgwdev -smallClusterHub=memk -bigClusterHub=swarm \
+	-chainMinScore=5000 -chainLinearGap=loose > swap.log 2>&1 &
+    #	real    110m13.435s
+    cat fb.calJac3.chainMonDom5Link.txt 
+    #	217614612 bases of 2752505800 (7.906%) in intersection
+
+##############################################################################