src/hg/makeDb/doc/ce6.txt 1.18

1.18 2010/01/19 20:14:21 hiram
adding algBinding tracks
Index: src/hg/makeDb/doc/ce6.txt
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/makeDb/doc/ce6.txt,v
retrieving revision 1.17
retrieving revision 1.18
diff -b -B -U 1000000 -r1.17 -r1.18
--- src/hg/makeDb/doc/ce6.txt	28 Jul 2009 21:55:25 -0000	1.17
+++ src/hg/makeDb/doc/ce6.txt	19 Jan 2010 20:14:21 -0000	1.18
@@ -1,2746 +1,2808 @@
 # for emacs: -*- mode: sh; -*-
 
 # Caenorhabditis elegans
 # Washington University School of Medicine GSC and Sanger Institute WS190
 
 #  $Id$
 
 #########################################################################
 # DOWNLOAD SEQUENCE (DONE - 2008-05-30 - Hiram)
     ssh kkstore06
     mkdir /cluster/store3/worm/ce6
     ln -s /cluster/store3/worm/ce6 /cluster/data/ce6
     mkdir /cluster/data/ce6/downloads
     cd /cluster/data/ce6/downloads
     for C in I II III IV V X MtDNA
 do
     F=WS190/CHROMOSOMES/CHROMOSOME_${C}
     wget --timestamping \
       "ftp://ftp.sanger.ac.uk/pub/wormbase/${F}.agp"
     wget --timestamping \
       "ftp://ftp.sanger.ac.uk/pub/wormbase/${F}.dna.gz"
     wget --timestamping \
       "ftp://ftp.sanger.ac.uk/pub/wormbase/${F}.gff.gz"
 done
 
 #########################################################################
 # NORMALIZE SEQUENCE NAMES TO BEGIN WITH chr (DONE - 2008-05-30 - Hiram)
     ssh kkstore06
     cd /cluster/data/ce6/downloads
     # Fix fasta names:
     zcat CHR*.dna.gz \
     | sed -e '/^$/ d;  s/^>CHROMOSOME_MtDNA/>chrM/;  s/^>CHROMOSOME_/>chr/;' \
     | gzip -c > UCSC.fa.gz
     faSize -detailed UCSC.fa.gz
 # chrI    15072421
 # chrII   15279323
 # chrIII  13783681
 # chrIV   17493785
 # chrM    13794
 # chrV    20919568
 # chrX    17718854
 
     # Make sure we get the same sizes from this command:
     zcat CHR*.dna.gz | sed -e '/^$/ d;' | faSize -detailed stdin
 
     # Fix AGP names:
     sed -e 's/^/chr/' CHR*.agp > UCSC.agp
     # And add a fake mitochondrial AGP entry for the sake of downstream
     # tools (make sure the GenBank sequence is identical to given):
     echo -e "chrM\t1\t13794\t1\tF\tNC_001328.1\t1\t13794\t+" >> UCSC.agp
 
 #########################################################################
 # run the makeGenomeDb procedure to create the db and unmasked sequence
 #	(DONE - 2008-05-30 - Hiram)
     ssh kkstore06
     cd /cluster/data/ce6
     cat << '_EOF_' > ce6.config.ra
 # Config parameters for makeGenomeDb.pl:
 db ce6
 clade worm
 genomeCladePriority 10
 scientificName Caenorhabditis elegans
 commonName C. elegans
 assemblyDate May 2008
 assemblyLabel Washington University School of Medicine GSC and Sanger Institute WS190
 orderKey 826
 mitoAcc none
 fastaFiles /cluster/data/ce6/downloads/UCSC.fa.gz
 agpFiles   /cluster/data/ce6/downloads/UCSC.agp
 # qualFiles /dev/null
 dbDbSpeciesDir worm
 '_EOF_'
     # << emacs
 
     mkdir jkStuff
     #	run just to AGP to make sure things are sane first
     nice makeGenomeDb.pl ce6.config.ra -stop agp \
       > jkStuff/makeGenomeDb.agp 2>&1
     #	now, continuing to make the Db and all
     time nice -n +19 makeGenomeDb.pl ce6.config.ra -continue db \
       > jkStuff/makeGenomeDb.log 2>&1
     #	that takes 2m30s to run to completion
     #	take the trackDb business there and check it into the source tree
     #	fixup the description, gap and gold html page descriptions
 
 #########################################################################
 # REPEATMASKER (DONE - 2008-05-30 - Hiram)
     ssh kkstore06
     screen 	#	use screen to control the job
     mkdir /cluster/data/ce6/bed/repeatMasker
     cd /cluster/data/ce6/bed/repeatMasker
     time nice -n +19 doRepeatMasker.pl -bigClusterHub=pk \
 	-buildDir=/cluster/data/ce6/bed/repeatMasker ce6 > do.log 2>&1 &
     #	real    133m17.088s
 
     #	from the do.log:
     #	RepeatMasker version development-$Id:
     #	RepeatMasker,v 1.20 2008/01/16 18:15:45 angie Exp $
     #	CC   RELEASE 20071204;                                            *
 
 #########################################################################
 # SIMPLE REPEATS (DONE - 2008-05-30 - Hiram)
     ssh kkstore06
     screen 	#	use screen to control the job
     mkdir /cluster/data/ce6/bed/simpleRepeat
     cd /cluster/data/ce6/bed/simpleRepeat
     time nice -n +19 doSimpleRepeat.pl -smallClusterHub=kki \
 	-buildDir=/cluster/data/ce6/bed/simpleRepeat ce6 > do.log 2>&1 &
     #	about 30 minutes
 
 #########################################################################
 # MASK SEQUENCE WITH RM+TRF (DONE - 2008-05-30 - Hiram)
     # Since both doRepeatMasker.pl and doSimpleRepeats.pl have completed,
     # now it's time to combine the masking into the final ce6.2bit,
     # following the instructions at the end of doSimpleRepeat's output.
     ssh kolossus
     cd /cluster/data/ce6
     twoBitMask ce6.rmsk.2bit -add bed/simpleRepeat/trfMask.bed ce6.2bit
     # You can safely ignore the warning about extra BED columns
     twoBitToFa ce6.2bit stdout | faSize stdin
 # 100281426 bases (0 N's 100281426 real 86981803 upper 13299623 lower)
 #	in 7 sequences in 1 files
 # Total size: mean 14325918.0 sd 6728308.1 min 13794 (chrM)
 #	max 20919568 (chrV) median 15279323
 # %13.26 masked total, %13.26 masked real
     #	set the symlink on hgwdev to /gbdb/ce6
     cd /gbdb/ce6
     ln -s /cluster/data/ce6/ce6.2bit .
 
 #########################################################################
 # MAKE 11.OOC FILE FOR BLAT (DONE - 2008-05-30 - Hiram)
     # Use -repMatch=100 (based on size -- for human we use 1024, and 
     # worm size is ~3.4% of human judging by gapless ce4 vs. hg18 genome 
     # size from featureBits. So we would use 34, but that yields a very
     # high number of tiles to ignore, especially for a small more compact 
     # genome.  Bump that up a bit to be more conservative.
     ssh kolossus
     cd /cluster/data/ce6
     blat ce6.2bit /dev/null /dev/null -tileSize=11 \
       -makeOoc=jkStuff/11.ooc -repMatch=100
     #	Wrote 8523 overused 11-mers to jkStuff/11.ooc
     #	copy all of this stuff to the Iservers
     ssh kkr1u00
     mkdir /iscratch/i/worms/ce6
     cd /iscratch/i/worms/ce6
     cp -p /cluster/data/ce6/ce6.2bit .
     cp -p /cluster/data/ce6/jkStuff/11.ooc .
     cp -p /cluster/data/ce6/chrom.sizes .
 -
     cp -p jkStuff/11.ooc /san/sanvol1/scratch/worms/ce6
 
 #########################################################################
 # GENBANK AUTO UPDATE (DONE - 2008-05-30 - Hiram)
     # align with latest genbank process.
     ssh hgwdev
     cd ~/kent/src/hg/makeDb/genbank
     cvsup
     # edit etc/genbank.conf to add ce6 just before ce4
 
 # ce6 (C. elegans)
 ce6.serverGenome = /cluster/data/ce6/ce6.2bit
 ce6.clusterGenome = /iscratch/i/worms/ce6/ce6.2bit
 ce6.ooc = /iscratch/i/worms/ce6/11.ooc
 ce6.refseq.mrna.native.pslCDnaFilter  = ${finished.refseq.mrna.native.pslCDnaFilter}
 ce6.refseq.mrna.xeno.pslCDnaFilter    = ${finished.refseq.mrna.xeno.pslCDnaFilter}
 ce6.genbank.mrna.native.pslCDnaFilter = ${finished.genbank.mrna.native.pslCDnaFilter}
 ce6.genbank.mrna.xeno.pslCDnaFilter   = ${finished.genbank.mrna.xeno.pslCDnaFilter}
 ce6.genbank.est.native.pslCDnaFilter  = ${finished.genbank.est.native.pslCDnaFilter}
 ce6.maxIntron = 100000
 ce6.lift = no
 ce6.genbank.mrna.xeno.load = yes
 ce6.refseq.mrna.xeno.load = yes
 ce6.downloadDir = ce6
 
     cvs ci -m "Added ce6." etc/genbank.conf
     # update /cluster/data/genbank/:
     make etc-update
 
     ssh genbank
     screen		#	use a screen to manage this job
     cd /cluster/data/genbank
     time nice -n +19 bin/gbAlignStep -initial ce6 &
     #	logFile: var/build/logs/2008.05.30-15:04:49.ce6.initalign.log
     #	real    137m58.675s
 
     #	real    61m33.930s
 
     # load database when finished
     ssh hgwdev
     cd /cluster/data/genbank
     time nice -n +19 ./bin/gbDbLoadStep -drop -initialLoad ce6
     #	logFile: var/dbload/hgwdev/logs/2008.05.30-20:29:01.dbload.log
     #	real    23m8.651s
 
     # enable daily alignment and update of hgwdev
     cd ~/kent/src/hg/makeDb/genbank
     cvsup
     # add ce6 to:
         etc/align.dbs
         etc/hgwdev.dbs
     cvs ci -m "Added ce6 - C. elegans WS190" \
 	etc/align.dbs etc/hgwdev.dbs
     make etc-update
 
 ###############################################################################
 ## SANGER GENE TRACK (DONE - 2008-06-03 - Hiram)
 ##	There is a tremendous amount of extraneous notations in the gff
 ##	files.  Filter them down to a manageable set, change the chrom
 ##	names, eliminate duplicates, select only what will work in
 ##	ldHgGene
     ssh kkstore06
     mkdir /cluster/data/ce6/bed/sangerGene
     cd /cluster/data/ce6/bed/sangerGene
 for C in I II III IV V X
 do
     echo -n "${C} "
     zcat ../../downloads/CHROMOSOME_${C}.gff.gz | \
     sed -e "s/CHROMOSOME_III/chrIII/g; s/CHROMOSOME_II/chrII/g; \
         s/CHROMOSOME_IV/chrIV/g; s/CHROMOSOME_I/chrI/g; \
         s/CHROMOSOME_X/chrX/g; s/CHROMOSOME_V/chrV/g; \
         s/CHROMOSOME_M/chrM/g;" \
         -e 's/Sequence "\(.*\)"$/\1/' -e 's/Transcript "\(.*\)"$/\1/' \
         -e 's/CDS "//' -e 's/"//' \
                 > chr${C}.gff
 done
 C=M
 echo -n "${C} "
 zcat ../../downloads/CHROMOSOME_MtDNA.gff.gz | \
 sed -e "s/CHROMOSOME_III/chrIII/g; s/CHROMOSOME_II/chrII/g; \
     s/CHROMOSOME_IV/chrIV/g; s/CHROMOSOME_I/chrI/g; \
     s/CHROMOSOME_X/chrX/g; s/CHROMOSOME_V/chrV/g; \
     s/CHROMOSOME_M/chrM/g; s/chrMtDNA/chrM/g;" \
     -e 's/Sequence "\(.*\)"$/\1/' -e 's/Transcript "\(.*\)"$/\1/' \
     -e 's/CDS "//' -e 's/"//' \
             > chr${C}.gff
 for C in I II III IV V X M
 do
     echo "chr${C}.gff -> filtered.chr${C}.gff"
     grep -v "^#" chr${C}.gff | awk -F'\t' '
 BEGIN { IGNORECASE=1 }
 {
     if (match($2,"curated|Coding_transcript")) {
 	if (match($3,"intron|coding_exon|exon|cds|three_prime_UTR|five_prime_UTR")) {
 	    gsub("coding_exon","CDS",$3)
             gsub("Transcript ","",$9)
             gsub(" .*","",$9)
             gsub("three_prime_UTR","3utr",$3)
             gsub("five_prime_UTR","5utr",$3)
             for (i = 1; i < 9; ++i) {
                 printf "%s\t", $i
             }
             printf "%s\n", $9
         }
     }
 }
 ' | sort -u | sort -k4n > filtered.chr${C}.gff
 done
 
     ssh hgwdev
     cd /cluster/data/ce6/bed/sangerGene
     nice -n +19 ldHgGene ce6 sangerGene filtered.*.gff
     nice -n +19 ldHgGene -out=filteredGenePred.tab ce6 sangerGene filtered.*.gff
     #	Read 52512 transcripts in 997038 lines in 7 files
     #	52512 groups 7 seqs 3 sources 5 feature types
     #	30296 gene predictions
 
     ## this doesn't work with the bin column, so alter that table
     hgsql -e "alter table sangerGene drop column bin;" ce6
     ## found when trying to click through on sequence links, mRNA or
     ## protein when on a gene details page
 
     ## Create orfToGene table.  The tar file containing this information
     # was obtained from Anthony Rogers via Email 2008-06-03 ar2@sanger.ac.uk
     #	They had one duplicate line in their file:
     sort -u /cluster/data/ce6/downloads/tarFile/gene_name.txt \
 	| sed -e "s/  */\t/" > orfToGene.tab
     ## plus, they didn't include the self with self references, which are
     ## needed in the gene sorter so it can find its names.  So, fixing
     #	this table:
     hgsql -N -e "select name from sangerGene;" ce6 | sort > name.sangerGene.txt
     cat << '_EOF_' > fixupOrf.pl
 #!/usr/bin/env perl
 
 use strict;
 use warnings;
 
 my %sangerGeneName;
 
 open (FH, "<name.sangerGene.txt") or die "can not read name.sangerGene.txt";
 while (my $line = <FH>) {
     chomp $line;
     $sangerGeneName{$line} = 1;
 }
 close (FH);
 
 open (FH,"<orfToGene.tab") or die "can not read orfToGene.tab";
 
 my %alias;	# key is alias name, value is real name
 my %realNames;	# key is real name, value is 1 (unimportant)
 my %twoDot;	# key is two dot name, value is 1 (unimportant)
 
 while (my $line = <FH>) {
     chomp $line;
     my ($alias, $real) = split('\s+', $line);
     die "duplicate alias name found: $alias" if (exists($alias{$alias}));
     if (exists($sangerGeneName{$alias})) {
 	$alias{$alias} = $real;
 	$realNames{$real} = 1;
 	my ($a, $b, $c, $d) = split('\.',$alias);
 	die "four dot name found $line" if (defined($d));
 	my $twoDotName = sprintf "%s.%s", $a, $b;
 	if (exists($sangerGeneName{$twoDotName})) {
 	    $twoDot{$twoDotName} = $real;
 	}
     }
 }
 
 close (FH);
 
 my $count = 0;
 my $noReal = 0;
 my %needReal;
 foreach my $key (keys %alias) {
     ++$count;
     my $realName = $alias{$key};
     if (!exists($alias{$realName})) {
 	if (!exists($needReal{$realName}) &&
 		exists($sangerGeneName{$realName})) {
 	    ++$noReal;
 	    $needReal{$realName} = $key;
 	    my ($a, $b, $c, $d) = split('\.',$realName);
 	    die "four dot name found $realName" if (defined($d));
 	    my $twoDotName = sprintf "%s.%s", $a, $b;
 	    if (exists($sangerGeneName{$twoDotName})) {
 		$twoDot{$twoDotName} = $key;
 	    }
 	}
     }
 }
 print STDERR "found $count alias names\n";
 print STDERR "can not find $noReal real names in alias list\n";
 
 $count = 0;
 foreach my $key (keys %realNames) {
     ++$count;
 }
 print STDERR "found $count real names\n";
 
 $noReal = 0;
 foreach my $key (keys %needReal) {
     ++$noReal;
     print STDERR "$key $needReal{$key}\n" if ($noReal < 10);
 }
 print STDERR "need to add $noReal real names\n";
 
 open (FH,">fixedOrfToGene.tab") or die "can not write to fixedOrfToGene.tab";
 foreach my $key (keys %alias) {
     printf FH "%s\t%s\n", $key, $alias{$key};
 }
 
 foreach my $key (keys %needReal) {
     printf FH "%s\t%s\n", $key, $key;
 }
 
 foreach my $key (keys %twoDot) {
     if ( (!exists($alias{$key})) && (!exists($needReal{$key})) ) {
 	print STDERR "need name for $key $twoDot{$key}\n";
 	printf FH "%s\t%s\n", $key, $twoDot{$key};
     }
 }
 foreach my $key (keys %sangerGeneName) {
     if ( (!exists($alias{$key})) && (!exists($needReal{$key})) ) {
 	if ( !exists($twoDot{$key}) ) {
 	    print STDERR "last chance need name for $key $key\n";
 	    printf FH "%s\t%s\n", $key, $key;
 	}
     }
 }
 close (FH);
 '_EOF_'
     # << happy emacs
     chmod +x fixupOrf.pl
     ./fixupOrf.pl > fixupOrf.out 2>&1
 
     hgLoadSqlTab ce6 orfToGene ~/kent/src/hg/lib/orfToGene.sql \
 	fixedOrfToGene.tab
     ln -s /cluster/data/ce6/downloads/tarFile/paragraph.txt ./paragraph.tab
     ln -s /cluster/data/ce6/downloads/tarFile/uniprot.txt ./uniprot.tab
     cat << '_EOF_' > mkSangerLinks.pl
 #!/usr/bin/env perl
 
 use strict;
 use warnings;
 
 my %geneToOrf;  #       key is uc(gene name), value is orf
 my %orfToGene;  #       key is uc(orf), value is gene name
 
 my $orfCount = 0;
 open (FH, "<orfToGene.tab") or die "can not read orfToGene.tab";
 while (my $line = <FH>) {
     chomp $line;
     my ($orf, $gene) = split('\t',$line);
     die "gene name zero length" if (length($gene) < 1);
     die "orf name zero length" if (length($orf) < 1);
     $geneToOrf{uc($gene)} = $orf;
     $orfToGene{uc($orf)} = $gene;
     ++$orfCount;
 }
 
 close (FH);
 printf STDERR "read $orfCount orf designations from orfToGene.tab\n";
 
 open (LS,">sangerLinks.tab") or die "can not write to sangerLinks.tab";
 
 my $geneCount = 0;
 open (FH, "sort -u paragraph.tab|") or die "can not read paragraph.tab";
 while (my $line = <FH>) {
     chomp $line;
     my ($orf, $descr) = split('\t',$line);
     die "gene name zero length" if (length($orf) < 1);
     die "can not find orfToGene $orf" if (!exists($orfToGene{uc($orf)}));
     my $orfGene = $orfToGene{uc($orf)};
     die "orfGene name zero length" if (length($orfGene) < 1);
     printf LS "%s\t%s\t%s\n", $orf, $orfGene, $descr;
     ++$geneCount;
 }
 
 close (FH);
 printf STDERR "read $geneCount gene descriptions from paragraph.tab\n";
 close (LS);
 '_EOF_'
     # << happy emacs
     chmod +x mkSangerLinks.pl
     ./mkSangerLinks.pl
     #	read 33792 orf designations from orfToGene.tab
     #	read 33793 gene descriptions from paragraph.tab
     #	There is problem in paragraph.tab, it contains a duplicate entry:
 F57C7.4 F57C7.4 contains similarity to Interpro domain IPR008972 ()
 F57C7.4 F57C7.4 contains similarity to Plasmodium falciparum Hypothetical protein.\; TR:Q8I5U3
     #	manually fixup this one to elimiate the second definition
 
     hgLoadSqlTab ce6 sangerLinks ~/kent/src/hg/lib/sangerLinks.sql \
 	sangerLinks.tab
     # Add proteinID field to sangerGene table, used by Gene Sorter
     hgsql -e 'alter table sangerGene add proteinID varchar(40) NOT NULL;' ce6
     # To add index on this column
     hgsql -e 'alter table sangerGene add index(proteinID);' ce6
     # Use SwissProt IDs in sangerLinks table to fill proteinID 
     # column in the sangerGene table
     hgsql -e 'update sangerGene as g, sangerLinks as l \
       set g.proteinID = l.protName where g.name = l.orfName;' ce6
     # check there are rows with the protName field filled
     hgsql -N -e "select proteinId from sangerGene" ce6 | sort | uniq -c | wc -l
     #	20127
 
     hgLoadSqlTab ce6 sangerLinks ~/kent/src/hg/lib/sangerLinks.sql \
 	sangerLinks.tab
     wget --timestamping \
 	"ftp://ftp.sanger.ac.uk/pub/wormbase/WS190/wormpep190.tar.gz"
     wget --timestamping \
 	"ftp://ftp.sanger.ac.uk/pub/wormbase/WS190/wormrna190.tar.gz"
     tar xvzf wormrna190.tar.gz
     mv README README.wormrna190
     tar xvzf wormpep190.tar.gz
     #	creates files in directory wormpep190:
 # -rw-rw-r--  1 32316202 Apr  7 06:38 wormpep.dna190
 # -rw-rw-r--  1   980303 Apr  7 06:41 wormpep.history190
 # -rw-rw-r--  1     4826 Apr  7 06:41 wormpep.diff190
 # -rw-rw-r--  1 19139454 Apr  7 06:41 wormpep.fasta190
 # -rw-rw-r--  1   398315 Apr  7 06:41 wormpep.accession190
 # -rw-rw-r--  1 13244499 Apr 15 06:23 wormpep190
 # -rw-rw-r--  1  2608936 Apr 15 06:23 wormpep.table190
     hgPepPred ce6 generic sangerPep wormpep190/wormpep190
 
 ###############################################################################
 ######## MAKING GENE SORTER TABLES #### (DONE - 2008-06-04 - Hiram)
 # These are instructions for building the
 # Gene Sorter browser.  Don't start these until
 # there is a sangerGene table with a proteinID column containing Swiss-Prot
 # protein IDs, and also Kim lab expression data is required (in hgFixed).
 
     # Cluster together various alt-splicing isoforms.
     # Creates the sangerIsoforms and sangerCanonical tables
     ssh hgwdev
     hgClusterGenes ce6 sangerGene sangerIsoforms sangerCanonical
     #	Got 20051 clusters, from 30296 genes in 7 chromosomes
 
     featureBits ce6 sangerCanonical
     #	57412493 bases of 100281426 (57.251%) in intersection
     featureBits ce4 sangerCanonical
     #	57092312 bases of 100281244 (56.932%) in intersection
     featureBits ce2 sangerCanonical
     # 54156601 bases of 100291761 (53.999%) in intersection
     featureBits ce1 sangerCanonical 
     # 53654286 bases of 100277784 (53.506%) in intersection
 
     # Create Self blastp table - sangerBlastTab
     # Get sangerPep peptide sequences fasta file from sangerPep dir
     # and create a blast database out of them.
     mkdir -p /cluster/data/ce6/bed/blastp
     cd /cluster/data/ce6/bed/blastp
     cat << '_EOF_' > pepTabToFa.pl
 #!/usr/bin/env perl
 
 use strict;
 use warnings;
 
 open (FH,"<../sangerGene/sangerPep.tab") or
         die "can not open ../sangerGene/sangerPep.tab";
 while (my $line = <FH>) {
     chomp $line;
     my ($name,$seq) = split('\s+',$line);
     printf ">%s\n%s\n", $name, $seq;
 }
 close (FH);
 '_EOF_'
     # << happy emacs
     chmod +x ./pepTabToFa.pl
     ./pepTabToFa.pl > wormPep190.faa
     
     /cluster/bluearc/blast229/formatdb -i wormPep190.faa \
 	-t wormPep190 -n wormPep190
 
     # Copy database over to kluster storage
     mkdir -p /san/sanvol1/scratch/worms/ce6/blastp
     rsync -a ./ /san/sanvol1/scratch/worms/ce6/blastp/
 
     cd /cluster/data/ce6/bed/blastp
     mkdir split
     faSplit sequence wormPep190.faa 1000 split/wp
 
     # Make parasol run directory 
     ssh pk
     mkdir -p /cluster/data/ce6/bed/blastp/self
     cd /cluster/data/ce6/bed/blastp/self
     mkdir run
     cd run
     mkdir out
 
 # Make blast script
     cat  << '_EOF_' > blastSome.csh
 #!/bin/csh -ef
 setenv BLASTMAT /cluster/bluearc/blast229/data
 /cluster/bluearc/blast229/blastall -p blastp \
     -d /san/sanvol1/scratch/worms/ce6/blastp/wormPep190 \
 	-i $1 -o $2 -e 0.01 -m 8 -b 1000
 '_EOF_'
     # << happy emacs
     chmod a+x blastSome.csh
     # Make gensub2 file
     cat  << '_EOF_' > gsub
 #LOOP
 blastSome.csh {check in line+ $(path1)} {check out line out/$(root1).tab}
 #ENDLOOP
 '_EOF_'
      # << Create parasol batch
 
      ls ../../split/*.fa > split.lst
      gensub2 split.lst single gsub jobList
      para create jobList
      para try.... etc ...
 # Completed: 986 of 986 jobs
 # CPU time in finished jobs:      18187s     303.12m     5.05h    0.21d  0.001 y
 # IO & Wait Time:                  2713s      45.22m     0.75h    0.03d  0.000 y
 # Average job time:                  21s       0.35m     0.01h    0.00d
 # Longest finished job:              73s       1.22m     0.02h    0.00d
 # Submission to last job:           302s       5.03m     0.08h    0.00d
 
     # Load into database.  
     ssh hgwdev
     cd /cluster/data/ce6/bed/blastp/self/run/out
     hgLoadBlastTab ce6 sangerBlastTab *.tab
     #	Scanning through 986 files
     #	Loading database with 1255312 rows
 
     # CREATE EXPRESSION DISTANCE TABLES FOR  
     # KIM LAB EXPRESSION DATA  (this appears to be an expensive command,
     #	about 10 to 15 minutes or so ?)
     hgExpDistance ce6 hgFixed.kimWormLifeMedianRatio \
 	  hgFixed.kimWormLifeMedianExps kimExpDistance
     #	Have 19134 elements in hgFixed.kimWormLifeMedianRatio
     #	Got 19134 unique elements in hgFixed.kimWormLifeMedianRatio
 
     # CREATE TABLE TO MAP BETWEEN SANGERGENES AND REFSEQ GENES
     # sangerToRefSeq
     hgMapToGene ce6 refGene sangerGene sangerToRefSeq
 
     # SET dbDb TABLE ENTRY FOR GENE SORTER (DONE, 2004-05-25, hartera)
     # set hgNearOk to 1 on hgcentraltest to make Ce6 Gene Sorter viewable
 
     hgsql -e 'update dbDb set hgNearOk = 1 where name = "ce6";' \
          hgcentraltest
 
     #	Running joinerCheck to see what is sane:
     cd ~/kent/src/hg/makeDb/schema
     joinerCheck -identifier=wormBaseId -database=ce6 -times -keys ./all.joiner
 # ce6.orfToGene.name - hits 30296 of 30296 ok
 # ce6.sangerBlastTab.query - hits 1255312 of 1255312 ok
 # ce6.sangerBlastTab.target - hits 1255312 of 1255312 ok
 # ce6.sangerCanonical.transcript - hits 20051 of 20051 ok
 # ce6.sangerIsoforms.transcript - hits 30296 of 30296 ok
 # ce6.sangerLinks.orfName - hits 30115 of 30115 ok
 # ce6.sangerPep.name - hits 23771 of 23771 ok
 # ce6.sangerToRefSeq.name - hits 29995 of 29995 ok
 
     ## create a gene name to WBGene ID reference table
     ## to be used to construct URL
     mkdir /cluster/data/ce6/bed/WBGeneID
     cd /cluster/data/ce6/bed/WBGeneID
     #	There was one duplicate entry in the file, the sort -u eliminates it
     sed -e "s#http://wormbase.org/db/gene/gene?name=##; s#;class=Gene##" \
 	../../downloads/tarFile/url.txt | sort -u > sangerGeneToWBGeneID.tab
     #	add in the one-dot names for those names that are only two-dot names
     #	in this file:
     cat << '_EOF_' > addDotName.pl
 #!/usr/bin/env perl
 
 use strict;
 use warnings;
 
 my %sangerGeneName;
 
 open (FH, "sangerGene.name") or die "can not read sangerGene.name";
 while (my $line = <FH>) {
     chomp $line;
     $sangerGeneName{$line} = 1;
 }
 close (FH);
 
 open (FH,"<sangerGeneToWBGeneID.tab") or
 	die "can not read sangerGeneToWBGeneID.tab";
 
 my %twoDotNames;
 
 while (my $line = <FH>) {
     chomp $line;
     my ($name, $wbId) = split('\s+',$line);
     die "duplicate gene name $name $wbId" if (exists($twoDotNames{$name}));
     if (exists($sangerGeneName{$name})) {
 	$twoDotNames{$name} = $wbId;
     }
 }
 close (FH);
 
 my %neededNames;
 
 foreach my $key (keys %twoDotNames) {
     my ($a, $b, $c, $d) = split('\.',$key);
     my $wbId = $twoDotNames{$key};
     die "three dot name found $key $wbId" if (defined($d));
     if (defined($c)) {
 	my $oneDot = sprintf "%s.%s", $a, $b;
 	if (!exists($twoDotNames{$oneDot})) {
 	    if (!exists($neededNames{$oneDot})) {
 		$neededNames{$oneDot} = $wbId;
 	    }
 	}
     }
 }
 foreach my $key (keys %twoDotNames) {
     printf "%s\t%s\n", $key, $twoDotNames{$key};
 }
 foreach my $key (keys %neededNames) {
     printf "%s\t%s\n", $key, $neededNames{$key};
 }
 '_EOF_'
     #	<< happy emacs
     chmod +x addDotName.pl
 
     hgsql -N -e "select name from sangerGene;" ce6 | sort -u > sangerGene.name
     ./addDotName.pl  > fixed.sangerGeneToWBGeneID.tab
 
 
     hgLoadSqlTab ce6 sangerGeneToWBGeneID \
 	~/kent/src/hg/lib/sangerGeneToWBGeneID.sql \
 	fixed.sangerGeneToWBGeneID.tab
     cd ~/kent/src/hg/makeDb/schema
     joinerCheck -identifier=wormBaseId -database=ce6 -times -keys ./all.joiner
 # ce6.orfToGene.name - hits 30296 of 30296 ok
 # ce6.sangerBlastTab.query - hits 1255312 of 1255312 ok
 # ce6.sangerBlastTab.target - hits 1255312 of 1255312 ok
 # ce6.sangerCanonical.transcript - hits 20051 of 20051 ok
 # ce6.sangerIsoforms.transcript - hits 30296 of 30296 ok
 # ce6.sangerLinks.orfName - hits 30115 of 30115 ok
 # ce6.sangerPep.name - hits 23771 of 23771 ok
 # ce6.sangerToRefSeq.name - hits 29995 of 29995 ok
 # ce6.sangerGeneToWBGeneID.sangerGene - hits 30115 of 30115 ok
 
 ############################################################################
 ## fixup sangerLinks table (DONE - 2008-06-03 - Hiram)
     ## it is missing descriptions for the one dot names where only two dot
     ## names existed
     mkdir /cluster/data/ce6/bed/fixupSangerLinks
     cd /cluster/data/ce6/bed/fixupSangerLinks
     ln -s ../sangerGene/sangerLinks.tab .
     cat << '_EOF_' > slFixup.pl
 #!/usr/bin/env perl
 
 use strict;
 use warnings;
 
 open (FH,"<sangerLinks.tab") or die "can not read sangerLinks.tab";
 
 my %existing;   # key is full gene symbol name, one or two dots
                 # value is the orfName and description text, tab separated
 my %newNeeded;  # key is shortened one dot name, value is orfName and
                 # description text, tab separated
 my $newCount = 0;
 
 while (my $line = <FH>) {
     chomp $line;
     my ($name, $orf, $desc) = split('\t',$line);
     my ($a, $b, $c, $d) = split('\.', $name);
     die "ERROR: found three dot name at $line" if (defined($d));
     if (defined($c)) {
         my $oneDotName = sprintf "%s.%s", $a, $b;
         if (exists($newNeeded{$oneDotName})) {
             my $newString = sprintf "%s\t%s", $orf, $desc;
             die "ERROR: new one dot name, different descr $name"
                 if ($newNeeded{$oneDotName} ne $newString)
         } else {
         $newNeeded{$oneDotName} = sprintf "%s\t%s", $orf, $desc;
         ++$newCount;
         }
     }
     die "ERROR: duplicate name $name at $line" if (exists($existing{$name}));
     $existing{$name} = sprintf "%s\t%s", $orf, $desc;
 }
 
 close (FH);
 
 foreach my $key (keys %newNeeded) {
     die "ERROR: duplicate new name $key for $newNeeded{$key}"
         if (exists($existing{$key}));
     printf "%s\t%s\n", $key, $newNeeded{$key};
 }
 
 foreach my $key (keys %existing) {
     die "ERROR: duplicate existing name $key for $existing{$key}"
         if (exists($newNeeded{$key}));
     printf "%s\t%s\n", $key, $existing{$key};
 }
 '_EOF_'
     # << happy emacs
     chmod +x ./slFixup.pl
     ./slFixup.pl > fixedSangerLinks.tab
 
     hgLoadSqlTab ce6 sangerLinks ~/kent/src/hg/lib/sangerLinks.sql \
 	fixedSangerLinks.tab
     #	It still has a difficulty because it contains references to the RNA
     #	genes.  joinerCheck complains.  So, after loading sangerRnaGene
     #	below, remove the entries in sangerLinks that are in sangerRnaGene.
     #	I tried to figure out a delete statement that would do this, but
     #	to no avail.  So, brain dead list management:
     hgsql -N -e "select orfName from sangerLinks;" ce6 \
 	| sort -u > sangerLinks.orfName
     hgsql -N -e "select name from sangerRnaGene;" ce6 \
 	| sort -u > sangerRnaGene.name
     #	before delete:
     hgsql -e "select count(*) from sangerLinks;" ce6
     #	 36555 
     comm -12 sangerLinks.orfName sangerRnaGene.name | while read N
 do
     hgsql -e "delete from sangerLinks where orfName=\"$N\";" ce6
     echo $N
 done
     #	after delete:
     hgsql -e "select count(*) from sangerLinks;" ce6
     #	 30115 
     # 36555 - 30115 = 6440
     
 ###############################################################################
 ## RNA sangerGenes (DONE - 2008-06-03 - Hiram)
     ssh kkstore06
     cd /cluster/data/ce6/bed/sangerGene
 for C in I II III IV V X M
 do
     echo "chr${C}.gff -> rna.chr${C}.gff"
     grep -v "^#" chr${C}.gff | awk -F'\t' '
 BEGIN { IGNORECASE=1 }
 {
     if (match($2,"^miRNA|^ncRNA|^rRNA|^scRNA|^snRNA|^snlRNA|^snoRNA|^tRNA|^tRNAscan-SE-1.23")) {
         if (match($3,"exon")) {
             gsub("Transcript ","",$9)
             gsub(" .*","",$9)
             for (i = 1; i < 9; ++i) {
                 printf "%s\t", $i
             }
             printf "%s\n", $9
         }
     }
 }
 ' | sort -u | sort -k4n > rna.chr${C}.gff
 done
 
 for C in I II III IV V X M
 do
     echo "chr${C}.gff -> pseudoGene.chr${C}.gff"
     grep -v "^#" chr${C}.gff | awk -F'\t' '
 BEGIN { IGNORECASE=1 }
 {
     if (match($2,"^Pseudogene")) {
         if (match($3,"Pseudogene")) {
             gsub("Pseudogene ","",$9)
             gsub("Pseudogene","CDS",$3)
             gsub(" .*","",$9)
             gsub("\".*","",$9)
             for (i = 1; i < 9; ++i) {
                 printf "%s\t", $i
             }
             printf "%s\n", $9
         }
     }
 }
 ' | sort -u | sort -k4n > pseudoGene.chr${C}.gff
 done
 
     ssh hgwdev
     cd /cluster/data/ce6/bed/sangerGene
     time nice -n +19 ldHgGene ce6 sangerRnaGene rna.*.gff
     #	Read 6440 transcripts in 6474 lines in 7 files
     #	6440 groups 7 seqs 9 sources 1 feature types
     #	6440 gene predictions
 
     time nice -n +19 ldHgGene ce6 sangerPseudoGene pseudoGene.*.gff
     #	Read 1462 transcripts in 1462 lines in 7 files
     #	1462 groups 6 seqs 1 sources 1 feature types
     #	1462 gene predictions
 
 ############################################################################
 #  BLATSERVERS ENTRY (DONE - 2008-06-04 - Hiram)
 #	After getting a blat server assigned by the Blat Server Gods,
     ssh hgwdev
 
     hgsql -e 'INSERT INTO blatServers (db, host, port, isTrans, canPcr) \
 	VALUES ("ce6", "blat9", "17788", "1", "0"); \
 	INSERT INTO blatServers (db, host, port, isTrans, canPcr) \
 	VALUES ("ce6", "blat9", "17789", "0", "1");' \
 	    hgcentraltest
     #	test it with some sequence
 
 ############################################################################
 # reset default position, same as ce4 on the ZC101 / unc-52 locus
     ssh hgwdev
     hgsql -e 'update dbDb set defaultPos="chrII:14646344-14667746"
 	where name="ce6";' hgcentraltest
 
 ##########################################################################
 ## Blastz SELF (DONE - 2008-06-04 - Hiram
     ssh kkstore06
     #	screen
     mkdir /cluster/data/ce6/bed/blastzCe6.2008-06-04
     cd /cluster/data/ce6/bed
     ln -s blastzCe6.2008-06-04 blastzSelf
     cd blastzCe6.2008-06-04
     cat << '_EOF_' > DEF
 # ce6 vs ce6 - Self alignment
 BLASTZ_H=2000
 BLASTZ_M=200
 
 # TARGET: elegans Ce6
 SEQ1_DIR=/iscratch/i/worms/ce6/ce6.2bit
 SEQ1_LEN=/iscratch/i/worms/ce6/chrom.sizes
 SEQ1_CHUNK=1000000
 SEQ1_LAP=10000
 
 # QUERY: elegans Ce6
 SEQ2_DIR=/iscratch/i/worms/ce6/ce6.2bit
 SEQ2_LEN=/iscratch/i/worms/ce6/chrom.sizes
 SEQ2_CHUNK=1000000
 SEQ2_LAP=10000
 
 BASE=/cluster/data/ce6/bed/blastzCe6.2008-06-04
 TMPDIR=/scratch/tmp
 '_EOF_'
     # << happy emacs
 
     time nice -n +19 doBlastzChainNet.pl DEF -verbose=2 \
 	-chainMinScore=10000 -chainLinearGap=medium -bigClusterHub=kk \
 	-smallClusterHub=kki > do.log 2>&1 &
     #	real    79m26.707s
 XXX - running 2008-06-04 12:20
     #	broke down during loadUp because /iscratch/i/worms/ce6/chrom.sizes
     #	does not exist on hgwdev.  Finish the load manually by fixing
     #	the chrom.sizes reference in loadUp.csh.  Then:
     time nice -n +19 doBlastzChainNet.pl DEF -verbose=2 \
 	-chainMinScore=10000 -chainLinearGap=medium -bigClusterHub=kk \
 	-continue=download -smallClusterHub=kki > download.log 2>&1 &
 i
 
     #	real    37m22.066s
     ssh hgwdev
     cd /cluster/data/ce6/bed/blastz.ce6.2008-06-04
     nice -n +19 featureBits ce6 chainSelfLink > fb.ce6.chainSelfLink.txt 2>&1
     #	31186593 bases of 100281244 (31.099%) in intersection
 
 ############################################################################
 ## BLASTZ caePb2 (DONE - 2008-06-04,09 - Hiram)
     ssh kkstore06
     screen 	#	use screen to control the job
     mkdir /cluster/data/ce6/bed/blastzCaePb2.2008-06-04
     cd /cluster/data/ce6/bed/blastzCaePb2.2008-06-04
 
     cat << '_EOF_' > DEF
 # ce6 vs caePb2
 BLASTZ_H=2000
 BLASTZ_M=50
 
 # TARGET: elegans Ce6
 SEQ1_DIR=/scratch/data/ce6/ce6.2bit
 SEQ1_LEN=/scratch/data/ce6/chrom.sizes
 SEQ1_CHUNK=1000000
 SEQ1_LAP=10000
 
 # QUERY: C. PB2801 caePb2
 SEQ2_DIR=/scratch/data/caePb2/caePb2.2bit
 SEQ2_LEN=/scratch/data/caePb2/chrom.sizes
 SEQ2_CTGDIR=/scratch/data/caePb2/caePb2.supercontigs.2bit
 SEQ2_CTGLEN=/scratch/data/caePb2/caePb2.supercontigs.sizes
 SEQ2_LIFT=/scratch/data/caePb2/caePb2.supercontigs.lift
 SEQ2_CHUNK=1000000
 SEQ2_LAP=0
 SEQ2_LIMIT=50
 
 BASE=/cluster/data/ce6/bed/blastzCaePb2.2008-06-04
 TMPDIR=/scratch/tmp
 '_EOF_'
     # << happy emacs
 
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	`pwd`/DEF -verbose=2 -bigClusterHub=kk \
 	-smallClusterHub=kki > do.log 2>&1 &
     #	real    32m29.730s
     #	trouble with chaining due to no copies of genomes on /scratch/data/
     #	for the Iservers
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	`pwd`/DEF -verbose=2 -bigClusterHub=kk \
 	-continue=chainMerge -smallClusterHub=kki > chainMerge.log 2>&1 &
     #	real    26m55.624s
     #	forgot the -qRepeats=windowmaskerSdust
     rm axtChain/ce6.caePb2.net
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	`pwd`/DEF -verbose=2 -bigClusterHub=kk -qRepeats=windowmaskerSdust \
 	-continue=load -smallClusterHub=kki > load.log 2>&1 &
     #	real    1m18.270s
 
     cat fb.ce6.chainCaePb2Link.txt
     #	40805712 bases of 100281426 (40.691%) in intersection
 
     #	swap, this is also in caePb2.txt
     mkdir /cluster/data/caePb2/bed/blastz.ce6.swap
     cd /cluster/data/caePb2/bed/blastz.ce6.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	-qRepeats=windowmaskerSdust \
 	/cluster/data/ce6/bed/blastzCaePb2.2008-06-04/DEF \
 	-bigClusterHub=kk -smallClusterHub=kki -swap > swap.log 2>&1 &
     #	a couple of minutes
     cat fb.caePb2.chainCe6Link.txt
     #	55092134 bases of 170473138 (32.317%) in intersection
 
 ############################################################################
 ## BLASTZ caeRem3 (DONE - 2008-06-04,09 - Hiram)
     ssh kkstore06
     screen 	#	use screen to control the job
     mkdir /cluster/data/ce6/bed/blastzCaeRem3.2008-06-04
     cd /cluster/data/ce6/bed/blastzCaeRem3.2008-06-04
 
     cat << '_EOF_' > DEF
 # ce6 vs caeRem3
 BLASTZ_H=2000
 BLASTZ_M=50
 
 # TARGET: elegans Ce6
 SEQ1_DIR=/scratch/data/ce6/ce6.2bit
 SEQ1_LEN=/scratch/data/ce6/chrom.sizes
 SEQ1_CHUNK=1000000
 SEQ1_LAP=10000
 
 # QUERY: C. PB2801 caeRem3
 SEQ2_DIR=/scratch/data/caeRem3/caeRem3.2bit
 SEQ2_LEN=/scratch/data/caeRem3/chrom.sizes
 SEQ2_CTGDIR=/scratch/data/caeRem3/caeRem3.supercontigs.2bit
 SEQ2_CTGLEN=/scratch/data/caeRem3/caeRem3.supercontigs.sizes
 SEQ2_LIFT=/scratch/data/caeRem3/caeRem3.chrUn.lift
 SEQ2_CHUNK=1000000
 SEQ2_LAP=0
 SEQ2_LIMIT=50
 
 BASE=/cluster/data/ce6/bed/blastzCaeRem3.2008-06-04
 TMPDIR=/scratch/tmp
 '_EOF_'
     # << happy emacs
 
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	`pwd`/DEF -verbose=2 -bigClusterHub=kk \
 	-smallClusterHub=kki > do.log 2>&1 &
     #	real    28m40.865s
     #	trouble with chaining due to no copies of genomes on /scratch/data/
     #	for the Iservers
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	`pwd`/DEF -verbose=2 -bigClusterHub=kk \
 	-continue=chainMerge -smallClusterHub=kki > chainMerge.log 2>&1 &
     #	real    19m30.231s
     #	forgot the -qRepeats=windowmaskerSdust, went back and fixed that
     #	manually in loadUp.csh and reran the load of the net track
 
     cat fb.ce6.chainCaeRem3Link.txt
     #	41853863 bases of 100281426 (41.736%) in intersection
 
     #	swap, this is also in caeRem3.txt
     mkdir /cluster/data/caeRem3/bed/blastz.ce6.swap
     cd /cluster/data/caeRem3/bed/blastz.ce6.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	-qRepeats=windowmaskerSdust \
 	/cluster/data/ce6/bed/blastzCaeRem3.2008-06-04/DEF \
 	-bigClusterHub=kk -smallClusterHub=kki -swap > swap.log 2>&1 &
     cat fb.caeRem3.chainCe6Link.txt
     #	46326709 bases of 138406388 (33.472%) in intersection
     #	something unusual happened with ssh during installDownloads.csh,
     #	continuing after finishing that manually:
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	-continue=cleanup -qRepeats=windowmaskerSdust \
 	/cluster/data/ce6/bed/blastzCaeRem3.2008-06-04/DEF \
 	-bigClusterHub=kk -smallClusterHub=kki -swap > cleanUp.log 2>&1 &
 
 ############################################################################
 ## BLASTZ cb3 (DONE - 2008-06-04,09 - Hiram)
     ssh kkstore06
     screen 	#	use screen to control the job
     mkdir /cluster/data/ce6/bed/blastzCb3.2008-06-04
     cd /cluster/data/ce6/bed/blastzCb3.2008-06-04
 
     cat << '_EOF_' > DEF
 # ce6 vs cb3
 BLASTZ_H=2000
 BLASTZ_M=50
 
 # TARGET: elegans Ce6
 SEQ1_DIR=/scratch/data/ce6/ce6.2bit
 SEQ1_LEN=/scratch/data/ce6/chrom.sizes
 SEQ1_CHUNK=1000000
 SEQ1_LAP=10000
 
 # QUERY: C. briggsae cb3
 SEQ2_DIR=/san/sanvol1/scratch/worms/cb3/cb3.rmskTrf.2bit
 SEQ2_LEN=/san/sanvol1/scratch/worms/cb3/chrom.sizes
 SEQ2_CHUNK=1000000
 SEQ2_LAP=0
 SEQ2_LIMIT=50
 
 BASE=/cluster/data/ce6/bed/blastzCb3.2008-06-04
 TMPDIR=/scratch/tmp
 '_EOF_'
     # << happy emacs
 
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	`pwd`/DEF -verbose=2 -bigClusterHub=kk \
 	-smallClusterHub=kki > do.log 2>&1 &
     #	real    33m24.103s
     #	42483532 bases of 100281426 (42.364%) in intersection
 
     #	swap, this is also in cb3.txt
     mkdir /cluster/data/cb3/bed/blastz.ce6.swap
     cd /cluster/data/cb3/bed/blastz.ce6.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	/cluster/data/ce6/bed/blastzCb3.2008-06-04/DEF \
 	-bigClusterHub=kk  -smallClusterHub=kki -swap > swap.log 2>&1 &
     #	real    3m20.338s
     #	real    22m40.205s
     cat fb.cb3.chainCe6Link.txt
     #	43180969 bases of 108433446 (39.823%) in intersection
 
 ############################################################################
 ## BLASTZ priPac1 (DONE - 2008-06-09 - Hiram)
     ssh kkstore06
     screen 	#	use screen to control the job
     mkdir /cluster/data/ce6/bed/blastzPriPac1.2008-06-09
     cd /cluster/data/ce6/bed/blastzPriPac1.2008-06-09
 
     cat << '_EOF_' > DEF
 # ce6 vs priPac1
 BLASTZ_H=2000
 BLASTZ_M=50
 
 # TARGET: elegans Ce6
 SEQ1_DIR=/scratch/data/ce6/ce6.2bit
 SEQ1_LEN=/scratch/data/ce6/chrom.sizes
 SEQ1_CHUNK=1000000
 SEQ1_LAP=10000
 
 # QUERY: C. briggsae priPac1
 SEQ2_DIR=/iscratch/i/worms/priPac1/priPac1.TrfWM.2bit
 SEQ2_LEN=/iscratch/i/worms/priPac1/chrom.sizes
 SEQ2_CTGDIR=/iscratch/i/worms/priPac1/priPac1.TrfWM.supers.2bit
 SEQ2_CTGLEN=/iscratch/i/worms/priPac1/priPac1.TrfWM.supers.sizes
 SEQ2_LIFT=/iscratch/i/worms/priPac1/priPac1.supercontigs.lift
 SEQ2_CHUNK=1000000
 SEQ2_LAP=0
 SEQ2_LIMIT=50
 
 BASE=/cluster/data/ce6/bed/blastzPriPac1.2008-06-09
 TMPDIR=/scratch/tmp
 '_EOF_'
     # << happy emacs
 
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	`pwd`/DEF -verbose=2 -bigClusterHub=kk \
 	-qRepeats=windowmaskerSdust -smallClusterHub=kki > do.log 2>&1 &
     #	after some kk parasol experiments, continuing:
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	-continue=cat `pwd`/DEF -verbose=2 -bigClusterHub=kk \
 	-qRepeats=windowmaskerSdust -smallClusterHub=kki > cat.log 2>&1 &
     #	real    9m46.300s
 
     cat fb.ce6.chainPriPac1Link.txt
     #	6256254 bases of 100281426 (6.239%) in intersection
 
     #	swap, this is also in priPac1.txt
     mkdir /cluster/data/priPac1/bed/blastz.ce6.swap
     cd /cluster/data/priPac1/bed/blastz.ce6.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	-qRepeats=windowmaskerSdust \
 	/cluster/data/ce6/bed/blastzPriPac1.2008-06-09/DEF \
 	-bigClusterHub=kk -smallClusterHub=kki -swap > swap.log 2>&1 &
     #	real    0m33.836s
     #	failed during the loadUp.csh due to /iscratch/i/ missing from hgwdev
     #	fixup that manually in loadUp.csh and ran it on hgwdev
     cat fb.priPac1.chainCe6Link.txt
     #	6803322 bases of 145948246 (4.661%) in intersection
     #	and continuing:
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	-qRepeats=windowmaskerSdust \
 	-continue=download /cluster/data/ce6/bed/blastzPriPac1.2008-06-09/DEF \
 	-bigClusterHub=kk -smallClusterHub=kki -swap > download.log 2>&1 &
 
 ############################################################################
 ## BLASTZ caeJap1 (DONE - 2008-06-09 - Hiram)
     ssh kkstore06
     screen 	#	use screen to control the job
     mkdir /cluster/data/ce6/bed/blastzCaeJap1.2008-06-09
     cd /cluster/data/ce6/bed/blastzCaeJap1.2008-06-09
 
     cat << '_EOF_' > DEF
 # ce6 vs caeJap1
 BLASTZ_H=2000
 BLASTZ_M=50
 
 # TARGET: elegans Ce6
 SEQ1_DIR=/scratch/data/ce6/ce6.2bit
 SEQ1_LEN=/scratch/data/ce6/chrom.sizes
 SEQ1_CHUNK=1000000
 SEQ1_LAP=10000
 
 # QUERY: C. japonica caeJap1
 SEQ2_DIR=/cluster/bluearc/scratch/data/caeJap1/caeJap1.2bit
 SEQ2_LEN=/cluster/bluearc/scratch/data/caeJap1/chrom.sizes
 SEQ2_CTGDIR=/cluster/bluearc/scratch/data/caeJap1/caeJap1.TrfWM.supers.2bit
 SEQ2_CTGLEN=/cluster/bluearc/scratch/data/caeJap1/caeJap1.TrfWM.supers.sizes
 SEQ2_LIFT=/cluster/bluearc/scratch/data/caeJap1/caeJap1.chrUn.lift
 SEQ2_CHUNK=1000000
 SEQ2_LAP=0
 SEQ2_LIMIT=50
 
 BASE=/cluster/data/ce6/bed/blastzCaeJap1.2008-06-09
 TMPDIR=/scratch/tmp
 '_EOF_'
     # << happy emacs
 
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	`pwd`/DEF -verbose=2 -bigClusterHub=pk \
 	-qRepeats=windowmaskerSdust -smallClusterHub=kki > do.log 2>&1 &
     #	real    35m7.220s
 
     cat fb.ce6.chainCaeJap1Link.txt
     #	26888483 bases of 100281426 (26.813%) in intersection
 
     #	swap, this is also in caeJap1.txt
     mkdir /cluster/data/caeJap1/bed/blastz.ce6.swap
     cd /cluster/data/caeJap1/bed/blastz.ce6.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	-qRepeats=windowmaskerSdust \
 	/cluster/data/ce6/bed/blastzCaeJap1.2008-06-09/DEF \
 	-bigClusterHub=pk -smallClusterHub=kki -swap > swap.log 2>&1 &
     #	real    1m10.794s
     #	failed to load because:
     #	loadUp: looks like previous stage was not
     #	successful
     #	(can't find /cluster/data/caeJap1/bed/blastz.ce6.swap/mafNet/).
 
     #	continuing:
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	-qRepeats=windowmaskerSdust \
 	-continue=load /cluster/data/ce6/bed/blastzCaeJap1.2008-06-09/DEF \
 	-bigClusterHub=pk -smallClusterHub=kki -swap > load.log 2>&1 &
     #	real    0m33.440s
     cat fb.caeJap1.chainCe6Link.txt
     #	26209442 bases of 129347181 (20.263%) in intersection
 
 ############################################################################
 ## 6-Way multiple alignment (DONE - 2008-06-09 - Hiram)
 
     mkdir /cluster/data/ce6/bed/multiz6way
     cd /cluster/data/ce6/bed/multiz6way
     #	An additional nematode tree:
     #	http://nematol.unh.edu/tree/tree_des.php
     #	Constructing a tree from the document:
     #	http://www.wormbook.org/chapters/www_phylogrhabditids/phylorhab.html
     #	http://www.wormbook.org/chapters/www_phylogrhabditids/phylofig6B.jpg
 
     #	purely artifical for the last one: :0.2,pristiochus_priPac1:0.8
     #	since it is not on the graph
 
     cat << '_EOF_' > nematodes.nh
     ((((((((((briggsae_cb3:0.005,remanei_caeRem3:0.003):0.004,
 	brenneri_caePb2:0.013):0.002,elegans_ce6:0.003):0.001,
 	    japonica_caeJap1:0.023):0.024,ps1010:0.027):0.014,
 		df5070:0.056):0.009, plicata:0.102):0.099,sb341:0.060):0.038,
 		(df5074:0.351,df5055:0.090):0.221):0.2,pristiochus_priPac1:0.8)
 '_EOF_'
     #	<< happy emacs
 
     #	filter that for the six species in use here
     /cluster/bin/phast/x86_64/tree_doctor \
 --prune-all-but briggsae_cb3,remanei_caeRem3,brenneri_caePb2,elegans_ce6,japonica_caeJap1,pristiochus_priPac1 \
 	nematodes.nh > 6way.nh
 
     #	rearrange to get Ce6 on top for the construction of the tree image
     #	in the 6-way description page.  This results in something like:
 
 (((C._elegans_ce6:0.003000,
     (C._brenneri_caePb2:0.013000,
 	(C._remanei_caeRem3:0.003000,C._briggsae_cb3:0.005000):0.004000)
 		:0.002000):0.001000,
 	    C._japonica_caeJap1:0.023000):0.384000,
 		P._pristiochus_priPac1:0.800000);
 
     /cluster/bin/phast/x86_64/all_dists 6way.nh > 6way.distances.txt
     grep -i ce6 6way.distances.txt | sort -k3,3n
     #	Use this output for reference, and use the calculated
     #	distances in the table below to order the organisms and check
     #	the button order on the browser.
     #	And if you can fill in the table below entirely, you have
     #	succeeded in finishing all the alignments required.
     #
 #                         featureBits chainLink measures
 #                                           chaince6Link   chain   linearGap
 #    distance                       on Ce6      on other   minScore
 #  1 0.0120  - remanei_caeRem3     (% 41.736)  (% 33.472)  1000     loose
 #  2 0.0140  - briggsae_cb3        (% 42.364)  (% 39.823)  1000     loose
 #  3 0.0180  - brenneri_caePb2     (% 40.691)  (% 32.317)  1000     loose
 #  3 0.0270  - japonica_caeJap1    (% 26.813)  (% 20.263)  1000     loose
 #  4 1.1880  - pristiochus_priPac1 (%  6.239)  (%  4.661)  1000     loose
 
     cd /cluster/data/ce6/bed/multiz6way
     #	bash shell syntax here ...
     export H=/cluster/data/ce6/bed
     mkdir mafLinks
     for G in caeRem3 cb3 caePb2 priPac1 caeJap1
     do
 	mkdir mafLinks/$G
 	if [ ! -d ${H}/blastz.${G}/mafNet ]; then
 	echo "missing directory blastz.${G}/mafNet"
 		exit 255
 	fi
 	ln -s ${H}/blastz.$G/mafNet/*.maf.gz ./mafLinks/$G
     done
 
     #	Copy MAFs to some appropriate NFS server for kluster run
     ssh kkstore06
     mkdir /san/sanvol1/scratch/worms/ce6/multiz6way
     cd /san/sanvol1/scratch/worms/ce6/multiz6way
     time rsync -a --copy-links --progress --stats \
 	/cluster/data/ce6/bed/multiz6way/mafLinks/ .
     #	a few seconds to copy 126 Mb
 
     #	these are x86_64 binaries
     mkdir penn
     # use latest penn utilities
     P=/cluster/bin/penn/multiz.v11.2007-03-19/multiz-tba
     cp -p $P/{autoMZ,multiz,maf_project} penn
 
     # the autoMultiz cluster run
     ssh memk
     cd /cluster/data/ce6/bed/multiz6way/
 
     # create species list and stripped down tree for autoMZ
     sed 's/[a-z][a-z0-9]*_//ig; s/:[0-9\.][0-9\.]*//g; s/;//; /^ *$/d' \
 	6way.nh > tmp.nh
     echo `cat tmp.nh` > tree-commas.nh
     echo `cat tree-commas.nh` | sed 's/ //g; s/,/ /g' > tree.nh
     sed 's/[()]//g; s/,/ /g' tree.nh > species.lst
 
     mkdir maf run
     cd run
 
     #	NOTE: you need to set the db properly in this script
 
     cat > autoMultiz << '_EOF_'
 #!/bin/csh -ef
 set db = ce6
 set c = $1
 set maf = $2
 set binDir = /san/sanvol1/scratch/worms/$db/multiz6way/penn
 set tmp = /scratch/tmp/$db/multiz.$c
 set pairs = /san/sanvol1/scratch/worms/$db/multiz6way
 rm -fr $tmp
 mkdir -p $tmp
 cp ../{tree.nh,species.lst} $tmp
 pushd $tmp
 foreach s (`cat species.lst`)
     set in = $pairs/$s/$c.maf
     set out = $db.$s.sing.maf
     if ($s == $db) then
 	continue
     endif
     if (-e $in.gz) then
 	zcat $in.gz > $out
     else if (-e $in) then
 	cp $in $out
     else
 	echo "##maf version=1 scoring=autoMZ" > $out
     endif
 end
 set path = ($binDir $path); rehash
 $binDir/autoMZ + T=$tmp E=$db "`cat tree.nh`" $db.*.sing.maf $c.maf
 popd
 cp $tmp/$c.maf $maf
 rm -fr $tmp
 '_EOF_'
 # << happy emacs
     chmod +x autoMultiz
 
 cat  << '_EOF_' > template
 #LOOP
 autoMultiz $(root1) {check out line+ /cluster/data/ce6/bed/multiz6way/maf/$(root1).maf}
 #ENDLOOP
 '_EOF_'
 # << happy emacs
 
     awk '{print $1}' /cluster/data/ce6/chrom.sizes > chrom.lst
     gensub2 chrom.lst single template jobList
     para create jobList
     para -maxNode=1 push
     para check ... push ... etc ...
 # Completed: 7 of 7 jobs
 # CPU time in finished jobs:       2302s      38.37m     0.64h    0.03d  0.000 y
 # IO & Wait Time:                    38s       0.63m     0.01h    0.00d  0.000 y
 # Average job time:                 334s       5.57m     0.09h    0.00d
 # Longest running job:                0s       0.00m     0.00h    0.00d
 # Longest finished job:             478s       7.97m     0.13h    0.01d
 # Submission to last job:           954s      15.90m     0.27h    0.01d
 
     ssh kkstore06
     cd /cluster/data/ce6/bed/multiz6way
     time nice -n +19 catDir maf > multiz6way.maf
     # a few seconds to produce:
     #	-rw-rw-r--  1 343585668 Jun  9 16:25 multiz6way.maf
 
     #	before getting to the annotation, load this up so we can get
     #	a first view of the track.  This will be replaced with the annotated
     #	mafs
     ssh hgwdev
     cd /cluster/data/ce6/bed/multiz6way
     mkdir /gbdb/ce6/multiz6way
     ln -s /cluster/data/ce6/bed/multiz6way/multiz6way.maf /gbdb/ce6/multiz6way
 
     #	this load creates a large file, do that on local disk:
     cd /scratch/tmp
     time nice -n +19 hgLoadMaf ce6 multiz6way
     #	a few seconds to load:
     #	Loaded 367046 mafs in 1 files from /gbdb/ce6/multiz6way
 
     time nice -n +19 hgLoadMafSummary -minSize=10000 -mergeGap=500 \
 	-maxSize=50000 ce6 multiz6waySummary /gbdb/ce6/multiz6way/multiz6way.maf
     #	a few seconds to load:
     #	Created 125113 summary blocks from 1051000 components
     #	and 366975 mafs from /gbdb/ce6/multiz6way/multiz6way.maf
 
     #	remove the temporary .tab files:
     rm multiz6*.tab
 
 ############################################################################
 ####  Blat sangerGene proteins to determine exons
 #	(DONE - 2008-06-11 - hiram)
     ssh hgwdev
     cd /cluster/data/ce6/bed
     mkdir blat.ce6SG.2008-06-11
     ln -s  blat.ce6SG.2008-06-11 blat.ce6SG
     cd blat.ce6SG
     pepPredToFa ce6 sangerPep sangerPep.fa
 
     ssh kkstore06
     #	create san nib directory from goldenPath chromosomes fa files
     mkdir /san/sanvol1/scratch/worms/ce6/nib
     cd /san/sanvol1/scratch/worms/ce6/nib
     for C in I II III IV V X M
 do
     twoBitToFa -seq=chr${C} /cluster/data/ce6/ce6.2bit stdout \
 	| faToNib -softMask stdin chr${C}.nib
 done
 
     #	The kluster run
     ssh pk
     cd /cluster/data/ce6/bed/blat.ce6SG
     cat << '_EOF_' > blatSome
 #!/bin/csh -fe
 blat -t=dnax -q=prot -out=pslx /san/sanvol1/scratch/worms/ce6/nib/$1.nib sgfa/$2.fa $3
 '_EOF_'
     # << happy emacs
     chmod +x blatSome
     ls -1S /san/sanvol1/scratch/worms/ce6/nib > ce6.lst
     mkdir sgfa
     cd sgfa
     faSplit sequence ../sangerPep.fa 3000 sg
     ls -1S *.fa > ../sg.lst
     cd ..
     cat << '_EOF_' > template
 #LOOP
 blatSome $(root1) $(root2) {check out line psl/$(root1)/$(root2).psl}
 #ENDLOOP
 '_EOF_'
     # << happy emacs
     gensub2 ce6.lst sg.lst template jobList
     mkdir psl
     cd psl
     sed -e "s/.nib//" ../ce6.lst | xargs mkdir
     cd ..
     para create jobList
     para try ... check ... push ... etc
 # Completed: 20335 of 20335 jobs
 # CPU time in finished jobs:      69885s    1164.74m    19.41h    0.81d  0.002 y
 # IO & Wait Time:                 51640s     860.67m    14.34h    0.60d  0.002 y
 # Average job time:                   6s       0.10m     0.00h    0.00d
 # Longest finished job:             348s       5.80m     0.10h    0.00d
 # Submission to last job:           943s      15.72m     0.26h    0.01d
 
     ssh kkstore06
     cd /cluster/data/ce6/bed/blat.ce6SG.2008-06-11
     pslSort dirs raw.psl /tmp psl/*
     #	-rw-rw-r--  1 70627970 Jun 11 10:35 raw.psl
     pslReps -nohead -minCover=0.9 -minAli=0.9 raw.psl cooked.psl /dev/null
     #	Processed 155625 alignments
     #	-rw-rw-r--  1 26423327 Jun 11 10:35 cooked.psl
     pslUniq cooked.psl ce6SG.psl
     #	-rw-rw-r--  1 25332830 Jun 11 10:36 ce6SG.psl
 
     cut -f 10 ce6SG.psl > sgName.lst
     faSomeRecords sangerPep.fa sgName.lst ce6SG.fa
     #	-rw-rw-r--  1 10903181 Jun 11 10:11 sangerPep.fa
     #	-rw-rw-r--  1 10899914 Jun 11 10:37 ce6SG.fa
     grep "^>" ce6SG.fa | wc -l
     #	23741
     grep "^>" sangerPep.fa | wc -l
     #	23771
 
     pslxToFa ce6SG.psl ce6SG_ex.fa -liftTarget=genome.lft \
 	-liftQuery=protein.lft
     #	-rw-rw-r--  1  5288587 Jun 11 10:39 protein.lft
     #	-rw-rw-r--  1  6741328 Jun 11 10:39 genome.lft
     #	-rw-rw-r--  1 12743074 Jun 11 10:39 ce6SG_ex.fa
     wc -l *.psl *.lft *.fa sgName.lst
     #	 23741 ce6SG.psl
     #	 25926 cooked.psl
     #	155630 raw.psl
     #	151196 genome.lft
     #	151196 protein.lft
     #	244239 ce6SG.fa
     #	302392 ce6SG_ex.fa
     #	244343 sangerPep.fa
     #	 23741 sgName.lst
     #	1322404 total
 
     #	back on hgwdev
     ssh hgwdev
     cd /cluster/data/ce6/bed/blat.ce6SG
     #	After about an hour, it exited with this message:
     #	sqlFreeConnection called on cache (ce6) that doesn't contain
     #	the given connection
     #	This may be a lurking error in this program, because the
     #	resulting file seems to have the correct number of lines:
     #	Once upon a time, long long ago, this table was made by the kgName
     #	command, but this doesn't work on C. elegans because there is no
     #	kgXref table.  So, the following perl script was developed to do the
     #	same thing
     #	kgName ce6 ce6SG.psl blastSGRef01
 
     #	Need a reference table loaded, perl script makes up relationships
     cat << '_EOF_' >> mkRef.pl
 #!/usr/bin/env perl
 
 use warnings;
 use strict;
 
 my $argc = scalar(@ARGV);
 
 sub usage() {
     print STDERR "usage: ./mkRef.pl blastSGRef01.tab\n";
     print STDERR "reads ../sangerGene/genePred.tab and ",
         "../sangerGene/orfToGene.tab\n";
     print STDERR "writes references to the given input file, ",
         "e.g. blastSGRef01.tab\n";
 }
 
 if (1 != $argc) {
     usage;
     exit 255;
 }
 my $outFile = shift;
 
 my %orfToGene;  # key is all lowercase orf name, value is gene name
 
 my $inFile="../sangerGene/orfToGene.tab";
 open (FH,"<$inFile") or die "can not read $inFile";
 while (my $line=<FH>) {
     chomp $line;
     my ($orf, $gene) = split('\s+',$line);
     $orfToGene{lc($orf)} = $gene
 }
 close (FH);
 
 my %links;      # key is all lowercase orf name, value is SwissProt name
 
 $inFile="../sangerGene/sangerLinks.tab";
 open (FH,"<$inFile") or die "can not read $inFile";
 while (my $line=<FH>) {
     chomp $line;
     my ($orf, $SP, $comments) = split('\s+',$line);
     $links{lc($orf)} = $SP
 }
 close (FH);
 
 my %ce6Position;        # key is all lowercase orf name, value is ce6 position
 
 $inFile="../sangerGene/genePred.tab";
 open (FH,"<$inFile") or die "can not read $inFile";
 while (my $line=<FH>) {
     chomp $line;
     my ($bin, $orf, $chr, $strand, $start, $end, $other) = split('\s+',$line);
     $ce6Position{lc($orf)} = sprintf("%s:%d-%d", $chr, $start, $end);
 }
 close (FH);
 
 open (OF,">$outFile") or die "can not write to $outFile";
 
 $inFile="sgName.lst";
 open (FH,"<$inFile") or die "can not read $inFile";
 while (my $orf=<FH>) {
     chomp $orf;
     if (!exists($orfToGene{lc($orf)})) {
         $orfToGene{lc($orf)} = "";
     }
     if (!exists($links{lc($orf)})) {
         $links{lc($orf)} = "";
     }
     if (!exists($ce6Position{lc($orf)})) {
         $ce6Position{lc($orf)} = "";
     }
     printf OF "%s\t%s\t%s\t%s\t%s\n", $orf, $orfToGene{lc($orf)},
         $ce6Position{lc($orf)}, $links{lc($orf)}, "";
 }
 close (FH);
 close (OF);
 '_EOF_'
     # << happy emacs
     chmod +x ./mkRef.pl
     ./mkRef.pl blastSGRef01.tab
     wc -l blastSGRef01.tab 
     #	23741 blastSGRef.01.tab
 
     hgLoadSqlTab ce6 blastSGRef01 ~/kent/src/hg/lib/blastRef.sql \
 	blastSGRef01.tab
     wc -l sgName.lst blastSGRef01.tab ce6SG.psl
     #	23741 sgName.lst
     #	23741 blastSGRef01.tab
     #	23741 ce6SG.psl
 
     #	And load the protein sequences
     hgPepPred ce6 generic blastSGPep01 ce6SG.fa
 
 
 ############################################################################
 # CREATE CONSERVATION WIGGLE WITH PHASTCONS - 6-WAY
 #		(DONE - 2008-06-18 - Hiram)
 
 # Estimate phastCons parameters
     ssh kkstore06
     #	We need the fasta sequence for this business
     cd /cluster/data/ce6
     for C in `cut -f1 chrom.sizes | sed -e s/chr//`
 do
     echo "twoBitToFa -seq=chr${C} ce6.2bit ${C}/chr${C}.fa"
     twoBitToFa -seq=chr${C} ce6.2bit ${C}/chr${C}.fa
 done
 
     mkdir /cluster/data/ce6/bed/multiz6way/cons
     cd /cluster/data/ce6/bed/multiz6way/cons
 
     # Create a starting-tree.mod based on chrV (the largest one)
     #	Set -windows large enough so it only makes one piece
     faSize ../../../V/chrV.fa
     #	20919568 bases (0 N's 20919568 real 18018198 upper 2901370 lower)
     #	in 1 sequences in 1 files
     #	%13.87 masked total, %13.87 masked real
 
     time nice -n +19 /cluster/bin/phast/$MACHTYPE/msa_split ../maf/chrV.maf \
 	--refseq ../../../V/chrV.fa --in-format MAF \
 	--windows 30000000,1000 --out-format SS \
 	--between-blocks 5000 --out-root s1
     #	Creating partition 1 (column 1 to column 20919568)...
     #	Writing partition 1 to s1.1-20919568.ss...
     #	13 seconds
 
     #	the tree in use
     cat ../tree-commas.nh
     #	(((((cb3,caeRem3),caePb2),ce6),caeJap1),priPac1)
 
     #  this s1.*.ss should match only one item
     time nice -n +19 /cluster/bin/phast/$MACHTYPE/phyloFit -i SS s1.*.ss \
 --tree "(((((cb3,caeRem3),caePb2),ce6),caeJap1),priPac1)" \
     --out-root starting-tree
     #	real    0m14.408s
 
     rm s1.*.ss
     # add up the C and G:
     grep BACKGROUND starting-tree.mod | awk '{printf "%0.3f\n", $3 + $4;}'
     #	0.383
     #	This 0.383 is used in the --gc argument below
 
     # Create SS files on san filesystem
     #	Increasing their size this time from 1,000,000 to 4,000,000 to
     #	slow down the phastCons pk jobs
     ssh kkstore06
     mkdir -p  /san/sanvol1/scratch/worms/ce6/cons/ss
     cd  /san/sanvol1/scratch/worms/ce6/cons/ss
     time for C in `cut -f1 /cluster/data/ce6/chrom.sizes`
     do
       if [ -s /cluster/data/ce6/bed/multiz6way/maf/${C}.maf ]; then
 	mkdir ${C}
 	echo msa_split $C
 	chrN=${C/chr/}
 	chrN=${chrN/_random/}
 	/cluster/bin/phast/$MACHTYPE/msa_split \
 	    /cluster/data/ce6/bed/multiz6way/maf/${C}.maf \
 	    --refseq /cluster/data/ce6/${chrN}/${C}.fa \
 	    --in-format MAF --windows 4000000,0 --between-blocks 5000 \
 	    --out-format SS --out-root ${C}/${C}
       fi
     done
     #	real    1m7.685s
 
     # Create a random list of 50 1 mb regions  (do not use the _randoms)
     cd /san/sanvol1/scratch/worms/ce6/cons/ss
     ls -1l chr*/chr*.ss | grep -v random | \
 	awk '$5 > 4000000 {print $9;}' | randomLines stdin 50 ../randomSs.list
     #	Only 28 lines in stdin  (i.e. all are used)
 
     # Set up parasol directory to calculate trees on these regions
     ssh pk
     mkdir /san/sanvol1/scratch/worms/ce6/cons/treeRun2
     cd /san/sanvol1/scratch/worms/ce6/cons/treeRun2
     mkdir tree log
 
     #	Tuning this loop should come back to here to recalculate 
     # Create  script that calls phastCons with right arguments
     #	expected-lengths and target-coverage taken from ce4 history
     #	for treeRun2 --expected-lengths 15 --target-coverage 0.55
     #	for treeRun1 --expected-lengths 15 --target-coverage 0.60
     cat > makeTree.csh << '_EOF_'
 #!/bin/csh -fe
 set C=$1:h
 mkdir -p log/${C} tree/${C}
     /cluster/bin/phast/$MACHTYPE/phastCons ../ss/$1 \
 	/cluster/data/ce6/bed/multiz6way/cons/starting-tree.mod \
 	--gc 0.383 --nrates 1,1 --no-post-probs --ignore-missing \
 	--expected-lengths 15 --target-coverage 0.55 \
 	--quiet --log log/$1 --estimate-trees tree/$1
 '_EOF_'
     #	<< happy emacs
     chmod +x makeTree.csh
 
     cat > template << '_EOF_'
 #LOOP
 makeTree.csh $(path1)
 #ENDLOOP
 '_EOF_'
     #	<< happy emacs
 
     # Make cluster job and run it
     gensub2 ../randomSs.list single template jobList
     para create jobList
     para try/push/check/etc
 # Completed: 28 of 28 jobs
 # CPU time in finished jobs:       5941s      99.01m     1.65h    0.07d  0.000 y
 # IO & Wait Time:                    64s       1.07m     0.02h    0.00d  0.000 y
 # Average job time:                 214s       3.57m     0.06h    0.00d
 # Longest finished job:             309s       5.15m     0.09h    0.00d
 # Submission to last job:           326s       5.43m     0.09h    0.00d
 
     # Now combine parameter estimates.  We can average the .mod files
     # using phyloBoot.  This must be done separately for the conserved
     # and nonconserved models
     ssh kkstore06
     cd /san/sanvol1/scratch/worms/ce6/cons/treeRun2
     ls -1 tree/chr*/*.cons.mod > cons.list
     /cluster/bin/phast/$MACHTYPE/phyloBoot --read-mods '*cons.list' \
 	--output-average ../ave.cons.mod > cons_summary.txt 2>&1
     ls -1 tree/chr*/*.noncons.mod > noncons.list
     /cluster/bin/phast/$MACHTYPE/phyloBoot --read-mods '*noncons.list' \
 	--output-average ../ave.noncons.mod > noncons_summary.txt
     cd ..
     cp -p ave.*.mod /cluster/data/ce6/bed/multiz6way/cons
 
     #	measuring entropy
     #	consEntopy <target coverage> <expected lengths>
     #		 ave.cons.mod ave.noncons.mod --NH 9.78
     /cluster/bin/phast/$MACHTYPE/consEntropy --NH 9.78 .55 15 \
                         ave.cons.mod ave.noncons.mod
     ###	for treeRun2: --expected-lengths 15 --target-coverage 0.55
     #	( Solving for new omega: 15.000000 76.253180 42.469155 35.389619
     #		34.810297 34.805819 )
 
     #	Transition parameters: gamma=0.550000, omega=15.000000, mu=0.066667,
     #		nu=0.081481
     #	Relative entropy: H=1.172307 bits/site
     #	Expected min. length: L_min=6.108564 sites
     #	Expected max. length: L_max=5.516260 sites
     #	Phylogenetic information threshold: PIT=L_min*H=7.161112 bits
     # Recommended expected length: omega=34.805819 sites (for L_min*H=9.780000)
 
     ###	for treeRun1: --expected-lengths 15 --target-coverage 0.60
     #	( Solving for new omega: 15.000000 140.706073 61.140093 42.355284
     #		39.561113 39.474822 )
     #	Transition parameters: gamma=0.600000, omega=15.000000, mu=0.066667,
     #		nu=0.100000
     #	Relative entropy: H=1.248356 bits/site
     #	Expected min. length: L_min=5.363742 sites
     #	Expected max. length: L_max=5.097039 sites
     #	Phylogenetic information threshold: PIT=L_min*H=6.695858 bits
     # Recommended expected length: omega=39.474822 sites (for L_min*H=9.780000)
 
     ssh pk
     # Create cluster dir to do main phastCons run
     mkdir /san/sanvol1/scratch/worms/ce6/cons/consRun2
     cd /san/sanvol1/scratch/worms/ce6/cons/consRun2
     mkdir ppRaw bed
 
     #	using the ave.cons.mod and ave.noncons.mod from treeRun2 above:
 ALPHABET: A C G T 
 ORDER: 0
 SUBST_MOD: REV
 BACKGROUND: 0.308500 0.191500 0.191500 0.308500 
 RATE_MAT:
   -0.875904    0.175256    0.458101    0.242547 
    0.282331   -1.200805    0.180928    0.737546 
    0.737985    0.180928   -1.200354    0.281440 
    0.242547    0.457829    0.174703   -0.875079 
 TREE: (((((cb3:0.087550,caeRem3:0.073515):0.020860,caePb2:0.090766):0.029808,ce6:0.094770):0.038388,caeJap1:0.109779):0.126548,priPac1:0.126548);
 ALPHABET: A C G T 
 ORDER: 0
 SUBST_MOD: REV
 BACKGROUND: 0.308500 0.191500 0.191500 0.308500 
 RATE_MAT:
   -0.875904    0.175256    0.458101    0.242547 
    0.282331   -1.200805    0.180928    0.737546 
    0.737985    0.180928   -1.200354    0.281440 
    0.242547    0.457829    0.174703   -0.875079 
 TREE: (((((cb3:0.328951,caeRem3:0.273909):0.078262,caePb2:0.339349):0.111927,ce6:0.355503):0.144928,caeJap1:0.413391):0.478831,priPac1:0.478831);
 
 
     # Create script to run phastCons with right parameters
     #	This job is I/O intensive in its output files, thus it is all
     #	working over in /scratch/tmp/
     cat > doPhast << '_EOF_'
 #!/bin/csh -fe
 mkdir /scratch/tmp/${2}
 cp -p ../ss/${1}/${2}.ss ../ave.cons.mod ../ave.noncons.mod /scratch/tmp/${2}
 pushd /scratch/tmp/${2} > /dev/null
 /cluster/bin/phast/${MACHTYPE}/phastCons ${2}.ss ave.cons.mod,ave.noncons.mod \
    --expected-lengths 15 --target-coverage 0.55 --quiet \
 	--seqname ${1} --idpref ${1} --viterbi ${2}.bed --score > ${2}.pp
 popd > /dev/null
 mkdir -p ppRaw/${1}
 mkdir -p bed/${1}
 mv /scratch/tmp/${2}/${2}.pp ppRaw/${1}
 mv /scratch/tmp/${2}/${2}.bed bed/${1}
 rm /scratch/tmp/${2}/ave.{cons,noncons}.mod
 rm /scratch/tmp/${2}/${2}.ss
 rmdir /scratch/tmp/${2}
 '_EOF_'
     # << happy emacs
     chmod +x doPhast
 
     #	root1 == chrom name, file1 == ss file name without .ss suffix
     cat > template << '_EOF_'
 #LOOP
 doPhast $(root1) $(file1)
 #ENDLOOP
 '_EOF_'
     #	<< happy emacs
 
     # Create parasol batch and run it
     ls -1 ../ss/chr*/chr*.ss | sed 's/.ss$//' > in.list
 
     gensub2 in.list single template jobList
     para create jobList
     para try/check/push/etc.
     #	These jobs are very fast and very I/O intensive
 # Completed: 29 of 29 jobs
 # CPU time in finished jobs:        386s       6.43m     0.11h    0.00d  0.000 y
 # IO & Wait Time:                    85s       1.42m     0.02h    0.00d  0.000 y
 # Average job time:                  16s       0.27m     0.00h    0.00d
 # Longest finished job:              22s       0.37m     0.01h    0.00d
 # Submission to last job:            49s       0.82m     0.01h    0.00d
 
     # combine predictions and transform scores to be in 0-1000 interval
     #	it uses a lot of memory, so on kolossus:
     ssh kolossus
     cd /san/sanvol1/scratch/worms/ce6/cons/consRun2
     #	The sed's and the sort get the file names in chrom,start order
     #	You might like to verify it is correct by first looking at the
     #	list it produces:
     find ./bed -type f | sed -e "s#/# x #g; s#\.# y #g; s#-# z #g" \
 	| sort -k7,7 -k9,9n \
 	| sed -e "s# z #-#g; s# y #\.#g; s# x #/#g"
     #	if that looks right (== in chrom and numerical order), then let it run:
     find ./bed -type f | sed -e "s#/# x #g; s#\.# y #g; s#-# z #g" \
 	| sort -k7,7 -k9,9n \
 	| sed -e "s# z #-#g; s# y #\.#g; s# x #/#g" | xargs cat \
 	| awk '{printf "%s\t%d\t%d\tlod=%d\t%s\n", $1, $2, $3, $5, $5;}' \
 	| /cluster/bin/scripts/lodToBedScore /dev/stdin \
 	    > phastConsElements6way.bed
     #	way too many negative
     #		scores with --expected-length 15 --target-coverage 0.60 
     #	OK with--expected-length 15 --target-coverage 0.55 
     cp -p phastConsElements6way.bed /cluster/data/ce6/bed/multiz6way
 
     # Figure out how much is actually covered by the bed file as so:
     #	Get the non-n genome size from faSize on all chroms:
     ssh kkstore06
     cd /cluster/data/ce6
     faSize */chr*.fa
     #	100281426 bases (0 N's 100281426 real 86981803 upper
     #		13299623 lower) in 7 sequences in 7 files
     #	%13.26 masked total, %13.26 masked real
 
     #	100281244 bases (0 N's 100281244 real 87008909 upper
     #	13272335 lower) in 7 sequences in 7 files
     #	%13.24 masked total, %13.24 masked real
 
     cd /san/sanvol1/scratch/worms/ce6/cons/consRun2
     #	The 100281426 comes from the non-n genome as counted above.
     awk '
 {sum+=$3-$2}
 END{printf "%% %.2f = 100.0*%d/100281426\n",100.0*sum/100281426,sum}' \
 	phastConsElements6way.bed
     #	--expected-length 15 --target-coverage 0.55
     #	% 29.56 = 100.0*29639749/100281426
 
     #	Aiming for %70 coverage in
     #	the following featureBits measurement on CDS:
     # Beware of negative scores when too high.  The logToBedScore
     # will output an error on any negative scores.
 
     HGDB_CONF=~/.hg.conf.read-only time nice -n +19 featureBits ce6 \
 	-enrichment sangerGene:cds phastConsElements6way.bed
     #	--expected-length 15 --target-coverage 0.55
     #	sangerGene:cds 25.329%, phastConsElements6way.bed 29.557%,
     #	both 15.469%, cover 61.07%, enrich 2.07x
 
     #	in Ce4, this was:
     #	sangerGene:cds 25.286%, phastConsElements5way.bed 32.368%,
     #	both 15.733%, cover 62.22%, enrich 1.92x
 
     # Load most conserved track into database
     ssh hgwdev
     cd /cluster/data/ce6/bed/multiz6way
     #	the copy was already done above
     # cp -p /san/sanvol1/scratch/worms/ce6/cons/consRun2/phastConsElements6way.bed .
     time nice -n +19 hgLoadBed ce6 phastConsElements6way \
 	phastConsElements6way.bed
     #	Loaded 595094 elements of size 5
 
     #	should measure the same as above
     time nice -n +19 featureBits ce6 -enrichment sangerGene:cds \
 	phastConsElements6way
     #	--expected-length 15 --target-coverage 0.55
     #	sangerGene:cds 25.329%, phastConsElements6way 29.557%, both 15.469%,
     #	cover 61.07%, enrich 2.07x
 
     #	on Ce4, this was:
     time nice -n +19 featureBits ce4 -enrichment sangerGene:cds \
 	phastConsElements5way
     #	sangerGene:cds 25.286%, phastConsElements5way 32.368%, both 15.733%,
     #	cover 62.22%, enrich 1.92x
 
     # Create merged posterier probability file and wiggle track data files
     ssh kkstore06
     cd /san/sanvol1/scratch/worms/ce6/cons/consRun2
 
     #	prepare compressed copy of ascii data values for downloads
     ssh pk
     cd /san/sanvol1/scratch/worms/ce6/cons/consRun2
     cat << '_EOF_' > gzipAscii.sh
 #!/bin/sh
 
 TOP=`pwd`
 export TOP
 
 mkdir -p phastCons6wayScores
 
 for D in ppRaw/chr*
 do
     C=${D/ppRaw\/}
     out=phastCons6wayScores/${C}.data.gz
     echo "========================== ${C} ${D}"
     find ./${D} -type f | sed -e "s#/# x #g; s#\.# y #g; s#-# z #g" \
 	| sort -k7,7 -k9,9n \
 	| sed -e "s# z #-#g; s# y #\.#g; s# x #/#g" | xargs cat |
 	    gzip > ${out}
 done
 '_EOF_'
     #	<< happy emacs
     chmod +x gzipAscii.sh
     time nice -n +19 ./gzipAscii.sh
     #	real    1m14.178s
 
     #	use those file to encode the data for the track:
     for C in I II III IV V X M
     do
 	zcat phastCons6wayScores/chr${C}.data.gz
     done | wigEncode stdin phastCons6way.wig phastCons6way.wib
     #	Converted stdin, upper limit 1.00, lower limit 0.00
     #	-rw-rw-r--  1 56813758 Jun 18 12:05 phastCons6way.wib
     #	-rw-rw-r--  1  8880383 Jun 18 12:05 phastCons6way.wig
 
     nice -n +19 cp -p phastCons6way.wi? /cluster/data/ce6/bed/multiz6way/
 
     #	copy them for downloads
     ssh kkstore06
     mkdir /cluster/data/ce6/bed/multiz6way/phastCons6wayScores
     cd /cluster/data/ce6/bed/multiz6way/phastCons6wayScores
     cp -p  /san/sanvol1/scratch/worms/ce6/cons/consRun2/phastCons6wayScores/* .
 
     ssh hgwdev
     cd /usr/local/apache/htdocs/goldenPath/ce6
     ln -s /cluster/data/ce6/bed/multiz6way/phastCons6wayScores .
 
     # Load gbdb and database with wiggle.
     ssh hgwdev
     cd /cluster/data/ce6/bed/multiz6way
     ln -s `pwd`/phastCons6way.wib /gbdb/ce6/wib/phastCons6way.wib
     nice -n +19 hgLoadWiggle ce6 phastCons6way phastCons6way.wig
 
     #  Create histogram to get an overview of all the data
     ssh hgwdev
     cd /cluster/data/ce6/bed/multiz6way
     nice -n +19 hgWiggle -doHistogram \
 	-hBinSize=0.001 -hBinCount=1000 -hMinVal=0.0 -verbose=2 \
 	    -db=ce6 phastCons6way > histogram.data 2>&1
 
     #	create plot of histogram:
 
     cat << '_EOF_' | gnuplot > histo.png
 set terminal png small color \
         x000000 xffffff xc000ff x66ff66 xffff00 x00ffff xff0000
 set size 1.4, 0.8
 set key left box
 set grid noxtics
 set grid ytics
 set title " Worm Ce6 Histogram phastCons6way track"
 set xlabel " phastCons6way score"
 set ylabel " Relative Frequency"
 set y2label " Cumulative Relative Frequency (CRF)"
 set y2range [0:1]
 set y2tics
 set yrange [0:0.02]
 
 plot "histogram.data" using 2:5 title " RelFreq" with impulses, \
         "histogram.data" using 2:7 axes x1y2 title " CRF" with lines
 '_EOF_'
     #	<< happy emacs
 
     display histo.png &
 
     #	enable this line in the ce6/trackDb.ra multiz6way track definition:
 # wiggle phastCons6way
     #	to get the graph to display in the genome browser
 
 ############################################################################
 # ANNOTATE MULTIZ6WAY MAF AND LOAD TABLES (DONE - 2008-06-19 - Hiram)
     ssh kolossus
 
     mkdir /cluster/data/ce6/bed/multiz6way/anno
     cd /cluster/data/ce6/bed/multiz6way/anno
     #	these actually already all exist from previous multiple alignments
     #	if there are done you will see an ls of them
     #	or else the twoBitInfo command will be echoed, run it if you want
     for DB in `cat ../species.lst`
 do
     CDIR="/cluster/data/${DB}"
     if [ ! -f ${CDIR}/${DB}.N.bed ]; then
 	echo "creating ${DB}.N.bed"
 	echo twoBitInfo -nBed ${CDIR}/${DB}.2bit ${CDIR}/${DB}.N.bed
     else
 	ls -og ${CDIR}/${DB}.N.bed
     fi
 done
 
     mkdir maf run
     cd run
     rm -f sizes nBeds
     for DB in `sed -e "s/ce6 //" ../../species.lst`
 do
     echo "${DB} "
     ln -s  /cluster/data/${DB}/${DB}.N.bed ${DB}.bed
     echo ${DB}.bed  >> nBeds
     ln -s  /cluster/data/${DB}/chrom.sizes ${DB}.len
     echo ${DB}.len  >> sizes
 done
 
     cat << '_EOF_' > doAnno.csh
 #!/bin/csh -ef
 set dir = /cluster/data/ce6/bed/multiz6way
 set c = $1
 cat $dir/maf/${c}.maf | nice \
     mafAddIRows -nBeds=nBeds stdin /cluster/data/ce6/ce6.2bit $2
 '_EOF_'
     # << happy emacs
     chmod +x doAnno.csh
 
 
     cat << '_EOF_' > template
 #LOOP
 ./doAnno.csh $(root1) {check out line+ ../maf/$(root1).maf}
 #ENDLOOP
 '_EOF_'
     # << happy emacs
 
     cut -f1 /cluster/data/ce6/chrom.sizes > chrom.list
     gensub2 chrom.list single template jobList
 # Completed: 7 of 7 jobs
 # CPU time in finished jobs:        217s       3.61m     0.06h    0.00d  0.000 y
 # IO & Wait Time:                    60s       1.01m     0.02h    0.00d  0.000 y
 # Average job time:                  40s       0.66m     0.01h    0.00d
 # Longest finished job:              55s       0.92m     0.02h    0.00d
 # Submission to last job:            58s       0.97m     0.02h    0.00d
 
     # Load anno/maf
     ssh hgwdev
     cd /cluster/data/ce6/bed/multiz6way/anno/maf
     mkdir -p /gbdb/ce6/multiz6way/anno/maf
     ln -s /cluster/data/ce6/bed/multiz6way/anno/maf/*.maf \
       /gbdb/ce6/multiz6way/anno/maf
     time nice -n +19 /cluster/bin/x86_64/hgLoadMaf \
 	-pathPrefix=/gbdb/ce6/multiz6way/anno/maf ce6 multiz6way
 # XXX problem: Can't hSetDb(ce6) after an hAllocConn(hg18), sorry.
 
     #	Loaded 420764 mafs in 7 files from /gbdb/ce6/multiz6way/anno/maf
     #	real    0m9.567s
 
     # Do the computation-intensive part of hgLoadMafSummary on a workhorse 
     # machine and then load on hgwdev:
     ssh kolossus
     cd /cluster/data/ce6/bed/multiz6way/anno/maf
     time cat *.maf | \
     	nice -n +19 hgLoadMafSummary ce6 -minSize=30000 -mergeGap=1500 \
 	-maxSize=200000 -test multiz6waySummary stdin
     #	Created 45771 summary blocks from 1051000 components
     #	and 420693 mafs from stdin
     #	real    0m16.138s
 
     #	-rw-rw-r--  1  2111948 Jun 20 10:25 multiz6waySummary.tab
 
     ssh hgwdev
     cd /cluster/data/ce6/bed/multiz6way/anno/maf
     time nice -n +19 hgLoadSqlTab ce6 multiz6waySummary \
 	~/kent/src/hg/lib/mafSummary.sql multiz6waySummary.tab
     #	real    0m0.645
 
     #	Create per-chrom individual maf files for downloads
     ssh kkstore02
     cd /cluster/data/ce6/bed/multiz6way
     mkdir mafDownloads
     time for M in anno/maf/chr*.maf
     do
 	B=`basename $M`
 	nice -n +19 cp -p ${M} mafDownloads/${B}
 	nice -n +19 gzip mafDownloads/${B}
 	echo ${B} done
     done
 
     ssh hgwdev
     cd /cluster/data/ce6/bed/multiz6way
     #	redone 2007-12-21 to fix difficulty in mafFrags when ce6 was not the
     #	first species listed
     # upstream mafs 
     #	There isn't any refGene table, trying sangerGene instead
 #!/bin/sh
 for S in 1000 2000 5000
 do
     echo "making upstream${S}.maf"
     nice -n +19 $HOME/bin/$MACHTYPE/featureBits -verbose=2 ce6 \
         sangerGene:upstream:${S} -fa=/dev/null -bed=stdout \
         | perl -wpe 's/_up[^\t]+/\t0/' | sort -k1,1 -k2,2n \
         | $HOME/kent/src/hg/ratStuff/mafFrags/mafFrags ce6 multiz6way \
                 stdin stdout -orgs=species.lst \
         | gzip -c > mafDownloads/sangerGene.upstream${S}.maf.gz
     echo "done sangerGene.upstream${S}.maf.gz"
 done
 
     cd mafDownloads
     md5sum *.gz > md5sum.txt
 
     #	deliver to downloads
     ssh hgwdev
     mkdir /usr/local/apache/htdocs/goldenPath/ce6/multiz6way
     cd /usr/local/apache/htdocs/goldenPath/ce6/multiz6way
     ln -s /cluster/data/ce6/bed/multiz6way/mafDownloads/* .
 
 #######################################################################
 # MULTIZ5WAY MAF FRAMES (DONE - 2007-05-03 - Hiram)
     ssh hgwdev
     mkdir /cluster/data/ce6/bed/multiz6way/frames
     cd /cluster/data/ce6/bed/multiz6way/frames
     mkdir genes
     # The following is adapted from the ce4 5-way sequence
 
     #------------------------------------------------------------------------
     # get the genes for all genomes
     # using sangerGene for ce6
     # using blastCe6SG for cb3, caePb2, caeRem3, caeJap1 priPac1
     for qDB in ce6 cb3 caePb2 caeRem3 caeJap1 priPac1
     do
       if [ $qDB = "cb3" -o $qDB = "caePb2" -o $qDB = "caeRem3" -o $qDB = "caeJap1" -o $qDB = "priPac1" ]; then
 	geneTbl=blastCe6SG
 	  echo hgsql -N -e \"'select * from '$geneTbl\;\" ${qDB}
 	  hgsql -N -e "select * from $geneTbl" ${qDB} | cut -f 2-100 \
 		> /scratch/tmp/$qDB.tmp.psl
 	    mrnaToGene -allCds /scratch/tmp/$qDB.tmp.psl stdout \
 	  | genePredSingleCover stdin stdout | gzip -2c \
 	    > /scratch/tmp/$qDB.tmp.gz
 	  rm -f /scratch/tmp/$qDB.tmp.psl
 	  mv /scratch/tmp/$qDB.tmp.gz genes/$qDB.gp.gz
       elif [ $qDB = "ce6" ]; then
 	geneTbl=sangerGene
 	  echo hgsql -N -e \"'select * from '$geneTbl\;\" ${qDB}
 	  hgsql -N -e "select * from $geneTbl" ${qDB} | cut -f 1-10 \
 	  | genePredSingleCover stdin stdout | gzip -2c \
 	    > /scratch/tmp/$qDB.tmp.gz
 	  mv /scratch/tmp/$qDB.tmp.gz genes/$qDB.gp.gz
       fi
     done
 
     ssh kkstore06
     cd /cluster/data/ce6/bed/multiz6way/frames
     cat ../maf/*.maf | nice -n +19 genePredToMafFrames ce6 \
 	stdin stdout ce6 genes/ce6.gp.gz cb3 \
 	genes/cb3.gp.gz caePb2 genes/caePb2.gp.gz caeRem3 \
 	genes/caeRem3.gp.gz caeJap1 genes/caeJap1.gp.gz \
 	priPac1 genes/priPac1.gp.gz \
 	| gzip > multiz6way.mafFrames.gz
 
     ssh hgwdev
     cd /cluster/data/ce6/bed/multiz6way/frames
     nice -n +19 hgLoadMafFrames ce6 multiz6wayFrames multiz6way.mafFrames.gz
 
 ##########################################################################
 #  verify all.joiner entries (DONE - 2008-06-20 - Hiram)
     #	try to get joinerCheck tests clean output on these commands
     ssh hgwdev
     cd ~/kent/src/hg/makeDb/schema
     joinerCheck -database=ce6 -tableCoverage all.joiner
     joinerCheck -database=ce6 -keys all.joiner
     joinerCheck -database=ce6 -times all.joiner
 
 ##########################################################################
 #  make downloads (DONE - 2008-06-20 - Hiram)
     #	verify all tracks have html pages for their trackDb entries
     cd /cluster/data/ce6
     /cluster/bin/scripts/makeDownloads.pl ce6
     #	fix the README files
     #	re-build the upstream files:
     cd /cluster/data/ce6/goldenPath/bigZips
    for size in 1000 2000 5000
 do
   echo $size
   featureBits ce6 sangerGene:upstream:$size -fa=stdout \
   | gzip -c > sangerGene.upstream$size.fa.gz
 done
 
 ##########################################################################
 #  create pushQ entry (DONE - 2008-06-20 - Hiram)
     #	verify all tracks have html pages for their trackDb entries
     ssh hgwdev
     cd /cluster/data/ce6/jkStuff
     /cluster/bin/scripts/makePushQSql.pl ce6 > ce6.pushQ.sql
 ce6 does not have seq
 Could not tell (from trackDb, all.joiner and hardcoded lists of supporting
 and genbank tables) which tracks to assign these tables to:
   blastSGPep01
   blastSGRef01
   multiz6wayFrames
  
 #############################################################################
 #  Create 6-way downloads (DONE - 2008-06-23 - Hiram)
     ssh hgwdev
     mkdir /cluster/data/ce6/goldenPath/multiz6way
     mkdir /cluster/data/ce6/goldenPath/phastCons6way
     cd /cluster/data/ce6/goldenPath/phastCons6way
     ln -s ../../bed/multiz6way/phastCons6wayScores/*.gz .
     ln -s ../../bed/multiz6way/cons/ave.cons.mod 6way.conserved.mod
     ln -s ../../bed/multiz6way/cons/ave.noncons.mod 6way.non-conserved.mod
     md5sum *.gz *.mod > md5sum.txt
     #	copy a README and fix it up for here:
     cp /usr/local/apache/htdocs/goldenPath/calJac1/phastCons9way/README.txt .
 
     mkdir /cluster/data/ce6/goldenPath/multiz6way
     cd /cluster/data/ce6/goldenPath/multiz6way
     cp /usr/local/apache/htdocs/goldenPath/calJac1/multiz9way/README.txt .
     # edit that README.txt to be correct for this 6-way alignment
     ssh kkstore06
     cd /cluster/data/ce6/goldenPath/multiz6way
     ln -s ../../bed/multiz6way/6way.nh ./ce6.6way.nh
 
     cp -p ../../bed/multiz6way//anno/maf/*.maf .
     gzip *.maf
 
     ssh hgwdev
     cd /cluster/data/ce6/goldenPath/multiz6way
     #	creating upstream files from sangerGene, bash script:
     cat << '_EOF_' > mkUpstream.sh
 #!/bin/bash
 DB=ce6
 GENE=sangerGene
 NWAY=multiz6way
 export DB GENE
 
 for S in 1000 2000 5000
 do
     echo "making upstream${S}.maf"
     featureBits ${DB} ${GENE}:upstream:${S} -fa=/dev/null -bed=stdout \
         | perl -wpe 's/_up[^\t]+/\t0/' | sort -k1,1 -k2,2n \
         | $HOME/kent/src/hg/ratStuff/mafFrags/mafFrags ${DB} ${NWAY} \
                 stdin stdout \
                 -orgs=/cluster/data/${DB}/bed/${NWAY}/species.lst \
         | gzip -c > upstream${S}.maf.gz
     echo "done upstream${S}.maf.gz"
 done
 '_EOF_'
     # << happy emacs
     chmod +x ./mkUpstream.sh
     time nice -n +19 ./mkUpstream.sh
     #	real    119m5.562s
 # -rw-rw-r--  1   42975041 Mar 28 14:27 upstream1000.maf.gz
 # -rw-rw-r--  1   76363192 Mar 28 15:03 upstream2000.maf.gz
 # -rw-rw-r--  1  303870318 Mar 28 15:42 upstream5000.maf.gz
 
     #	check the names in these upstream files to ensure sanity:
     zcat upstream1000.maf.gz | grep "^s " | awk '{print $2}' \
 	| sort | uniq -c | sort -rn | less
     #	should be a list of the other 4 species with a high count,
     #	then sangerGene names, e.g.:
     #	10134 priPac1
     #	10134 cb3
     #	10134 caeRem3
     #	10134 caePb2
     #	10134 caeJap1
     #	    1 cTel55X.1a.1
     #	    1 ZK993.1
     #	    1 ZK973.9
     #	    1 ZK973.8
     #	    1 ZK973.3
     #	... etc ...
 
     ssh kkstore06
     cd /cluster/data/ce6/goldenPath/multiz6way
     md5sum *.gz *.nh > md5sum.txt
 
     ssh hgwdev
     cd /cluster/data/ce6/goldenPath/phastCons6way
     mkdir /usr/local/apache/htdocs/goldenPath/ce6/phastCons6way
     ln -s `pwd`/* /usr/local/apache/htdocs/goldenPath/ce6/phastCons6way
 
     cd /cluster/data/ce6/goldenPath/multiz6way
     mkdir /usr/local/apache/htdocs/goldenPath/ce6/multiz6way
 
     ln -s `pwd`/* /usr/local/apache/htdocs/goldenPath/ce6/multiz6way
     #	if your ln -s `pwd`/* made extra links to files you don't want there,
     #	check the goldenPath locations and remove those extra links
 
 ##########################################################################
 #  Make protein blast runs for Gene Sorter and Known Genes on Hg18
     ssh hgwdev
     mkdir /cluster/data/ce6/bed/hgNearBlastp
     cd /cluster/data/ce6/bed/hgNearBlastp
     # Make sure there are no joinerCheck errors at this point, because 
     # if left here they can spread via doHgNearBlastp:
     runJoiner.csh ce6 sangerPep
 #	ce6.kimExpDistance.query - hits 13356000 of 19134000 ok
 #	ce6.kimExpDistance.target - hits 12973822 of 19134000 ok
 #	ce6.orfToGene.name - hits 30296 of 30296 ok
 #	ce6.sangerBlastTab.query - hits 1255312 of 1255312 ok
 #	ce6.sangerBlastTab.target - hits 1255312 of 1255312 ok
 #	ce6.sangerCanonical.transcript - hits 20051 of 20051 ok
 #	ce6.sangerIsoforms.transcript - hits 30296 of 30296 ok
 #	ce6.sangerLinks.orfName - hits 30115 of 30115 ok
 #	ce6.sangerPep.name - hits 23771 of 23771 ok
 #	ce6.sangerToRefSeq.name - hits 29995 of 29995 ok
 #	ce6.sangerGeneToWBGeneID.sangerGene - hits 30115 of 30115 ok
 
     pepPredToFa ce6 sangerPep ce6.sangerPep.faa
     cat << '_EOF_' > config.ra
 # Latest human vs. this ce6 worm for Human Gene Sorter
 
 targetGenesetPrefix known
 targetDb hg18
 queryDbs ce6
 
 hg18Fa /cluster/data/hg18/bed/ucsc.10/ucscGenes.faa
 ce6Fa /cluster/data/ce6/bed/hgNearBlastp/ce6.sangerPep.faa
 
 buildDir /cluster/data/ce6/bed/hgNearBlastp
 scratchDir /cluster/data/ce6/bed/hgNearBlastp/tmpHgNearBlastp
 '_EOF_'
 
     time nice -n +19 /cluster/bin/scripts/doHgNearBlastp.pl -workhorse=hgwdev \
 	-noLoad -verbose=2 config.ra > do.log 2>&1 &
 
 ##########################################################################
 ## creating xxBlastTab tables (DONE - 2008-07-11 - Hiram)
     # Make sure there are no joinerCheck errors at this point, because 
     # if left here they can spread via doHgNearBlastp:
     ssh hgwdev
     runJoiner.csh ce6 sangerPep
 # ce6.kimExpDistance.query - hits 13356000 of 19134000 ok
 # ce6.kimExpDistance.target - hits 12973822 of 19134000 ok
 # ce6.orfToGene.name - hits 30296 of 30296 ok
 # ce6.sangerBlastTab.query - hits 1255312 of 1255312 ok
 # ce6.sangerBlastTab.target - hits 1255312 of 1255312 ok
 # ce6.sangerCanonical.transcript - hits 20051 of 20051 ok
 # ce6.sangerIsoforms.transcript - hits 30296 of 30296 ok
 # ce6.sangerLinks.orfName - hits 30115 of 30115 ok
 # ce6.sangerPep.name - hits 23771 of 23771 ok
 # ce6.sangerToRefSeq.name - hits 29995 of 29995 ok
 # ce6.sangerGeneToWBGeneID.sangerGene - hits 30115 of 30115 ok
 
     ssh hgwdev
     mkdir /cluster/data/ce6/bed/hgNearBlastp/080711
     cd /cluster/data/ce6/bed/hgNearBlastp/080711
     # Get the proteins used by the other hgNear organisms:
     pepPredToFa hg18 knownGenePep hg18.known.faa
     pepPredToFa mm9 knownGenePep mm9.known.faa
     pepPredToFa rn4 knownGenePep rn4.known.faa
     pepPredToFa danRer5 ensPep danRer5.ensPep.faa
     pepPredToFa dm3 flyBasePep dm3.flyBasePep.faa
     pepPredToFa ce6 sangerPep ce6.sangerPep.faa
     pepPredToFa sacCer1 sgdPep sacCer1.sgdPep.faa
     # sanity check, number of lines in each faa file
     wc -l *.faa
     #	244343 ce6.sangerPep.faa
     #	345537 danRer5.ensPep.faa
     #	268201 dm3.flyBasePep.faa
     #	526563 hg18.known.faa
     #	453565 mm9.known.faa
     #	86220 rn4.known.faa
     #	64956 sacCer1.sgdPep.faa
 
     # sanity check, count the number of protein sequences in each
     for F in *.faa
 do
     echo -n "${F} "
     grep "^>" ${F} | wc -l
 done
 # ce6.sangerPep.faa 23771
 # danRer5.ensPep.faa 31743
 # dm3.flyBasePep.faa 20736
 # hg18.known.faa 45480
 # mm9.known.faa 39489
 # rn4.known.faa 8160
 # sacCer1.sgdPep.faa 5766
 
     cd /cluster/data/ce6/bed/hgNearBlastp
     cat << '_EOF_' > config.ra
 # Latest worm vs. other Gene Sorter orgs:
 # human, mouse, rat, zebrafish, fly, yeast
 
 targetGenesetPrefix sanger
 targetDb ce6
 queryDbs hg18 mm9 rn4 danRer5 dm3 sacCer1
 recipBest hg18 mm9 rn4 danRer5 dm3 sacCer1
 
 ce6Fa     /cluster/data/ce6/bed/hgNearBlastp/080711/ce6.sangerPep.faa
 hg18Fa    /cluster/data/ce6/bed/hgNearBlastp/080711/hg18.known.faa
 mm9Fa     /cluster/data/ce6/bed/hgNearBlastp/080711/mm9.known.faa
 rn4Fa     /cluster/data/ce6/bed/hgNearBlastp/080711/rn4.known.faa
 danRer5Fa /cluster/data/ce6/bed/hgNearBlastp/080711/danRer5.ensPep.faa
 dm3Fa     /cluster/data/ce6/bed/hgNearBlastp/080711/dm3.flyBasePep.faa
 sacCer1Fa /cluster/data/ce6/bed/hgNearBlastp/080711/sacCer1.sgdPep.faa
 
 buildDir /cluster/data/ce6/bed/hgNearBlastp/080711
 scratchDir /san/sanvol1/scratch/ce6HgNearBlastp
 '_EOF_'
     # << happy emacs
 
     # Run this in a screen since it is going to take awhile
     # Run with -noLoad so we can eyeball files.
     # Later overload other databases' dmBlastTab tables.
     ssh kkstore06
     screen
     cd 
     time nice -n +19 doHgNearBlastp.pl -workhorse=hgwdev -clusterHub=pk \
 	-noLoad config.ra > do.log 2>&1 &
     # watch progress in do.log
 
     #	load the blast tabs into ce6
 
     ssh hgwdev
     cd /cluster/data/ce6/bed/hgNearBlastp/080711
     for D in danRer5 dm3 hg18 mm9 rn4 sacCer1
 do
     cd run.ce6.$D
     grep hgLoadBlastTab loadPairwise.csh
     ./loadPairwise.csh
     cd ..
 done
 
 #############################################################################
 # Lift nucleosome data from ce4 to ce6 (DONE - 2008-09-28,30 - Hiram)
     mkdir /hive/data/genomes/ce6/bed/monoNucleosomes
     cd /hive/data/genomes/ce6/bed/monoNucleosomes
     ln -s ../../../ce4/bed/monoNucleosomes/chrI-M.for.paired.bed.gz \
 	ce4.nucleosomeControl.fwd.bed.gz
     ln -s ../../../ce4/bed/monoNucleosomes/chrI-M.rev.paired.bed.gz \
 	ce4.nucleosomeControl.rev.bed.gz
     ln -s \
 /usr/local/apache/htdocs/goldenPath/ce4/liftOver/ce4ToCe6.over.chain.gz \
 	ce4ToCe6.over.chain.gz
     #	The name filed isn't being parsed correctly because of the blanks
     #	take out the blanks so liftOver can read them, put them back
     #	in after lift
     zcat ce4.nucleosomeControl.fwd.bed.gz | sed -e "s/n = /n=/" \
 	| liftOver stdin ce4ToCe6.over.chain.gz stdout \
 		ce6.nucleosomeControl.fwd.unMapped \
 	| sed -e "s/n=/n = /" | gzip > ce6.nucleosomeControl.fwd.bed.gz
     #	only 5 items do not map correctly
     zcat ce4.nucleosomeControl.rev.bed.gz | sed -e "s/n = /n=/" \
 	| liftOver stdin ce4ToCe6.over.chain.gz stdout \
 		ce6.nucleosomeControl.rev.unMapped \
 	| sed -e "s/n=/n = /" | gzip > ce6.nucleosomeControl.rev.bed.gz
     #	71 items do not map correctly
     LOAD="hgLoadBed -tab -noNameIx ce6"
     zcat ce6.nucleosomeControl.fwd.bed.gz ce6.nucleosomeControl.rev.bed.gz \
         | ${LOAD} nucleosomeControl stdin
     #	Loaded 28951421 elements of size 12
 
     #	Collapsed nucleosome track files by chromosome:
     ln -s ../../../ce4/bed/monoNucleosomes/chrI-M.for.clean.bed.gz \
 	./ce4.nucleosomeFragmentsSense.bed.gz
     ln -s ../../../ce4/bed/monoNucleosomes/chrI-M.rev.clean.bed.gz \
 	./ce4.nucleosomeFragmentsAntisense.bed.gz
     zcat ce4.nucleosomeFragmentsSense.bed.gz | sed -e "s/n = /n=/" \
 	| liftOver stdin ce4ToCe6.over.chain.gz stdout \
 		ce6.nucleosomeControlFragmentsSense.unMapped \
 	| sed -e "s/n=/n = /" | gzip > ce6.nucleosomeFragmentsSense.bed.gz
     #	four items do not map
     zcat ce4.nucleosomeFragmentsAntisense.bed.gz | sed -e "s/n = /n=/" \
 	| liftOver stdin ce4ToCe6.over.chain.gz stdout \
 		ce6.nucleosomeControlFragmentsAntisense.unMapped \
 	| sed -e "s/n=/n = /" | gzip > ce6.nucleosomeFragmentsAntisense.bed.gz
     #	34 items do not map
     zcat ce6.nucleosomeFragmentsSense.bed.gz \
         | ${LOAD} nucleosomeFragmentsSense stdin
     #	Loaded 11279598 elements of size 12
     zcat ce6.nucleosomeFragmentsAntisense.bed.gz \
         | ${LOAD} nucleosomeFragmentsAntisense stdin
     #	Loaded 11269358 elements of size 12
     #	these three tracks grouped into the composite: "nucleosome"
     #	labels:
     #   composite container: Nucleosome predictions from SOLidD core alignments
     #	Control - : mononucleosome control
     #	fragments + : mononucleosomal fragments, sense strand reads
     #	fragments - : mononucleosomal fragments, antisense strand reads
 
     #	Positioning stringency wig track
     ln -s \
 ../../../ce4/bed/monoNucleosomes/chrI-M_positioning_stringency.wig.gz \
 	./ce4.nucleosomeStringency.wigVariable.txt.gz
     zcat ce4.nucleosomeStringency.wigVariable.txt.gz \
 	| varStepToBedGraph.pl stdin | liftOver stdin ce4ToCe6.over.chain.gz \
 	    stdout ce6.nucleosomeStringency.unMapped \
 	| gzip > ce6.nucleosomeStringency.bedGraph.gz
     #	11 items do not lift
     cat << '_EOF_' > bedGraphToVarStep.sh
 #!/bin/sh
 
 SPAN=1
 SRC0=ce4.nucleosomeStringency.wigVariable.txt.gz
 SRC=ce6.nucleosomeStringency.bedGraph.gz
 R=ce6.nucleosomeStringency.wigVariable.txt
 rm -f ${R}
 zcat ${SRC0} | egrep "^browser|^track" > ${R}
 zcat ${SRC} | grep "^chr" | cut -f1 | sort -u > chr.list
 cat chr.list | while read C
 do
     echo "variableStep chrom=${C} span=${SPAN}" >> ${R}
     zcat "${SRC}" \
         | awk '{if (match($1,"^'"${C}"'$")) { print } }' | sort -k2n | awk '
 {
     printf "%d\t%g\n", $2+1, $4
 }
 ' >> ${R}
 done
 rm -f chr.list
 '_EOF_'
     # << happy emacs
     time ./bedGraphToVarStep.sh
     #	real    35m15.201s
     gzip ce6.nucleosomeStringency.wigVariable.txt
     zcat ce6.nucleosomeStringency.wigVariable.txt.gz \
 	| wigEncode stdin nucleosomeStringency.wig nucleosomeStringency.wib
     #	Converted stdin, upper limit 1.00, lower limit 0.00
     ln -s `pwd`/nucleosomeStringency.wib /gbdb/ce6/wib
     #	Positioning stringency wig track
     hgLoadWiggle ce6 nucleosomeStringency nucleosomeStringency.wig
     #	NSome Stringency
     #	Nucleosome Positioning Stringency from SOLiD core alignments
 
     #	 Adjusted nucleosome coverage wig track:
     ln -s \
 	../../../ce4/bed/monoNucleosomes/adjusted_coverage_chrI-M.wig.gz \
 	./ce4.nucleosomeAdjustedCoverage.wigVariable.txt.gz
     zcat ce4.nucleosomeAdjustedCoverage.wigVariable.txt.gz \
 	| varStepToBedGraph.pl stdin | liftOver stdin ce4ToCe6.over.chain.gz \
 	    stdout ce6.nucleosomeAdjustedCoverage.unMapped \
 	| gzip > ce6.nucleosomeAdjustedCoverage.bedGraph.gz
     time ./bedGraphToVarStep.sh \
 	ce4.nucleosomeAdjustedCoverage.wigVariable.txt.gz \
 	ce6.nucleosomeAdjustedCoverage.bedGraph.gz \
 	ce6.nucleosomeAdjustedCoverage.wigVariable.txt
     #	real    27m40.408s
     time gzip ce6.nucleosomeAdjustedCoverage.wigVariable.txt
     #	real    3m27.146s
 
     time wigEncode ce6.nucleosomeAdjustedCoverage.wigVariable.txt.gz \
 	nucleosomeAdjustedCoverage.wig nucleosomeAdjustedCoverage.wib
     #	real    1m25.770s
     #	Converted ce6.nucleosomeAdjustedCoverage.wigVariable.txt.gz
     #	upper limit 8.76, lower limit -11.93
     ln -s `pwd`/nucleosomeAdjustedCoverage.wib /gbdb/ce6/wib
     hgLoadWiggle ce6 nucleosomeAdjustedCoverage nucleosomeAdjustedCoverage.wig
     #	Adj NSome Covrg
     #	Adjusted nucleosome coverage (25bp)
 
     #	MNase Coverage
     #	Micrococcal nuclease control coverage
     #	Cntl MNase +Covg
     #	Micrococcal nuclease control coverage, sense strand reads
     #	Cntl MNase -Covg
     #	Micrococcal nuclease control coverage, antisense strand reads
     zcat ce6.nucleosomeControl.fwd.bed.gz | sort -k1,1 -k2,2n \
      | bedItemOverlapCount ce6 stdin \
      | wigEncode stdin nucleosomeControlSenseCoverage.wig \
         nucleosomeControlSenseCoverage.wib
     #	Converted stdin, upper limit 142.00, lower limit 1.00
     ln -s `pwd`/nucleosomeControlSenseCoverage.wib /gbdb/ce6/wib
 
     zcat ce6.nucleosomeControl.rev.bed.gz | sort -k1,1 -k2,2n \
      | bedItemOverlapCount ce6 stdin \
      | wigEncode stdin nucleosomeControlAntisenseCoverage.wig \
         nucleosomeControlAntisenseCoverage.wib
     #	Converted stdin, upper limit 145.00, lower limit 1.00
     ln -s `pwd`/nucleosomeControlAntisenseCoverage.wib /gbdb/ce6/wib
 
     hgLoadWiggle ce6 nucleosomeControlSenseCoverage \
         nucleosomeControlSenseCoverage.wig
     hgLoadWiggle ce6 nucleosomeControlAntisenseCoverage \
         nucleosomeControlAntisenseCoverage.wig
 
     mkdir ce4.monoNucleosomes
     cd ce4.monoNucleosomes
     ln -s ../../../../ce4/bed/monoNucleosomes/stanford/clean/* .
 
     zcat ce4.monoNucleosomes/chr*.forward.bed.gz \
 	| liftOver stdin ce4ToCe6.over.chain.gz \
 	    stdout ce6.monoNucleosomesSense.unMapped \
 	| gzip > ce6.monoNucleosomesSense.bed.gz
     zcat ce4.monoNucleosomes/chr*.reverse.bed.gz \
 	| liftOver stdin ce4ToCe6.over.chain.gz \
 	    stdout ce6.monoNucleosomesAntiSense.unMapped \
 	| gzip > ce6.monoNucleosomesAntiSense.bed.gz
     bedItemOverlapCount ce6 ce6.monoNucleosomesSense.bed.gz \
          | wigEncode stdin monoNucleosomesSense.wig monoNucleosomesSense.wib
     #	Converted stdin, upper limit 16195.00, lower limit 1.00
     bedItemOverlapCount ce6 ce6.monoNucleosomesAntiSense.bed.gz \
          | wigEncode stdin monoNucleosomesAntiSense.wig \
 		monoNucleosomesAntiSense.wib
     #	Converted stdin, upper limit 19137.00, lower limit 1.00
     ln -s `pwd`/monoNucleosomesSense.wib /gbdb/ce6/wib
     ln -s `pwd`/monoNucleosomesAntiSense.wib /gbdb/ce6/wib
     #	for downloads:
     bedItemOverlapCount ce6 ce6.monoNucleosomesSense.bed.gz \
 	| gzip --rsyncable > monoNucleosomesSense.wigVariable.gz
     bedItemOverlapCount ce6 ce6.monoNucleosomesAntiSense.bed.gz \
 	| gzip --rsyncable > monoNucleosomesAntiSense.wigVariable.gz
 
     
     #	NSome Coverage
     #	Coverage of nucleosome predictions from SOLiD core alignments
 
     #	Control
     #	mononucleosome control
     hgLoadWiggle ce6 monoNucleosomesSense monoNucleosomesSense.wig
     #	NSome Core +Covg
     #	Coverage of mononucleosomal fragments, sense strand reads
     hgLoadWiggle ce6 monoNucleosomesAntiSense monoNucleosomesAntiSense.wig
     #	NSome Core -Covg
     #	Coverage of mononucleosomal fragments, antisense strand reads
 
 #########################################################################
 #  lift 25mers repeat track from ce4 (DONE - 2008-09-30 - Hiram)
     mkdir /cluster/data/ce6/bed/25mersRepeats
     cd /cluster/data/ce6/bed/25mersRepeats
     ln -s /cluster/data/ce4/bed/25mersRepeats/withScore.bed \
 	./ce4.25mersRepeats.bed
     ln -s \
 /usr/local/apache/htdocs/goldenPath/ce4/liftOver/ce4ToCe6.over.chain.gz \
 	ce4ToCe6.over.chain.gz
     liftOver ce4.25mersRepeats.bed ce4ToCe6.over.chain.gz \
 	ce6.25mersRepeats.bed ce6.unMapped
     #	no elements failed the eliftOver
 
     hgLoadBed ce6 25mersRepeats ce6.25mersRepeats.bed
     #	Loaded 757742 elements of size 9
     #	clean up
     rm bed.tab
     gzip ce6.25mersRepeats.bed
 
 #########################################################################
 # hgPal downloads (DONE braney 2008-09-30)
     ssh hgwdev
     screen
     bash
     rm -rf /cluster/data/ce6/bed/multiz6way/pal
     mkdir /cluster/data/ce6/bed/multiz6way/pal
     cd /cluster/data/ce6/bed/multiz6way/pal
     mz=multiz6way
     gp=refGene
     db=ce6
     echo $db > order.lst
     echo "select settings from trackDb where tableName='$mz'" | \
 	hgsql $db | sed 's/\\n/\n/g'| grep speciesOrder | tr ' ' '\n' | \
 	tail -n +2 >> order.lst
 
     mkdir exonAA exonNuc ppredAA ppredNuc
     for j in `sort -nk 2 /cluster/data/$db/chrom.sizes | awk '{print $1}'`
     do
 	echo "date"
 	echo "mafGene -chrom=$j  $db $mz $gp order.lst stdout | \
 	    gzip -c > ppredAA/$j.ppredAA.fa.gz"
 	echo "mafGene -chrom=$j -noTrans $db $mz $gp order.lst stdout | \
 	    gzip -c > ppredNuc/$j.ppredNuc.fa.gz"
 	echo "mafGene -chrom=$j -exons -noTrans $db $mz $gp order.lst stdout | \
 	    gzip -c > exonNuc/$j.exonNuc.fa.gz"
 	echo "mafGene -chrom=$j -exons $db $mz $gp order.lst stdout | \
 	    gzip -c > exonAA/$j.exonAA.fa.gz"
     done > refGene.jobs
 
     time sh -x refGene.jobs > refGene.job.log 2>&1 & 
     sleep 1
     tail -f refGene.job.log
 
 # real    14m44.986s
 # user    2m26.113s
 # sys     1m41.837s
 
     zcat exonAA/*.gz | gzip -c > $gp.$mz.exonAA.fa.gz
     zcat exonNuc/*.gz | gzip -c > $gp.$mz.exonNuc.fa.gz
     zcat ppredAA/*.gz | gzip -c > $gp.$mz.ppredAA.fa.gz
     zcat ppredNuc/*.gz | gzip -c > $gp.$mz.ppredNuc.fa.gz
 
     rm -rf exonAA exonNuc ppredAA ppredNuc
 
     # we're only distributing exons at the moment
     pd=/usr/local/apache/htdocs/goldenPath/$db/$mz
     rm -rf $pd/$gp.exonAA.fa.gz $pd/$gp.exonNuc.fa.gz
     ln -s `pwd`/$gp.$mz.exonAA.fa.gz $pd/$gp.exonAA.fa.gz
     ln -s `pwd`/$gp.$mz.exonNuc.fa.gz $pd/$gp.exonNuc.fa.gz
 
     mz=multiz6way
     gp=sangerGene
     db=ce6
     mkdir exonAA exonNuc ppredAA ppredNuc
     for j in `sort -nk 2 /cluster/data/$db/chrom.sizes | awk '{print $1}'`
     do
 	echo "date"
 	echo "mafGene -chrom=$j  $db $mz $gp order.lst stdout | \
 	    gzip -c > ppredAA/$j.ppredAA.fa.gz"
 	echo "mafGene -chrom=$j -noTrans $db $mz $gp order.lst stdout | \
 	    gzip -c > ppredNuc/$j.ppredNuc.fa.gz"
 	echo "mafGene -chrom=$j -exons -noTrans $db $mz $gp order.lst stdout | \
 	    gzip -c > exonNuc/$j.exonNuc.fa.gz"
 	echo "mafGene -chrom=$j -exons $db $mz $gp order.lst stdout | \
 	    gzip -c > exonAA/$j.exonAA.fa.gz"
     done > $gp.$mz.jobs
 
     time sh -x $gp.$mz.jobs > $gp.$mz.job.log 2>&1 & 
     sleep 1
     tail -f $gp.$mz.job.log
 
 # real    16m2.266s
 # user    2m43.764s
 # sys     2m7.173s
 
     zcat exonAA/c*.gz | gzip -c > $gp.$mz.exonAA.fa.gz
     zcat exonNuc/c*.gz | gzip -c > $gp.$mz.exonNuc.fa.gz
     zcat ppredAA/c*.gz | gzip -c > $gp.$mz.ppredAA.fa.gz
     zcat ppredNuc/c*.gz | gzip -c > $gp.$mz.ppredNuc.fa.gz
 
     rm -rf exonAA exonNuc ppredAA ppredNuc
 
     # we're only distributing exons at the moment
     pd=/usr/local/apache/htdocs/goldenPath/ce6/multiz6way
     ln -s `pwd`/$gp.$mz.exonAA.fa.gz $pd/$gp.exonAA.fa.gz
     ln -s `pwd`/$gp.$mz.exonNuc.fa.gz $pd/$gp.exonNuc.fa.gz
 
 #########################################################################
 
 ################################################
 # AUTOMATE UPSTREAM FILE CREATION (2008-10-15 markd)
 update genbank.conf:
 ce6.upstreamGeneTbl = refGene
 ce6.upstreamMaf = multiz6way /hive/data/genomes/ce6/bed/multiz6way/species.lst
 
 ############################################################################
 # CE6->CE7 LIFTOVER (DONE - 2008-06-24 - Hiram)
     cd /hive/data/genomes/ce6
     #	test procedure with -debug first
     doSameSpeciesLiftOver.pl -bigClusterHub=swarm -workhorse=hgwdev \
 	-ooc /hive/data/genomes/ce6/jkStuff/11.ooc ce6 ce7 -debug
     cd bed/blat.ce7.2009-07-28
     time nice -n +19 doSameSpeciesLiftOver.pl -bigClusterHub=swarm \
 	-workhorse=hgwdev \
 	-ooc /hive/data/genomes/ce6/jkStuff/11.ooc ce6 ce7 > do.log 2>&1 &
     #	real    8m7.833s
 
 #########################################################################
+# ALG Binding sites (DONE - 2010-01-14 - Hiram)
+    #	From the Gene Yeo lab:
+    #	fetch files
+T="http://www.snl.salk.edu/~lovci/Amy/ce6/alltheexperiments/for_ucsc"
+
+one() {
+wget --timestamping \
+"${T}/$1" -O $1
+}
+
+one all_L3_L4.L3_L4.ce6.ingenes.both_strands.reannotated.BED.gz
+one all_L3_L4.L3_L4.ce6.ingenes.both_strands.reannotated.BED.gz
+one MT1.neg.off.WIG.gz
+one MT1.pos.off.WIG.gz
+one MT2.neg.off.WIG.gz
+one MT2.pos.off.WIG.gz
+one MT3.neg.off.WIG.gz
+one MT3.pos.off.WIG.gz
+one WT1.neg.off.WIG.gz
+one WT1.pos.off.WIG.gz
+one WT2.neg.off.WIG.gz
+one WT2.pos.off.WIG.gz
+one WT3.neg.off.WIG.gz
+one WT3.pos.off.WIG.gz
+one WT_region_specific_clusters.weight_normal.3.rp.bed.WT.good_clusters.25.gz
+
+    #	encode and load wiggle tracks
+
+one() {
+ID=$1
+NP=$2
+RF=$3
+UC=$4
+LC=`echo $ID | tr '[A-Z]' '[a-z]'`
+zcat $ID.$NP.off.WIG.gz \
+        | wigEncode stdin ${LC}${UC}.wig ${LC}${UC}.wib > ${LC}$UC.log 2>&1
+ln -s `pwd`/${LC}${UC}.wib /gbdb/ce6/wib
+hgLoadWiggle ce6 ${LC}${UC} ${LC}${UC}.wig
+}
+
+one MT1 neg reverse Reverse
+one MT1 pos forward Forward
+one MT2 neg reverse Reverse
+one MT2 pos forward Forward
+one MT3 neg reverse Reverse
+one MT3 pos forward Forward
+one WT1 neg reverse Reverse
+one WT1 pos forward Forward
+one WT2 neg reverse Reverse
+one WT2 pos forward Forward
+one WT3 neg reverse Reverse
+one WT3 pos forward Forward
+
+    #	load BED tables:
+zcat WT_region_specific_clusters.weight_normal.3.rp.bed.WT.good_clusters.25.gz \
+    | grep -v track \
+        | awk -F'\t' '{printf "%s %d %d %s 1 %s\n", $1, $2, $3, $4, $6}' \
+            | hgLoadBed -noNameIx ce6 algBindSites stdin
+zcat all_L3_L4.L3_L4.ce6.ingenes.both_strands.reannotated.BED.gz \
+    | grep -v track | awk -F'\t' '{printf "%s %d %d %s 0 %s\n", $1, $2, $3, $4, $6}' | hgLoadBed -noNameIx ce6 algBindGenic stdin
+
+#########################################################################