src/hg/makeDb/doc/felCat4.txt 1.1

1.1 2010/06/04 23:32:28 chinhli
BLASTZ/CHAIN/NET
Index: src/hg/makeDb/doc/felCat4.txt
===================================================================
RCS file: src/hg/makeDb/doc/felCat4.txt
diff -N src/hg/makeDb/doc/felCat4.txt
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ src/hg/makeDb/doc/felCat4.txt	4 Jun 2010 23:32:28 -0000	1.1
@@ -0,0 +1,1820 @@
+# for emacs: -*- mode: sh; -*-
+
+#       $Id$
+
+# Felis Catus (domestic cat) --  NHGRI/GTB 4/felCat4 (2009-05-25)
+  
+
+# file template copied from calJac3.txt
+
+
+# Felis catus (Project ID: 32759) by NHGRI/Genome Technology Branch [Draft
+#    assembly] sequence: 
+# ftp.ncbi.nlm.nih.gov/genbank/genomes/Eukaryotes/vertebrates_mammals/
+#       Felis_catus/catChr4
+#       Felis catus
+
+##########################################################################
+# Download sequence (DONE - 2010-05-23 - Chin)
+    mkdir /hive/data/genomes/felCat4
+    cd /hive/data/genomes/felCat4
+    mkdir genbank
+    cd genbank
+wget --timestamping -r --cut-dirs=6 --level=0 -nH -x \
+        --no-remove-listing -np \
+"ftp://ftp.ncbi.nlm.nih.gov/genbank/genomes/Eukaryotes/vertebrates_mammals/Felis_catus/catChr4/*"
+    # FINISHED --09:05:15--
+    # Downloaded: 151 files, 1.3G in 7m 42s (2.98 MB/s)
+    # Read ASSEMBLY_INFO 
+
+    mkdir ucscChr
+    # stay at genbank directory
+    #   fixup the accession names to become UCSC chrom names
+
+# use component agp file as source
+S=Primary_Assembly/assembled_chromosomes
+cut -f1 ${S}/chr2acc  | while read C
+do
+    ACC=`grep "${C}" ${S}/chr2acc | cut -f2`
+    echo "${ACC} -> chr${C}"
+    zcat ${S}/AGP/chr${C}.comp.agp.gz \
+        | sed -e "s/^${ACC}/chr${C}/" | gzip > ucscChr/chr${C}.agp.gz
+done
+
+
+S=Primary_Assembly/assembled_chromosomes
+cut -f1 ${S}/chr2acc  | while read C
+do
+    ACC=`grep "${C}" ${S}/chr2acc | cut -f2`
+    echo "${ACC} -> chr${C}"
+    echo ">chr${C}" > ucscChr/chr${C}.fa
+    zcat ${S}/FASTA/chr${C}.fa.gz | grep -v "^>" >> ucscChr/chr${C}.fa
+    gzip ucscChr/chr${C}.fa &
+done
+   # Check them with faSize 
+   faSize Primary_Assembly/assembled_chromosomes/FASTA/chr*.fa.gz
+   # 2872644707 bases (1165972091 N's 1706672616 real 1706672616 upper 0
+   #       lower) in 19 sequences in 19 files
+   faSize ucscChr/chr*.fa.gz
+   # 2872644707 bases (1165972091 N's 1706672616 real 1706672616 upper 0
+   #        lower) in 19 sequences in 19 files
+
+
+   # For unplaced scalfolds, named them as chrUn_xxxxxxxx
+   #   where xxxxxx is the original access id as: chrUn_ACBE01000381.1 
+   # and put it into chrUn.* files 
+   #   The ".1" at the end of field 1 and 6 need to be filter out since
+   #   MySQL does not allow "." as part of the table name and 
+   #   will casue problems in genbank task step later
+
+   # copy all the comment lines (start with #)
+S=Primary_Assembly/unplaced_scaffolds
+zcat ${S}/AGP/unplaced.scaf.agp.gz | grep "^#" > ucscChr/chrUn.agp
+   # append the gap records
+zcat ${S}/AGP/unplaced.scaf.agp.gz | grep -v "^#" \
+        | sed -e "s/^/chrUn_/"  -e "s/\.1//"  >> ucscChr/chrUn.agp
+gzip ucscChr/chrUn.agp &
+
+zcat ${S}/FASTA/unplaced.scaf.fa.gz \
+        | sed -e "s#^>.*|gb|#>chrUn_#; s#|.*##" \
+        | sed -e "s/\.1$//" \
+        | gzip > ucscChr/chrUn.fa.gz
+   # about 104034 sequences in the unplaced
+zcat ucscChr/chrUn.fa.gz | grep "^>" | wc
+   #  104034  104034 2275648
+
+   # Check them with faSize 
+   faSize Primary_Assembly/unplaced_scaffolds/FASTA/unplaced.scaf.fa.gz 
+   # 287642232 bases (3696852 N's 283945380 real 283945380 upper 0 
+   #       lower) in 104034 sequences in 1 files
+   faSize ucscChr/chrUn.fa.gz
+   # 287642232 bases (3696852 N's 283945380 real 283945380 upper 0
+   #        lower) in 104034 sequences in 1 files
+
+
+##########################################################################
+# Initial genome build (DONE - 2010-05-24 -Chin)
+
+# decide the right order by
+ hgsql -e "select orderKey,name from dbDb order by orderKey;" \
+ hgcentraltest | less
+    # 168     cavPor3
+    # 169     cavPor2
+    # 189     oryCun2
+    # 190     oryCun1
+    # 191     ochPri2
+    # 200     eriEur1
+    # 217     felCat3
+ 
+
+    cd /hive/data/genomes/felCat4
+
+    cat << '_EOF_' > felCat4.config.ra
+# Config parameters for makeGenomeDb.pl:
+db felCat4
+clade mammal
+genomeCladePriority 16
+scientificName Felis catus
+commonName Cat
+assemblyDate Dec. 2008
+assemblyLabel NHGRI/Genome Technology Branch (NCBI project 10703, accession ACBE0100000)
+assemblyShortLabel NHGRI/GTB 4
+orderKey 216
+mitoAcc NC_001700
+fastaFiles /hive/data/genomes/felCat4/genbank/ucscChr/chr*.fa.gz
+agpFiles /hive/data/genomes/felCat4/genbank/ucscChr/chr*.agp.gz
+# qualFiles none
+dbDbSpeciesDir cat
+taxId 9685
+'_EOF_'
+
+    time makeGenomeDb.pl -noGoldGapSplit -workhorse=hgwdev felCat4.config.ra \
+        > makeGenomeDb.log 2>&1
+    #   real    10m1.157s
+
+    # set up track
+    mkdir /cluster/home/chinhli/kent/src/hg/makeDb/trackDb/cat/felCat4
+    cp /cluster/data/felCat4/TemporaryTrackDbCheckout/kent/src/hg/makeDb/trackDb/cat/felCat4/* /cluster/home/chinhli/kent/src/hg/makeDb/trackDb/cat/felCat4/
+    # Search for '***' notes in each file in and make corrections
+    # cd ../.. (to trackDb/) and
+    # - edit makefile to add felCat4 to DBS.
+    # if necessary) cvs add cat
+    # - cvs add cat/felCat4
+    # - cvs add cat/felCat4/*.{ra,html}
+    #  - cvs ci -m "Added felCat4 to DBS." makefile
+    # - cvs ci -m "Initial descriptions for felCat4." cat/felCat4
+    # - (if necessary) cvs ci cat
+    # - Run make update DBS=felCat4 and make alpha when done.
+    # - (optional) Clean up /cluster/data/felCat4/TemporaryTrackDbCheckout
+    # - cvsup your ~/kent/src/hg/makeDb/trackDb and make future edits there.
+
+
+
+##########################################################################
+# running repeat masker (DONE 2010-05-26 - Chin)
+    mkdir /hive/data/genomes/felCat4/bed/repeatMasker
+    cd /hive/data/genomes/felCat4/bed/repeatMasker
+    time doRepeatMasker.pl -buildDir=`pwd` -noSplit \
+        -bigClusterHub=swarm -dbHost=hgwdev -workhorse=hgwdev \
+        -smallClusterHub=encodek felCat4 > do.log 2>&1 &
+    # real    262m59.644s
+    cat faSize.rmsk.txt
+    # 3160303948 bases (1169668943 N's 1990635005 real 1183631401 
+    # upper 807003604 lower) in 104054 sequences in 1 files
+    # %25.54 masked total, %40.54 masked real
+
+##########################################################################
+# running simple repeat (DONE - 2010-05-26 - Chin)
+    mkdir /hive/data/genomes/felCat4/bed/simpleRepeat
+    cd /hive/data/genomes/felCat4/bed/simpleRepeat
+    time doSimpleRepeat.pl -buildDir=`pwd` -bigClusterHub=swarm \
+        -dbHost=hgwdev -workhorse=hgwdev -smallClusterHub=encodek \
+        felCat4 > do.log 2>&1 &
+    #  real    40m36.157s
+    cat fb.simpleRepeat
+    #   49107705 bases of 1990645222 (2.467%) in intersection   
+    #   49107705 bases of 2690950627 (1.825%) in intersection (felCatV17e???)
+
+
+    cd /hive/data/genomes/felCat4
+    twoBitMask felCat4.rmsk.2bit \
+        -add bed/simpleRepeat/trfMask.bed felCat4.2bit
+    #   you can safely ignore the warning about fields >= 13
+
+    twoBitToFa felCat4.2bit stdout | faSize stdin > faSize.felCat4.2bit.txt
+    cat faSize.felCat4.2bit.txt
+    #  3160303948 bases (1169668943 N's 1990635005 real 1182766163 upper
+    #   807868842 lower) in 104054 sequences in 1 files
+    #   %25.56 masked total, %40.58 masked real
+
+    # double check with featureBits
+    featureBits -countGaps felCat4 gap
+    #  1169658726 bases of 3160303948 (37.011%) in intersection
+    #  1169668943 bases of 3160303948 (37.011%) in intersection (felCatV17e
+    #  ???)
+
+    rm /gbdb/felCat4/felCat4.2bit
+    ln -s `pwd`/felCat4.2bit /gbdb/felCat4/felCat4.2bit
+
+
+
+########################################################################
+# Marking *all* gaps - they are not all in the AGP file
+#       (DONE - 2010-05-27 - Chin)
+    mkdir /hive/data/genomes/felCat4/bed/allGaps
+    cd /hive/data/genomes/felCat4/bed/allGaps
+
+    time nice -n +19 findMotif -motif=gattaca -verbose=4 \
+        -strand=+ ../../felCat4.rmsk.2bit > findMotif.txt 2>&1
+    #   real    1m11.067s
+
+    grep "^#GAP " findMotif.txt | sed -e "s/^#GAP //" > allGaps.bed
+    featureBits felCat4 -not gap -bed=notGap.bed
+    #  1990645222 bases of 1990645222 (100.000%) in intersection
+
+    featureBits felCat4 allGaps.bed notGap.bed -bed=new.gaps.bed
+    #  10217 bases of 1990645222 (0.001%) in intersection
+    #    (real time 7+ hours)
+    #   what is the last index in the existing gap table:
+    hgsql -N -e "select ix from gap;" felCat4 | sort -n | tail -1
+    #   101050
+
+    cat << '_EOF_' > mkGap.pl
+#!/usr/bin/env perl
+
+use strict;
+use warnings;
+
+my $ix=`hgsql -N -e "select ix from gap;" felCat4 | sort -n | tail -1`;
+chomp $ix;
+
+open (FH,"<new.gaps.bed") or die "can not read new.gaps.bed";
+while (my $line = <FH>) {
+    my ($chrom, $chromStart, $chromEnd, $rest) = split('\s+', $line);
+    ++$ix;
+    printf "%s\t%d\t%d\t%d\tN\t%d\tother\tyes\n", $chrom, $chromStart,
+        $chromEnd, $ix, $chromEnd-$chromStart;
+}
+close (FH);
+'_EOF_'
+    # << happy emacs
+    chmod +x ./mkGap.pl
+    ./mkGap.pl > other.gap
+    hgLoadBed -sqlTable=$HOME/kent/src/hg/lib/gap.sql \
+        -noLoad felCat4 otherGap other.gap
+    #   Reading other.gap
+    #   Loaded 1294 elements of size 8
+    #   Sorted
+    #   Saving bed.tab
+    #   No load option selected, see file: bed.tab
+    wc -l bed.tab
+    #   1294 bed.tab
+    #   starting with this many
+    hgsql -e "select count(*) from gap;" felCat4
+    #   500507
+    hgsql felCat4 -e 'load data local infile "bed.tab" into table gap;'
+    #   result count:
+    hgsql -e "select count(*) from gap;" felCat4
+    #   501801
+    # == 500507 + 1294
+
+#########################################################################
+# MAKE 11.OOC FILES FOR BLAT (DONE 2010-05-27 - Chin)
+    cd /hive/data/genomes/felCat4
+    cat faSize.felCat4.2bit.txt 
+    #  3160303948 bases (1169668943 N's 1990635005 real 1182766163 upper 
+    #       807868842 lower) in 104054 sequences in 1 files
+    #       %25.56 masked total, %40.58 masked real
+
+    # numerator is felCat4 gapless bases "real" as reported by faSize 
+    # denominator is hg17 gapless bases as reported by featureBits,
+    # 1024 is threshold used for human -repMatch:
+    calc \( 1990635005 /  2897310462 \) \* 1024
+    #  ( 1990635005 / 2897310462 ) * 1024 = 703.552578
+    # ==> use -repMatch=700 according to size scaled down from 1024 for human.
+    #   and rounded down to nearest 50
+    blat felCat4.2bit /dev/null /dev/null -tileSize=11 \
+      -makeOoc=jkStuff/felCat4.11.ooc -repMatch=700
+    #   Wrote 23033 overused 11-mers to jkStuff/felCat4.11.ooc
+
+    mkdir /hive/data/staging/data/felCat4
+    cp -p felCat4.2bit chrom.sizes jkStuff/felCat4.11.ooc \
+        /hive/data/staging/data/felCat4
+
+    gapToLift -bedFile=jkStuff/nonBridgedGaps.bed felCat4 \
+        jkStuff/felCat4.nonBridged.lft
+
+    # Ask the admin to copy the 
+    #  /hive/data/staging/data/felCat4 directory
+    #  to cluster nodes:   /scratch/data/felCat4
+
+##########################################################################
+#  BLATSERVERS ENTRY (DONE 2010-05-27 - Chin)
+#       After getting a blat server assigned by the Blat Server Gods,
+# 
+
+    hgsql -e 'INSERT INTO blatServers (db, host, port, isTrans, canPcr) \
+        VALUES ("felCat4", "blat4", "17792", "1", "0"); \
+        INSERT INTO blatServers (db, host, port, isTrans, canPcr) \
+        VALUES ("felCat4", "blat4", "17793", "0", "1");' \
+            hgcentraltest
+    #   test it with some sequence
+
+############################################################################
+# chrA2:64650765-64656835
+# reset position to RHO location as found from blat of hg19 RHO gene
+    hgsql -e \
+'update dbDb set defaultPos="chrA2:64650765-64656835" where 
+  name="felCat4";' \
+        hgcentraltest
+############################################################################
+# genbank run DONE - 2010-05-27 - Chin )
+    ssh hgwdev
+    cd $HOME/kent/src/hg/makeDb/genbank
+    # edit etc/genbank.conf to add this section just before calJac1:
+# felCat4
+felCat4.serverGenome = /hive/data/genomes/felCat4/felCat4.2bit
+felCat4.clusterGenome = /scratch/data/felCat4/felCat4.2bit
+felCat4.ooc = /scratch/data/felCat4/felCat4.11.ooc
+felCat4.lift = no
+felCat4.perChromTables = no
+felCat4.refseq.mrna.native.pslCDnaFilter  = ${ordered.refseq.mrna.native.pslCDnaFilter}
+felCat4.refseq.mrna.xeno.pslCDnaFilter    = ${ordered.refseq.mrna.xeno.pslCDnaFilter}
+felCat4.genbank.mrna.native.pslCDnaFilter = ${ordered.genbank.mrna.native.pslCDnaFilter}
+felCat4.genbank.mrna.xeno.pslCDnaFilter   = ${ordered.genbank.mrna.xeno.pslCDnaFilter}
+felCat4.genbank.est.native.pslCDnaFilter  = ${ordered.genbank.est.native.pslCDnaFilter}
+felCat4.genbank.est.xeno.pslCDnaFilter    = ${ordered.genbank.est.xeno.pslCDnaFilter}
+felCat4.downloadDir = felCat4
+felCat4.refseq.mrna.native.load  = yes
+felCat4.refseq.mrna.xeno.load = yes
+felCat4.refseq.mrna.xeno.loadDesc  = yes
+
+    cvs ci -m "adding felCat4" etc/genbank.conf
+    make etc-update
+
+    ssh genbank
+    screen  # control this business with a screen since it takes a while
+    cd /cluster/data/genbank
+    time nice -n +19 bin/gbAlignStep -initial  felCat4 &
+    # logFile: var/build/logs/2010.05.29-00:59:34.felCat4.initalign.log
+    # tail var/build/logs/2010.05.29-00:59:34.felCat4.initalign.log
+    #   real    1161m8.623s
+    #   user    288m16.175s
+    #   sys     117m18.519s
+
+    ssh hgwdev
+    cd /cluster/data/genbank
+    time ./bin/gbDbLoadStep -drop -initialLoad felCat4 &
+    #   logFile: var/dbload/hgwdev/logs/2010.06.01-08:22:58.dbload.log 
+    #     tail var/dbload/hgwdev/logs/2010.06.01-08:22:58.dbload.log
+    #  real    37m51.980s
+
+    # enable daily alignment and update of hgwdev
+    cd ~/kent/src/hg/makeDb/genbank
+    cvsup
+    # add felCat4 to:
+        etc/align.dbs
+        etc/hgwdev.dbs
+    cvs ci -m "Adding felCat4 - Cat - Felis catus" \
+        etc/align.dbs etc/hgwdev.dbs
+    make etc-update
+    #   DONE  - 2010-06-01 - Chin
+
+
+############################################################################
+# running cpgIsland business (working - 2010-06-01 - Chin)
+    mkdir /hive/data/genomes/felCat4/bed/cpgIsland
+    cd /hive/data/genomes/felCat4/bed/cpgIsland
+    cvs -d /projects/compbio/cvsroot checkout -P hg3rdParty/cpgIslands
+    cd hg3rdParty/cpgIslands
+    #   needed to fixup this source, adding include to readseq.c:
+#include "string.h"
+    #   and to cpg_lh.c:
+#include "unistd.h"
+#include "stdlib.h"
+    # and fixing a declaration in cpg_lh.c
+    sed -e "s#\(extern char\* malloc\)#// \1#" cpg_lh.c > tmp.c
+    mv tmp.c cpg_lh.c
+    make
+    cd ../../
+    ln -s hg3rdParty/cpgIslands/cpglh.exe
+    mkdir -p hardMaskedFa
+    cut -f1 ../../chrom.sizes | while read C
+do
+    echo ${C}
+    twoBitToFa ../../felCat4.2bit:$C stdout \
+        | maskOutFa stdin hard hardMaskedFa/${C}.fa
+done
+
+    ssh swarm
+    cd /hive/data/genomes/felCat4/bed/cpgIsland
+    mkdir results
+    cut -f1 ../../chrom.sizes > chr.list
+    cat << '_EOF_' > template
+#LOOP
+./runOne $(path1) {check out exists results/$(path1).cpg}
+#ENDLOOP
+'_EOF_'
+    # << happy emacs
+
+    #   the faCount business is to make sure there is enough sequence to
+    #   work with in the fasta.  cpglh.exe does not like files with too many
+    #   N's - it gets stuck
+    cat << '_EOF_' > runOne
+#!/bin/csh -fe
+set C = `faCount hardMaskedFa/$1.fa | grep ^chr | awk '{print  $2 - $7 }'`
+if ( $C > 200 ) then
+    ./cpglh.exe hardMaskedFa/$1.fa > /scratch/tmp/$1.$$
+    mv /scratch/tmp/$1.$$ $2
+else
+    touch $2
+endif
+'_EOF_'
+    # << happy emacs
+    chmod +x ./runOne 
+
+    gensub2 chr.list single template jobList
+    para create jobList
+    para try
+    para check ... etc
+    para push
+    para time
+
+    # in jobList:
+    # .....
+    # ./runOne chrD3 {check out exists results/chrD3.cpg}
+	# wc -l jobList 
+    # .....
+    # 104054 jobList
+    # Completed: 104054 of 104054 jobs
+    # CPU time in finished jobs:    209s       3.48m     0.06h    0.00d  0.000 y
+    # IO & Wait Time:           343233s    5720.56m    95.34h    3.97d  0.011 y
+    # Average job time:               3s       0.06m     0.00h    0.00d
+    # Longest finished job:              52s       0.87m     0.01h    0.00d
+    # Submission to last job:          3927s      65.45m     1.09h    0.05d
+
+    # Transform cpglh output to bed +
+    catDir results | awk '{
+$2 = $2 - 1;
+width = $3 - $2;
+printf("%s\t%d\t%s\t%s %s\t%s\t%s\t%0.0f\t%0.1f\t%s\t%s\n",
+       $1, $2, $3, $5,$6, width,
+       $6, width*$7*0.01, 100.0*2*$6/width, $7, $9);
+}' > cpgIsland.bed
+
+    wc -l cpgIsland.bed 
+    #  57950 cpgIsland.bed
+
+    cd /hive/data/genomes/felCat4/bed/cpgIsland
+    hgLoadBed felCat4 cpgIslandExt -tab \
+      -sqlTable=$HOME/kent/src/hg/lib/cpgIslandExt.sql cpgIsland.bed
+    # Reading cpgIsland.bed
+    # Loaded 57950 elements of size 10
+    # Sorted
+    # Creating table definition for cpgIslandExt
+    # Saving bed.tab
+    # Loading felCat4
+
+    # check it with 
+    featureBits felCat4 cpgIslandExt
+    #  36646772 bases of 1990635005 (1.841%) in intersection
+
+    #   cleanup
+    rm -fr hardMaskedFa
+
+#######################################################################
+# felCat4 Cat BLASTZ/CHAIN/NET (working  - 2010-06-01 - Chin)
+    screen # use a screen to manage this multi-day job
+    mkdir /hive/data/genomes/canFam2/bed/lastzFelCat4.2010-06-01
+    cd /hive/data/genomes/canFam2/bed/lastzFelCat4.2010-06-01
+
+    cat << '_EOF_' > DEF
+# dog vs. cat
+# maximum M allowed with lastz is only 254
+BLASTZ_M=254
+
+# TARGET: Dog canFan2
+SEQ1_DIR=/scratch/data/canFam2/canFam2.2bit
+SEQ1_LEN=/scratch/data/canFam2/chrom.sizes
+SEQ1_CHUNK=20000000
+SEQ1_LAP=10000
+SEQ1_LIMIT=5
+
+# QUERY: Cat (felCat4)
+SEQ2_DIR=/scratch/data/felCat4/felCat4.2bit
+SEQ2_LEN=/scratch/data/felCat4/chrom.sizes
+SEQ2_LIMIT=50
+SEQ2_CHUNK=10000000
+SEQ2_LAP=0
+
+BASE=/hive/data/genomes/canFam2/bed/lastzFelCat4.2010-06-01
+TMPDIR=/scratch/tmp
+'_EOF_'
+    # << this line keeps emacs coloring happy
+
+    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
+        `pwd`/DEF \
+         -syntenicNet -noDbNameCheck \
+         -chainMinScore=3000 -chainLinearGap=medium \
+         -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
+         > do.log 2>&1 &
+    #   real  389m24.326s first try got failuer in do.log
+    #   -- during chainRun after cat
+    # rerun chain.csf with failed item works. so rerun the 
+    # doBlastzChainNet from step chainRun after para stop, para freeBatch 
+    # rm chain and liftedChain in
+    #  /hive/data/genomes/canFam2/bed/lastzFelCat4.2010-06-01/axtChain/run
+    #   and use memk this time
+XXXX 06-04 PM screen -d -r 3477
+    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
+        `pwd`/DEF \
+         -continue chainRun \
+         -syntenicNet -noDbNameCheck \
+         -chainMinScore=3000 -chainLinearGap=medium \
+         -workhorse=hgwdev -smallClusterHub=memk -bigClusterHub=swarm \
+         > do_chainRun.log 2>&1 &
+ 
+
+
+    cat fb.canFam2.chainFelCat4Link.txt
+    #  1481040604 bases of 2384996543 (62.098%) in intersection
+
+    mkdir /hive/data/genomes/felCat4/bed/blastz.canFam2.swap
+    cd /hive/data/genomes/felCat4/bed/blastz.canFam2.swap
+    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
+        /hive/data/genomes/canFam2/bed/lastzFelCat4.2010-06-01/DEF \
+        -swap -syntenicNet -noDbNameCheck \
+        -workhorse=hgwdev -smallClusterHub=memk -bigClusterHub=swarm \
+        -chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
+    #   real    251m14.341s
+    # *** All done !  Elapsed time: 251m14s
+    # *** Make sure that goldenPath/felCat4/vsCanFam2/README.txt is
+    # accurate.
+    # *** Add {chain,net}CanFam2 tracks to trackDb.ra if necessary.
+    cat fb.felCat4.chainCanFam2Link.txt
+    # 1467506008 bases of 1990635005 (73.720%) in intersection
+
+
+
+#####################################################################
+# LASTZ Mouse Swap (DONE - 2010-06-01 - Chin)
+    cd /hive/data/genomes/mm9/bed/lastzFelCat4.2010-06-01
+    cat fb.mm9.chainFelCat4Link.txt
+    #   637007193 bases of 2620346127 (24.310%) in intersection
+    # make sure the link exists:
+    # cd /hive/data/genomes/mm9/bed
+    # ln -s /hive/data/genomes/mm9/bed/lastzFelCat4.2010-06-01 \
+    #      /lastz.felCat4
+
+    # swap
+    mkdir /hive/data/genomes/felCat4/bed/blastz.mm9.swap
+    cd /hive/data/genomes/felCat4/bed/blastz.mm9.swap
+    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
+        /hive/data/genomes/mm9/bed/lastzFelCat4.2010-06-01/DEF \
+        -swap -syntenicNet -noDbNameCheck \
+        -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
+        -chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
+    #   real   125m37.926s 
+    cat fb.felCat4.chainMm9Link.txt
+    #   616529959 bases of 1990635005 (30.972%) in intersection
+
+
+#####################################################################
+# LASTZ Human Swap (DONE - 2010-06-01 - Chin)
+    cd /hive/data/genomes/hg19/bed/lastzFelCat4.2010-06-01
+    cat fb.hg19.chainFelCat4Link.txt
+    #  1266003011 bases of 2897316137 (43.696%) in intersection
+
+    # Swap re-run 04-07
+    mkdir /hive/data/genomes/felCat4/bed/blastz.hg19.swap
+    cd /hive/data/genomes/felCat4/bed/blastz.hg19.swap
+    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
+        /hive/data/genomes/hg19/bed/lastzFelCat4.2010-06-01/DEF \
+        -swap -syntenicNet -noDbNameCheck \
+        -workhorse=hgwdev -smallClusterHub=pk -bigClusterHub=swarm \
+        -chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
+    #   real   200m43.245s 
+    cat fb.felCat4.chainHg19Link.txt
+#   1211702270 bases of 1990635005 (60.870%) in intersection
+
+
+#####################################################################
+# LASTZ Panda Swap (DONE - 2010-06-01 - Chin)
+    cd /hive/data/genomes/ailMel1/bed/lastzFelCat4.2010-06-01
+    cat fb.felCat4.chainAilMel1Link.txt
+    # 1503647735 bases of 1990635005 (75.536%) in intersection
+    # link it
+    #  cd /hive/data/genomes/ailMel1/bed
+    #  ln -s /hive/data/genomes/ailMel1/bed/lastzFelCat4.2010-06-01
+    #  lastz.felCat4
+    # run rbest 2010-04-30
+    time doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` \
+        felCat4 ailMel1 > rbest.log 2>&1 &
+    #   real    234m49.556s
+
+    #   re-run swap 04-26
+    mkdir /hive/data/genomes/felCat4/bed/blastz.ailMel1.swap
+    cd /hive/data/genomes/felCat4/bed/blastz.ailMel1.swap
+    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
+        /hive/data/genomes/ailMel1/bed/lastzFelCat4.2010-06-01/DEF \
+        -swap -syntenicNet -noDbNameCheck \
+        -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
+        -chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
+    # real    155m57.566s
+    cd /hive/data/genomes/ailMel1/bed/blastz.felCat4.swap
+    cat fb.ailMel1.chainFelCat4Link.txt
+    # 1507273252 bases of 2245312831 (67.130%) in intersection
+
+    #   Due to the SEQ1 and SEQ2 spec'ed in
+    # /hive/data/genomes/ailMel1/bed/lastzFelCat4.2010-06-01/DEF
+    # need to copy the dDirectory from ailMel1 to felCatv17e
+    #  without this, 6way step will have problem
+    #  cd /hive/data/genomes/ailMel1/bed/blastz.felCat4.swap]
+    #  cp -pr * /hive/data/genomes/felCat4/bed/blastz.ailMel1.swap/ 
+    #  cd /hive/data/genomes/felCat4/bed
+    #  ln -s lastzAilMel1.2010-06-01 lastz.ailMel1
+
+
+    # mkdir /hive/data/genomes/felCat4/bed/lastzAilMel1.2010-06-01
+    # cd /hive/data/genomes/ailMel1/bed/lastzFelCat4.2010-06-01
+    # cp -rp * /hive/data/genomes/felCat4/bed/lastzAilMel1.2010-06-01/ 
+    # cd /hive/data/genomes/felCat4/bed
+    # ln -s /hive/data/genomes/felCat4/bed/lastzAilMel1.2010-06-01 lastz.ailMel1
+    # cd /hive/data/genomes/ailMel1/bed
+    # ln -s  blastz.felCat4.swap  lastz.felCat4
+
+
+
+
+#####################################################################
+# LASTZ Oppsom  Swap (DONE - 2010-06-01 - Chin)
+    cd /hive/data/genomes/monDom5/bed/lastzFelCat4.2010-06-01
+    cat fb.monDom5.chainFelCat4Link.txt
+    # 178616721 bases of 3501660299 (5.101%) in intersection
+    # make sure the link exist
+    # cd /hive/data/genomes/monDom5/bed
+    #  ln -s lastzFelCat4.2010-06-01 lastz.felCat4 
+
+    # Swap
+    mkdir /hive/data/genomes/felCat4/bed/blastz.monDom5.swap
+    cd /hive/data/genomes/felCat4/bed/blastz.monDom5.swap
+    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
+        /hive/data/genomes/monDom5/bed/lastzFelCat4.2010-06-01/DEF \
+        -swap -syntenicNet -noDbNameCheck \
+        -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
+        -chainMinScore=5000 -chainLinearGap=loose > swap.log 2>&1 &
+    # *** All done !  Elapsed time: 100m29s
+    # *** Make sure that goldenPath/felCat4/vsMonDom5/README.txt is
+    # accurate.
+    # *** Add {chain,net}MonDom5 tracks to trackDb.ra if necessary.
+
+    cat fb.felCat4.chainMonDom5Link.txt
+    #   166499264 bases of 1990635005 (8.364%) in intersection
+
+
+############################################################################
+# ctgPos2 track - showing clone sequence locations on chromosomes
+#       (DONE - 2010-03-30 - Chin)
+    mkdir /hive/data/genomes/felCat4/bed/ctgPos2
+    cd /hive/data/genomes/felCat4/bed/ctgPos2
+    cat << '_EOF_' > agpToCtgPos2.pl
+#!/usr/bin/env perl
+
+use warnings;
+use strict;
+
+my $argc = scalar(@ARGV);
+
+if ($argc != 1) {
+    printf STDERR "usage: zcat your.files.agp.gz | agpToCtgPos2.pl
+/dev/stdin > ctgPos2.tab\n";
+    exit 255;
+}
+
+my $agpFile = shift;
+
+open (FH, "<$agpFile") or die "can not read $agpFile";
+while (my $line = <FH>) {
+    next if ($line =~ m/^#/);
+    chomp $line;
+    my @a = split('\s+', $line);
+    next if ($a[4] =~ m/^N$/);
+    next if ($a[4] =~ m/^U$/);
+    my $chrSize = $a[2]-$a[1]+1;
+    my $ctgSize = $a[7]-$a[6]+1;
+    die "sizes differ $chrSize != $ctgSize\n$line\n" if ($chrSize !=
+$ctgSize);
+    printf "%s\t%d\t%s\t%d\t%d\t%s\n", $a[5], $chrSize, $a[0], $a[1]-1,
+$a[2], $a[4];
+}
+close (FH);
+'_EOF_'
+    # << happy emacs
+
+chmod 775 agpToCtgPos2.pl 
+
+export S=../../genbank/Primary_Assembly/assembled_chromosomes
+cut -f2 ${S}/chr2acc |  while read ACC
+do
+    C=`grep "${ACC}" ${S}/chr2acc | cut -f1`
+    zcat ${S}/AGP/chr${C}.agp.gz \
+        | sed -e "s/^${ACC}/chr${C}/"
+done | ./agpToCtgPos2.pl /dev/stdin > ctgPos2.tab
+
+    hgLoadSqlTab felCat4 ctgPos2 $HOME/kent/src/hg/lib/ctgPos2.sql \ 
+     ctgPos2.tab
+
+    # add the track ctgPos2 to src/hg/makeDb/trackDb/cat/felCat4/trackDb.ra
+    # at src/makeDb/trackdb do "make update" or/and "make alpha"
+
+#########################################################################
+# all.joiner update, downloads and in pushQ - (DONE - 2010-04-09 Chin)
+    cd $HOME/kent/src/hg/makeDb/schema
+    # fixup all.joiner until this is a clean output
+    joinerCheck -database=felCat4 -all all.joiner
+    
+    mkdir /hive/data/genomes/felCat4/goldenPath
+    cd /hive/data/genomes/felCat4/goldenPath
+    makeDownloads.pl felCat4 > do.log 2>&1
+    #  vi /hive/data/genomes/felCat4/goldenPath/database/README.txt
+    #  vi  /hive/data/genomes/felCat4/goldenPath/bigZips/README.txt
+
+
+    #   now ready for pushQ entry
+    mkdir /hive/data/genomes/felCat4/pushQ
+    cd /hive/data/genomes/felCat4/pushQ
+    makePushQSql.pl felCat4 > felCat4.pushQ.sql 2> stderr.out
+    #   check for errors in stderr.out, some are OK, e.g.:
+# WARNING: felCat4 does not have seq
+# WARNING: felCat4 does not have extFile
+# WARNING: felCat4 does not have tableDescriptions
+
+    #   copy it to hgwbeta
+    #   use hgcat in .hg.conf.beta
+    scp -p felCat4.pushQ.sql hgwbeta:/tmp
+    ssh hgwbeta
+    cd /tmp
+    hgsql qapushq < felCat4.pushQ.sql
+    #   in that pushQ entry walk through each entry and see if the
+    #   sizes will set properly
+
+    #   reset to hguser in .hg.conf.beta
+
+
+#####################################################################
+## 6-Way Multiz (DONE - 2010-05-07 - Chin)
+##  (Redo mafSplit and all steps followed working 2010-05-23 - Chin)
+ 
+# use /cluster/home/chinhli/kent/src/hg/utils/phyloTrees/49way.nh 
+    mkdir /hive/data/genomes/felCat4/bed/multiz6way
+    cd /hive/data/genomes/felCat4/bed/multiz6way
+
+    /cluster/bin/phast/tree_doctor \
+      --prune-all-but=felCat3,hg19,mm9,canFam2,monDom5,ailMel1 \
+      --rename="felCat3 -> felCat4  " \
+/cluster/home/chinhli/kent/src/hg/utils/phyloTrees/49way.nh > 6way.nh 
+
+    #   rearrange felCat4 to the top, get some help from tree_doctor:
+    /cluster/bin/phast/tree_doctor --name-ancestors --reroot felCat4 \
+           --with-branch 6way.nh
+    #   edit out the ancestors, and move felCat4 from the bottom to
+    #   the top, resulting in this tree:
+
+(((felCat4:0.098612,(canFam2:0.052458,ailMel1:0.050000):0.050000):0.093470,(hg19:0.144018,mm9:0.356483):0.020593):0.258392,monDom5:0.340786);
+
+    #   more rearranging after seeing what the distance table looks like
+    #   below to get them appearing as much as possible in their
+    #   distance order top to bottom: (TO_DO)
+
+    # use the phyloGif to get the tree image
+   /cluster/bin/x86_64/phyloGif -phyloGif_tree=6way.nh \
+   -phyloGif_width=260  -phyloGif_height=260 > 6way.gif
+
+    #   more rearranging after seeing what the distance table looks like
+    #   below to get them appearing as much as possible in their
+    #   distance order top to bottom: (TO_DO)
+
+
+
+    #   usee this specification in the phyloGif tool after changing the names:
+    /cluster/bin/phast/tree_doctor \
+--rename="felCat4 -> Cat ;  hg19 -> Human ; ailMel1 -> Panda ; canFam2 -> Dog ; mm9 -> Mouse ; monDom5 -> Opossum " 6way.nh
+
+    #   http://genome.ucsc.edu/cgi-bin/phyloGif
+    #   to obtain a gif image for htdocs/images/phylo/felCat4_6way.gif
+   /cluster/bin/x86_64/phyloGif -phyloGif_tree=6way.nh \
+   -phyloGif_width=260  -phyloGif_height=260 > felCat4_6way.gif
+
+    /cluster/bin/phast/all_dists 6way.nh > 6way.distances.txt
+    # make sure all symlinks lastz.DB -> lastzDb-date
+    #   exist here and at the swap locations, the perl script expects this
+    #   in order to find featureBits numbers.
+    cd /hive/data/genomes/felCat4/bed]
+    ln -s blastz.hg19.swap lastz.hg19
+    ln -s blastz.canFam2.swap lastz.canFam2
+    ln -s blastz.mm9.swap lastz.mm9
+    ln -s blastz.monDom5.swap lastz.monDom5
+    ln -s /hive/data/genomes/felCat4/bed/lastzAilMel1.2010-06-01 /
+            lastz.ailMel1
+
+
+    cd /hive/data/genomes/felCat4/bed/multiz6way
+    #   Use 6way.distances.txt to create the table below
+    #   with this perl script:
+    cat << '_EOF_' > sizeStats.pl
+#!/usr/bin/env perl
+use strict;
+use warnings;
+
+
+open (FH, "grep -y felCat4 6way.distances.txt | sort -k3,3n|") or
+        die "can not read 6way.distances.txt";
+
+my $count = 0;
+while (my $line = <FH>) {
+    chomp $line;
+    my ($felCat4, $D, $dist) = split('\s+', $line);
+    my $chain = "chain" . ucfirst($D);
+    my $B="/hive/data/genomes/felCat4/bed/lastz.$D/fb.felCat4." .
+        $chain . "Link.txt";
+    my $chainLinkMeasure =
+        `awk '{print \$5}' ${B} 2> /dev/null | sed -e "s/(//; s/)//"`;
+    chomp $chainLinkMeasure;
+    $chainLinkMeasure = 0.0 if (length($chainLinkMeasure) < 1);
+    $chainLinkMeasure =~ s/\%//;
+    my $swapFile="/hive/data/genomes/${D}/bed/lastz.felCat4/fb.${D}.chainFelCatV17eLink.txt";
+    my $swapMeasure = "N/A";
+    if ( -s $swapFile ) {
+        $swapMeasure =
+            `awk '{print \$5}' ${swapFile} 2> /dev/null | sed -e "s/(//; s/)//"`;
+        chomp $swapMeasure;
+        $swapMeasure = 0.0 if (length($swapMeasure) < 1);
+        $swapMeasure =~ s/\%//;
+    }
+    my $orgName=
+    `hgsql -N -e "select organism from dbDb where name='$D';" hgcentraltest`;
+    chomp $orgName;
+    if (length($orgName) < 1) {
+        $orgName="N/A";
+    }
+    ++$count;
+   ++$count;
+    if ($swapMeasure eq "N/A") {
+        printf "# %02d  %.4f - %s %s\t(%% %.3f) (%s)\n", $count, $dist,
+            $orgName, $D, $chainLinkMeasure, $swapMeasure
+    } else {
+        printf "# %02d  %.4f - %s %s\t(%% %.3f) (%% %.3f)\n", $count, $dist,
+            $orgName, $D, $chainLinkMeasure, $swapMeasure
+    }
+}
+close (FH);
+'_EOF_'
+    # << happy emacs
+    chmod +x ./sizeStats.pl
+    ./sizeStats.pl
+#
+#       If you can fill in all the numbers in this table, you are ready for
+#       the multiple alignment procedure
+#
+#                         featureBits chainLink measures
+#                                  chainFlCat4Link
+#    distance                on felCat4 on other
+# 02  0.1986 - Panda ailMel1    (% 75.536) (% 67.130)
+# 04  0.2011 - Dog canFam2      (% 73.720) (% 62.098)
+# 06  0.3567 - Human hg19       (% 60.870) (% 43.696)
+# 08  0.5692 - Mouse mm9        (% 30.972) (% 24.310)
+# 10  0.7913 - Opossum monDom5  (% 8.364) (% 5.101)
+
+    # create species list and stripped down tree for autoMZ
+    sed 's/[a-z][a-z]*_//g; s/:[0-9\.][0-9\.]*//g; s/;//; /^ *$/d' \
+        6way.nh > tmp.nh
+    echo `cat tmp.nh` > tree-commas.nh
+    echo `cat tree-commas.nh` | sed 's/ //g; s/,/ /g' > tree.nh
+    sed 's/[()]//g; s/,/ /g' tree.nh > species.list
+
+    #   collect the single whole mafs into one place for splitting:
+    mkdir singleMaf
+    cd singleMafs
+    ln -s ../../lastz.hg19/axtChain/felCat4.hg19.synNet.maf.gz .
+    ln -s ../../lastz.mm9/axtChain/felCat4.mm9.synNet.maf.gz .
+    ln -s ../../lastz.canFam2/axtChain/felCat4.canFam2.synNet.maf.gz .
+    ln -s ../../lastz.monDom5/axtChain/felCat4.monDom5.synNet.maf.gz .
+    # N50 for ailMel1 is 1281781 (less than 10 ~ 20 MB) use rbest
+    ln -s ../../lastz.ailMel1/mafRBestNet/felCat4.ailMel1.rbest.maf.gz .
+
+XXXX 05-24 mafSplit new option mafSplit -byTarget -useFullSequenceName
+    # to use rbest or net, use n50.pl chrom.size to tell
+    mkdir /hive/data/genomes/felCat4/bed/multiz6way/splitMaf
+    cd /hive/data/genomes/felCat4/bed/multiz6way/splitMaf
+    for D in ailMel1
+do
+    mkdir ${D}
+    mafSplit -useFullSequenceName -byTarget /dev/null ${D}/ \
+        ../singleMafs/felCat4.${D}.rbest.maf.gz
+done
+
+for D in hg19 mm9 canFam2 monDom5
+do
+    mkdir ${D}
+    mafSplit -useFullSequenceName -byTarget /dev/null ${D}/ \
+        ../singleMafs/felCat4.${D}.synNet.maf.gz
+done
+
+
+    cd /hive/data/genomes/felCat4/bed/multiz6way
+    mkdir penn
+    cp -p /cluster/bin/penn/multiz.2008-11-25/multiz penn
+    cp -p /cluster/bin/penn/multiz.2008-11-25/maf_project penn
+    cp -p /cluster/bin/penn/multiz.2008-11-25/maf_project penn
+    cp -p /cluster/bin/penn/multiz.2008-11-25/autoMZ penn
+
+    ssh swarm
+    mkdir /hive/data/genomes/felCat4/bed/multiz6way/run
+    mkdir /hive/data/genomes/felCat4/bed/multiz6way/run/maf
+
+    cd /hive/data/genomes/felCat4/bed/multiz6way/run
+
+    #  set the db and pairs directories here
+    cat > autoMultiz.csh << '_EOF_'
+#!/bin/csh -ef
+set db = felCat4
+set topDir = /hive/data/genomes/$db/bed/multiz6way
+set c = $1
+set result = $2
+set pennBin = $topDir/penn
+set run = `/bin/pwd`
+set tmp = /scratch/tmp/$db/multiz.$c
+set pairs = $topDir/splitMaf
+/bin/rm -fr $tmp
+/bin/mkdir -p $tmp
+/bin/cp -p $topDir/tree.nh $topDir/species.list $tmp
+pushd $tmp > /dev/null
+foreach s (`/bin/sed -e "s/^$db //" species.list`)
+    set in = $pairs/$s/$c.maf
+    set out = $db.$s.sing.maf
+    if (-e $in.gz) then
+        /bin/zcat $in.gz > $out
+        if (! -s $out) then
+            echo "##maf version=1 scoring=autoMZ" > $out
+        endif
+    else if (-e $in) then
+        /bin/ln -s $in $out
+    else
+        echo "##maf version=1 scoring=autoMZ" > $out
+    endif
+end
+set path = ($pennBin $path); rehash
+$pennBin/autoMZ + T=$tmp E=$db "`cat tree.nh`" $db.*.sing.maf $c.maf \
+        > /dev/null
+popd > /dev/null
+/bin/rm -f $result
+/bin/cp -p $tmp/$c.maf $result
+/bin/rm -fr $tmp
+/bin/rmdir --ignore-fail-on-non-empty /scratch/tmp/$db
+'_EOF_'
+# << happy emacs
+    chmod +x autoMultiz.csh
+
+    cat  << '_EOF_' > template
+#LOOP
+./autoMultiz.csh $(root1) {check out line+ /hive/data/genomes/felCat4/bed/multiz6way/run/maf/$(root1).maf}
+#ENDLOOP
+'_EOF_'
+# << happy emacs
+
+    find ../splitMaf -type f | grep "/[0-9][0-9][0-9].maf" \
+        | xargs -L 1 basename | sort -u > chr.part.list
+    gensub2 chr.part.list single template jobList
+    para -ram=8g create jobList
+
+XXXX TODO: 05-14 Chin: a script to check the integrity of the maf file
+      (i.e. hearde, size existence etc
+
+    #   put the split mafs back together into a single result
+    head -q -n 1 maf/000.maf > felCat4.6way.maf
+    for F in maf/*.maf
+do
+    grep -h -v "^#" ${F} >> felCat4.6way.maf
+done
+    tail -q -n 1 maf/000.maf >> felCat4.6way.maf
+
+
+    # load tables for a look
+    mkdir -p /gbdb/felCat4/multiz6way/maf
+    # cd /hive/data/genomes/felCat4/bed/multiz6way/maf
+    cd /hive/data/genomes/felCat4/bed/multiz6way/run
+
+    ln -s `pwd`/felCat4.6way.maf \
+        /gbdb/felCat4/multiz6way/maf/multiz6way.maf
+
+    # this generates an immense multiz6way.tab file in the directory
+    #   where it is running.  Best to run this over in scratch.
+    cd /data/tmp
+    time nice -n +19 hgLoadMaf \
+        -pathPrefix=/gbdb/felCat4/multiz6way/maf felCat4 multiz6way
+    # Indexing and tabulating /gbdb/felCat4/multiz6way/maf/multiz6way.maf
+    # Loading multiz6way into database
+    # Loaded 5500990 mafs in 1 files from /gbdb/felCat4/multiz6way/maf
+    # 
+    # real    5m56.770s
+
+    # load summary table
+    time nice -n +19 cat /gbdb/felCat4/multiz6way/maf/*.maf \
+        | hgLoadMafSummary felCat4 -minSize=30000 -verbose=2 \
+                -mergeGap=1500 -maxSize=200000  multiz6waySummary stdin
+    # Created 1029626 summary blocks from 15177161 components and 
+    #      5500990 mafs from stdin
+    # real    9m46.074s
+
+    # Gap Annotation
+    # prepare bed files with gap info
+    mkdir /hive/data/genomes/felCat4/bed/multiz6way/anno
+    cd /hive/data/genomes/felCat4/bed/multiz6way/anno
+    mkdir maf run
+
+    #   most of these will already exist from previous multiple
+    #   alignments
+    #   remove the echo from in front of the twoBitInfo command to get
+    #   them
+    #   to run if this loop appears to be correct
+    for DB in `cat ../species.list`
+do
+    CDIR="/hive/data/genomes/${DB}"
+    if [ ! -f ${CDIR}/${DB}.N.bed ]; then
+        echo "creating ${DB}.N.bed"
+        echo twoBitInfo -nBed ${CDIR}/${DB}.2bit ${CDIR}/${DB}.N.bed
+    else
+        ls -og ${CDIR}/${DB}.N.bed
+    fi
+done
+
+    cd run
+    rm -f nBeds sizes
+    for DB in `sed -e "s/felCat4 //" ../../species.list`
+do
+    echo "${DB} "
+    ln -s  /hive/data/genomes/${DB}/${DB}.N.bed ${DB}.bed
+    echo ${DB}.bed  >> nBeds
+    ln -s  /hive/data/genomes/${DB}/chrom.sizes ${DB}.len
+    echo ${DB}.len  >> sizes
+done
+
+    #   the annotation step requires large memory, run on memk nodes
+
+    ssh memk
+    cd /hive/data/genomes/felCat4/bed/multiz6way/anno/run
+    ls ../../run/maf | sed -e "s/.maf//" > chr.list
+    cat << '_EOF_' > template
+#LOOP
+./anno.csh $(root1) {check out line+ ../maf/$(root1).maf}
+#ENDLOOP
+'_EOF_'
+    # << happy emacs
+
+    cat << '_EOF_' > anno.csh
+#!/bin/csh -fe
+
+set inMaf = ../../run/maf/$1.maf
+set outMaf = ../maf/$1.maf
+rm -f $outMaf
+mafAddIRows -nBeds=nBeds $inMaf /hive/data/genomes/felCat4/felCat4.2bit $outMaf
+'_EOF_'
+    # << happy emacs
+    chmod +x anno.csh
+
+    gensub2 chr.list single template jobList
+    para -ram=30g create jobList
+    #   specify lots of ram to get one job per node
+    para -ram=30g push
+    #   
+# para time
+# Completed: 256 of 256 jobs
+# CPU time in finished jobs:       3735s      62.25m     1.04h    0.04d  0.000 y
+# IO & Wait Time:                  2382s      39.70m     0.66h    0.03d  0.000 y
+# Average job time:                  24s       0.40m     0.01h    0.00d
+# Longest finished job:             600s      10.00m     0.17h    0.01d
+# Submission to last job:           787s      13.12m     0.22h    0.01d
+
+    ssh hgwdev
+    rm -fr /gbdb/felCat4/multiz6way/maf
+    mkdir /gbdb/felCat4/multiz6way/maf
+    cd /hive/data/genomes/felCat4/bed/multiz6way/anno/maf
+    ln -s `pwd`/*.maf /gbdb/felCat4/multiz6way/maf/
+    #   by loading this into the table multiz6way, it will replace the
+    #   previously loaded table with the unannotated mafs
+    #   huge temp files are made, do them on local disk
+    cd /data/tmp
+    time nice -n +19 hgLoadMaf \
+        -pathPrefix=/gbdb/felCat4/multiz6way/maf felCat4 multiz6way
+    #   real    93m33.812s
+    #   Loading multiz6way into database
+    #   Loaded 6501831 mafs in 256 files from /gbdb/felCat4/multiz6way/maf
+
+    time nice -n +19 cat /gbdb/felCat4/multiz6way/maf/*.maf \
+        | hgLoadMafSummary felCat4 -minSize=30000 -mergeGap=1500 \
+                 -maxSize=200000  multiz6waySummary stdin
+    #   Created 1029626 summary blocks from 15177161 components \
+    #   and 6501831 mafs from stdin
+    #   Loading into felCat4 table multiz6waySummary...
+    #   Loading complete
+    #   real    72m12.808s
+
+    #   by loading this into the table multiz6waySummary, it will
+    #   replace
+    #   the previously loaded table with the unannotated mafs
+    #   remove the multiz6way*.tab files in this /data/tmp directory
+# -rw-rw-r-- 1 338738590 May  6 15:49 multiz6way.tab
+# -rw-rw-r-- 1  49960609 May  6 17:04 multiz6waySummary.tab
+    wc -l multiz6way*.tab
+  #  6501831 multiz6way.tab
+  #  1029626 multiz6waySummary.tab
+  #  7531457 total
+    rm multiz6way*.tab
+
+    # create some downloads
+    mkdir -p /hive/data/genomes/felCat4/bed/multiz6way/downloads/maf
+    cd /hive/data/genomes/felCat4/bed/multiz6way/downloads/maf
+    # time cp -p ../../anno/maf/chr*.maf .
+    time cp -p ../../anno/maf/*.maf .
+    #   real    6m55.315s
+    time gzip --rsyncable *.maf
+    #   real    30m37.474s
+    time md5sum *.gz > md5sum.txt
+    #   real    1m36.814s
+    #   user    0m13.210s
+    #   sys     0m2.212s
+
+
+####################################################################
+# HUMAN (hg18) PROTEINS TRACK (DONE braney 2010-05-05)
+    # bash  if not using bash shell already
+
+    cd /cluster/data/felCat4
+    mkdir /cluster/data/felCat4/blastDb
+
+    awk '{if ($2 > 1000000) print $1}' chrom.sizes > 1meg.lst
+    twoBitToFa -seqList=1meg.lst  felCat4.2bit temp.fa
+    faSplit gap temp.fa 1000000 blastDb/x -lift=blastDb.lft
+    rm temp.fa 1meg.lst
+
+    awk '{if ($2 <= 1000000) print $1}' chrom.sizes > less1meg.lst
+    twoBitToFa -seqList=less1meg.lst  felCat4.2bit temp.fa
+    faSplit about temp.fa 1000000 blastDb/y 
+    rm temp.fa less1meg.lst
+
+    cd blastDb
+    for i in *.fa
+    do
+	/hive/data/outside/blast229/formatdb -i $i -p F
+    done
+    rm *.fa
+    ls *.nsq | wc -l
+# 2765
+
+    mkdir -p /cluster/data/felCat4/bed/tblastn.hg18KG
+    cd /cluster/data/felCat4/bed/tblastn.hg18KG
+    echo  ../../blastDb/*.nsq | xargs ls -S | sed "s/\.nsq//"  > query.lst
+    wc -l query.lst
+# 2765 query.lst
+
+   # we want around 350000 jobs
+   calc `wc /cluster/data/hg18/bed/blat.hg18KG/hg18KG.psl | awk '{print $1}'`/\(350000/`wc query.lst | awk '{print $1}'`\)
+
+# 36727/(350000/2765) = 290.143300
+
+   mkdir -p kgfa
+   split -l 290 /cluster/data/hg18/bed/blat.hg18KG/hg18KG.psl  kgfa/kg
+   cd kgfa
+   for i in *; do 
+     nice pslxToFa $i $i.fa; 
+     rm $i; 
+   done
+   cd ..
+   ls -1S kgfa/*.fa > kg.lst
+   wc kg.lst
+#  127  127 1651 kg.lst
+
+   mkdir -p blastOut
+   for i in `cat kg.lst`; do  mkdir blastOut/`basename $i .fa`; done
+   tcsh
+   cd /cluster/data/felCat4/bed/tblastn.hg18KG
+   cat << '_EOF_' > blastGsub
+#LOOP
+blastSome $(path1) {check in line $(path2)} {check out exists blastOut/$(root2)/q.$(root1).psl }
+#ENDLOOP
+'_EOF_'
+
+   cat << '_EOF_' > blastSome
+#!/bin/sh
+BLASTMAT=/hive/data/outside/blast229/data
+export BLASTMAT
+g=`basename $2`
+f=/tmp/`basename $3`.$g
+for eVal in 0.01 0.001 0.0001 0.00001 0.000001 1E-09 1E-11
+do
+if /hive/data/outside/blast229/blastall -M BLOSUM80 -m 0 -F no -e $eVal -p tblastn -d $1 -i $2 -o $f.8
+then
+        mv $f.8 $f.1
+        break;
+fi
+done
+if test -f  $f.1
+then
+    if /cluster/bin/i386/blastToPsl $f.1 $f.2
+    then
+	liftUp -nosort -type=".psl" -nohead $f.3 /cluster/data/felCat4/blastDb.lft carry $f.2
+        liftUp -nosort -type=".psl" -pslQ -nohead $3.tmp /cluster/data/hg18/bed/blat.hg18KG/protein.lft warn $f.3
+        if pslCheck -prot $3.tmp
+        then                  
+            mv $3.tmp $3     
+            rm -f $f.1 $f.2 $f.3 $f.4
+        fi
+        exit 0               
+    fi                      
+fi                         
+rm -f $f.1 $f.2 $3.tmp $f.8 $f.3 $f.4
+exit 1
+'_EOF_'
+    # << happy emacs
+    chmod +x blastSome
+    exit 
+    
+    ssh swarm
+    cd /cluster/data/felCat4/bed/tblastn.hg18KG
+    gensub2 query.lst kg.lst blastGsub blastSpec
+    para create blastSpec
+#    para try, check, push, check etc.
+
+    para time
+
+# Completed: 351155 of 351155 jobs
+# CPU time in finished jobs:   15303249s  255054.15m  4250.90h  177.12d  0.485 y
+# IO & Wait Time:               2843300s   47388.34m   789.81h   32.91d  0.090 y
+# Average job time:                  52s       0.86m     0.01h    0.00d
+# Longest finished job:             134s       2.23m     0.04h    0.00d
+# Submission to last job:         19084s     318.07m     5.30h    0.22d
+
+    ssh swarm
+    cd /cluster/data/felCat4/bed/tblastn.hg18KG
+    mkdir chainRun
+    cd chainRun
+    tcsh
+    cat << '_EOF_' > chainGsub
+#LOOP
+chainOne $(path1)
+#ENDLOOP
+'_EOF_'
+
+    cat << '_EOF_' > chainOne
+(cd $1; cat q.*.psl | simpleChain -prot -outPsl -maxGap=150000 stdin ../c.`basename $1`.psl)
+'_EOF_'
+    chmod +x chainOne
+    ls -1dS ../blastOut/kg?? > chain.lst
+    gensub2 chain.lst single chainGsub chainSpec
+    # do the cluster run for chaining
+    para create chainSpec
+    para try, check, push, check etc.
+
+# Completed: 127 of 127 jobs
+# CPU time in finished jobs:     544804s    9080.07m   151.33h    6.31d  0.017 y
+# IO & Wait Time:                 82233s    1370.55m    22.84h    0.95d  0.003 y
+# Average job time:                4937s      82.29m     1.37h    0.06d
+# Longest finished job:           24063s     401.05m     6.68h    0.28d
+# Submission to last job:         24074s     401.23m     6.69h    0.28d
+
+
+    cd /cluster/data/felCat4/bed/tblastn.hg18KG/blastOut
+    for i in kg??
+    do
+       cat c.$i.psl | awk "(\$13 - \$12)/\$11 > 0.6 {print}" > c60.$i.psl
+       sort -rn c60.$i.psl | pslUniq stdin u.$i.psl
+       awk "((\$1 / \$11) ) > 0.60 { print   }" c60.$i.psl > m60.$i.psl
+       echo $i
+    done
+    sort u.*.psl m60* | uniq | sort -T /tmp -k 14,14 -k 16,16n -k 17,17n > ../blastHg18KG.psl
+    cd ..
+    pslCheck blastHg18KG.psl
+# checked: 50782 failed: 0 errors: 0
+
+    # load table 
+    ssh hgwdev
+    cd /cluster/data/felCat4/bed/tblastn.hg18KG
+    hgLoadPsl felCat4 blastHg18KG.psl
+
+    # check coverage
+    featureBits felCat4 blastHg18KG 
+# 23826621 bases of 1990635005 (1.197%) in intersection
+
+    featureBits felCat4 blastHg18KG refGene  -enrichment
+# blastHg18KG 1.197%, refGene 0.021%, both 0.013%, cover 1.12%, enrich 52.73x
+
+    featureBits felCat4 blastHg18KG xenoRefGene  -enrichment
+# blastHg18KG 1.197%, xenoRefGene 2.066%, both 1.010%, cover 84.36%, enrich 40.83x
+    rm -rf blastOut
+#end tblastn
+
+
+############################################################################
+## Annotate 6-way multiple alignment with gene annotations
+##              (DONE 2010-05-07 - Chin)
+    # Gene frames
+    ## survey all genomes to see what type of gene track to use
+    ## Rules to decide which genes to use in order:
+    ##    1. ucscGene
+    ##    2. ensGene
+    ##    3. refSeq
+    ##    4. Other include mRNA
+    ## Based on this, 
+    #   hg19, mm9  ucscGene
+    #   monDom5, canFam2:  ensGene
+    #   ailMel1: mRNA
+
+    ssh hgwdev
+    mkdir /hive/data/genomes/felCat4/bed/multiz6way/frames
+    cd /hive/data/genomes/felCat4/bed/multiz6way/frames
+    #
+    #   survey all the genomes to find out what kinds of gene tracks
+    #   they have
+    cat << '_EOF_' > showGenes.csh
+#!/bin/csh -fe
+foreach db (`cat ../species.list`)
+    echo -n "${db}: "
+    set tables = `hgsql $db -N -e "show tables like '%Gene%'"`
+    foreach table ($tables) 
+        if ($table == "ensGene" || $table == "refGene" || $table == "mgcGenes" || \
+            $table == "knownGene" || $table == "xenoRefGene" ) then
+                set count = `hgsql $db -N -e "select count(*) from $table"`
+                echo -n "${table}: ${count}, "
+        endif
+    end
+    set orgName = `hgsql hgcentraltest -N -e \
+            "select scientificName from dbDb where name='$db'"`
+    set orgId = `hgsql felCat4 -N -e \
+            "select id from organism where name='$orgName'"`
+    if ($orgId == "") then
+        echo "Mrnas: 0"
+    else
+        set count = `hgsql felCat4 -N -e "select count(*) from gbCdnaInfo where organism=$orgId"`
+        echo "Mrnas: ${count}"
+    endif
+end
+'_EOF_'
+    # << happy emacs
+    chmod +x ./showGenes.csh
+    ./showGenes.csh
+
+    # felCat4: refGene: 351, xenoRefGene: 210753, Mrnas: 2163
+    # canFam2: ensGene: 30299, refGene: 1039, xenoRefGene: 210152, Mrnas: 3211
+    # ailMel1: xenoRefGene: 200505, Mrnas: 51
+    # hg19: ensGene: 143123, knownGene: 77614, mgcGenes: 31354, 
+    #       refGene: 35655, xenoRefGene: 126675, Mrnas: 287241
+    # mm9: ensGene: 83124, knownGene: 54042, mgcGenes: 26582, 
+    #      refGene: 27122, xenoRefGene: 122159, Mrnas: 258055
+    # monDom5: ensGene: 34745, refGene: 392, xenoRefGene: 213164, Mrnas: 1605
+    #
+    #   Based on this, use
+    #    1. knownGenes for hg19, mm9
+    #    2. ensGene for monDom5, canFam2
+    #    3, xenoRefGene for ailMel1, felCat4
+
+    mkdir genes
+    # knownGene
+
+    for DB in hg19 mm9
+do
+    hgsql -N -e "select name,chrom,strand,txStart,txEnd,cdsStart,cdsEnd,exonCount,exonStarts,exonEnds from knownGene" ${DB} \
+      | genePredSingleCover stdin stdout | gzip -2c \
+        > /scratch/tmp/${DB}.tmp.gz
+    mv /scratch/tmp/${DB}.tmp.gz genes/$DB.gp.gz
+    echo "${DB} done"
+done
+
+    echo "monDom5 canFam2" \
+    | sed -e "s/  */ /g" > ensGene.list
+
+    # ensGene 
+    for DB in monDom5 canFam2
+do  
+    hgsql -N -e "select name,chrom,strand,txStart,txEnd,cdsStart,cdsEnd,exonCount,exonStarts,exonEnds from ensGene" ${DB} \
+      | genePredSingleCover stdin stdout | gzip -2c \
+        > /scratch/tmp/${DB}.tmp.gz
+    mv /scratch/tmp/${DB}.tmp.gz genes/$DB.gp.gz
+    echo "${DB} done"
+done
+
+
+    # xenoRefGene
+    echo "ailMel1 felCat42" \
+            | sed -e "s/  */ /g" > xenoRef.list
+
+    for DB in ailMel1 felCat4
+do  
+    hgsql -N -e "select name,chrom,strand,txStart,txEnd,cdsStart,cdsEnd,exonCount,exonStarts,exonEnds from xenoRefGene" ${DB} \
+      | genePredSingleCover stdin stdout | gzip -2c \
+        > /scratch/tmp/${DB}.tmp.gz
+    mv /scratch/tmp/${DB}.tmp.gz genes/$DB.gp.gz
+    echo "${DB} done"
+done
+
+    #   the following single command doesn't work on any 32 Gb computer,
+    #   requires much more memory, turn it into a kluster job
+    ssh swarm
+    cd /hive/data/genomes/felCat4/bed/multiz6way/frames
+    cat << '_EOF_' > runOne
+#!/bin/csh -fe
+
+set C = $1
+set G = $2
+
+cat ../run/maf/${C}.maf | genePredToMafFrames felCat4 stdin stdout \
+        ${G} genes/${G}.gp.gz | gzip > parts/${C}.${G}.mafFrames.gz
+'_EOF_'
+    # << happy emacs
+    chmod +x runOne
+
+    ls ../run/maf | sed -e "s/.maf//" > chr.list
+    ls genes | sed -e "s/.gp.gz//" | grep -v felCat4 > gene.list
+
+    cat << '_EOF_' > template
+#LOOP
+runOne $(root1) $(root2) {check out exists+ parts/$(root1).$(root2).mafFrames.gz}
+#ENDLOOP
+'_EOF_'
+    # << happy emacs
+
+    mkdir parts
+    gensub2 chr.list gene.list template jobList
+    para -ram=8g create jobList
+    para try ... check ... push
+
+    para time
+# Completed: 1280 of 1280 jobs
+# CPU time in finished jobs:        653s      10.88m     0.18h    0.01d  0.000 y
+# IO & Wait Time:                 11158s     185.97m     3.10h    0.13d  0.000 y
+# Average job time:                   9s       0.15m     0.00h    0.00d
+# Longest finished job:              63s       1.05m     0.02h    0.00d
+# Submission to last job:           138s       2.30m     0.04h    0.00d
+# Estimated complete:                 0s       0.00m     0.00h    0.00d
+
+
+    # see what it looks like in terms of number of annotations per DB:
+    find ./parts -type f | while read F
+do
+    zcat ${F}
+done | cut -f4 | sort | uniq -c | sort -n > annotation.survey.txt
+
+   cat annotation.survey.txt
+   #   157202 monDom5
+   #   180207 mm9
+   #   188394 hg19
+   #   189876 canFam2
+   #   201403 ailMel1
+
+
+    #   load the resulting file
+    ssh hgwdev
+    cd /cluster/data/felCat4/bed/multiz6way/frames
+    find ./parts -type f | while read F
+do
+    zcat ${F}
+done | sort -k1,1 -k2,2n > multiz6wayFrames.bed
+
+    hgLoadMafFrames felCat4 multiz6wayFrames stdin
+
+    featureBits -countGaps felCat4 multiz6wayFrames.bed
+    #   30677403 bases of 3160303948 (0.971%) in intersection
+
+    #   enable the trackDb entries:
+# frames multiz6wayFrames
+# irows on
+    #   appears to work OK
+
+#############################################################################
+## create upstream refGene maf files
+    cd /hive/data/genomes/felCat4/bed/multiz6way/downloads/maf
+    # bash script
+#!/bin/sh
+for S in 1000 2000 5000
+do
+    echo "making upstream${S}.maf"
+    featureBits felCat4 refGene:upstream:${S} -fa=/dev/null -bed=stdout \
+        | perl -wpe 's/_up[^\t]+/\t0/' | sort -k1,1 -k2,2n \
+        | /cluster/bin/$MACHTYPE/mafFrags felCat4 multiz6way \
+                stdin stdout \
+                -orgs=/hive/data/genomes/felCat4/bed/multiz6way/species.list \
+        | gzip -c > upstream${S}.maf.gz
+    echo "done upstream${S}.maf.gz"
+done
+
+    mkdir -p /usr/local/apache/htdocs-hgdownload/goldenPath/felCat4/multiz6way/maf
+    cd /usr/local/apache/htdocs-hgdownload/goldenPath/felCat4/multiz6way/maf
+    ln -s /hive/data/genomes/felCat4/bed/multiz6way/downloads/maf/up*.gz .
+    md5sum up*.gz >> md5sum.txt
+
+
+#############################################################################
+# phastCons 6-way (working - 2010-05-20 - Chin)
+    #   was unable to split the full chrom MAF files, now working on the
+    #   maf files as they were split up during multiz
+    
+    # split 6way mafs into 10M chunks and generate sufficient
+    # statistics 
+    # files for # phastCons
+    ssh swarm
+    mkdir -p /hive/data/genomes/felCat4/bed/multiz6way/cons/msa.split
+    mkdir /hive/data/genomes/felCat4/bed/multiz6way/cons/ss
+    mkdir -p /hive/data/genomes/felCat4/bed/multiz6way/cons/msa.split/2010-05-21
+    cd /hive/data/genomes/felCat4/bed/multiz6way/cons/msa.split/2010-05-21
+    mkdir ss
+    
+    cat << '_EOF_' > doSplit.csh
+#!/bin/csh -ef
+set c = $1
+set MAF = /hive/data/genomes/felCat4/bed/multiz6way/anno/maf/*.maf
+set WINDOWS = /hive/data/genomes/felCat4/bed/multiz6way/cons/msa.split/2010-05-21/ss/$c
+set WC = `cat $MAF | wc -l`
+set NL = `grep "^#" $MAF | wc -l`
+if ( -s $2 ) then
+    exit 0
+endif
+if ( -s $2.running ) then
+    exit 0
+endif
+
+date >> $2.running
+
+rm -fr $WINDOWS
+mkdir $WINDOWS
+pushd $WINDOWS > /dev/null
+if ( $WC != $NL ) then
+/cluster/bin/phast.build/cornellCVS/phast.2009-10-19/bin/msa_split \
+    $MAF -i MAF -o SS -r $WINDOWS/$c -w 10000000,0 -I 1000 -B 5000
+endif
+popd > /dev/null
+date >> $2
+rm -f $2.running
+'_EOF_'
+    # << happy emacs
+    chmod +x doSplit.csh
+
+    cat << '_EOF_' > template
+#LOOP
+doSplit.csh $(root1) {check out line+ $(root1).done}
+#ENDLOOP
+'_EOF_'
+    # << happy emacs
+
+    #   do the easy ones first to see some immediate results
+    ls -1S -r ../../../anno/maf | sed -e "s/.maf//; s/felCat4_//" > maf.list
+
+    gensub2 maf.list single template jobList
+    para -ram=8g create jobList
+    para try ... check ... etc
+# Completed: 503 of 504 jobs
+# Crashed: 1 jobs
+# CPU time in finished jobs:      14171s     236.18m     3.94h    0.16d
+# 0.000 y
+# IO & Wait Time:                188193s    3136.55m    52.28h    2.18d
+# 0.006 y
+# Average job time:                 402s       6.71m     0.11h    0.00d
+# Longest finished job:            1597s      26.62m     0.44h    0.02d
+# Submission to last job:          2586s      43.10m     0.72h    0.03d
+    #   the one crashed job is felCat4_chr18_gl000207_random.00.maf
+
+    #   XXX - this did not work
+    #   this takes a really long time.  memk was down to 2 usable
+    #   machines - got it finished manually on a combination of
+    #   hgwdevnew CPUs
+    #   and other machines
+
+    # Estimate phastCons parameters
+    #   experimented with this as a parasol job on hgwdevnew to try a
+    #   number
+    #   of SS files.  With a command of:
+
+/cluster/bin/phast/x86_64/phyloFit -i SS ${SS} \
+--tree "(((((((((((((((((felCat4,panTro2),gorGor1),ponAbe2),rheMac2),calJac1),tarSyr1),(micMur1,otoGar1)),tupBel1),(((((mm9,rn4),dipOrd1),cavPor3),speTri1),(oryCun1,ochPri2))),(((vicPac1,(turTru1,bosTau4)),((equCab2,(felCat3,canFam2)),(myoLuc1,pteVam1))),(eriEur1,sorAra1))),(((loxAfr2,proCap1),echTel1),(dasNov2,choHof1))),monDom4),ornAna1),((galGal3,taeGut1),anoCar1)),xenTro2),(((tetNig1,fr2),(gasAcu1,oryLat2)),danRer5)),petMar1)" \
+--out-root=$OUT/starting_tree
+
+    #   running over the input files ../ss/*/*.ss results to
+#.../genomes/felCat4/bed/multiz6way/cons/startingTree/result/*/starting-tree.mod
+
+    # add up the C and G: 
+    find ./result -type f | xargs ls -rt | while read F
+do  
+    D=`dirname $F`
+    echo -n `basename $D`" - " 
+    grep BACKGROUND ${F} | awk '{printf "%0.3f\n", $3 + $4;}'
+done
+    #   counting number of species seen in the maf file:
+    find ./result -type f | xargs ls -rt | while read F
+do  
+    D=`dirname $F`
+    echo -n `basename $D`" - "
+    grep TREE $F | sed -e \
+"s/TREE: //; s/(//g; s/)//g; s/[0-9].[0-9][0-9][0-9][0-9][0-9][0-9]//g; s/://g"  | tr ',' '\n' | wc -l
+done
+
+
+    # Run phastCons
+    #   This job is I/O intensive in its output files, beware where this
+    #   takes place or do not run too many at once.
+    ssh swarm
+    mkdir -p /hive/data/genomes/felCat4/bed/multiz6way/cons/run.cons
+    cd /hive/data/genomes/felCat4/bed/multiz6way/cons/run.cons
+
+    #   there are going to be several different phastCons runs using
+    #   this same script.  They trigger off of the current working
+    #   directory
+    #   $cwd:t which is the "grp" in this script.  It is one of:
+    #   all primates placentals
+
+    cat << '_EOF_' > doPhast.csh
+#!/bin/csh -fe 
+set PHASTBIN = /cluster/bin/phast.build/cornellCVS/phast.2009-10-21/bin
+set c = $1
+set cX = $1:r
+set f = $2
+set len = $3
+set cov = $4
+set rho = $5
+set grp = $cwd:t
+set cons = /hive/data/genomes/felCat4/bed/multiz6way/cons
+set tmp = $cons/tmp/$f
+mkdir -p $tmp
+set ssSrc = $cons
+set useGrp = "$grp.mod"
+if ( $cX == "chrX" ) then
+    set useGrp = "$grp.chrX.mod"
+endif
+if (-s $cons/$grp/$grp.non-inf) then
+  ln -s $cons/$grp/$grp.mod $tmp 
+  ln -s $cons/$grp/$grp.chrX.mod $tmp
+  ln -s $cons/$grp/$grp.non-inf $tmp
+  ln -s $ssSrc/msa.split/2009-10-21/ss/$c/$f.ss $tmp
+else 
+  ln -s $ssSrc/msa.split/2009-10-21/ss/$c/$f.ss $tmp
+  ln -s $cons/$grp/$grp.mod $tmp 
+  ln -s $cons/$grp/$grp.chrX.mod $tmp
+endif 
+pushd $tmp > /dev/null
+if (-s $grp.non-inf) then
+  $PHASTBIN/phastCons $f.ss $useGrp \ 
+    --rho $rho --expected-length $len --target-coverage $cov --quiet \
+    --not-informative `cat $grp.non-inf` \
+    --seqname $c --idpref $c --most-conserved $f.bed --score > $f.pp
+else
+  $PHASTBIN/phastCons $f.ss $useGrp \ 
+    --rho $rho --expected-length $len --target-coverage $cov --quiet \
+    --seqname $c --idpref $c --most-conserved $f.bed --score > $f.pp
+endif
+popd > /dev/null
+mkdir -p pp/$c bed/$c
+sleep 4
+touch pp/$c bed/$c
+rm -f pp/$c/$f.pp
+rm -f bed/$c/$f.bed
+mv $tmp/$f.pp pp/$c
+mv $tmp/$f.bed bed/$c
+rm -fr $tmp
+'_EOF_'
+    # << happy emacs
+    chmod a+x doPhast.csh
+
+    #   this template will serve for all runs
+    #   root1 == chrom name, file1 == ss file name without .ss suffix
+    cat << '_EOF_' > template
+#LOOP
+../run.cons/doPhast.csh $(root1) $(file1) 45 0.3 0.3 {check out line+ pp/$(root1)/$(file1).pp}
+#ENDLOOP
+'_EOF_'
+    # << happy emacs
+
+    ls -1S ../msa.split/2009-10-21/ss/chr*/chr* | sed -e "s/.ss$//" > ss.list
+
+    # Create parasol batch and run it
+    # run for all species
+    cd /hive/data/genomes/felCat4/bed/multiz6way/cons
+    mkdir -p all
+    cd all
+    #   Using the two different .mod tree 
+    cp -p ../../4dNoX/phyloFit.NoChrX.mod ./all.mod
+    cp -p ../../4dX/phyloFit.chrX.mod ./all.chrX.mod
+
+    gensub2 ../run.cons/ss.list single ../run.cons/template jobList
+    para -ram=8g create jobList 
+    para try ... check ... push ... etc.
+# Completed: 581 of 581 jobs
+# CPU time in finished jobs:      41877s     697.95m    11.63h    0.48d
+# 0.001 y
+# IO & Wait Time:                 39172s     652.87m    10.88h    0.45d
+# 0.001 y
+# Average job time:                 139s       2.32m     0.04h    0.00d
+# Longest finished job:             329s       5.48m     0.09h    0.00d
+# Submission to last job:          2240s      37.33m     0.62h    0.03d
+
+    # create Most Conserved track
+    cd /hive/data/genomes/felCat4/bed/multiz6way/cons/all
+    cut -f1 ../../../../chrom.sizes | while read C
+do  
+    ls -d bed/${C}.[0-9][0-9] 2> /dev/null | while read D
+    do
+        cat ${D}/${C}*.bed
+    done | sort -k1,1 -k2,2n \
+    | awk '{printf "%s\t%d\t%d\tlod=%d\t%s\n", "'${C}'", $2, $3, $5, $5;}'
+done > tmpMostConserved.bed
+/cluster/bin/scripts/lodToBedScore tmpMostConserved.bed > mostConserved.bed
+
+
+    # load into database
+    ssh hgwdev
+    cd /hive/data/genomes/felCat4/bed/multiz6way/cons/all
+    time nice -n +19 hgLoadBed felCat4 phastConsElements6way mostConserved.bed
+    #   Loaded 5163775 elements of size 6
+    #   real     1m44.439s
+
+    # Try for 5% overall cov, and 70% CDS cov 
+    featureBits felCat4 -enrichment refGene:cds phastConsElements6way
+    #   --rho 0.3 --expected-length 45 --target-coverage 0.3
+    #   refGene:cds 1.187%, phastConsElements6way 5.065%,
+    #   both 0.884%, cover 74.46%, enrich 14.70x
+
+    # Create merged posterier probability file and wiggle track data
+    # files
+    cd /hive/data/genomes/felCat4/bed/multiz6way/cons/all
+    mkdir downloads
+    cat << '_EOF_' > phastCat.sh 
+#!/bin/sh
+
+mkdir -p downloads
+cut -f1 ../../../../chrom.sizes | while read C
+do
+    echo -n "${C} ... working ... "
+    ls -d pp/${C}.[0-9][0-9] 2> /dev/null | while read D
+    do
+        cat ${D}/${C}*.pp | sed -e "s/chrom=${C}.[0-9][0-9]/chrom=${C}/"
+    done | gzip > downloads/${C}.phastCons6way.wigFix.gz
+    echo "done"
+done
+'_EOF_'
+    #   << happy emacs
+    chmod +x phastCat.sh
+    time nice -n +19 ./phastCat.sh
+    #   real    30m2.623s
+    
+    #   encode those files into wiggle data
+    zcat downloads/*.wigFix.gz \
+        | wigEncode stdin phastCons6way.wig phastCons6way.wib
+    #   Converted stdin, upper limit 1.00, lower limit 0.00
+    #   real    18m37.881s
+    du -hsc *.wi?
+    #   2.7G    phastCons6way.wib
+    #   271M    phastCons6way.wig
+    #   3.0G    total
+    
+    #   encode into a bigWig file:
+    #   (warning wigToBigWig process grows to about 36 Gb)
+    #   in bash, to avoid the 32 Gb memory limit:
+sizeG=188743680
+export sizeG
+ulimit -d $sizeG
+ulimit -v $sizeG
+    zcat downloads/*.wigFix.gz \
+        | wigToBigWig stdin ../../../../chrom.sizes phastCons6way.bw
+    #   real    52m36.142s
+# -rw-rw-r--   1 21667535139 Oct 20 13:59 phastCons6way.bw
+    mkdir /gbdb/felCat4/bbi
+    ln -s `pwd`/phastCons6way.bw /gbdb/felCat4/bbi
+    #   if you wanted to use the bigWig file, loading bigWig table:
+    hgsql felCat4 -e 'drop table if exists phastCons6way; \
+            create table phastCons6way (fileName varchar(255) not null); \
+            insert into phastCons6way values
+        ("/gbdb/felCat4/bbi/phastCons6way.bw");'
+
+    # Load gbdb and database with wiggle.
+    ssh hgwdev
+    cd /hive/data/genomes/felCat4/bed/multiz6way/cons/all
+    ln -s `pwd`/phastCons6way.wib /gbdb/felCat4/multiz6way/phastCons6way.wib
+    time nice -n +19 hgLoadWiggle -pathPrefix=/gbdb/felCat4/multiz6way felCat4 \
+        phastCons6way phastCons6way.wig
+    #   real    1m45.381s
+
+    wigTableStats.sh felCat4 phastCons6way
+# db.table      min max mean count sumData
+# felCat4.phastCons6way     0 1 0.103653 2845303719 2.94924e+08
+#       stdDev viewLimits
+#       0.230184 viewLimits=0:1
+
+    #  Create histogram to get an overview of all the data
+    ssh hgwdev
+    cd /hive/data/genomes/felCat4/bed/multiz6way/cons/all
+    time nice -n +19 hgWiggle -doHistogram -db=felCat4 \
+        -hBinSize=0.001 -hBinCount=1000 -hMinVal=0.0 -verbose=2 \
+            phastCons6way > histogram.data 2>&1
+    #   real    7m37.212s
+
+    #   create plot of histogram:
+    cat << '_EOF_' | gnuplot > histo.png
+set terminal png small color x000000 xffffff xc000ff x66ff66 xffff00 x00ffff
+set size 1.4, 0.8
+set key left box
+set grid noxtics
+set grid ytics
+set title " Human Hg19 Histogram phastCons6way track"
+set xlabel " phastCons6way score"
+set ylabel " Relative Frequency"
+set y2label " Cumulative Relative Frequency (CRF)"
+set y2range [0:1]
+set y2tics 
+set yrange [0:0.02]
+
+plot "histogram.data" using 2:5 title " RelFreq" with impulses, \
+        "histogram.data" using 2:7 axes x1y2 title " CRF" with lines
+'_EOF_'
+    #   << happy emacs
+
+    display histo.png &
+
+
+
+#############################################################################