src/hg/makeDb/doc/tetNig1.txt 1.21

1.21 2010/01/16 00:39:10 hiram
liftOver to tetNig2 done
Index: src/hg/makeDb/doc/tetNig1.txt
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/makeDb/doc/tetNig1.txt,v
retrieving revision 1.20
retrieving revision 1.21
diff -b -B -U 1000000 -r1.20 -r1.21
--- src/hg/makeDb/doc/tetNig1.txt	20 Sep 2009 17:16:47 -0000	1.20
+++ src/hg/makeDb/doc/tetNig1.txt	16 Jan 2010 00:39:10 -0000	1.21
@@ -1,3164 +1,3176 @@
 # for emacs: -*- mode: sh; -*-
 
 
 # Tetraodon Nigirividis from Genoscope, version v6 (released 5/6/02)
 #    Project website:
 #    http://www.genoscope.cns.fr/externe/tetraodon/sequencing.html
 
 #  NOTE:  this doc may have genePred loads that fail to include
 #  the bin column.  Please correct that for the next build by adding
 #  a bin column when you make any of these tables:
 #
 #  mysql> SELECT tableName, type FROM trackDb WHERE type LIKE "%Pred%";
 #  +------------+--------------------+
 #  | tableName  | type               |
 #  +------------+--------------------+
 #  | gaze       | genePred           |
 #  | geneid     | genePred geneidPep |
 #  | genscan    | genePred           |
 #  | hoxGenes   | genePred           |
 #  | cytokines  | genePred           |
 #  | ecoresHg17 | genePred           |
 #  +------------+--------------------+
 
 
 # DOWNLOAD SEQUENCE 
 
     ssh kksilo
     mkdir /cluster/store7/tetNig1
     ln -s /cluster/store7/tetNig1 /cluster/data
     cd /cluster/data/tetNig1
     wget http://www.genoscope.cns.fr/externe/tetraodon/Data/Reads/tetraodon6.gz
 
 # DOWNLOAD SEQUENCE BY CHROMOSOME, GFF AND AGP FILES (DONE, 2004-08-13, hartera)
     # Genoscope provided the url, username and password  - this is V7
     # download agp, gff and accompanying FASTA files by chromosome
     ssh kksilo
     cd /cluster/data/tetNig1
     wget --timestamp --http-user=   --http-passwd= \
          http://www.genoscope.cns.fr/nda/tetraodon/GFF/tetraodon.all.gff.*
     wget --timestamp --http-user=   --http-passwd=  \
          http://www.genoscope.cns.fr/nda/tetraodon/GP/chr.agp
     wget --timestamp --http-user=   --http-passwd=   \
          http://www.genoscope.cns.fr/nda/tetraodon/GP/chr.agp.info
     wget --timestamp -r -l1 --http-user=   --http-passwd=   \
          http://www.genoscope.cns.fr/nda/tetraodon/GP/unmasked/
     mv ./www.genoscope.cns.fr/secure-nda/tetraodon/GP/unmasked/* .
     rm -r www.genoscope.cns.fr
     rm index*
     # unzip FASTA files
     bzip2 -d chr*.bz2
     bzip2 -d tetraodon.all.gff.bz2
     # no complete mitochondrial sequence in Genbank
 
 # Create list of chromosomes (DONE, 2004-08-13, hartera)
     ssh hgwdev
     cd /cluster/data/tetNig1
     foreach f (*.fa)
        set chr = `echo $f:h | sed -e 's/^chr//' | sed -e 's/\.fa$//'` 
       echo $chr >> chrom
     end
     sort -n chrom > chrom.lst
     rm chrom
 
 # SPLIT AGP FILES BY CHROMOSOME (DONE, 2004-08-16, hartera)
 
    ssh kksilo
    cd /cluster/data/tetNig1
    foreach c (`cat chrom.lst`)
       mkdir $c
       mv chr${c}.fa ./$c
       perl -we "while(<>){if (/^chr$c\t/) {print;}}" \
         ./chr.agp > $c/chr$c.agp
    end
    # agp has either gaps (N) or scaffolds (S), change S to W for WGS
    # agp files are not standard formats so need to reformat for
    # processing programs to run - missing field 4 which is just line number
    # and need to reformat gap line to:
    # chr chr_start chr_end line type length gap type linkage
    foreach c (`cat chrom.lst`)
       cp $c/chr${c}.agp $c/chr${c}.agp.bak
       echo "Processing chr${c}.agp ..."
       awk 'BEGIN {OFS="\t"} \
          { if ($4 == "S") print $1, $2, $3, NR, "W", $5, $6, $7, $8, $9; \
            if ($4 == "N" && $5 == "GAP_IS") \
               print $1, $2, $3, NR, $4, $9, "fragment", "yes"; \
            if ($4 == "N" && $5 == "GAP_UC") \
               print $1, $2, $3, NR, $4, $9, "contig", "yes"; \
            if ($4 == "N" && $5 == "GAP_UN") \
               print $1, $2, $3, NR, $4, $9, "contig", "no"; \
            if ($4 == "N" && $5 == "CENT") \
               print $1, $2, $3, NR, $4, $9, "centromere", "no" }' \
               $c/chr${c}.agp >> $c/chr${c}.new.agp
       mv $c/chr${c}.new.agp $c/chr{$c}.agp
    end
    # check agp files and then remove backup files
    foreach c (`cat chrom.lst`)
       rm $c/chr${c}.agp.bak
    end
 
 # CHECK FASTA AND AGP FILES (DONE, 2004-08-16, hartera)
    ssh kksilo
    cd /cluster/data/tetNig1
    foreach c (`cat chrom.lst`)
       foreach f ( $c/chr$c.agp) 
          set agpLen = `tail -1 $f | awk '{print $3;}'`
          set h = $f:r
          set g = $h:r
          echo "Getting size of $g.fa"
          set faLen = `faSize $g.fa | awk '{print $1;}'`
          if ($agpLen == $faLen) then
            echo "   OK: $f length = $g length = $faLen"
          else
            echo "ERROR:  $f length = $agpLen, but $g length = $faLen"
          endif
       end
    end
    # all are OK so FASTA files sizes are in agreement with agp files
 
 # BREAK UP SEQUENCE INTO 5MB CHUNKS AT CONTIGS/GAPS FOR CLUSTER RUNS
 # (DONE, 2004-08-16, hartera)
 
    ssh kksilo
    cd /cluster/data/tetNig1
     
    foreach c (`cat chrom.lst`)
       foreach agp ($c/chr$c.agp)
         if (-e $agp) then
           set fa = $c/chr$c.fa
           echo splitting $agp and $fa
           cp -p $agp $agp.bak
           cp -p $fa $fa.bak
           splitFaIntoContigs $agp $fa . -nSize=5000000
         endif
       end
    end
  
 # MAKE JKSTUFF AND BED DIRECTORIES (DONE, 2004-08-16, hartera)
     # This used to hold scripts -- better to keep them inline here
     # Now it should just hold lift file(s) and
     # temporary scripts made by copy-paste from this file.
     ssh hgwdev
     mkdir /cluster/data/tetNig1/jkStuff
     # This is where most tracks will be built:
     mkdir /cluster/data/tetNig1/bed
 
 # CREATING DATABASE (DONE, 2004-08-16, hartera)
     # Create the database.
     # next machine
     ssh hgwdev
     echo 'create database tetNig1' | hgsql ''
     # if you need to delete that database:  !!! WILL DELETE EVERYTHING !!!
     echo 'drop database tetNig1' | hgsql tetNig1
     # Use df to make sure there is at least 5 gig free on
     df -h /var/lib/mysql
 # Before loading data:
 # Filesystem            Size  Used Avail Use% Mounted on
 # /dev/sdc1             1.8T  525G  1.2T  32% /var/lib/mysql
 
 # CREATING GRP TABLE FOR TRACK GROUPING (DONE, 2004-08-16, hartera)
     # next machine
     ssh hgwdev
     #  the following command copies all the data from the table
     #  grp in the database galGal2 to our new database ce2
     echo "create table grp (PRIMARY KEY(NAME)) select * from galGal2.grp" \
       | hgsql tetNig1
     # if you need to delete that table:   !!! WILL DELETE ALL grp data !!!
     echo 'drop table grp;' | hgsql tetNig1
 
 # REPEAT MASKING - Run RepeatMasker on chroms (DONE, 2004-08-17, hartera)
 #- Split contigs into 500kb chunks, at gaps if possible:
     ssh kksilo
     cd /cluster/data/tetNig1
     foreach c (`cat chrom.lst`)
       foreach d ($c/chr${c}*_?{,?})
         cd $d
         echo "splitting $d"
         set contig = $d:t
         ~/bin/i386/faSplit gap $contig.fa 500000 ${contig}_ -lift=$contig.lft \
             -minGapSize=100
         cd ../..
       end
     end
 
 #- Make the run directory and job list:
     cd /cluster/data/tetNig1
     cat << '_EOF_' > jkStuff/RMTetraodon
 #!/bin/csh -fe
                                                                                 
 cd $1
 pushd .
 /bin/mkdir -p /tmp/tetNig1/$2
 /bin/cp $2 /tmp/tetNig1/$2/
 cd /tmp/tetNig1/$2
 /cluster/bluearc/RepeatMasker/RepeatMasker -ali -s $2
 popd
 /bin/cp /tmp/tetNig1/$2/$2.out ./
 if (-e /tmp/tetNig1/$2/$2.align) /bin/cp /tmp/tetNig1/$2/$2.align ./
 if (-e /tmp/tetNig1/$2/$2.tbl) /bin/cp /tmp/tetNig1/$2/$2.tbl ./
 if (-e /tmp/tetNig1/$2/$2.cat) /bin/cp /tmp/tetNig1/$2/$2.cat ./
 /bin/rm -fr /tmp/tetNig1/$2/*
 /bin/rmdir --ignore-fail-on-non-empty /tmp/tetNig1/$2
 /bin/rmdir --ignore-fail-on-non-empty /tmp/tetNig1
 '_EOF_'
     # << this line makes emacs coloring happy
     chmod +x jkStuff/RMTetraodon
     mkdir RMRun
     cp /dev/null RMRun/RMJobs
     foreach c (`cat chrom.lst`)
       foreach d ($c/chr${c}_?{,?})
           set ctg = $d:t
           foreach f ( $d/${ctg}_?{,?}.fa )
             set f = $f:t
             echo /cluster/data/tetNig1/jkStuff/RMTetraodon \
                  /cluster/data/tetNig1/$d $f \
                '{'check out line+ /cluster/data/tetNig1/$d/$f.out'}' \
               >> RMRun/RMJobs
           end
       end
     end
 
     #- Do the run
     ssh kk
     cd /cluster/data/tetNig1/RMRun
     para create RMJobs
     para try, para check, para check, para push, para check,...
 # para time
 # Completed: 940 of 940 jobs
 # CPU time in finished jobs:    6654614s  110910.24m  1848.50h   77.02d  0.211 y
 # IO & Wait Time:                 54843s     914.05m    15.23h    0.63d  0.002 y
 # Average job time:                7138s     118.96m     1.98h    0.08d
 # Longest job:                    11024s     183.73m     3.06h    0.13d
 # Submission to last job:         34509s     575.15m     9.59h    0.40d
 
     #- Lift up the 500KB chunk .out's to 5MB ("pseudo-contig") level
     ssh kksilo
     cd /cluster/data/tetNig1
     foreach d (*/chr*_?{,?})
       set contig = $d:t
       echo $contig
       liftUp $d/$contig.fa.out $d/$contig.lft warn $d/${contig}_*.fa.out \
         > /dev/null
     end
 
     #- Lift pseudo-contigs to chromosome level
     foreach c (`cat chrom.lst`)
       echo lifting $c
       cd $c
       if (-e lift/ordered.lft && ! -z lift/ordered.lft) then
         liftUp chr$c.fa.out lift/ordered.lft warn `cat lift/oOut.lst` \
         > /dev/null
       endif
       cd ..
     end
 
     #- Load the .out files into the database with:
     ssh hgwdev
     cd /cluster/data/tetNig1
     hgLoadOut tetNig1 */chr*.fa.out
 
 # MAKE LIFTALL.LFT (DONE, 2004-08-16, hartera)
     ssh kksilo
     cd /cluster/data/tetNig1
     cat */lift/ordered.lft > jkStuff/liftAll.lft
 
 # SIMPLE REPEATS TRACK (DONE, 2004-08-17, hartera)
     # TRF runs pretty quickly now... it takes a few hours total runtime,
     # so instead of binrsyncing and para-running, just do this on the
     # local fileserver
     ssh kksilo
     mkdir -p /cluster/data/tetNig1/bed/simpleRepeat
     cd /cluster/data/tetNig1/bed/simpleRepeat
     mkdir trf
     cp /dev/null jobs.csh
     foreach d (/cluster/data/tetNig1/*/chr*_?{,?})
       set ctg = $d:t
       foreach f ($d/${ctg}.fa)
         set fout = $f:t:r.bed
         echo $fout
         echo "/cluster/bin/i386/trfBig -trf=/cluster/bin/i386/trf $f /dev/null -bedAt=trf/$fout -tempDir=/tmp" \
         >> jobs.csh
       end
     end
 
     chmod a+x jobs.csh
     csh -ef jobs.csh >&! jobs.log &
     # check on this with
     tail -f jobs.log
     wc -l jobs.csh
     ls -1 trf | wc -l
     endsInLf trf/*
 
     liftUp simpleRepeat.bed /cluster/data/tetNig1/jkStuff/liftAll.lft warn \
            trf/*.bed
     
     # Load into the database:
     ssh hgwdev
     cd /cluster/data/tetNig1/bed/simpleRepeat
     hgLoadBed tetNig1 simpleRepeat \
       /cluster/data/tetNig1/bed/simpleRepeat/simpleRepeat.bed \
       -sqlTable=$HOME/src/hg/lib/simpleRepeat.sql
 
 # PROCESS SIMPLE REPEATS INTO MASK (DONE, 2004-08-17, hartera)
     # After the simpleRepeats track has been built, make a filtered version
     # of the trf output: keep trf's with period <= 12:
     ssh kksilo
     cd /cluster/data/tetNig1/bed/simpleRepeat
     mkdir -p trfMask
     foreach f (trf/chr*.bed)
       awk '{if ($5 <= 12) print;}' $f > trfMask/$f:t
     end
     # Lift up filtered trf output to chrom coords as well:
     cd /cluster/data/tetNig1
     mkdir bed/simpleRepeat/trfMaskChrom
     foreach c (`cat chrom.lst`)
       if (-e $c/lift/ordered.lst) then
         perl -wpe 's@(\S+)@bed/simpleRepeat/trfMask/$1.bed@' \
           $c/lift/ordered.lst > $c/lift/oTrf.lst
         liftUp bed/simpleRepeat/trfMaskChrom/chr$c.bed \
           jkStuff/liftAll.lft warn `cat $c/lift/oTrf.lst`
       endif
       if (-e $c/lift/random.lst) then
         perl -wpe 's@(\S+)@bed/simpleRepeat/trfMask/$1.bed@' \
            $c/lift/random.lst > $c/lift/rTrf.lst
         liftUp bed/simpleRepeat/trfMaskChrom/chr${c}_random.bed \
           jkStuff/liftAll.lft warn `cat $c/lift/rTrf.lst`
       endif
     end
 
 # MASK SEQUENCE WITH REPEATMASKER AND SIMPLE REPEAT/TRF 
 # (DONE, 2004-08-17, hartera)
     ssh kksilo
     cd /cluster/data/tetNig1
     # Soft-mask (lower-case) the contig and chr .fa's,
     # then make hard-masked versions from the soft-masked.
     set trfCtg=bed/simpleRepeat/trfMask
     set trfChr=bed/simpleRepeat/trfMaskChrom
     foreach f (*/chr*.fa)
       echo "repeat- and trf-masking $f"
       maskOutFa -soft $f $f.out $f
       set chr = $f:t:r
       maskOutFa -softAdd $f $trfChr/$chr.bed $f
       echo "hard-masking $f"
       maskOutFa $f hard $f.masked
     end
 
     foreach c (`cat chrom.lst`)
       echo "repeat- and trf-masking contigs of chr$c"
       foreach d ($c/chr*_?{,?})
         set ctg=$d:t
         set f=$d/$ctg.fa
         maskOutFa -soft $f $f.out $f
         maskOutFa -softAdd $f $trfCtg/$ctg.bed $f
         maskOutFa $f hard $f.masked
       end
     end
 
     # Build nib files, using the soft masking in the fa
     mkdir nib
     foreach f (*/chr*.fa)
       faToNib -softMask $f nib/$f:t:r.nib
     end
 
 # STORING O+O SEQUENCE AND ASSEMBLY INFORMATION  (DONE, 2004-08-17, hartera)
     # Make symbolic links from /gbdb/tetNig1/nib to the real nibs.
     ssh hgwdev
     cd /cluster/data/tetNig1
     mkdir -p /gbdb/tetNig1/nib
     foreach f (/cluster/data/tetNig1/nib/chr*.nib)
       ln -s $f /gbdb/tetNig1/nib
     end
     # Load /gbdb/tetNig1/nib paths into database and save size info
     # hgNibSeq creates chromInfo table
     hgNibSeq -preMadeNib tetNig1 /gbdb/tetNig1/nib */chr*.fa
     echo "select chrom,size from chromInfo" | hgsql -N tetNig1 > chrom.sizes
     # take a look at chrom.sizes, should be 27 lines
     wc chrom.sizes
 
     # Make one big 2bit file as well, and make a link to it in
     # /gbdb/tetNig1/nib because hgBlat looks there:
     faToTwoBit */chr*.fa tetNig1.2bit
     ln -s /cluster/data/tetNig1/tetNig1.2bit /gbdb/tetNig1/nib/
 
 # MAKE GOLD AND GAP TRACKS (DONE, 2004-08-17, hartera)
     ssh hgwdev
     cd /cluster/data/tetNig1
     # the gold and gap tracks are created from the chrN.agp file 
     hgGoldGapGl -noGl -chromLst=chrom.lst tetNig1 /cluster/data/tetNig1 .
 
 # MAKE HGCENTRALTEST ENTRY AND TRACKDB TABLE FOR TETNIG1
 # (DONE, 2004-06-18, hartera)
 # UPDATED SOURCE AND ASSEMBLY DATE IN dbDb (DONE, 2004-09-09, hartera)
                                                                          
     # Make trackDb table so browser knows what tracks to expect:
     ssh hgwdev
     mkdir -p ~/kent/src/hg/makeDb/trackDb/tetraodon/tetNig1
                                                                                 
     cd $HOME/kent/src/hg/makeDb/trackDb
     cvs up -d -P
     # Edit that makefile to add tetNig1 in all the right places and do
     make update
     make alpha
     cvs commit -m "Added tetNig1." makefile
                                                                                 
     # Add dbDb and defaultDb entries:
     echo 'insert into dbDb (name, description, nibPath, organism,  \
           defaultPos, active, orderKey, genome, scientificName,  \
           htmlPath, hgNearOk, hgPbOk, sourceName)  \
           values("tetNig1", "May 2002", \
           "/gbdb/tetNig1/nib", "Tetraodon", "chr1:1000-4000", 1, \
           39, "Tetraodon", "Tetraodon nigroviridis", \
           "/gbdb/tetNig1/html/description.html", 0, 0, \
           "Genoscope and Whitehead Institute, V6");' \
     | hgsql -h genome-testdb hgcentraltest
     echo 'insert into defaultDb (genome, name) \
           values ("Tetraodon", "tetNig1");' \
           | hgsql -h genome-testdb hgcentraltest
     # add correct assembly date and source (2004-09-08, hartera)
     echo 'update dbDb set description = "Feb 2004" where name = "tetNig1";' \
           | hgsql -h genome-testdb hgcentraltest
     echo 'update dbDb set sourceName = "Genoscope and Broad Institute, V7" \
           where name = "tetNig1";' | hgsql -h genome-testdb hgcentraltest
     # correction to assembly date format (2004-09-09)
     echo 'update dbDb set description = "Feb. 2004" where name = "tetNig1";' \
           | hgsql -h genome-testdb hgcentraltest
 
 # MAKE DESCRIPTION/SAMPLE POSITION HTML PAGE (DONE, 2004-08-23, hartera)
     ssh hgwdev
     mkdir /cluster/data/tetNig1/html
     cd /cluster/data/tetNig1/html
   # make a symbolic link from /gbdb/tetNig1/html to /cluster/data/tetNig1/html
     mkdir /gbdb/tetNig1
     ln -s /cluster/data/tetNig1/html /gbdb/tetNig1/html
     # Add a description page for zebrafish
     cd /cluster/data/tetNig1/html
     cp /cluster/data/fr1/html/*.html .
     # Edit this for zebrafish
     # create a description.html page here
     mkdir -p ~/kent/src/hg/makeDb/trackDb/tetraodon/tetNig1
     cd ~/kent/src/hg/makeDb/trackDb/
     cvs add tetraodon
     cd tetraodon
     cvs add tetNig1
     # Add description page here too
     cp /cluster/data/tetNig1/html/description.html \
        $HOME/kent/src/hg/makeDb/trackDb/tetraodon/tetNig1/
     chmod a+r \
           $HOME/kent/src/hg/makeDb/trackDb/tetraodon/tetNig1/description.html
     cd $HOME/kent/src/hg/makeDb/trackDb/tetraodon/tetNig1/
     # Check it in and copy (ideally using "make alpha" in trackDb) to
     # /gbdb/tetNig1/html
     cvs add description.html
     cvs commit description.html
 
 # PUT MASKED SEQUENCE OUT FOR CLUSTER RUNS (DONE, 2004-08-18, hartera)
     ssh kkr1u00
     # Chrom-level mixed nibs that have been repeat- and trf-masked:
     rm -rf /iscratch/i/tetNig1/nib
     mkdir -p /iscratch/i/tetNig1/nib
     cp -p /cluster/data/tetNig1/nib/chr*.nib /iscratch/i/tetNig1/nib
     # Pseudo-contig fa that have been repeat- and trf-masked:
     rm -rf /iscratch/i/tetNig1/trfFa
     mkdir /iscratch/i/tetNig1/trfFa
     foreach d (/cluster/data/tetNig1/*/chr*_?{,?})
       cp $d/$d:t.fa /iscratch/i/tetNig1/trfFa
     end
     rm -rf /iscratch/i/tetNig1/rmsk
     mkdir -p /iscratch/i/tetNig1/rmsk
     cp -p /cluster/data/tetNig1/*/chr*.fa.out /iscratch/i/tetNig1/rmsk
     cp -p /cluster/data/tetNig1/tetNig1.2bit /iscratch/i/tetNig1/
     iSync
 
     # add to /cluster/bluearc too
     ssh kksilo
 
     # masked contigs
     rm -fr /cluster/bluearc/scratch/tetra/tetNig1/trfFa
     mkdir -p /cluster/bluearc/scratch/tetra/tetNig1/trfFa
     foreach d (/cluster/data/tetNig1/*/chr*_?{,?})
       cp $d/$d:t.fa /cluster/bluearc/scratch/tetra/tetNig1/trfFa
     end
 
     # masked chrom nibs
     rm -fr /cluster/bluearc/scratch/tetra/tetNig1/nib
     mkdir -p /cluster/bluearc/scratch/tetra/tetNig1/nib
     cp -p /cluster/data/tetNig1/nib/chr*.nib /cluster/bluearc/scratch/tetra/tetNig1/nib
 
     # fasta files  - CHECK FOR RANDOMS
     cd /cluster/data/tetNig1
     rm -fr /cluster/bluearc/scratch/tetra/tetNig1/fasta
     mkdir -p /cluster/bluearc/scratch/tetra/tetNig1/fasta
     cp -p */*.fa /cluster/bluearc/scratch/tetra/tetNig1/fasta
 
     # RepeatMasker *.out files - CHECK FOR RANDOMS
     rm -rf /cluster/bluearc/scratch/tetra/tetNig1/rmsk
     mkdir -p /cluster/bluearc/scratch/tetra/tetNig1/rmsk
     cp -p ./*/chr*.fa.out /cluster/bluearc/scratch/tetra/tetNig1/rmsk
 
 # CREATE gc5Base wiggle TRACK (DONE, 2004-08-19, hartera)
     ssh kki
     mkdir /cluster/data/tetNig1/bed/gc5Base
     cd /cluster/data/tetNig1/bed/gc5Base
     #   in the script below, the 'grep -w GC' selects the lines of
     #   output from hgGcPercent that are real data and not just some
     #   information from hgGcPercent.  The awk computes the number
     #   of bases that hgGcPercent claimed it measured, which is not
     #   necessarily always 5 if it ran into gaps, and then the division
     #   by 10.0 scales down the numbers from hgGcPercent to the range
     #   [0-100].  Two columns come out of the awk print statement:
     #   <position> and <value> which are fed into wigAsciiToBinary through
     #   the pipe.  It is set at a dataSpan of 5 because each value
     #   represents the measurement over five bases beginning with
     #   <position>.  The result files end up in ./wigData5.
     #   A new script is used (from makeHg17.doc) which gets around the
     #   problem that wigAsciiToBinary was calculating chromEnd to be
     #   beyond the real chromosome end
 
     mkdir wigData5 dataLimits5
     cat << '_EOF_' > kkRun.sh
 #!/bin/sh
 NIB=$1
                                                                                 
 chr=${NIB/.nib/}
 chrom=${chr#chr}
                                                                                 
 hgGcPercent -chr=${chr} -doGaps -file=stdout -win=5 tetNig1 \
         /iscratch/i/tetNig1/nib | \
     grep -w GC | \
     awk '{if (($3-$2) >= 5) {printf "%d\t%.1f\n", $2+1, $5/10.0} }' | \
     wigAsciiToBinary -dataSpan=5 -chrom=${chr} \
         -wibFile=wigData5/gc5Base_${chrom} \
             -name=${chrom} stdin 2> dataLimits5/${chr}
 '_EOF_'
     # << this line makes emacs coloring happy
     chmod +x kkRun.sh
                                                                                 
     ls /iscratch/i/tetNig1/nib  > nibList
     cat << '_EOF_' > gsub
 #LOOP
 ./kkRun.sh $(path1)
 #ENDLOOP
 '_EOF_'
     # << this line makes emacs coloring happy
 
     gensub2 nibList single gsub jobList
     para create jobList
     para try, check, push ... etc
 # para time
 # Completed: 27 of 27 jobs
 # CPU time in finished jobs:        587s       9.79m     0.16h    0.01d  0.000 y
 # IO & Wait Time:                    16s       0.26m     0.00h    0.00d  0.000 y
 # Average job time:                  22s       0.37m     0.01h    0.00d
 # Longest job:                      187s       3.12m     0.05h    0.00d
 # Submission to last job:           623s      10.38m     0.17h    0.01d
 
     # load the .wig files back on hgwdev:
     ssh hgwdev
     cd /cluster/data/tetNig1/bed/gc5Base
     # up to here
     hgLoadWiggle -pathPrefix=/gbdb/tetNig1/wib/gc5Base \
                  tetNig1 gc5Base wigData5/*.wig
     # and symlink the .wib files into /gbdb
     mkdir -p /gbdb/tetNig1/wib/gc5Base
     ln -s `pwd`/wigData5/*.wib /gbdb/tetNig1/wib/gc5Base
 
     # And then the zoomed data view
     ssh kki
     cd /cluster/data/tetNig1/bed/gc5Base
     mkdir wigData5_1K dataLimits5_1K
                                                                                 
     cat << '_EOF_' > kkRunZoom.sh
 #!/bin/sh
 NIB=$1
                                                                                 
 chr=${NIB/.nib/}
 chrom=${chr#chr}
                                                                                 
 hgGcPercent -chr=${chr} -doGaps -file=stdout -win=5 tetNig1 \
         /iscratch/i/tetNig1/nib | \
     grep -w GC | \
     awk '{if (($3-$2) >= 5) {printf "%d\t%.1f\n", $2+1, $5/10.0} }' | \
     wigZoom -dataSpan=1000 stdin | wigAsciiToBinary -dataSpan=1000 \
         -chrom=${chr} -wibFile=wigData5_1K/gc5Base_${chrom}_1K \
             -name=${chrom} stdin 2> dataLimits5_1K/${chr}
 '_EOF_'
     # << this line makes emacs coloring happy
     chmod +x kkRunZoom.sh
                                                                                 
     cat << '_EOF_' > gsubZoom
 #LOOP
 ./kkRunZoom.sh $(path1)
 #ENDLOOP
 '_EOF_'
     # << this line makes emacs coloring happy
     gensub2 nibList single gsubZoom jobListZoom
     para create jobListZoom
     para try, check, push, ... etc.
 # para time
 # Completed: 27 of 27 jobs
 # CPU time in finished jobs:        571s       9.51m     0.16h    0.01d  0.000 y
 # IO & Wait Time:                    40s       0.67m     0.01h    0.00d  0.000 y
 # Average job time:                  23s       0.38m     0.01h    0.00d
 # Longest job:                      223s       3.72m     0.06h    0.00d
 # Submission to last job:           986s      16.43m     0.27h    0.01d
 
     #   Then load these .wig files into the same database as above
     ssh hgwdev
     cd /cluster/data/tetNig1/bed/gc5Base
     hgLoadWiggle -pathPrefix=/gbdb/tetNig1/wib/gc5Base \
         -oldTable tetNig1 gc5Base wigData5_1K/*.wig
     # and symlink these .wib files into /gbdb
     mkdir -p /gbdb/tetNig1/wib/gc5Base
     ln -s `pwd`/wigData5_1K/*.wib /gbdb/tetNig1/wib/gc5Base
 
 # CREATE 10.OOC, 11.OOC FILES FOR BLAT (DONE, 2004-08-20, hartera)
     # Use -repMatch=130 (based on size - for human 1024 was used, Tetraodon 
     # is about 380 Mb in length and it is 12.7% the size of the human genome) 
     ssh kkr1u00
     mkdir /cluster/data/tetNig1/bed/ooc
     cd /cluster/data/tetNig1/bed/ooc
     mkdir -p /cluster/bluearc/tetNig1
     ls -1 /cluster/data/tetNig1/nib/chr*.nib > nib.lst
     blat nib.lst /dev/null /dev/null -tileSize=11 \
       -makeOoc=/cluster/bluearc/tetNig1/11.ooc -repMatch=130
     # Wrote 8776 overused 11-mers to /cluster/bluearc/tetNig1/11.ooc
     # For 10.ooc, repMatch = 4096 for human, so use 520
     blat nib.lst /dev/null /dev/null -tileSize=10 \
       -makeOoc=/cluster/bluearc/tetNig1/10.ooc -repMatch=520
     # Wrote 1976 overused 11-mers to /cluster/bluearc/tetNig1/11.ooc
     # keep copies of ooc files in this directory and copy to iscratch
     cp /cluster/bluearc/tetNig1/*.ooc .
     cp -p /cluster/bluearc/tetNig1/*.ooc /iscratch/i/tetNig1/
     iSync
                                                                        
 # AUTO UPDATE GENBANK MRNA AND EST RUN  (DONE, 2004-08-24, hartera and markd)
     ssh eieio
     cd /cluster/data/genbank
     # This is a new organism, edit the etc/genbank.conf file and add:
 # tetNig1 (Tetraodon)
 tetNig1.genome = /iscratch/i/tetNig1/nib/chr*.nib
 tetNig1.lift = /cluster/data/tetNig1/jkStuff/liftAll.lft
 tetNig1.downloadDir = tetNig1
 
     # Default includes native genbank mRNAs and ESTs,
     # genbank xeno mRNAs but no xenoESTs, native RefSeq mRNAs but
     # not xeno RefSeq
     cvs commit -m "Added Tetraodon." etc/genbank.conf
     # Edit src/align/gbBlat to add /iscratch/i/tetNig1/11.ooc
     cd ~/kent/src/hg/makeDb/genbank
     # Add line: TETNIG_OOC=/iscratch/i/tetNig1/11.ooc
     # Add line:  tetNig*) oocOpt="-ooc=${TETNIG_OOC}" ;;
     cvs diff src/align/gbBlat
     make
     cvs update src/align/gbBlat
     cvs commit -m "Added 11.ooc for tetNig1." src/align/gbBlat
     # Edit src/lib/gbGenome.c to add new genome
     # ADD: static char *tetNigNames[] = {"Tetraodon nigroviridis", NULL};
     # ADD: {"tetNig", tetNigNames, NULL},
     # to static struct dbToSpecies dbToSpeciesMap[]
     cvs diff src/lib/gbGenome.c
     make
     cvs update src/lib/gbGenome.c
     cvs commit -m "Added Tetraodon, tetNig." src/lib/gbGenome.c
     # Install to /cluster/data/genbank:
     make install-server
 
     ssh eieio
     cd /cluster/data/genbank
     # This is an -initial run, RefSeq mRNA only:
     nice bin/gbAlignStep -srcDb=refseq -type=mrna -verbose=1 -initial tetNig1
     # Load results for RefSeq mRNAs:
     ssh hgwdev
     cd /cluster/data/genbank
     # There are no RefSeq mRNAs for Tetraodon
 
     # -initial for GenBank mRNAs
     nice ./bin/gbAlignStep -initial -srcDb=genbank -type=mrna tetNig1
 
     # Load results for GenBank mRNAs
     ssh hgwdev
     cd /cluster/data/genbank
     nice bin/gbDbLoadStep -verbose=1 tetNig1
     cd /cluster/data/genbank/work
     mv initial.tetNig1 initial.tetNig1.genbank.mrna
 
     # -initial for ESTs
     ssh eieio
     cd /cluster/data/genbank
     nice bin/gbAlignStep -srcDb=genbank -type=est -verbose=1 -initial tetNig1
 
     # Load results for GenBank ESTs
     ssh hgwdev
     cd /cluster/data/genbank
     nice bin/gbDbLoadStep -verbose=1 tetNig1
     cd /cluster/data/genbank/work
     mv initial.tetNig1 initial.tetNig1.genbank.est
 
     # Everything loaded ok, so clean up
     rm -r /cluster/data/genbank/work/initial.tetNig1*
 
 # BLASTZ SWAP FOR hg17 vs tetNig1 BLASTZ TO CREATE tetNig1 vs hg17 BLASTZ
 # (DONE, 2004-08-26, hartera)
     ssh kolossus
     mkdir -p /cluster/data/tetNig1/bed/blastz.hg17.swap
     cd /cluster/data/tetNig1/bed/blastz.hg17.swap
     # use axtChrom from blastzTetNig1 on hg17
     set aliDir = /cluster/data/hg17/bed/blastz.tetNig1
     cp $aliDir/S1.len S2.len
     cp $aliDir/S2.len S1.len
     mkdir unsorted axtChrom
     cat $aliDir/axtChrom/chr*.axt \
     | axtSwap stdin $aliDir/S1.len $aliDir/S2.len stdout \
     | axtSplitByTarget stdin unsorted
     # Sort the shuffled .axt files.
     foreach f (unsorted/*.axt)
       echo sorting $f:t:r
       axtSort $f axtChrom/$f:t
     end
     du -sh $aliDir/axtChrom unsorted axtChrom
     # 709M    /cluster/data/hg17/bed/blastz.tetNig1/axtChrom
     # 710M    unsorted
     # 709M    axtChrom
     rm -r unsorted
     # translate sorted axt files into psl
     ssh kolossus
     cd /cluster/data/tetNig1/bed/blastz.hg17.swap
     mkdir -p pslChrom
     set tbl = "blastzHg17"
     foreach f (axtChrom/chr*.axt)
       set c=$f:t:r
       echo "Processing chr $c"
       /cluster/bin/i386/axtToPsl $f S1.len S2.len pslChrom/${c}_${tbl}.psl
     end
     # Load database tables
     ssh hgwdev
     cd /cluster/data/tetNig1/bed/blastz.hg17.swap/pslChrom
     foreach f (./*.psl)
       /cluster/bin/i386/hgLoadPsl tetNig1 $f
       echo "$f Done"
     end
 
 # CHAIN HUMAN (hg17) BLASTZ (DONE, 2004-08-27, hartera)
     # Run axtChain on little cluster
     ssh kki
     cd /cluster/data/tetNig1/bed/blastz.hg17.swap
     mkdir -p axtChain/run1
     cd axtChain/run1
     mkdir out chain
     ls -1S /cluster/data/tetNig1/bed/blastz.hg17.swap/axtChrom/*.axt \
                     > input.lst
     cat << '_EOF_' > gsub
 #LOOP
 doChain {check in exists $(path1)} {check out line+ chain/$(root1).chain} {check out line+ out/$(root1).out}
 #ENDLOOP
 '_EOF_'
     # << this line makes emacs coloring happy
                                                                                 
     # Make our own linear gap file with reduced gap penalties,
     # in hopes of getting longer chains - works well for species at
     # chicken-human distance or greater
     cat << '_EOF_' > ../../chickenHumanTuned.gap
 tablesize^V     11
 smallSize^V     111
 position^V      1^V     2^V     3^V     11^V    111^V   2111^V  12111^V 32111^V
 72111^V 152111^V        252111
 qGap^V  325^V   360^V   400^V   450^V   600^V   1100^V  3600^V  7600^V  15600^V
 31600^V 56600
 tGap^V  325^V   360^V   400^V   450^V   600^V   1100^V  3600^V  7600^V  15600^V
 31600^V 56600
 bothGap^V       625^V   660^V   700^V   750^V   900^V   1400^V  4000^V  8000^V
 16000^V 32000^V 57000
 '_EOF_'
 # << this line makes emacs coloring happy
                                                                                 
 cat << '_EOF_' > doChain
 #!/bin/csh
 axtChain -linearGap=../../chickenHumanTuned.gap $1 \
     /iscratch/i/tetNig1/nib \
     /iscratch/i/gs.18/build35/bothMaskedNibs $2 >& $3 
 '_EOF_'
     # << this line makes emacs coloring happy
     chmod a+x doChain
     gensub2 input.lst single gsub jobList
     para create jobList
     para try, check, push, check...
 # para time
 # Completed: 27 of 27 jobs
 # CPU time in finished jobs:       2207s      36.78m     0.61h    0.03d  0.000 y
 # IO & Wait Time:                   124s       2.07m     0.03h    0.00d  0.000 y
 # Average job time:                  86s       1.44m     0.02h    0.00d
 # Longest job:                      160s       2.67m     0.04h    0.00d
 # Submission to last job:           334s       5.57m     0.09h    0.00d
 
    # now on the cluster server, sort chains
     ssh kksilo
     cd /cluster/data/tetNig1/bed/blastz.hg17.swap/axtChain
     chainMergeSort run1/chain/*.chain > all.chain
     chainSplit chain all.chain
                                                                                 
     # take a look at score distr's
     foreach f (chain/*.chain)
       grep chain $f | awk '{print $2;}' | sort -nr > /tmp/score.$f:t:r
       echo $f:t:r >> hist5000.out
       textHistogram -binSize=5000 /tmp/score.$f:t:r >> hist5000.out
       echo ""
     end
     # apart from chrUn_random, not too many chains with score < 5000
     # load chr 1 into table
     ssh hgwdev
     cd /cluster/data/tetNig1/bed/blastz.hg17.swap/axtChain/chain
     hgLoadChain tetNig1 chr1_chainHg17 chr1.chain
 # no RefSeqs for Tetraodon so use mrna table
 # featureBits -chrom=chr1 tetNig1 all_mrna chainHg17Link -enrichment
 # all_mrna 2.637%, chainHg17Link 9.262%, both 0.772%, cover 29.28%, enrich 3.16x
     # try filtering with minScore < 5000
     ssh kksilo
     cd /cluster/data/tetNig1/bed/blastz.hg17.swap/axtChain
     mv all.chain all.chain.unfiltered
     chainFilter -minScore=5000 all.chain.unfiltered > all.chain
     chainSplit chainFilt5k all.chain
     ssh hgwdev
     cd /cluster/data/tetNig1/bed/blastz.hg17.swap/axtChain/chainFilt5k
     hgLoadChain tetNig1 chr1_chainHg17Filt5k chr1.chain
 # featureBits -chrom=chr1 tetNig1 all_mrna chainHg17Filt5kLink -enrichment
 # all_mrna 2.637%, chainHg17Filt5kLink 8.746%, both 0.746%, cover 28.29%, 
 # enrich 3.23x
 
 # add all chains for minScore=5000 filtered chains
 # remove test chain tables for chr1
     ssh hgwdev
     cd /cluster/data/tetNig1/bed/blastz.hg17.swap/axtChain
     hgsql -e "drop table chr1_chainHg17;" tetNig1
     hgsql -e "drop table chr1_chainHg17Link;" tetNig1
     hgsql -e "drop table chr1_chainHg17Filt5k;" tetNig1
     hgsql -e "drop table chr1_chainHg17Filt5kLink;" tetNig1
     mv chainFilt5k chain
 
     cd /cluster/data/tetNig1/bed/blastz.hg17.swap/axtChain/chain
     foreach i (*.chain)
         set c = $i:r
         hgLoadChain tetNig1 ${c}_chainHg17 $i
         echo done $c
     end
 
 # NET HUMAN (hg17) BLASTZ (DONE, 2004-08-27, hartera)
     ssh kksilo
     cd /cluster/data/tetNig1/bed/blastz.hg17.swap/axtChain
     mkdir preNet
     cd chain
     foreach i (*.chain)
        echo preNetting $i
        /cluster/bin/i386/chainPreNet $i ../../S1.len ../../S2.len \
                                      ../preNet/$i
     end
     cd ..
     mkdir n1
     cd preNet
     foreach i (*.chain)
       set n = $i:r.net
       echo primary netting $i
       /cluster/bin/i386/chainNet $i -minSpace=1 ../../S1.len ../../S2.len \
                                  ../n1/$n /dev/null
     end
     cd ..
     cat n1/*.net | /cluster/bin/i386/netSyntenic stdin noClass.net
     # memory usage 55447552, utime 259 s/100, stime 33
 
     # Add classification info using db tables:
     cd /cluster/data/tetNig1/bed/blastz.hg17.swap/axtChain
     # netClass looks for ancient repeats in one of the databases
     # hg17 has this table - hand-curated by Arian but this is for
     # human-rodent comparisons so do not use here, use -noAr option
     mkdir -p /cluster/bluearc/hg17/linSpecRep.notInTetraodon
     mkdir -p /cluster/bluearc/tetNig1/linSpecRep.notInHuman
     cp -p /iscratch/i/hg17/linSpecRep.notInTetraodon/* \
           /cluster/bluearc/hg17/linSpecRep.notInTetraodon
     cp -p /iscratch/i/tetNig1/linSpecRep.notInHuman/* \
           /cluster/bluearc/tetNig1/linSpecRep.notInHuman
 
     ssh hgwdev
     cd /cluster/data/tetNig1/bed/blastz.hg17.swap/axtChain
     time netClass noClass.net tetNig1 hg17 hg17.net \
           -tNewR=/cluster/bluearc/tetNig1/linSpecRep.notInHuman \
           -qNewR=/cluster/bluearc/hg17/linSpecRep.notInTetraodon -noAr
     # 47.430u 31.160s 2:15.46 58.0%   0+0k 0+0io 197pf+0w
     netFilter -minGap=10 hg17.net |  hgLoadNet tetNig1 netHg17 stdin
     # featureBits tetNig1 all_mrna netHg17 -enrichment
     # all_mrna 3.204%, netHg17 42.716%, both 1.698%, cover 53.00%, enrich 1.24x
 
     # create symlinks to make consistent with newer builds 2006-04-09 markd
     ln -s all.chain.gz  /cluster/data/tetNig1/bed/blastz.hg17/axtChain/tetNig1.hg17.all.chain.gz
     ln -s hg17.net.gz  /cluster/data/tetNig1/bed/blastz.hg17/axtChain/tetNig1.hg17.net.gz
 
 # BLASTZ FOR FR1 (FUGU) (DONE, 2004-08-31, hartera)
 
     ssh kkr1u00
     # blastz requires lineage-specific repeats but there are none
     # available between these two fish species to make empty files
     mkdir -p /iscratch/i/tetNig1/linSpecRep.notInFugu
     mkdir -p /iscratch/i/fugu/linSpecRep.notInZebrafish
     cd /cluster/data/tetNig1
     # for Tetraodon
     foreach c (`cat chrom.lst`)
       cp /dev/null /iscratch/i/tetNig1/linSpecRep.notInFugu/chr${c}.out.spec
     end
     # for Fugu
     cp /dev/null /iscratch/i/fugu/linSpecRep.notInZebrafish/chrUn.out.spec
     iSync
 
     ssh kk
     mkdir -p /cluster/data/tetNig1/bed/blastz.fr1.2004-08-31
     ln -s /cluster/data/tetNig1/bed/blastz.fr1.2004-08-31 \
           /cluster/data/tetNig1/bed/blastz.fr1
     cd /cluster/data/tetNig1/bed/blastz.fr1
 
     cat << '_EOF_' > DEF
 # Tetraodon (tetNig1) vs. Fugu (fr1)
 export PATH=/usr/bin:/bin:/usr/local/bin:/cluster/bin/penn:/cluster/bin/i386:/cluster/home/angie/schwartzbin
                                                                                 
 ALIGN=blastz-run
 BLASTZ=blastz
 BLASTZ_L=8000
 BLASTZ_H=2200
 #BLASTZ_ABRIDGE_REPEATS=1 if SMSK is specified
 BLASTZ_ABRIDGE_REPEATS=0
                                                                                 
 # TARGET - Tetraodon (tetNig1)
 SEQ1_DIR=/iscratch/i/tetNig1/nib
 SEQ1_RMSK=
 # lineage-specific repeats
 # we don't have that information for these species so these files are empty
 SEQ1_SMSK=/iscratch/i/tetNig1/linSpecRep.notInFugu
 SEQ1_FLAG=
 SEQ1_IN_CONTIGS=0
 # 10 MB chunk for target
 SEQ1_CHUNK=10000000
 SEQ1_LAP=10000
                                                                                 
 # QUERY - Fugu (fr1)
 # soft-masked chrom nib
 SEQ2_DIR=/cluster/bluearc/fugu/fr1/chromNib
 SEQ2_RMSK=
 SEQ2_SMSK=/iscratch/i/fugu/linSpecRep.notInZebrafish
 SEQ2_FLAG=
 SEQ2_IN_CONTIGS=0
 # 10 Mbase for query
 SEQ2_CHUNK=10000000
 SEQ2_LAP=0
                                                                                 
 BASE=/cluster/data/tetNig1/bed/blastz.fr1
                                                                                 
 DEF=$BASE/DEF
 RAW=$BASE/raw
 CDBDIR=$BASE
 SEQ1_LEN=$BASE/S1.len
 SEQ2_LEN=$BASE/S2.len
                                                                                 
 #DEBUG=1
 '_EOF_'
     # << this line keeps emacs coloring happy
     # save the DEF file in the current standard place
     chmod +x DEF
     cp DEF ~angie/hummus/DEF.tetNig1-fr1.2004-08-31
     # setup cluster run
     # source the DEF file
     bash
     . ./DEF
     /cluster/data/tetNig1/jkStuff/BlastZ_run0.sh
     cd run.0
     # check batch looks ok then
     para try, check, push, check, ....
 # para time
 # Completed: 1995 of 1995 jobs
 # CPU time in finished jobs:     502856s    8380.93m   139.68h    5.82d  0.016 y
 # IO & Wait Time:               1190285s   19838.08m   330.63h   13.78d  0.038 y
 # Average job time:                 849s      14.14m     0.24h    0.01d
 # Longest job:                    19130s     318.83m     5.31h    0.22d
 # Submission to last job:         19415s     323.58m     5.39h    0.22d
 
     # second cluster run: lift raw alignments -> lav dir
     cd /cluster/data/tetNig1/bed/blastz.fr1
     bash # if a csh/tcsh user
     . ./DEF
     /cluster/data/tetNig1/jkStuff/BlastZ_run1.sh
     cd run.1
     para try, check, push, check etc.
 # para time
 # Completed: 57 of 57 jobs
 # IO & Wait Time:                   511s       8.52m     0.14h    0.01d  0.000 y
 # Average job time:                  29s       0.48m     0.01h    0.00d
 # Longest job:                       58s       0.97m     0.02h    0.00d
 # Submission to last job:           853s      14.22m     0.24h    0.01d
 
     # third run: lav -> axt
     ssh kki
     cd /cluster/data/tetNig1/bed/blastz.fr1
     mkdir axtChrom run.2
     cd run.2
     cat << '_EOF_' > do.csh
 #!/bin/csh -ef
 cd $1
 cat `ls -1 *.lav | sort -g` \
 | lavToAxt stdin /iscratch/i/tetNig1/nib \
 /cluster/bluearc/fugu/fr1/chromNib stdout \
 | axtSort stdin $2
 '_EOF_'
     # << this line makes emacs coloring happy
     chmod a+x do.csh
     cat << '_EOF_' > gsub
 #LOOP
 ./do.csh {check in exists $(path1)} {check out line+ /cluster/data/tetNig1/bed/blastz.fr1/axtChrom/$(root1).axt}
 #ENDLOOP
 '_EOF_'
     # << this line makes emacs coloring happy
     \ls -1Sd ../lav/chr* > chrom.list
     gensub2 chrom.list single gsub jobList
     wc -l jobList
     head jobList
     para create jobList
     para try, check, push, check,...
 # para time
 # Completed: 27 of 27 jobs
 # CPU time in finished jobs:         73s       1.22m     0.02h    0.00d  0.000 y
 # IO & Wait Time:                   495s       8.25m     0.14h    0.01d  0.000 y
 # Average job time:                  21s       0.35m     0.01h    0.00d
 # Longest job:                       77s       1.28m     0.02h    0.00d
 # Submission to last job:           318s       5.30m     0.09h    0.00d
 
    # translate sorted axt files into psl
     ssh kolossus
     cd /cluster/data/tetNig1/bed/blastz.fr1
     mkdir -p pslChrom
     set tbl = "blastzFr1"
     foreach f (axtChrom/chr*.axt)
       set c=$f:t:r
       echo "Processing chr $c"
       /cluster/bin/i386/axtToPsl $f S1.len S2.len pslChrom/${c}_${tbl}.psl
     end
 
     # Load database tables
     ssh hgwdev
     cd /cluster/data/tetNig1/bed/blastz.fr1/pslChrom
     foreach f (./*.psl)
       /cluster/bin/i386/hgLoadPsl -noTNameIx tetNig1 $f
       echo "$f Done"
     end
 
 # default parameters and H=2000
 # featureBits -chrom=chr1 tetNig1 all_mrna blastzFr1 -enrichment
 # all_mrna 2.637%, blastzFr1 78.364%, both 2.361%, cover 89.53%, enrich 1.14x 
 # repeat with H=2000 and L=6000 - blastzL6k
 # featureBits -chrom=chr1 tetNig1 all_mrna blastzFr1L6k -enrichment
 # all_mrna 2.637%, blastzFr1L6k 77.955%, both 2.348%, cover 89.04%, enrich 1.14x# very little difference
 # featureBits -chrom=chr1 danRer1 all_mrna blastzFr1 -enrichment
 # all_mrna 0.981%, blastzFr1 5.542%, both 0.592%, cover 60.32%, enrich 10.88x
 # blastzFr1L6k
 # BLASTZ_H=2000
 # BLASTZ_Y=3400
 # BLASTZ_L=6000
 # BLASTZ_K=2200
 # BLASTZ_Q=/cluster/data/blastz/HoxD55.q
 # featureBits -chrom=chr1 tetNig1 all_mrna blastzFr1L6k2 -enrichment
 # all_mrna 2.637%, blastzFr1L6k2 79.917%, both 2.403%, cover 91.12%, 
 # enrich 1.14x
 # H=2200, default parameters
 # featureBits -chrom=chr1 tetNig1 all_mrna blastzFr1H2200 -enrichment
 # all_mrna 2.637%, blastzFr1H2200 78.225%, both 2.359%, cover 89.46%, 
 # enrich 1.14x
 # rows in chr1_blastzFr1* tables:
 # blastzFr1	47079
 # blastzFr1L6k	26408
 # blastzFr1L6k2	52340
 # blastzFr1H2200 44240
 # blastzFr1H2200L8k 20502
 # blastzFr1H2200L10k 17355
 # try increasing L to 8000
 # H=2200, L=8000
 # featureBits -chrom=chr1 tetNig1 all_mrna blastzFr1H2200L8k -enrichment
 # all_mrna 2.637%, blastzFr1H2200L8k 77.714%, both 2.345%, cover 88.94%, 
 # enrich 1.14x
 # H=2200 L=10000
 #  featureBits -chrom=chr1 tetNig1 all_mrna blastzFr1H2200L10k -enrichment
 # all_mrna 2.637%, blastzFr1H2200L10k 77.568%, both 2.340%, cover 88.72%, 
 # enrich 1.14x
 # 6 jobs took a very long time and did not finish
 # looking at some alignments in these tracks with the mRNA track on there is not
 # much difference between most of them but blastzFr1L6k2 often has more
 # alignments and a different strucutre. Use blastzFr1H2200L8k as this has
 # fewer alingments with very little loss of coverage and more lower scoring 
 # alignments can be removed at the chaining step.
 
 # CHAIN FR1 BLASTZ (DONE, 2004-09-01, hartera)
     # Run axtChain on little cluster
     ssh kki 
     cd /cluster/data/tetNig1/bed/blastz.fr1
     mkdir -p axtChain/run1
     cd axtChain/run1
     mkdir out chain
     ls -1S /cluster/data/tetNig1/bed/blastz.fr1/axtChrom/*.axt \
         > input.lst
 cat << '_EOF_' > gsub
 #LOOP
 doChain {check in exists $(path1)} {check out line+ chain/$(root1).chain} {check out line+ out/$(root1).out}
 #ENDLOOP
 '_EOF_'
 
 cat << '_EOF_' > doChain
 #!/bin/csh
 axtChain $1 \
     /iscratch/i/tetNig1/nib \
     /cluster/bluearc/fugu/fr1/chromNib $2 >& $3
 '_EOF_'
     # << this line makes emacs coloring happy
     chmod a+x doChain
     gensub2 input.lst single gsub jobList
     para create jobList
     para try, check, push, check...
 # Completed: 27 of 27 jobs
 # CPU time in finished jobs:        414s       6.90m     0.11h    0.00d  0.000 y
 # IO & Wait Time:                   252s       4.20m     0.07h    0.00d  0.000 y
 # Average job time:                  25s       0.41m     0.01h    0.00d
 # Longest job:                      118s       1.97m     0.03h    0.00d
 # Submission to last job:           972s      16.20m     0.27h    0.01d
 
     # now on the cluster server, sort chains
     ssh kksilo
     cd /cluster/data/tetNig1/bed/blastz.fr1/axtChain
     chainMergeSort run1/chain/*.chain > all.chain
     chainSplit chain all.chain
     # take a look at score distr's,try also with larger bin size.
     foreach f (chain/*.chain)
       grep chain $f | awk '{print $2;}' | sort -nr > /tmp/score.$f:t:r
       echo $f:t:r >> hist5000.out
       textHistogram -binSize=5000 /tmp/score.$f:t:r >> hist5000.out
       echo ""
     end
 
     ssh hgwdev
     cd /cluster/data/tetNig1/bed/blastz.fr1/axtChain/chain
     hgLoadChain tetNig1 chr1_chainFr1 chr1.chain
 # no RefSeqs for Tetraodon so use mrna table
 # featureBits -chrom=chr1 tetNig1 all_mrna chainFr1Link -enrichment
 # all_mrna 2.637%, chainFr1Link 76.099%, both 2.320%, cover 87.97%, enrich 1.16x
   
     # try filtering with minScore < 5000
     ssh kksilo
     cd /cluster/data/tetNig1/bed/blastz.fr1/axtChain
     mv all.chain all.chain.unfiltered
     chainFilter -minScore=5000 all.chain.unfiltered > all.chain
     chainSplit chainFilt5k all.chain
     ssh hgwdev
     cd /cluster/data/tetNig1/bed/blastz.fr1/axtChain/chainFilt5k
     hgLoadChain tetNig1 chr1_chainFr1Filt5k chr1.chain
 
 # featureBits -chrom=chr1 tetNig1 all_mrna chainFr1Filt5kLink -enrichment
 # all_mrna 2.637%, chainFr1Filt5kLink 75.999%, both 2.319%, cover 87.95%, 
 # enrich 1.16x
 # very little difference in the coverage so use the filtered version
     ssh hgwdev
     rm -r chain
     mv chainFilt5k chain
     cd /cluster/data/tetNig1/bed/blastz.fr1/axtChain/chain
     foreach i (*.chain)
         set c = $i:r
         hgLoadChain tetNig1 ${c}_chainFr1 $i
         echo done $c
     end
 
 # NET FUGU (FR1) BLASTZ (DONE, 2004-09-01, hartera)
 # RELOADED NET WITH CORRECT TABLE NAME (2004-09-02, hartera)
     ssh kksilo
     cd /cluster/data/tetNig1/bed/blastz.fr1/axtChain
     mkdir preNet
     cd chain
     foreach i (*.chain)
        echo preNetting $i
        /cluster/bin/i386/chainPreNet $i ../../S1.len ../../S2.len \
                                      ../preNet/$i
     end
                                                                                 
     cd ..
     mkdir n1
     cd preNet
     foreach i (*.chain)
       set n = $i:r.net
       echo primary netting $i
       /cluster/bin/i386/chainNet $i -minSpace=1 ../../S1.len ../../S2.len \
                                  ../n1/$n /dev/null
     end
     cd ..
     cat n1/*.net | /cluster/bin/i386/netSyntenic stdin noClass.net
     # memory usage 300011520, utime 1360 s/100, stime 247
 
 # Add classification info using db tables:
     ssh hgwdev
     cd /cluster/data/tetNig1/bed/blastz.fr1/axtChain
     # netClass looks for ancient repeats in one of the databases
     # hg17 has this table - hand-curated by Arian but this is for
     # human-rodent comparisons so do not use here, use -noAr option
     
     time netClass noClass.net tetNig1 fr1 fuguFr1.net -noAr
     netFilter -minGap=10 fuguFr1.net |  hgLoadNet tetNig1 netFr1 stdin
 
 # MAKE Human Proteins track
     ssh kksilo
     mkdir -p /cluster/data/tetNig1/blastDb
     cd /cluster/data/tetNig1
     cat */*/*.lft > jkStuff/subLift.lft
     for i in */*/*_*_*.fa; do ln -s `pwd`/$i blastDb; done  
     # remove *_random_#.fa (not listed)
     cd blastDb
     for i in *.fa; do formatdb -i $i -p F 2> /dev/null; done
     rm *.fa 
 
     ssh kkr1u00
     mkdir -p /iscratch/i/tetNig1/blastDb
     cd /cluster/data/tetNig1/blastDb
     cp * /iscratch/i/tetNig1/blastDb
     (~kent/bin/iSync) 2>&1 > sync.out
     
     mkdir -p /cluster/data/tetNig1/bed/tblastn.hg17KG
     cd /cluster/data/tetNig1/bed/tblastn.hg17KG
     ls -1S /iscratch/i/tetNig1/blastDb/*.nsq | sed "s/\.nsq//" > tetra.lst
     exit
 
     # back to kksilo
     cd /cluster/data/tetNig1/bed/tblastn.hg17KG
     mkdir kgfa
     # calculate a reasonable number of jobs
     calc `wc /cluster/data/hg17/bed/blat.hg17KG/hg17KG.psl | awk "{print \\\$1}"`/\(150000/`wc tetra.lst | awk "{print \\\$1}"`\)
     # 40262/(150000/979) = 262.776653
     split -l 263 /cluster/data/hg17/bed/blat.hg17KG/hg17KG.psl kgfa/kg
     cd kgfa
     for i in *; do pslxToFa $i $i.fa; rm $i; done
     cd ..
     ls -1S kgfa/*.fa > kg.lst
     mkdir blastOut
     for i in `cat kg.lst`; do  mkdir blastOut/`basename $i .fa`; done
     tcsh
     cat << '_EOF_' > blastGsub
 #LOOP
 blastSome $(path1) {check in line $(path2)} {check out exists blastOut/$(root2)/q.$(root1).psl } 
 #ENDLOOP
 '_EOF_'
     cat << '_EOF_' > blastSome
 #!/bin/sh
 BLASTMAT=/iscratch/i/blast/data
 export BLASTMAT
 g=`basename $2`
 f=/tmp/`basename $3`.$g
 for eVal in 0.1 0.01 0.001 0.0001 0.00001 0.000001 1E-09 1E-11
 do
 if /scratch/blast/blastall -M BLOSUM80 -m 0 -F no -e $eVal -p tblastn -d $1 -i $2 -o $f.8
 then
         mv $f.8 $f.1
         break;
 fi
 done
 if test -f  $f.1
 then
 if /cluster/bin/i386/blastToPsl $f.1 $f.2
 then
         liftUp -nosort -type=".psl" -nohead $f.3 /cluster/data/tetNig1/jkStuff/subLift.lft warn $f.2
         liftUp -nosort -type=".psl" -nohead $f.4 /cluster/data/tetNig1/jkStuff/liftAll.lft warn $f.3
         liftUp -nosort -type=".psl" -pslQ -nohead $3.tmp /cluster/data/hg17/bed/blat.hg17KG/protein.lft warn $f.4
         mv $3.tmp $3
         rm -f $f.1 $f.2 $f.3 $f.4
         exit 0
     fi
 fi
 rm -f $f.1 $f.2 $3.tmp $f.8 $f.3 $f.4
 exit 1
 '_EOF_'
 
     chmod +x blastSome
     gensub2 tetra.lst kg.lst blastGsub blastSpec
 
     # exit from tcsh
     exit 
     ssh kk
     cd /cluster/data/tetNig1/bed/tblastn.hg17KG
     para create blastSpec
     para try, push
 # Completed: 144760 of 144760 jobs
 # CPU time in finished jobs:   22218348s  370305.81m  6171.76h  257.16d  0.705 y
 # IO & Wait Time:               3507367s   58456.11m   974.27h   40.59d  0.111 y
 # Average job time:                 178s       2.96m     0.05h    0.00d
 # Longest job:                     2810s      46.83m     0.78h    0.03d
 # Submission to last job:         41709s     695.15m    11.59h    0.48d
 
     tcsh
     cat << '_EOF_' > chainGsub
 #LOOP
 chainSome $(path1)
 #ENDLOOP
 '_EOF_'
 
     cat << '_EOF_' > chainSome
 (cd $1; cat q.*.psl | simpleChain -prot -outPsl -maxGap=50000 stdin ../c.`basename $1`.psl)
 '_EOF_'
     chmod +x chainSome
 
     ls -1dS `pwd`/blastOut/kg?? > chain.lst
     gensub2 chain.lst single chainGsub chainSpec
 
     ssh kki
     cd /cluster/data/tetNig1/bed/tblastn.hg17KG
     para create chainSpec
     para push
 # Completed: 261 of 261 jobs
 # CPU time in finished jobs:        828s      13.81m     0.23h    0.01d  0.000 y
 # IO & Wait Time:                 19521s     325.34m     5.42h    0.23d  0.001 y
 # Average job time:                  78s       1.30m     0.02h    0.00d
 # Longest job:                      270s       4.50m     0.07h    0.00d
 # Submission to last job:          3878s      64.63m     1.08h    0.04d
 
     exit
     # back to kksilo
     cd /cluster/data/tetNig1/bed/tblastn.hg17KG/blastOut
     for i in kg??
     do 
 	awk "(\$13 - \$12)/\$11 > 0.6 {print}" c.$i.psl > c60.$i.psl
 	sort -rn c60.$i.psl | pslUniq stdin u.$i.psl
 	awk "((\$1 / \$11) ) > 0.60 { print   }" c60.$i.psl > m60.$i.psl
 	echo $i
     done
 
     cat u.*.psl m60* | liftUp -nosort -type=".psl" -nohead stdout /cluster/data/tetNig1/jkStuff/liftAll.lft warn stdin |  \
     	sort -T /tmp -k 14,14 -k 17,17n -k 17,17n  | uniq  > ../blastHg17KG.psl
     cd ..
 
     ssh hgwdev
     cd /cluster/data/tetNig1/bed/tblastn.hg17KG
     hgLoadPsl tetNig1 blastHg17KG.psl
     exit
 
     # back to kksilo
     rm -rf blastOut
 
 # End tblastn
 
 # MAKE DOWNLOADABLE SEQUENCE FILES (DONE, 2004-09-08, hartera)
     ssh kksilo
     cd /cluster/data/tetNig1
     #- Build the .zip files
     cat << '_EOF_' > jkStuff/zipAll.csh
 rm -rf zip
 mkdir zip
 # chrom AGP's
 zip -j zip/chromAgp.zip [0-9A-Z]*/chr*.agp
 # chrom RepeatMasker out files
 zip -j zip/chromOut.zip [0-9A-Z]*/chr*.fa.out
 # soft masked chrom fasta
 zip -j zip/chromFa.zip [0-9A-Z]*/chr*.fa
 # hard masked chrom fasta
 zip -j zip/chromFaMasked.zip [0-9A-Z]*/chr*.fa.masked
 # chrom TRF output files
 cd bed/simpleRepeat
 zip ../../zip/chromTrf.zip trfMask/chr*.bed
 cd ../..
 # get GenBank native mRNAs
 cd /cluster/data/genbank
 ./bin/i386/gbGetSeqs -db=tetNig1 -native GenBank mrna \
         /cluster/data/tetNig1/zip/mrna.fa
 # get GenBank xeno mRNAs
 ./bin/i386/gbGetSeqs -db=tetNig1 -xeno GenBank mrna \
         /cluster/data/tetNig1/zip/xenoMrna.fa
 # get native GenBank ESTs
 ./bin/i386/gbGetSeqs -db=tetNig1 -native GenBank est \
 /cluster/data/tetNig1/zip/est.fa
 
 cd /cluster/data/tetNig1/zip
 # zip GenBank native and xeno mRNAs and native ESTs
 zip -j mrna.zip mrna.fa
 zip -j xenoMrna.zip xenoMrna.fa
 zip -j est.zip est.fa
 '_EOF_'
     # << this line makes emacs coloring happy
     nice csh ./jkStuff/zipAll.csh |& tee ./jkStuff/zipAll.log
     cd jkStuff
     #- Look at zipAll.log to make sure all file lists look reasonable.
     # Copy the .zip files to
     # hgwdev:/usr/local/apache/...
     cd /cluster/data/tetNig1/zip
     #- Check zip file integrity:
     foreach f (*.zip)
       unzip -t $f > $f.test
       tail -1 $f.test
     end
     wc -l *.zip.test
 # 29 chromAgp.zip.test
 # 29 chromFa.zip.test
 # 29 chromFaMasked.zip.test
 # 29 chromOut.zip.test
 # 87 chromTrf.zip.test
 #  3 est.zip.test
 #  3 mrna.zip.test
 #  3 xenoMrna.zip.test
 #  212 total
 
     ssh hgwdev
     /cluster/data/tetNig1/zip
     set gp = /usr/local/apache/htdocs/goldenPath/tetNig1
     mkdir -p $gp/bigZips
     cp -p *.zip $gp/bigZips
     mkdir -p $gp/chromosomes
     foreach f (../*/chr*.fa)
        zip -j $gp/chromosomes/$f:t.zip $f
     end
     cd $gp/bigZips
     md5sum *.zip > md5sum.txt
     cd $gp/chromosomes
     md5sum *.zip > md5sum.txt
     # Take a look at bigZips/* and chromosomes/*, update their README.txt's
 
 # MAKE VSHG17 DOWNLOADABLES (DONE, 2004-09-08, hartera)
     ssh kksilo
     # zip chains and nets
     cd /cluster/data/tetNig1/bed/blastz.hg17.swap/axtChain
     cp all.chain hg17.chain
     zip -j /cluster/data/tetNig1/zip/hg17.chain.zip hg17.chain
     rm hg17.chain
     zip -j /cluster/data/tetNig1/zip/hg17.net.zip hg17.net
 
     ssh hgwdev
     # copy chains and nets to downloads area
     set gp = /usr/local/apache/htdocs/goldenPath/tetNig1
     mkdir -p $gp/vsHg17
     cd $gp/vsHg17
     mv /cluster/data/tetNig1/zip/hg17*.zip .
     md5sum *.zip > md5sum.txt
 
     # move axt files to downloads area and zip
     cd /cluster/data/tetNig1/bed/blastz.hg17.swap/axtChrom
     mkdir -p $gp/vsHg17/axtChrom
     cp -p *.axt $gp/vsHg17/axtChrom
     cd $gp/vsHg17/axtChrom
     gzip *.axt
     md5sum *.gz > md5sum.txt
 
     # Copy over & edit README.txt w/pointers to chain, net formats.
 
 # MAKE VSFR1 DOWNLOADABLES (DONE, 2004-09-08, hartera)
     ssh kksilo
     # zip chains and nets
     cd /cluster/data/tetNig1/bed/blastz.fr1/axtChain
     cp all.chain fr1.chain
     zip -j /cluster/data/tetNig1/zip/fr1.chain.zip fr1.chain
     rm fr1.chain
     cp fuguFr1.net fr1.net
     zip -j /cluster/data/tetNig1/zip/fr1.net.zip fr1.net
     rm fr1.net
 
     ssh hgwdev
     # copy chains and nets to downloads area
     set gp = /usr/local/apache/htdocs/goldenPath/tetNig1
     mkdir -p $gp/vsFr1
     cd $gp/vsFr1
     mv /cluster/data/tetNig1/zip/fr1*.zip .
     md5sum *.zip > md5sum.txt
 
     # move axt files to downloads area and zip
     cd /cluster/data/tetNig1/bed/blastz.fr1/axtChrom
     mkdir -p $gp/vsFr1/axtChrom
     cp -p *.axt $gp/vsFr1/axtChrom
     cd $gp/vsFr1/axtChrom
     gzip *.axt
     md5sum *.gz > md5sum.txt
 
     # Copy over & edit README.txt w/pointers to chain, net formats.
 
 # BLASTZ SWAP FOR mm5 vs tetNig1 BLASTZ TO CREATE tetNig1 vs MOUSE mm5 BLASTZ
 # (DONE, 2004-09-08, hartera)
     ssh kksilo
     mkdir -p /cluster/store8/tetNig1/blastz.mm5.swap.2004-09-08
     ln -s /cluster/store8/tetNig1/blastz.mm5.swap.2004-09-08 \
           /cluster/data/tetNig1/bed
     ln -s /cluster/data/tetNig1/bed/blastz.mm5.swap.2004-09-08 \
           /cluster/data/tetNig1/bed/blastz.mm5.swap
     ssh kolossus
     cd /cluster/data/tetNig1/bed/blastz.mm5.swap
     # use axtChrom from blastzTetNig1 on mm5
     set aliDir = /cluster/data/mm5/bed/blastz.tetNig1
     cp $aliDir/S1.len S2.len
     cp $aliDir/S2.len S1.len
     mkdir unsorted axtChrom
 
     cat $aliDir/axtChrom/chr*.axt \
     | axtSwap stdin $aliDir/S1.len $aliDir/S2.len stdout \
     | axtSplitByTarget stdin unsorted
     # Sort the shuffled .axt files.
     foreach f (unsorted/*.axt)
       echo sorting $f:t:r
       axtSort $f axtChrom/$f:t
     end
     du -sh $aliDir/axtChrom unsorted axtChrom
     # 522M    /cluster/data/mm5/bed/blastz.tetNig1/axtChrom
     # 523M    unsorted
     # 522M    axtChrom
     rm -r unsorted
     # translate sorted axt files into psl
     ssh kolossus
     cd /cluster/data/tetNig1/bed/blastz.mm5.swap
     mkdir -p pslChrom
     set tbl = "blastzMm5"
     foreach f (axtChrom/chr*.axt)
       set c=$f:t:r
       echo "Processing chr $c"
       /cluster/bin/i386/axtToPsl $f S1.len S2.len pslChrom/${c}_${tbl}.psl
     end
     # Load database tables
     ssh hgwdev
     cd /cluster/data/tetNig1/bed/blastz.mm5.swap/pslChrom
     foreach f (./*.psl)
       /cluster/bin/i386/hgLoadPsl tetNig1 $f
       echo "$f Done"
     end
 # featureBits -chrom=chr1 tetNig1 all_mrna blastzMm5 -enrichment
 # all_mrna 2.637%, blastzMm5 12.633%, both 0.852%, cover 32.30%, enrich 2.56x
 
 # CHAIN MOUSE (mm5) BLASTZ (DONE, 2004-09-09, hartera)
     # Run axtChain on little cluster
     ssh kki
     cd /cluster/data/tetNig1/bed/blastz.mm5.swap
     mkdir -p axtChain/run1
     cd axtChain/run1
     mkdir out chain
     ls -1S /cluster/data/tetNig1/bed/blastz.mm5.swap/axtChrom/*.axt \
                     > input.lst
     cat << '_EOF_' > gsub
 #LOOP
 doChain {check in exists $(path1)} {check out line+ chain/$(root1).chain} {check out line+ out/$(root1).out}
 #ENDLOOP
 '_EOF_'
     # << this line makes emacs coloring happy
                                                                                 
     # Make our own linear gap file with reduced gap penalties,
     # in hopes of getting longer chains - works well for species at
     # chicken-human distance or greater
     cat << '_EOF_' > ../../chickenHumanTuned.gap
 tablesize^V     11
 smallSize^V     111
 position^V      1^V     2^V     3^V     11^V    111^V   2111^V  12111^V 32111^V
 72111^V 152111^V        252111
 qGap^V  325^V   360^V   400^V   450^V   600^V   1100^V  3600^V  7600^V  15600^V
 31600^V 56600
 tGap^V  325^V   360^V   400^V   450^V   600^V   1100^V  3600^V  7600^V  15600^V
 31600^V 56600
 bothGap^V       625^V   660^V   700^V   750^V   900^V   1400^V  4000^V  8000^V
 16000^V 32000^V 57000
 '_EOF_'
 # << this line makes emacs coloring happy
                                                                                 
 cat << '_EOF_' > doChain
 #!/bin/csh
 axtChain -linearGap=../../chickenHumanTuned.gap $1 \
     /iscratch/i/tetNig1/nib \
     /iscratch/i/mus/mm5/softNib $2 >& $3 
 '_EOF_'
     # << this line makes emacs coloring happy
     chmod a+x doChain
     gensub2 input.lst single gsub jobList
     para create jobList
     para try, check, push, check...
 # para time
 # Completed: 27 of 27 jobs
 # CPU time in finished jobs:       2124s      35.40m     0.59h    0.02d  0.000 y
 # IO & Wait Time:                    59s       0.99m     0.02h    0.00d  0.000 y
 # Average job time:                  81s       1.35m     0.02h    0.00d
 # Longest job:                      123s       2.05m     0.03h    0.00d
 # Submission to last job:          3725s      62.08m     1.03h    0.04d
 
    # now on the cluster server, sort chains
     ssh kksilo
     cd /cluster/data/tetNig1/bed/blastz.mm5.swap/axtChain
     chainMergeSort run1/chain/*.chain > all.chain
     chainSplit chain all.chain
                                                                                 
     # take a look at score distr's
     foreach f (chain/*.chain)
       grep chain $f | awk '{print $2;}' | sort -nr > /tmp/score.$f:t:r
       echo $f:t:r >> hist5000.out
       textHistogram -binSize=5000 /tmp/score.$f:t:r >> hist5000.out
       echo ""
     end
     # apart from chrUn_random, not too many chains with score < 5000
     # load chr 1 into table
     ssh hgwdev
     cd /cluster/data/tetNig1/bed/blastz.mm5.swap/axtChain/chain
     hgLoadChain tetNig1 chr1_chainMm5 chr1.chain
 # no RefSeqs for Tetraodon so use mrna table
 # featureBits -chrom=chr1 tetNig1 all_mrna chainMm5Link -enrichment
 # all_mrna 2.637%, chainMm5Link 11.552%, both 0.823%, cover 31.19%, enrich 2.70x
     # try filtering with minScore < 5000
     ssh kksilo
     cd /cluster/data/tetNig1/bed/blastz.mm5.swap/axtChain
     mv all.chain all.chain.unfiltered
     chainFilter -minScore=5000 all.chain.unfiltered > all.chain
     chainSplit chainFilt5k all.chain
     
     ssh hgwdev
     cd /cluster/data/tetNig1/bed/blastz.mm5.swap/axtChain/chainFilt5k
     hgLoadChain tetNig1 chr1_chainMm5Filt5k chr1.chain
 # featureBits -chrom=chr1 tetNig1 all_mrna chainMm5Filt5kLink -enrichment
 # all_mrna 2.637%, chainMm5Filt5kLink 10.492%, both 0.780%, cover 29.58%, 
 # enrich 2.82x
 # use chains with minScore=5000 filtering
 # remove test chain tables for chr1
     ssh hgwdev
     cd /cluster/data/tetNig1/bed/blastz.mm5.swap/axtChain
     hgsql -e "drop table chr1_chainMm5;" tetNig1
     hgsql -e "drop table chr1_chainMm5Link;" tetNig1
     hgsql -e "drop table chr1_chainMm5Filt5k;" tetNig1
     hgsql -e "drop table chr1_chainMm5Filt5kLink;" tetNig1
     rm -r chain
     mv chainFilt5k chain
 
     cd /cluster/data/tetNig1/bed/blastz.mm5.swap/axtChain/chain
     foreach i (*.chain)
         set c = $i:r
         hgLoadChain tetNig1 ${c}_chainMm5 $i
         echo done $c
     end
 
 # NET MOUSE (mm5) BLASTZ (DONE, 2004-09-09, hartera)
     ssh kksilo
     cd /cluster/data/tetNig1/bed/blastz.mm5.swap/axtChain
     mkdir preNet
     cd chain
     foreach i (*.chain)
        echo preNetting $i
        /cluster/bin/i386/chainPreNet $i ../../S1.len ../../S2.len \
                                      ../preNet/$i
     end
     cd ..
     mkdir n1
     cd preNet
     foreach i (*.chain)
       set n = $i:r.net
       echo primary netting $i
       /cluster/bin/i386/chainNet $i -minSpace=1 ../../S1.len ../../S2.len \
                                  ../n1/$n /dev/null
     end
     cd ..
     cat n1/*.net | /cluster/bin/i386/netSyntenic stdin noClass.net
     # memory usage 67969024, utime 336 s/100, stime 36
 
     # Add classification info using db tables:
     cd /cluster/data/tetNig1/bed/blastz.mm5.swap/axtChain
     # netClass looks for ancient repeats in one of the databases
     # hg17 has this table - hand-curated by Arian but this is for
     # human-rodent comparisons so do not use here, use -noAr option
     mkdir -p /cluster/bluearc/mm5/linSpecRep.notInTetraodon
     mkdir -p /cluster/bluearc/tetNig1/linSpecRep.notInMouse
     cp -p /iscratch/i/mm5/linSpecRep.notInTetraodon/* \
           /cluster/bluearc/mm5/linSpecRep.notInTetraodon
     cp -p /iscratch/i/tetNig1/linSpecRep.notInMouse/* \
           /cluster/bluearc/tetNig1/linSpecRep.notInMouse
 
     ssh hgwdev
     cd /cluster/data/tetNig1/bed/blastz.mm5.swap/axtChain
     time netClass noClass.net tetNig1 mm5 mouseMm5.net \
           -tNewR=/cluster/bluearc/tetNig1/linSpecRep.notInMouse \
           -qNewR=/cluster/bluearc/mm5/linSpecRep.notInTetraodon -noAr
     # 50.960u 34.880s 2:25.82 58.8%   0+0k 0+0io 200pf+0w
     netFilter -minGap=10 mouseMm5.net |  hgLoadNet tetNig1 netMm5 stdin
     # featureBits tetNig1 all_mrna netHg17 -enrichment
     # all_mrna 3.204%, netHg17 42.716%, both 1.698%, cover 53.00%, enrich 1.24x
 
 # MAKE VSMM5 DOWNLOADABLES (DONE, 2004-09-10, hartera)
     ssh kksilo
     # zip chains and nets
     cd /cluster/data/tetNig1/bed/blastz.mm5.swap/axtChain
     cp all.chain mm5.chain
     zip -j /cluster/data/tetNig1/zip/mm5.chain.zip mm5.chain
     rm mm5.chain
     cp mouseMm5.net mm5.net
     zip -j /cluster/data/tetNig1/zip/mm5.net.zip mm5.net
     rm mm5.net
 
     ssh hgwdev
     # copy chains and nets to downloads area
     set gp = /usr/local/apache/htdocs/goldenPath/tetNig1
     mkdir -p $gp/vsMm5
     cd $gp/vsMm5
     mv /cluster/data/tetNig1/zip/mm5*.zip .
     md5sum *.zip > md5sum.txt
     # move axt files to downloads area and zip
     cd /cluster/data/tetNig1/bed/blastz.mm5.swap/axtChrom
     mkdir -p $gp/vsMm5/axtChrom
     cp -p *.axt $gp/vsMm5/axtChrom
     cd $gp/vsMm5/axtChrom
     gzip *.axt
     md5sum *.gz > md5sum.txt
 
     # Copy over & edit README.txt w/pointers to chain, net formats.
 
 # CLEANUP HUMAN (hg17) BLASTZ (DONE, 2004-09-10, hartera)
     ssh kksilo
     cd /cluster/data/tetNig1/bed/blastz.hg17.swap
     nice rm axtChain/run1/chain/* &
     nice gzip {axt,psl}Chrom/* axtChain/{all.chain,*.net} &
 
 # CLEANUP FUGU (fr1) BLASTZ (DONE, 2004-09-10, hartera)
     ssh kksilo
     cd /cluster/data/tetNig1/bed/blastz.fr1
     nice rm -rf raw &
     nice rm -rf lav &
     nice rm -rf test* &
     nice rm axtChain/run1/chain/* &
     nice gzip {axt,psl}Chrom/* axtChain/{all.chain,*.net} &
 
 # CLEANUP MOUSE (mm5) BLASTZ (DONE, 2004-09-10, hartera)
     ssh kksilo
     cd /cluster/data/tetNig1/bed/blastz.mm5
     nice rm -rf raw &
     nice rm -rf lav &
     nice rm -rf test* &
     nice rm axtChain/run1/chain/* &
     nice gzip {axt,psl}Chrom/* axtChain/{all.chain,*.net} &
 
 # GENSCAN GENE PREDICTION ANNOTATIONS (DONE, 2004-09-10, hartera)
 # Provided by Genoscope, Genscan was run on masked Tetraodon genomic DNA 
 # using parameters calibrated for Tetraodon genes.
 
     ssh kksilo
     mkdir -p /cluster/data/tetNig1/bed/Genoscope/genscan
     cd /cluster/data/tetNig1/bed/Genoscope/genscan
     awk '{if ($2 == "GSC") print;}'  /cluster/data/tetNig1/tetraodon.all.gff > genscan.gff 
     foreach c (`cat /cluster/data/tetNig1/chrom.lst`)
         echo "Processing chr$c ... "
         awk '{if ($1 == "'chr${c}'") print;}' genscan.gff >> chr${c}.gff
     end
 
     # need to remove CDS from name then load
     foreach f (./chr*.gff) 
        perl -pi.bak -e 's/CDS G/G/' $f 
     end
     # load database with gene predictions
     ssh hgwdev
     cd /cluster/data/tetNig1/bed/Genoscope/genscan
     # Reloaded without -genePredExt 1/6/05:
     nice ldHgGene tetNig1 genscan chr*.gff
     # Read 56120 transcripts in 227294 lines in 27 files
     # 56120 groups 27 seqs 1 sources 2 feature types
     # 28060 gene predictions
 
 # GENOSCOPE GAZE ANNOTATIONS (DONE, 2004-09-10, hartera)
 # GAZE annotations have been automatically generated by the GAZE software 
 # (Howe et al.(2002). Genome Res., 12: 1418-27) using a custom designed gene 
 # model. GAZE integrates annotation information from Geneid, Genescan, 
 # Exofish (Human, Mouse, Fugu), Genewise (Human, Mouse) and cDNAs (Tetraodon).
 
     ssh kksilo
     mkdir -p /cluster/data/tetNig1/bed/Genoscope/gaze
     cd /cluster/data/tetNig1/bed/Genoscope/gaze
     awk '{if ($2 == "GSTEN") print;}' /cluster/data/tetNig1/tetraodon.all.gff > gaze.gff 
     # remove ; as separators between names of gene, protein, mRNA etc. 
 
     perl -pi.bak -e "s/\s\;\s/\t/g;" gaze.gff
     foreach c (`cat /cluster/data/tetNig1/chrom.lst`)
         echo "Processing chr$c ... "
         awk '{if ($1 == "'chr${c}'") print;}' gaze.gff >> chr${c}.gff
     end
     # need to remove CDS from name then load
     foreach f (./chr*.gff) 
        perl -pi.bak -e 's/CDS G/G/' $f 
     end
     # load database with gene predictions
     ssh hgwdev
     cd /cluster/data/tetNig1/bed/Genoscope/gaze
     # loaded chr1 only into rahseq first to check then load all chroms
     # Reloaded without -genePredExt 1/6/05:
     nice ldHgGene tetNig1 gaze chr*.gff
 
     # GALT 09-15-04 index on name(10) going to name(17) as all have long prefix
     echo "drop index name on gaze ;" | hgsql tetNig1
     echo "create index name on gaze (name(17)) ;" | hgsql tetNig1
 	
 
 # Read 91905 transcripts in 265439 lines in 27 files
 # 91905 groups 27 seqs 1 sources 4 feature types
 # 27918 gene predictions
 
 # HOX GENES TRACK (DONE, 2004-09-10, hartera)
 # Provided by Genoscope: Tetraodon HOX genes were annotated by human experts 
 # using sequence similarities with human, mouse and zebrafish HOX protein and 
 # nucleic sequence.
 
     ssh kksilo
     mkdir -p /cluster/data/tetNig1/bed/Genoscope/hoxGenes
     cd /cluster/data/tetNig1/bed/Genoscope/hoxGenes
     awk '{if ($2 == "HOX") print;}' /cluster/data/tetNig1/tetraodon.all.gff > hoxGenes.gff 
     # remove ; as separators between names of gene, protein, mRNA etc. 
 
     perl -pi.bak -e "s/\s\;\s/\t/g;" hoxGenes.gff
     foreach c (`cat /cluster/data/tetNig1/chrom.lst`)
         echo "Processing chr$c ... "
         awk '{if ($1 == "'chr${c}'") print;}' hoxGenes.gff >> chr${c}.gff
     end
     # need to remove CDS from name then load
     foreach f (./chr*.gff) 
        perl -pi.bak -e 's/CDS H/H/' $f 
     end
     # load database with gene predictions
     ssh hgwdev
     cd /cluster/data/tetNig1/bed/Genoscope/hoxGenes
     # loaded chr1 only into rahseq first to check then load all chroms
     # Reloaded without -genePredExt 1/6/05:
     nice ldHgGene tetNig1 hoxGenes chr*.gff
 # Read 96 transcripts in 142 lines in 27 files
 # 96 groups 6 seqs 1 sources 2 feature types
 # 48 gene predictions
 
 # CYTOKINE GENES TRACK (DONE, 2004-09-10, hartera)
 # Provided by Genoscope: Tetraodon Cytokine genes were annotated by human 
 # experts using sequence similarities with human, mouse and zebrafish 
 # Cytokine protein and nucleic sequence.
 
     ssh kksilo
     mkdir -p /cluster/data/tetNig1/bed/Genoscope/cytokines
     cd /cluster/data/tetNig1/bed/Genoscope/cytokines
     awk '{if ($2 == "CYT") print;}' /cluster/data/tetNig1/tetraodon.all.gff > cytokines.gff 
     # remove ; as separators between names of gene, protein, mRNA etc. 
 
     perl -pi.bak -e "s/\s\;\s/\t/g;" cytokines.gff
     foreach c (`cat /cluster/data/tetNig1/chrom.lst`)
         echo "Processing chr$c ... "
         awk '{if ($1 == "'chr${c}'") print;}' cytokines.gff >> chr${c}.gff
     end
     # need to remove CDS from name then load
     foreach f (./chr*.gff) 
        perl -pi.bak -e 's/CDS A/A/' $f 
     end
     # load database with gene predictions
     ssh hgwdev
     cd /cluster/data/tetNig1/bed/Genoscope/cytokines
     # loaded chr1 only into rahseq first to check then load all chroms
     # Reloaded without -genePredExt 1/6/05:
     nice ldHgGene tetNig1 cytokines chr*.gff
 
 # GENEID GENES (DONE 9/22/04 angie)
     ssh kksilo
     mkdir /cluster/data/tetNig1/bed/geneid
     cd /cluster/data/tetNig1/bed/geneid
     foreach chr (`awk '{print $1;}' ../../chrom.sizes`)
       wget http://genome.imim.es/genepredictions/T.nigroviridis/tetNigFeb2004/geneid_v1.2/$chr.gtf
       wget http://genome.imim.es/genepredictions/T.nigroviridis/tetNigFeb2004/geneid_v1.2/$chr.prot
     end
     # Add ".1" suffix to each item in .prot's, to match transcript_id's in gtf
     cp /dev/null geneidPep.fa
     foreach f (chr*.prot)
       perl -wpe 's/^(>chr\S+)/$1.1/' $f >> geneidPep.fa
     end
     ssh hgwdev
     cd /cluster/data/tetNig1/bed/geneid
     ldHgGene -gtf -genePredExt tetNig1 geneid chr*.gtf
     hgPepPred tetNig1 generic geneidPep geneidPep.fa
 
 # GENOSCOPE GENEWISE HUMAN IPI (DONE, 2004-10-14, hartera)
     ssh kksilo
     mkdir -p /cluster/data/tetNig1/bed/Genoscope/humanIPI
     cd /cluster/data/tetNig1/bed/Genoscope/humanIPI
     # Create README to describe data from Genoscope tetraodon.all.gff.readme
     awk '{if ($2 == "GWS_H") print;}' /cluster/data/tetNig1/tetraodon.all.gff \
         > humanIPI.gff
     # remove "exon" from name field 
     sed -e 's/exon GWS/GWS/g' humanIPI.gff > humanIPIformat.gff
 
     # load database with gene predictions
     ssh hgwdev
     cd /cluster/data/tetNig1/bed/Genoscope/humanIPI
     nice ldHgGene -genePredExt tetNig1 humanIPI humanIPIformat.gff
     # Read 43384 transcripts in 185494 lines in 1 files
     # 43384 groups 27 seqs 1 sources 2 feature types
     # 21692 gene predictions
     # add to trackDb and write html page
 
 # GENOSCOPE HUMAN IPI ECOTIGS (DONE, 2004-10-18, hartera) 
     ssh kksilo
     mkdir -p /cluster/data/tetNig1/bed/Genoscope/humanIPIEcotigs
     cd /cluster/data/tetNig1/bed/Genoscope/humanIPIEcotigs
     # Create README to describe data from Genoscope tetraodon.all.gff.readme
     awk '{if ($2 == "EP3_H") print;}' /cluster/data/tetNig1/tetraodon.all.gff \
         > humanIPIEcotigs.gff
     # remove "exon" from name field 
     sed -e 's/exon EP3/EP3/g' humanIPIEcotigs.gff > humanIPIEcotigsformat.gff
 
     # load database with gene predictions - LOAD IN RAHSEQ FIRST TO CHECK
     ssh hgwdev
     cd /cluster/data/tetNig1/bed/Genoscope/humanIPIEcotigs
     nice ldHgGene -genePredExt tetNig1 humanIPIEcotigs humanIPIEcotigsformat.gff
     # Read 55800 transcripts in 204355 lines in 1 files
     # 55800 groups 27 seqs 1 sources 2 feature types
     # 27900 gene predictions
 
 # GENOSCOPE FUGU (Takifugu) ECOTIGS (in progress, 2004-10-18, hartera) 
     ssh kksilo
     mkdir -p /cluster/data/tetNig1/bed/Genoscope/fuguEcotigs
     cd /cluster/data/tetNig1/bed/Genoscope/fuguEcotigs
     # Create README to describe data from Genoscope tetraodon.all.gff.readme
     awk '{if ($2 == "EG3_F") print;}' /cluster/data/tetNig1/tetraodon.all.gff \
         > fuguEcotigs.gff
  # remove "HSP" from name field, change HSP to "exon" and match to "transcript" 
     sed -e 's/HSP EG3/EG3/g' fuguEcotigs.gff | sed -e 's/HSP/exon/' \
            | sed -e 's/match/transcript/' > fuguEcotigsformat.gff
 
     # load database with gene predictions
     # load in when know which version of Fugu this is so can name appropriately
     ssh hgwdev
     cd /cluster/data/tetNig1/bed/Genoscope/fuguEcotigs
     nice ldHgGene -genePredExt tetNig1 fuguEcotigs fuguEcotigsformat.gff
     # Read 35552 transcripts in 210128 lines in 1 files
     #   35552 groups 27 seqs 1 sources 2 feature types
     # 17776 gene predictions
 
 # Adding a photograph to the description
 #	Permission to use photos obtained from Hugues Roest Crollius:
 #	Email message attached below
     ssh hgwdev
     cd /cluster/data/tetNig1/html
     wget \
 	'http://www.genoscope.cns.fr/externe/Francais/Projets/Projet_C/img/tetra23.jpg' \
 	-O photo_tetraodon.jpg
     convert -crop 1320x600+280+40 -sharpen 0 -normalize \
 	-geometry 300x200 -quality 80 photo_tetraodon.jpg \
 	Tetraodon_nigroviridis.jpg
     cp -p Tetraodon_nigroviridis.jpg /usr/local/apache/htdocs/images
     #	add links to this image in the description.html page, request push
 
 # After checking with a couple of journals who have used our pictures for
 # their covers, it appears that none holds copyrights so you are free to use
 # any of the pictures on the followings pages:
 
 # http://www.genoscope.cns.fr/externe/English/Projets/Projet_C/album.html
 
 # For some of the images on the first page we can provide you with high
 # resolution versions if needed.
 
 # I hope this helps.
 
 # Best wishes,
 
 # Hugues Roest Crollius
 # -------------------------------------------------------------------------
 # Hugues Roest Crollius
 # Laboratoire Dynamique et Organisation des Genomes (Dyogen)
 # CNRS UMR8541
 # Departement de Biologie
 # Ecole Normale Superieure                        Tel: +33 (0)1 44 32 23 70
 # 46 rue d'Ulm, 75230 Paris Cedex 05              Email: hrc@ens.fr
 
 # SELF BLASTZ (DONE, 2005-01-12, hartera)
     ssh kksilo
     rm /cluster/data/tetNig1/bed/blastzSelf
     mkdir -p /cluster/store8/tetNig1/bed/blastzSelf.2005-01-11
     ln -s /cluster/store8/tetNig1/bed/blastzSelf.2005-01-11 \
           /cluster/data/tetNig1/bed/blastzSelf
     cd /cluster/data/tetNig1/bed/blastzSelf
     # try using contigs
     mkdir -p /cluster/data/tetNig1/bed/blastzSelf/contigSeqs
     cd /cluster/data/tetNig1/bed/blastzSelf/contigSeqs
     foreach c (`cat /cluster/data/tetNig1/chrom.lst`)
        foreach d (/cluster/data/tetNig1/$c/chr${c}*_?{,?})
       echo "splitting $d"
       set contig = $d:t
       echo "$contig"
       ~/bin/i386/faSplit gap $d/$contig.fa 500000 ${contig}_ -lift=$contig.lft \
             -minGapSize=100
       end
     end
     # 1025 files for chr*.fa and chr*.lft - 940 for chr*.fa, 85 for chr*.lft
     cat chr*.fa >> tetNig1Contigs.fa
     cat chr*.lft >> 500kbcontigs.lft
     rm chr*.fa chr*.lft
     
     ssh kkr1u00
     mkdir -p /iscratch/i/tetNig1/contigs
     mv /cluster/data/tetNig1/bed/blastzSelf/contigSeqs/tetNig1Contigs.fa \
        /iscratch/i/tetNig1/contigs
     iSync
 
     ssh kk
     cd /cluster/data/tetNig1/bed/blastzSelf
     # for this blastz run, M=50
     cat << '_EOF_' > DEF
 # tetraodon vs. tetraodon
 export PATH=/usr/bin:/bin:/usr/local/bin:/cluster/bin/penn:/cluster/home/angie/schwartzbin:/cluster/home/kent/bin/i386
 
 ALIGN=blastz-run1
 BLASTZ=blastz
 BLASTZ_L=5000
 BLASTZ_H=2500
 BLASTZ_ABRIDGE_REPEATS=0
 
 # TARGET
 # Tetraodon
 SEQ1_DIR=/iscratch/i/tetNig1/nib
 # not used
 SEQ1_RMSK=
 # not used
 SEQ1_FLAG=
 SEQ1_SMSK=
 SEQ1_IN_CONTIGS=0
 SEQ1_CHUNK=500000
 SEQ1_LAP=500
 
 # QUERY
 # Tetraodon
 SEQ2_DIR=/iscratch/i/tetNig1/contigs
 # not currently used
 SEQ2_RMSK=
 # not currently used
 SEQ2_FLAG=
 SEQ2_SMSK=
 SEQ2_IN_CONTIGS=1
 SEQ2_CHUNK=
 SEQ2_LAP=
 
 BASE=/cluster/data/tetNig1/bed/blastzSelf
 
 DEF=$BASE/DEF
 RAW=$BASE/raw
 CDBDIR=$BASE
 SEQ1_LEN=$BASE/S1.len
 SEQ2_LEN=$BASE/S2.len
 
 #DEBUG=1
 '_EOF_'
     # << this line makes emacs coloring happy                                                                                
     # Save the DEF file in the current standard place - TO BE DONE
     chmod +x DEF
     cp DEF ~angie/hummus/DEF.tetNig1-tetNig1.2005-01-11
     # prepare first cluster run
     bash # if a csh/tcsh user
     . ./DEF
     /cluster/data/tetNig1/jkStuff/BlastZ_run0.sh
     cd run.0
     # check batch looks ok then
     para try, check, push, check, ....
 # para time
 # Completed: 820 of 820 jobs
 # CPU time in finished jobs:    1611164s   26852.74m   447.55h   18.65d  0.051 y
 # IO & Wait Time:                 11448s     190.80m     3.18h    0.13d  0.000 y
 # Average job time:                1979s      32.98m     0.55h    0.02d
 # Longest job:                     4231s      70.52m     1.18h    0.05d
 # Submission to last job:         14753s     245.88m     4.10h    0.17d
 
     # second cluster run: lift raw alignments -> lav dir
     cd /cluster/data/tetNig1/bed/blastzSelf
     bash # if a csh/tcsh user
     . ./DEF
     /cluster/data/tetNig1/jkStuff/BlastZ_run1.sh
     cd run.1
     para try, check, push, check etc.
 # para time
 # Completed: 820 of 820 jobs
 # CPU time in finished jobs:       2741s      45.68m     0.76h    0.03d  0.000 y
 # IO & Wait Time:                 13346s     222.44m     3.71h    0.15d  0.000 y
 # Average job time:                  20s       0.33m     0.01h    0.00d
 # Longest job:                      106s       1.77m     0.03h    0.00d
 # Submission to last job:           926s      15.43m     0.26h    0.01d
 
     # third run: lav -> axt
     ssh kki
     cd /cluster/data/tetNig1/bed/blastzSelf
     mkdir axtChrom run.2
     cd run.2
     cat << '_EOF_' > do.csh
 #!/bin/csh -ef
 cd $1
 cat `ls -1 *.lav | sort -g` \
 | lavToAxt -fa stdin /iscratch/i/tetNig1/nib \
 /iscratch/i/tetNig1/contigs/tetNig1Contigs.fa stdout \
 | axtSort stdin $2
 '_EOF_'
     # << this line makes emacs coloring happy
     chmod a+x do.csh
     cat << '_EOF_' > gsub
 #LOOP
 ./do.csh {check in exists $(path1)} {check out line+ /cluster/data/tetNig1/bed/blastzSelf/axtChrom/$(root1).axt}
 #ENDLOOP
 '_EOF_'
     # << this line makes emacs coloring happy
     \ls -1Sd ../lav/chr* > chrom.list
     gensub2 chrom.list single gsub jobList
     wc -l jobList
     head jobList
     para create jobList
     para try, check, push, check,...
 # para time
 # Completed: 27 of 27 jobs
 # CPU time in finished jobs:        337s       5.61m     0.09h    0.00d  0.000 y
 # IO & Wait Time:                   324s       5.41m     0.09h    0.00d  0.000 y
 # Average job time:                  24s       0.41m     0.01h    0.00d
 # Longest job:                      273s       4.55m     0.08h    0.00d
 # Submission to last job:          1675s      27.92m     0.47h    0.02d
 
     # do liftup here
     #- Lift up the 500KB chunk .out's to 5MB ("pseudo-contig") level
     ssh kksilo
     cd /cluster/data/tetNig1/bed/blastzSelf
     
     foreach d (axtChrom/chr*.axt)
       echo $d
       liftUp -axtQ ${d}.out.axt ./contigSeqs/500kbcontigs.lft warn $d \
         > /dev/null
     end
 
     #- Lift pseudo-contigs to chromosome level
     # check this is working correctly
     set tetra = "/cluster/data/tetNig1"
     foreach c (`cat ${tetra}/chrom.lst`)
       echo lifting $c
       liftUp -axtQ ./axtChrom/chr${c}.out2.axt \
          $tetra/jkStuff/liftAll.lft warn \
         ./axtChrom/chr${c}.axt.out.axt > /dev/null
     end
     cd axtChrom
     # need to drop Self alignments after lifting up co-ordinates
     foreach c (`cat ${tetra}/chrom.lst`)
       echo "Processing $c..."
       axtDropSelf chr${c}.out2.axt chr${c}.dropped.axt
       mv chr${c}.axt chr${c}.axt.old
       mv chr${c}.dropped.axt chr${c}.axt
       rm chr${c}.axt.out.axt
     end
     # translate sorted axt files into psl
     ssh kolossus
     cd /cluster/data/tetNig1/bed/blastzSelf
     mkdir -p pslChrom
     set tbl = "blastzSelf"
     foreach f (axtChrom/chr*.axt)
       set c=$f:t:r
       echo "Processing chr $c"
       /cluster/bin/i386/axtToPsl $f S1.len S1.len pslChrom/${c}_${tbl}.psl
     end
     # Load database tables
     ssh hgwdev
     cd /cluster/data/tetNig1/bed/blastzSelf/pslChrom
     foreach f (./*.psl)
       /cluster/bin/i386/hgLoadPsl -noTNameIx tetNig1 $f
       echo "$f Done"
     end
 # not in contigs: blastzSelf
 # featureBits -chrom=chr1 tetNig1 all_mrna blastzSelf -enrichment
 # all_mrna 2.601%, blastzSelf 16.728%, both 0.654%, cover 25.13%, enrich 1.50x
 # featureBits -chrom=chr1 tetNig1 blastzSelf
 # 2704737 bases of 16169091 (16.728%) in intersection
 
 # blastzSelfC, query is in contigs, M=50
 # featureBits -chrom=chr1 tetNig1 all_mrna blastzSelfC -enrichment
 # all_mrna 2.601%, blastzSelfC 16.208%, both 0.638%, cover 24.51%, enrich 1.51x
 # featureBits -chrom=chr1 tetNig1 blastzSelfC 
 # 2620707 bases of 16169091 (16.208%) in intersection
 
 # blastzSelfCM100, query is in contigs, M=100
 # featureBits -chrom=chr1 tetNig1 all_mrna blastzSelfCM100 -enrichment
 # all_mrna 2.601%, blastzSelfCM100 16.319%, both 0.639%, cover 24.56%, 
 # enrich 1.51x
 # featureBits -chrom=chr1 tetNig1 blastzSelfCM100
 # 2638606 bases of 16169091 (16.319%) in intersection
 # gain little by using M=100 except for a lot of extra alignments so use 
 # M=50 instead.
 # L=5000
 # featureBits -chrom=chr1 tetNig1 blastzSelfL5k
 # 2488953 bases of 16169091 (15.393%) in intersection
 # L=8000, L8k
 # featureBits -chrom=chr1 tetNig1 blastzSelfL8k
 # 2303191 bases of 16169091 (14.244%) in intersection 
 # rows in table
 # blastzSelf: 517242
 # blastzSelfC: 86073
 # blastzSelfCM100: 132429
 # blastzSelfCL5k: 62933
 # blastzSelfL8k: 43137
 
 # use blastz with contigs, M=50 and L=5000. Using L=5000 removes some of the 
 # short low scoring alignments but not too stringently. After chaining, 
 # further removal of low scoring alignments can be done.
 
 # CHAIN SELF BLASTZ (DONE, 2005-01-14, hartera)
     # Run axtChain on little cluster
     ssh kki 
     cd /cluster/data/tetNig1/bed/blastzSelf
     mkdir -p axtChain/run1
     cd axtChain/run1
     mkdir out chain
     rm axtChrom/chr*.out2.axt
     rm axtChrom/chr*.old
     ls -1S /cluster/data/tetNig1/bed/blastzSelf/axtChrom/*.axt \
         > input.lst
 cat << '_EOF_' > gsub
 #LOOP
 doChain {check in exists $(path1)} {check out line+ chain/$(root1).chain} {check out line+ out/$(root1).out}
 #ENDLOOP
 '_EOF_'
     # << this line makes emacs coloring happy
 # axtChain now using human/chicken gap scoring matrix as default
 cat << '_EOF_' > doChain
 #!/bin/csh
 axtChain $1 \
     /iscratch/i/tetNig1/nib \
     /iscratch/i/tetNig1/nib $2 >& $3
 '_EOF_'
     # << this line makes emacs coloring happy
     chmod a+x doChain
     gensub2 input.lst single gsub jobList
     para create jobList
     para try, check, push, check...
 # Completed: 27 of 27 jobs
 # CPU time in finished jobs:        864s      14.40m     0.24h    0.01d  0.000 y
 # IO & Wait Time:                   137s       2.28m     0.04h    0.00d  0.000 y
 # Average job time:                  37s       0.62m     0.01h    0.00d
 # Longest job:                      559s       9.32m     0.16h    0.01d
 # Submission to last job:          1758s      29.30m     0.49h    0.02d
 
     # now on the cluster server, sort chains
     ssh kksilo
     cd /cluster/data/tetNig1/bed/blastzSelf/axtChain
     chainMergeSort run1/chain/*.chain > all.chain
     chainSplit chainRaw all.chain
     # apply chainAntiRepeat to remove chains that are the result of repeats
     # and degenerate DNA 
     mkdir chain
     cd chainRaw
     foreach f (*.chain)
        set c = $f:r
        echo $c
        nice chainAntiRepeat /cluster/bluearc/tetNig1/nib \
                        /cluster/bluearc/tetNig1/nib $f \
                        ../chain/$c.chain
     end
     cd ..
     # take a look at score distr's,try also with larger bin size.
     foreach f (chain/*.chain)
       grep chain $f | awk '{print $2;}' | sort -nr > /tmp/score.$f:t:r
       echo $f:t:r >> hist10k.out
       textHistogram -binSize=10000 /tmp/score.$f:t:r >> hist10k.out
       echo ""
     end
     # trim to minScore=10000 to cut some of the fluff
     mkdir chainFilt
     foreach f (chain/*.chain)
       chainFilter -minScore=10000 $f > chainFilt/$f:t
     end
     # load tables    
     ssh hgwdev
     cd /cluster/data/tetNig1/bed/blastzSelf/axtChain/chainFilt
     foreach i (*.chain)
         set c = $i:r
         hgLoadChain tetNig1 ${c}_chainSelf $i
         echo done $c
     end
 # filtering on minScore=5000
 # featureBits -chrom=chr1 tetNig1 chainSelf5kLink
 # 2377755 bases of 16169091 (14.706%) in intersection
 # rows = 362482
 # filtering on minScore=10000, use this
 # featureBits -chrom=chr1 tetNig1 chainSelfLink
 # 2199465 bases of 16169091 (13.603%) in intersection
 # rows = 233729
 
 # NET SELF BLASTZ (DONE, 2005-01-14, hartera)
     ssh kksilo
     cd /cluster/data/tetNig1/bed/blastzSelf/axtChain
     mkdir preNet
     cd chainFilt
     foreach i (*.chain)
        echo preNetting $i
        /cluster/bin/i386/chainPreNet $i ../../S1.len ../../S1.len \
                                      ../preNet/$i
     end
     cd ..
     mkdir n1
     cd preNet
     foreach i (*.chain)
       set n = $i:r.net
       echo primary netting $i
       /cluster/bin/i386/chainNet $i -minSpace=1 ../../S1.len ../../S1.len \
                                  ../n1/$n /dev/null
     end
     cd ..
     cat n1/*.net | /cluster/bin/i386/netSyntenic stdin noClass.net
     # memory usage 78983168, utime 485 s/100, stime 55
 
     # Add classification info using db tables:
     # netClass looks for ancient repeats in one of the databases
     # hg17 has this table - hand-curated by Arian but this is for
     # human-rodent comparisons so do not use here, use -noAr option
 
     ssh hgwdev
     cd /cluster/data/tetNig1/bed/blastzSelf/axtChain
     time netClass noClass.net tetNig1 tetNig1 self.net -noAr
     # 14.530u 4.550s 0:31.26 61.0%    0+0k 0+0io 218pf+0w
 
     netFilter -minGap=10 self.net |  hgLoadNet tetNig1 netSelf stdin
     # featureBits tetNig1 netSelf
     # 184618536 bases of 342403326 (53.918%) in intersection
 
 # MAKE Human Proteins track (hg17 DONE 2005-1-15 braney)
     ssh kksilo
     mkdir -p /cluster/data/tetNig1/blastDb
     cd /cluster/data/tetNig1/blastDb
     for i in `cat ../chrom.lst`; do ls -1 ../$i/*/*.fa  ; done | sed "/Un_random/d" > list
     for i in `cat list`
     do
 	ln -s $i .
 	formatdb -i `basename $i` -p F
     done
     rm *.log *.fa list
     cd ..
     for i in `cat chrom.lst`; do cat  $i/chr*/*.lft  ; done > jkStuff/subChr.lft
 
     ssh kkr1u00
     mkdir -p /iscratch/i/tetNig1/blastDb
     cd /cluster/data/tetNig1/blastDb
     for i in nhr nin nsq; do cp *.$i /iscratch/i/tetNig1/blastDb     ; echo $i; done
 
     cd
     iSync > sync.out
 
     mkdir -p /cluster/data/tetNig1/bed/tblastn.hg17KG
     cd /cluster/data/tetNig1/bed/tblastn.hg17KG
     echo  /iscratch/i/tetNig1/blastDb/*.nsq  | xargs ls -S | sed "s/\.nsq//"  > query.lst  
     # back to kksilo
     exit
 
     cd /cluster/data/tetNig1/bed/tblastn.hg17KG
     calc `wc /cluster/data/hg17/bed/blat.hg17KG/hg17KG.psl | awk "{print \\\$1}"`/\(300000/`wc query.lst | awk "{print \\\$1}"`\)
     # 42156/(300000/593) = 83.328360
     mkdir -p /cluster/bluearc/tetNig1/bed/tblastn.hg17KG/kgfa
     split -l 83 /cluster/data/hg17/bed/blat.hg17KG/hg17KG.psl /cluster/bluearc/tetNig1/bed/tblastn.hg17KG/kgfa/kg
     ln -s /cluster/bluearc/tetNig1/bed/tblastn.hg17KG/kgfa kgfa
     cd kgfa
     for i in *; do pslxToFa $i $i.fa; rm $i; done
     cd ..
     ls -1S kgfa/*.fa > kg.lst
     mkdir -p /cluster/bluearc/tetNig1/bed/tblastn.hg17KG/blastOut
     ln -s /cluster/bluearc/tetNig1/bed/tblastn.hg17KG/blastOut
     for i in `cat kg.lst`; do  mkdir blastOut/`basename $i .fa`; done
     tcsh
     cat << '_EOF_' > blastGsub
 #LOOP
 blastSome $(path1) {check in line $(path2)} {check out exists blastOut/$(root2)/q.$(root1).psl } 
 #ENDLOOP
 '_EOF_'
     cat << '_EOF_' > blastSome
 #!/bin/sh
 BLASTMAT=/iscratch/i/blast/data
 export BLASTMAT
 g=`basename $2`
 f=/tmp/`basename $3`.$g
 for eVal in 0.01 0.001 0.0001 0.00001 0.000001 1E-09 1E-11
 do
 if /scratch/blast/blastall -M BLOSUM80 -m 0 -F no -e $eVal -p tblastn -d $1 -i $2 -o $f.8
 then
         mv $f.8 $f.1
         break;
 fi
 done
 if test -f  $f.1
 then
     if /cluster/bin/i386/blastToPsl $f.1 $f.2
     then
 	liftUp -nosort -type=".psl" -nohead $f.3 ../../jkStuff/subChr.lft carry $f.2       
         liftUp -nosort -type=".psl" -nohead $f.4 ../../jkStuff/liftAll.lft carry $f.3       
         liftUp -nosort -type=".psl" -pslQ -nohead $3.tmp /cluster/data/hg17/bed/blat.hg17KG/protein.lft warn $f.4       
         if pslCheck -prot $3.tmp                                                                          
         then                                                                                              
             mv $3.tmp $3                                                                                  
             rm -f $f.1 $f.2 $f.3 $f.4                                                                              
         fi
         exit 0                                                                                            
     fi                                                                                                    
 fi                                                                                                        
 rm -f $f.1 $f.2 $3.tmp $f.8 $f.3 $f.4      
 exit 1
 '_EOF_'
 
     chmod +x blastSome
     gensub2 query.lst kg.lst blastGsub blastSpec
     # I ended up doing the scaffolds separately from the chopped up chrom segments
     # but next time I wouldn't
     gensub2 scaffold.lst kg.lst blastGsub scaffoldBlastSpec
 
     ssh kk
     cd /cluster/data/tetNig1/bed/tblastn.hg17KG
     para create blastSpec
     para push
 # Completed: 301244 of 301244 jobs
 # CPU time in finished jobs:   25047824s  417463.74m  6957.73h  289.91d  0.794 y
 # IO & Wait Time:               2658184s   44303.06m   738.38h   30.77d  0.084 y
 # Average job time:                  92s       1.53m     0.03h    0.00d
 # Longest job:                   226899s    3781.65m    63.03h    2.63d
 # Submission to last job:        250711s    4178.52m    69.64h    2.90d
 
     cat << '_EOF_' > chainGsub
 #LOOP
 chainSome $(path1)
 #ENDLOOP
 '_EOF_'
 
     cat << '_EOF_' > chainSome
 (cd $1; cat q.*.psl | simpleChain -prot -outPsl -maxGap=50000 stdin ../c.`basename $1`.psl)
 '_EOF_'
     chmod +x chainSome
 
     ls -1dS `pwd`/blastOut/kg?? > chain.lst
     gensub2 chain.lst single chainGsub chainSpec
 
     ssh kki
     cd /cluster/data/tetNig1/bed/tblastn.hg17KG
     para create chainSpec
     para push
 # Completed: 1287 of 1287 jobs
 # CPU time in finished jobs:      72236s    1203.94m    20.07h    0.84d  0.002 y
 # IO & Wait Time:                 22886s     381.43m     6.36h    0.26d  0.001 y
 # Average job time:                  74s       1.23m     0.02h    0.00d
 # Longest job:                     3374s      56.23m     0.94h    0.04d
 # Submission to last job:          5982s      99.70m     1.66h    0.07d
 
     exit
     # back to kksilo
     cd /cluster/data/tetNig1/bed/tblastn.hg17KG/blastOut
     # again some weirdness because I did the NA and Un scaffolds separately
     for i in kg??
     do 
 	cat c.$i.psl | awk "(\$13 - \$12)/\$11 > 0.6 {print}" > cs60.$i.psl
 	sort -rn cs60.$i.psl | pslUniq stdin us.$i.psl
 	awk "((\$1 / \$11) ) > 0.60 { print   }" cs60.$i.psl > ms60.$i.psl
 	echo $i
     done
 
     for i in kg??
     do 
 	cat $i/c.*.psl | awk "(\$13 - \$12)/\$11 > 0.6 {print}" > c60.$i.psl
 	sort -rn c60.$i.psl | pslUniq stdin u.$i.psl
 	awk "((\$1 / \$11) ) > 0.60 { print   }" c60.$i.psl > m60.$i.psl
 	echo $i
     done
 
     cat us.*.psl ms60* | sort -T /tmp -k 14,14 -k 17,17n -k 17,17n  | uniq  > /cluster/data/tetNig1/bed/tblastn.hg17KG/blastHg17KG.psl
     cd ..
     # lift the chrUn and chrNA scaffolds 
 
     ssh hgwdev
     cd /cluster/data/tetNig1/bed/tblastn.hg17KG
     hgLoadPsl tetNig1 blastHg17KG.psl
 
     # back to kksilo
     rm -rf blastOut
 # End tblastn
 
 # GENOSCOPE HUMAN (hg17) ECORES (DONE, 2005-02-07, hartera)
     ssh kksilo
     mkdir -p /cluster/data/tetNig1/bed/Genocscope/ecoresHg17
     cd /cluster/data/tetNig1/bed/Genoscope/ecoresHg17
     
     wget --timestamp \
          http://www.genoscope.cns.fr/externe//4ucsc/ExofishTnig1Hs35
     # this is in gff format 
     # remove "Ecotig" from name field 
     sed -e 's/Ecotig EG/EG/g' ExofishTnig1Hs35 > ExofishTnig1Hs35.gff
     # need to have tabs between fields not a space to load file into table
     sed -e 's/ /\t/g' ExofishTnig1Hs35.gff > Tnig1Hs35format.gff
     # if "ecore" is changed to "CDS" and "ecotig" to "transcript" this loads
     # correctly into the table.
     sed -e 's/ecore/CDS/' Tnig1Hs35format.gff | sed -e 's/ecotig/transcript/' \
            > tetNig1vsHg17.gff
   
     ssh hgwdev
     cd /cluster/data/tetNig1/bed/Genoscope/ecoresHg17
     nice ldHgGene tetNig1 ecoresHg17 tetNig1vsHg17.gff
     # Read 36417 transcripts in 184981 lines in 1 files
     # 36417 groups 27 seqs 1 sources 2 feature types
     # 36417 gene predictions
 # added ecoresHg17 entry to trackDb.ra in trackDb/tetraodon
 # and created ecoresHg17.html.  Genoscope will not be maintaining this
 # newest data in their Exofish comparative browser display.
 
 
 # ACCESSIONS FOR CONTIGS (DONE 2005-03-17 kate)
 #  Used for Assembly track details, and also ENCODE AGP's
     cd /cluster/data/tetNig1/bed
     mkdir -p contigAcc
     cd contigAcc
     
     # extract WGS contig to accession mapping from Entrez Nucleotide summaries
     # To get the summary file, access the Genbank page for the project 
     # by searching:
     #       genus[ORGN] AND WGS[KYWD]
     # At the end of the page, click on the list of accessions for the contigs.
     # Select summary format, and send to file.
     # output to file <species>.acc
 
     grep SCAF tetra.acc | wc -l
     # 25773
 
     # edit hg/encode/regionAgp/contigAccession.pl to recognize tetra format
     # and install in /cluster/bin/scripts
     contigAccession tetra.acc > contigAcc.tab
     wc -l contigAcc.tab
     # 25773
     grep SCAF /cluster/data/tetNig1/chr.agp | wc -l
     # 25773
     hgsql tetNig1 < $HOME/kent/src/hg/lib/contigAcc.sql
     echo "LOAD DATA LOCAL INFILE 'contigAcc.tab' INTO TABLE contigAcc" | \
         hgsql tetNig1
     hgsql tetNig1 -e "SELECT COUNT(*) FROM contigAcc"
         # 25773
 
 #########################################################################
 # MOUSE NET/CHAINS MM6 - Info contained in makeMm6.doc (200503 Hiram)
 
 # MAKE 2BIT FILES FOR 500 kb SEQUENCE CONTIGS, CHROMS CONTIGS AND RANDOM
 # CHROMOSOME SCAFFOLDS 
 # (DONE, 2005-08-24, hartera)
      ssh hgwdev
      # 2bit file from 500 kb contigs file used for blastz alignments
      faToTwoBit /cluster/bluearc/tetNig1/contigs/tetNig1Contigs.fa \
               /cluster/bluearc/tetNig1/contigs/tetNig1Contigs.2bit
 
      # then make FASTA file and 2bit file for just the ordered chroms and
      # not the randoms.
      ssh kkr1u00
      mkdir -p /cluster/data/tetNig1/twoBit
      cd /cluster/data/tetNig1/twoBit
      grep '>' /iscratch/i/tetNig1/contigs/tetNig1Contigs.fa | sed -e 's/>//' \
         > tetNig1.contigs
      grep -v "random" tetNig1.contigs > tetNig1.contigs.norandoms
      wc -l tetNig1.contigs.norandoms
      # 507 tetNig1.contigs.norandoms
      # get these sequence without the randoms, run randoms as scaffolds
      faSomeRecords /iscratch/i/tetNig1/contigs/tetNig1Contigs.fa \
         tetNig1.contigs.norandoms tetNig1ContigNoRandoms.fa
      faToTwoBit tetNig1ContigsNoRandoms.fa tetNig1ContigsNoRandoms.2bit
      mv tetNig1ContigsNoRandoms.2bit /cluster/bluearc/tetNig1/contigs/
      cp /cluster/bluearc/tetNig1/contigs/tetNig1ContigsNoRandoms.2bit \
         /iscratch/i/tetNig1/contigs/
      # get list of scaffolds for randoms - need to make faFrag list to get 
      # sequences:
      cd /cluster/data/tetNig1/twoBit
      mkdir -p /cluster/bluearc/tetNig1/randomScaffolds
      mkdir -p /cluster/bluearc/tetNig1/randomFa
      mkdir run
      cd run
      grep 'random' /cluster/data/tetNig1/chrom.lst > chroms.random
      foreach c (`cat chroms.random`)
         set dir=/cluster/data/tetNig1
         echo $c
         cp $dir/${c}/chr{$c}.fa /cluster/bluearc/tetNig1/randomFa/
      end 
 cat > getSeqs.csh << 'EOF'
 #!/bin/csh -ef
 set inFa=$1
 set st=$2
 set end=$3
 set out=$4
 
 faFrag -mixed $inFa $st $end $out > ${out}.log
 'EOF'
      chmod +x getSeqs.csh
      awk '{if (($1 ~ /random/) && ($5 ~ /SCAF/)) print "/cluster/data/tetNig1/twoBit/run/getSeqs.csh","/cluster/bluearc/tetNig1/randomFa/"$1".fa",$2-1,$3,"/cluster/bluearc/tetNig1/randomScaffolds/"$5".fa"}' /cluster/data/tetNig1/chr.agp \
        > jobList
      para create jobList  
      awk '{if ($1 ~ /random/ && ($5 !~ /GAP/)) print $5;}' \
         /cluster/data/tetNig1/chr.agp > randoms.scaffolds     
      wc -l jobList
      # 24571 jobList
      wc -l randoms.scaffolds
      # 24571 randoms.scaffolds
      para try, check, push, check etc.
 # para time
 # Completed: 24571 of 24571 jobs
 # CPU time in finished jobs:     249907s    4165.12m    69.42h    2.89d  0.008 y
 # IO & Wait Time:                 93508s    1558.46m    25.97h    1.08d  0.003 y
 # Average job time:                  14s       0.23m     0.00h    0.00d
 # Longest running job:                0s       0.00m     0.00h    0.00d
 # Longest finished job:            3689s      61.48m     1.02h    0.04d
 # Submission to last job:          3952s      65.87m     1.10h    0.05d
 
      # once have all the sequences, cat together and make a 2bit file
      ssh kolossus
      cd /cluster/bluearc/tetNig1/randomScaffolds 
      # replace FASTA header with scaffold name and cat sequences together
      foreach f (SCAF*.fa)
         set s=$f:r
         perl -pi.bak -e "s/>chr[0-9Un]+_random:[0-9]+\-[0-9]+/>$s/" $f
         cat $f >> tetraRandomsScaffolds.fa
      end
      faToTwoBit tetraRandomsScaffolds.fa tetraRandomsScaffolds.2bit
      # then cat to the contigs file without randoms and make a 2bit file
      cat tetraRandomsScaffolds.fa \
        /cluster/data/tetNig1/twoBit/tetNig1ContigNoRandoms.fa \
        > tetNig1ChrContigsRandomScafs.fa
      faToTwoBit tetNig1ChrContigsRandomScafs.fa \
                 tetNig1ChrContigsRandomScafs.2bit
      ssh kkr1u00
      cd /cluster/bluearc/tetNig1/randomScaffolds
      # copy 2bit files to iservers
      cp *.2bit /iscratch/i/tetNig1/contigs
      cp *.2bit /cluster/bluearc/tetNig1/contigs
      # rsync to the iservers - this takes a long time these days
      # so do this with rsync
      foreach i (2 3 4 5 6 7 8)
         rsync -a --progress --delete --rsh=rsh \
            /iscratch/i/tetNig1/contigs/ kkr${i}u00:/iscratch/i/tetNig1/contigs/
      end
      # make a 2bit file with the full chroms and the random scaffolds
      ssh kolossus
      cd /cluster/data/tetNig1/twoBit
      grep -v "random" /cluster/data/tetNig1/chrom.lst > chromsNoRandoms.lst 
      foreach c (`cat chromsNoRandoms.lst`)
        cat /cluster/data/tetNig1/${c}/chr${c}.fa >> tetNig1chromsNoRandoms.fa
      end
      # then add scaffolds FASTAs
      cat tetNig1chromsNoRandoms.fa \
         /cluster/bluearc/tetNig1/randomScaffolds/tetraRandomsScaffolds.fa \
          > tetNig1ChromsRandomScafs.fa
      faToTwoBit tetNig1ChromsRandomScafs.fa \
                 tetNig1ChromsRandomScafs.2bit
      mkdir -p /cluster/bluearc/tetNig1/twoBit
      mv tetNig1ChromsRandomScafs* /cluster/bluearc/tetNig1/twoBit/
      ssh kkstore03
      mkdir /cluster/data/tetNig1/liftScaffoldsToChrom
      cd /cluster/data/tetNig1/liftScaffoldsToChrom
      foreach c (`cat chromsNoRandoms.lst`)
        awk 'BEGIN {FS="\t"} {OFS="\t"} {print $2-1,$6,$3,$1;}' \
           /cluster/data/tetNig1/${c}/${c}.agp > ${c}.lft
        awk '{if ($1 == "'${c}'") print $2;}' chrom.sizes
 
 #########################################################################
 # MOUSE NET/CHAINS MM7 - Info contained in makeMm7.doc (200503 Hiram)
 #########################################################################
 #	Prep for the Mm7 net/chains, need a lift file for the randoms
 #	(DONE - 2005-09-20 - Hiram)
     ssh kkstore003
     cd /cluster/data/tetNig1
     #	We need the lift information from the random.agp files
     cp -p /cluster/data/cb2/scripts/agpToLift.pl ./jkStuff
     mkdir liftScaffoldsToChrom
     for AGP in ?_random/*_random.agp ??_random/*_random.agp
 do
     CHR=`dirname ${AGP}`
     grep -v fragment "${AGP}" | ./jkStuff/agpToLift.pl /dev/stdin \
 	> liftScaffoldsToChrom/${CHR}.lft
 done
     cat liftScaffoldsToChrom/*.lft > jkStuff/chromsAndScafs.lft
     cp -p jkStuff/chromsAndScafs.lft \
 	/san/sanvol1/scratch/tetNig1/chromsAndScafs/chromsAndScafs.lft
 
 # BLASTZ SWAP FOR ZEBRAFISH (danRer3) (DONE, 2005-10-20, hartera)
 # CREATE CHAIN AND NET TRACKS, AXTNET, MAFNET, LIFTOVER AND ALIGNMENT DOWNLOADS.
                                                                                 
     # do swap of danRer3 vs. tetNig1 chain and net alignments to
     # create tetNig1 vs. danRer3. see makeDanRer3.doc for details.
     ssh hgwdev
     cd /cluster/data/danRer3/bed/blastz.tetNig1/chromsAndScafsRun
     nohup nice /cluster/bin/scripts/doBlastzChainNet.pl `pwd`/DEF \
        -bigClusterHub=pk \
        -swap -chainMinScore=5000 >& doSwap.log &
     # Start: Thu Oct 20 11:59  Finished: Oct 20 12:25
 # Parameters used for danRer3 vs tetNig1 BLASTZ:
 # BLASTZ_M=50
 # BLASTZ_H=2500
 # BLASTZ_Q=/cluster/data/blastz/HoxD55.q
 # #BLASTZ_ABRIDGE_REPEATS=1 if SMSK is specified
 # BLASTZ_ABRIDGE_REPEATS=0
 
 # Add entries for danRer3 chains and net to trackDb.ra for monDom2 and
 # Modify track descriptions to describe the process using scaffolds for danRer3
 # chrNA and chrUn and the fact that dynamic masking was used for the Blastz
 # alignments - see makeDanRer3.doc for details.
 
 ############################################################################
 ##  BLASTZ swap from mm8 alignments (DONE - 2006-02-19 - Hiram)
     ssh kk
     cd /cluster/data/mm8/bed/blastzTetNig1.2006-02-19
     time /cluster/bin/scripts/doBlastzChainNet.pl -verbose=2 \
         -bigClusterHub=kk -chainMinScore=5000 -chainLinearGap=loose \
         -swap `pwd`/DEF > swap.out 2>&1 &
     time /cluster/bin/scripts/doBlastzChainNet.pl -verbose=2 \
         -bigClusterHub=kk -chainMinScore=5000 -chainLinearGap=loose \
         -swap -continue=net `pwd`/DEF > swap-net.out 2>&1 &
 
     time nice -n +19 featureBits tetNig1 chainMm8Link
     #   47024263 bases of 342403326 (13.734%) in intersection
 
 ###########################################################################
 # BLASTZ SWAP TO CREATE CHAIN AND NET ALIGNMENTS FOR ZEBRAFISH (danRer4) 
 # AND CREATE AXNET, MAFNET, LIFTOVER AND ALIGNMENT DOWNLOADS 
 # (DONE, 2006-04-30, hartera)
    ssh pk
    # Used these BLASTZ parameters - see also makeDanRer4.doc
    # BLASTZ_H=2500
    # BLASTZ_M=50
    # BLASTZ_Q=/cluster/data/blastz/HoxD55.q 
    # and no lineage-specific repeats.
    # swap directory for alignments is:
    # /cluster/data/tetNig1/bed/blastz.danRer4.swap
    cd /cluster/data/danRer4/bed/blastz.tetNig1.2006-04-29
    nice /cluster/bin/scripts/doBlastzChainNet.pl -verbose=2 \
         -bigClusterHub=pk -chainMinScore=5000 -chainLinearGap=loose \
         -swap `pwd`/DEF >& doSwap.log &
    # Took about 22 minutes.
    # Check coverage compared to danRer3:
    featureBits tetNig1 chainDanRer4Link
    # 82462283 bases of 342403326 (24.083%) in intersection  
    featureBits tetNig1 chainDanRer3Link
    # 76373873 bases of 342403326 (22.305%) in intersection
    # No refGene table for tetNig1 so use mrna: 
    featureBits -chrom=chr1 tetNig1 mrna chainDanRer4Link -enrichment
    # mrna 2.624%, chainDanRer4Link 22.894%, both 1.297%, cover 49.42%, 
    # enrich 2.16x
    featureBits -chrom=chr1 tetNig1 mrna chainDanRer3Link -enrichment
    # mrna 2.624%, chainDanRer3Link 21.321%, both 1.205%, cover 45.92%, 
    # enrich 2.15x
    featureBits -chrom=chr1 tetNig1 mrna netDanRer4 -enrichment
    # mrna 2.624%, netDanRer4 78.279%, both 2.278%, cover 86.81%, enrich 1.11x
    featureBits -chrom=chr1 tetNig1 mrna netDanRer3 -enrichment
    # mrna 2.624%, netDanRer3 95.070%, both 2.435%, cover 92.80%, enrich 0.98x
    # Similar coverage for chains and nets for danRer4 and danRer3. 
    # nets coverage is a little lower for danRer4 than for danRer3.
    # Try also Human Proteins (hg17):
    featureBits -chrom=chr1 tetNig1 blastHg17KG netDanRer4 -enrichment
    # blastHg17KG 5.728%, netDanRer4 78.279%, both 5.666%, cover 98.92%, 
    # enrich 1.26x
    featureBits -chrom=chr1 tetNig1 blastHg17KG netDanRer3 -enrichment
    # blastHg17KG 5.728%, netDanRer3 95.070%, both 5.687%, cover 99.29%, 
    # enrich 1.04x
    # Similar coverage here too. Looked at chain and net tracks for danRer4
    # on the tetNig1 Browser and they look ok.
 
 #########################################################################
 ##  Reorder Fish organisms (DONE - 2006-12-22 - Hiram)
     hgsql -h genome-testdb hgcentraltest \
 	-e "update dbDb set orderKey = 460 where name = 'tetNig1';"
 
 #########################################################################
 # BLASTZ/CHAIN/NET FR2 (DONE - 2007-01-29 - Hiram)
 ##  Align to fr2 scaffolds,
 ##	results lifted to fr2 chrUn coordinates
     ssh kkstore03
     mkdir /cluster/data/tetNig1/bed/blastz.fr2.2007-01-25
     cd /cluster/data/tetNig1/bed/blastz.fr2.2007-01-25
     cat << '_EOF_' > DEF
 # Tetraodon vs. Fugu
 
 # Try "human-fugu" (more distant, less repeat-killed than mammal) params
 # +M=50:
 BLASTZ_H=2000
 BLASTZ_Y=3400
 BLASTZ_L=6000
 BLASTZ_K=2200
 BLASTZ_M=50
 BLASTZ_Q=/cluster/data/blastz/HoxD55.q
 
 # TARGET: Tetraodon tetNig1
 SEQ1_DIR=/san/sanvol1/scratch/tetNig1/tetNig1.sdTrf.2bit
 SEQ1_LEN=/san/sanvol1/scratch/tetNig1/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 SEQ1_LIMIT=1
 
 # QUERY: Fugu fr2
 #       Align to the scaffolds, results lifed up to chrUn.sdTrf coordinates
 SEQ2_DIR=/san/sanvol1/scratch/fr2/fr2.2bit
 SEQ2_LEN=/san/sanvol1/scratch/fr2/chrom.sizes
 SEQ2_CTGDIR=/san/sanvol1/scratch/fr2/fr2.scaffolds.2bit
 SEQ2_CTGLEN=/san/sanvol1/scratch/fr2/fr2.scaffolds.sizes
 SEQ2_LIFT=/san/sanvol1/scratch/fr2/liftAll.lft
 SEQ2_CHUNK=20000000
 SEQ2_LIMIT=30
 SEQ2_LAP=0
 
 BASE=/cluster/data/tetNig1/bed/blastz.fr2.2007-01-25
 TMPDIR=/scratch/tmp
 '_EOF_'
     # << this line keeps emacs coloring happy
 
     time doBlastzChainNet.pl DEF -chainMinScore=2000 -chainLinearGap=loose \
 	-tRepeats=windowmaskerSdust -qRepeats=windowmaskerSdust \
 	-verbose=2 -bigClusterHub=pk \
 	-blastzOutRoot /cluster/bluearc/tetNig1Fr2 > do.log 2>&1 &
     #	real    259m39.548s
 
     ## Swap to fr2
     mkdir /cluster/data/fr2/bed/blastz.tetNig1.swap
     cd /cluster/data/fr2/bed/blastz.tetNig1.swap
     time doBlastzChainNet.pl -verbose=2 \
 	/cluster/data/tetNig1/bed/blastz.fr2.2007-01-25/DEF \
 	-chainMinScore=2000 -chainLinearGap=loose \
 	-tRepeats=windowmaskerSdust -qRepeats=windowmaskerSdust \
 	-bigClusterHub=pk -swap > swap.log 2>&1 &
     time doBlastzChainNet.pl -verbose=2 \
 	/cluster/data/tetNig1/bed/blastz.fr2.2007-01-25/DEF \
 	-chainMinScore=2000 -chainLinearGap=loose \
 	-tRepeats=windowmaskerSdust -qRepeats=windowmaskerSdust \
 	-continue=net -bigClusterHub=pk -swap > net_swap.log 2>&1 &
     #	real    40m40.471s
 
     ssh hgwdev
     cd /cluster/data/tetNig1/bed/blastz.fr2.2007-01-25
     time nice -n +19 featureBits tetNig1 chainFr2Link \
 	> fb.tetNig1.chainFr2Link.txt 2>&1 
     #	246828605 bases of 342403326 (72.087%) in intersection
 
     cd /cluster/data/fr2/bed/blastz.tetNig1.swap
     time nice -n +19 featureBits fr2 chainTetNig1Link \
 	> fb.fr2.chainTetNig1.txt 2>&1
     #	247086553 bases of 393312790 (62.822%) in intersection
 
 #########################################################################
 ## SWAP oryLat1 to tetNig1 (DONE - 2007-01-18 - Hiram)
     ssh kkstore04
     cd /cluster/data/oryLat1/bed/blastz.tetNig1.2006-12-13
     cat fb.oryLat1.chainTetNig1Link.txt
     #	160388236 bases of 700386597 (22.900%) in intersection
 
     # And the swap
     ssh kkstore04
     cd /cluster/data/tetNig1/bed
     mv blastz.oryLat1.swap blastz.oryLat1.swap.2006-12-08
     mkdir blastz.oryLat1.swap
     cd /cluster/data/tetNig1/bed/blastz.oryLat1.swap
     time doBlastzChainNet.pl -verbose=2 \
 	/cluster/data/oryLat1/bed/blastz.tetNig1.2006-12-13/DEF \
 	-chainMinScore=2000 -chainLinearGap=loose \
 	-tRepeats=windowmaskerSdust -qRepeats=windowmaskerSdust \
 	-swap -bigClusterHub=kk > swap.log 2>&1 &
     time doBlastzChainNet.pl -verbose=2 \
 	/cluster/data/oryLat1/bed/blastz.tetNig1.2006-12-13/DEF \
 	-chainMinScore=2000 -chainLinearGap=loose \
 	-tRepeats=windowmaskerSdust -qRepeats=windowmaskerSdust \
 	-continue=net -swap -bigClusterHub=kk > net-swap.log 2>&1 &
 
     ssh hgwdev
     cd /cluster/data/tetNig1/bed/blastz.oryLat1.swap
     nice -n 19 featureBits -noRandom tetNig1 chainOryLat1Link \
 	>fb.tetNig1.oryLat1.txt 2>&1
     #	130327582 bases of 342403326 (38.063%) in intersection
 
 ##########################################################################
 # N-SCAN gene predictions (nscanGene) - (2007-08-16 markd)
     cd /cluster/data/tetNig1/bed/nscan/
 
     # obtained NSCAN predictions from michael brent's group
     # at WUSTL
     wget http://mblab.wustl.edu/predictions/Tetraodon/tetNig1.gtf
     wget http://mblab.wustl.edu/predictions/Tetraodon/tetNig1.prot.fa
     bzip2 tetNig1.*
 
     # load tracks.  Note that these have *utr features, rather than
     # exon features.  currently ldHgGene creates separate genePred exons
     # for these.
     ldHgGene -bin -gtf -genePredExt tetNig1 nscanGene tetNig1.gtf.bz2
 
     # load protein
     hgPepPred tetNig1 generic nscanPep tetNig1.prot.fa.bz2
     rm *.tab
 
     # update trackDb; need a tetNig1-specific page to describe informants
     tetraodon/tetNig1/nscanGene.html   (copy from mm8 and edit)
     tetraodon/tetNig1/trackDb.ra
 
 ##############################################################################
 # SWAP mm9 alignments to tetNig1 (DONE - 2007-09-07 - Hiram)
     #	the original was done at:
     cd /cluster/data/mm9/bed/blastzTetNig1.2007-09-06
     cat fb.mm9.chainTetNig1Link.txt
     #	46206292 bases of 2620346127 (1.763%) in intersection
 
     mkdir /cluster/data/tetNig1/bed/blastz.mm9.swap
     cd /cluster/data/tetNig1/bed/blastz.mm9.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	/cluster/data/mm9/bed/blastzTetNig1.2007-09-06/DEF \
 	-chainMinScore=5000 \
 	-qRepeats=windowmaskerSdust -chainLinearGap=loose \
 	-swap -bigClusterHub=kk > swap.log 2>&1 &
     #	real    19m58.885s
     cat fb.tetNig1.chainMm9Link.txt
     #	42256263 bases of 342403326 (12.341%) in intersection
 
 ##############################################################################
 # SWAP DANRER5 BLASTZ RESULT TO GET DANRER5 CHAINS, NETS,
 # AXTNET, MAFNET AND ALIGNMENT DOWNLOADS (DONE, 2007-09-17, hartera)
     
     ssh kkstore04
     mkdir /cluster/data/tetNig1/bed/blastz.swap.danRer5
     cd /cluster/data/tetNig1/bed/blastz.swap.danRer5
     # blastz parameters used to align tetNig1 as query to danRer5 as target:
     # BLASTZ_H=2500
     # BLASTZ_M=50
     # BLASTZ_Q=/cluster/data/blastz/HoxD55.q
     # Results for tetNig1 blastz on danRer5 are in:
     # /cluster/data/danRer5/bed/blastz.tetNig1.2007-09-16
     time nice /cluster/bin/scripts/doBlastzChainNet.pl -verbose=2 \
         -bigClusterHub=pk -chainMinScore=5000 -chainLinearGap=loose \
         -qRepeats=windowmaskerSdust -swap \ 
         /cluster/data/danRer5/bed/blastz.tetNig1.2007-09-16/DEF \
          >& swap.log &
     # 0.141u 0.097s 12:30.60 0.0%     0+0k 0+0io 7pf+0w 
     
     ssh hgwdev
     cat \ 
   /cluster/data/tetNig1/bed/blastz.danRer5.swap/fb.tetNig1.chainDanRer5Link.txt
     # 70448527 bases of 342403326 (20.575%) in intersection 
     # look at coverage of mrna, there is no native RefSeqs 
     # track for tetraodon, tetNig1.
     featureBits tetNig1 all_mrna chainDanRer5Link -enrichment
 # all_mrna 3.104%, chainDanRer5Link 20.575%, both 1.382%, cover 44.53%, enrich
 # 2.16x
     featureBits tetNig1 all_mrna chainDanRer4Link -enrichment
 # all_mrna 3.104%, chainDanRer4Link 24.083%, both 1.522%, cover 49.02%, enrich
 # 2.04x
     featureBits tetNig1 all_mrna netDanRer5 -enrichment
 # all_mrna 3.104%, netDanRer5 70.075%, both 2.400%, cover 77.31%, enrich 1.10x
     featureBits tetNig1 all_mrna netDanRer4 -enrichment
 # all_mrna 3.104%, netDanRer4 69.480%, both 2.445%, cover 78.78%, enrich 1.13x
 
     # clean up a little 
     ssh kkstore04
     cd /cluster/data/tetNig1/bed
     mv ./blastz.swap.danRer5/swap.log ./blastz.danRer5.swap
     rm -r blastz.swap.danRer5
     ln -s blastz.danRer5.swap blastz.danRer5
 ############################################################################
 # TRANSMAP vertebrate.2008-05-20 build  (2008-05-24 markd)
 
 vertebrate-wide transMap alignments were built  Tracks are created and loaded
 by a single Makefile. This is available from:
    svn+ssh://hgwdev.cse.ucsc.edu/projects/compbio/usr/markd/svn/projs/transMap/tags/vertebrate.2008-05-20
 
 see doc/builds.txt for specific details.
 ############################################################################
 ############################################################################
 # TRANSMAP vertebrate.2008-06-07 build  (2008-06-30 markd)
 
 vertebrate-wide transMap alignments were built  Tracks are created and loaded
 by a single Makefile. This is available from:
    svn+ssh://hgwdev.cse.ucsc.edu/projects/compbio/usr/markd/svn/projs/transMap/tags/vertebrate.2008-06-30
 
 see doc/builds.txt for specific details.
 
 ############################################################################
 # HUMAN (hg18) PROTEINS TRACK (DONE braney 2008-07-18)
     ssh kkstore03
     # bash  if not using bash shell already
 
     mkdir /cluster/data/tetNig1/blastDb
     cd /cluster/data/tetNig1
     cat noUn/chr*fa > temp.fa
     faSplit gap temp.fa 1000000 blastDb/x -lift=blastDb.lft
     rm temp.fa
     cat randomContigs/*.fa > temp.fa
     faSplit sequence temp.fa 150 blastDb/y
     rm temp.fa
     cd blastDb
     for i in *.fa
     do
 	/cluster/bluearc/blast229/formatdb -i $i -p F
     done
     rm *.fa
 
     mkdir -p /san/sanvol1/scratch/tetNig1/blastDb
     cd /cluster/data/tetNig1/blastDb
     for i in nhr nin nsq; 
     do 
 	echo $i
 	cp *.$i /san/sanvol1/scratch/tetNig1/blastDb
     done
 
     mkdir -p /cluster/data/tetNig1/bed/tblastn.hg18KG
     cd /cluster/data/tetNig1/bed/tblastn.hg18KG
     echo  /san/sanvol1/scratch/tetNig1/blastDb/*.nsq | xargs ls -S | sed "s/\.nsq//"  > query.lst
     wc -l query.lst
 # 386 query.lst
 
    # we want around 150000 jobs
    calc `wc /cluster/data/hg18/bed/blat.hg18KG/hg18KG.psl | awk "{print \\\$1}"`/\(150000/`wc query.lst | awk "{print \\\$1}"`\)
 
 # 36727/(150000/386) = 94.510813
 
    mkdir -p /cluster/bluearc/tetNig1/bed/tblastn.hg18KG/kgfa
    split -l 95 /cluster/data/hg18/bed/blat.hg18KG/hg18KG.psl  /cluster/bluearc/tetNig1/bed/tblastn.hg18KG/kgfa/kg
    ln -s /cluster/bluearc/tetNig1/bed/tblastn.hg18KG/kgfa kgfa
    cd kgfa
    for i in *; do 
      nice pslxToFa $i $i.fa; 
      rm $i; 
    done
    cd ..
    ls -1S kgfa/*.fa > kg.lst
    mkdir -p /cluster/bluearc/tetNig1/bed/tblastn.hg18KG/blastOut
    ln -s /cluster/bluearc/tetNig1/bed/tblastn.hg18KG/blastOut
    for i in `cat kg.lst`; do  mkdir blastOut/`basename $i .fa`; done
    cd /cluster/data/tetNig1/bed/tblastn.hg18KG
    tcsh
    cat << '_EOF_' > blastGsub
 #LOOP
 blastSome $(path1) {check in line $(path2)} {check out exists blastOut/$(root2)/q.$(root1).psl }
 #ENDLOOP
 '_EOF_'
 
    cat << '_EOF_' > blastSome
 #!/bin/sh
 BLASTMAT=/cluster/bluearc/blast229/data
 export BLASTMAT
 g=`basename $2`
 f=/tmp/`basename $3`.$g
 for eVal in 0.01 0.001 0.0001 0.00001 0.000001 1E-09 1E-11
 do
 if /cluster/bluearc/blast229/blastall -M BLOSUM80 -m 0 -F no -e $eVal -p tblastn -d $1 -i $2 -o $f.8
 then
         mv $f.8 $f.1
         break;
 fi
 done
 if test -f  $f.1
 then
     if /cluster/bin/i386/blastToPsl $f.1 $f.2
     then
 	liftUp -nosort -type=".psl" -nohead $f.3 /cluster/data/tetNig1/blastDb.lft carry $f.2
         liftUp -nosort -type=".psl" -pslQ -nohead $3.tmp /cluster/data/hg18/bed/blat.hg18KG/protein.lft warn $f.3
 
         if pslCheck -prot $3.tmp                              
         then  
             mv $3.tmp $3                                               
             rm -f $f.1 $f.2 $f.3 $f.4
         fi
         exit 0 
     fi 
 fi                                                                                
 rm -f $f.1 $f.2 $3.tmp $f.8 $f.3 $f.4
 exit 1
 '_EOF_'
     # << happy emacs
     chmod +x blastSome
     gensub2 query.lst kg.lst blastGsub blastSpec
     exit 
     
     ssh pk
     cd /cluster/data/tetNig1/bed/tblastn.hg18KG
     para create blastSpec
 #    para try, check, push, check etc.
 
     para time
 
 # Completed: 136074 of 136074 jobs
 
 # CPU time in finished jobs:    2945512s   49091.87m   818.20h   34.09d  0.093 y
 # IO & Wait Time:                569398s    9489.96m   158.17h    6.59d  0.018 y
 # Average job time:                  26s       0.43m     0.01h    0.00d
 # Longest finished job:             881s      14.68m     0.24h    0.01d
 # Submission to last job:        146804s    2446.73m    40.78h    1.70d
 
     ssh kkstore03
     cd /cluster/data/tetNig1/bed/tblastn.hg18KG
     mkdir chainRun
     cd chainRun
     tcsh
     cat << '_EOF_' > chainGsub
 #LOOP
 chainOne $(path1)
 #ENDLOOP
 '_EOF_'
 
     cat << '_EOF_' > chainOne
 (cd $1; cat q.*.psl | simpleChain -prot -outPsl -maxGap=100000 stdin /cluster/bluearc/tetNig1/bed/tblastn.hg18KG/blastOut/c.`basename $1`.psl)
 '_EOF_'
     chmod +x chainOne
     ls -1dS /cluster/bluearc/tetNig1/bed/tblastn.hg18KG/blastOut/kg?? > chain.lst
     gensub2 chain.lst single chainGsub chainSpec
     # do the cluster run for chaining
     ssh memk
     cd /cluster/data/tetNig1/bed/tblastn.hg18KG/chainRun
     para create chainSpec
     para try, check, push, check etc.
 
 # Completed: 387 of 387 jobs
 # CPU time in finished jobs:       1982s      33.04m     0.55h    0.02d  0.000 y
 # IO & Wait Time:                 25746s     429.10m     7.15h    0.30d  0.001 y
 # Average job time:                  72s       1.19m     0.02h    0.00d
 # Longest finished job:             165s       2.75m     0.05h    0.00d
 # Submission to last job:          2784s      46.40m     0.77h    0.03d
 
     ssh kkstore03
     cd /cluster/data/tetNig1/bed/tblastn.hg18KG/blastOut
     for i in kg??
     do
        cat c.$i.psl | awk "(\$13 - \$12)/\$11 > 0.6 {print}" > c60.$i.psl
        sort -rn c60.$i.psl | pslUniq stdin u.$i.psl
        awk "((\$1 / \$11) ) > 0.60 { print   }" c60.$i.psl > m60.$i.psl
        echo $i
     done
     sort -T /tmp -k 14,14 -k 16,16n -k 17,17n u.*.psl m60* | uniq > /cluster/data/tetNig1/bed/tblastn.hg18KG/unliftBlastHg18KG.psl
     cd ..
     pslCheck unliftBlastHg18KG.psl
     liftUp -nohead temp.psl ../../randomContigs/tetNig1.randomContigs.lift carry unliftBlastHg18KG.psl
     sort -T /tmp -k 14,14 -k 16,16n -k 17,17n temp.psl > blastHg18KG.psl
     rm temp.psl
 
     # load table 
     ssh hgwdev
     cd /cluster/data/tetNig1/bed/tblastn.hg18KG
     hgLoadPsl tetNig1 blastHg18KG.psl
 
     # check coverage
     featureBits tetNig1 blastHg18KG 
 # 19028060 bases of 342403326 (5.557%) in intersection
 
     featureBits tetNig1 ensGene:cds blastHg18KG  -enrichment
 # ensGene:cds 9.909%, blastHg18KG 5.557%, both 4.906%, cover 49.51%, enrich 8.91x
 
     featureBits tetNig1 nscanGene:cds blastHg18KG  -enrichment
 # nscanGene:cds 9.388%, blastHg18KG 5.557%, both 3.568%, cover 38.01%, enrich 6.84x
 
     ssh kkstore03
     rm -rf /cluster/data/tetNig1/bed/tblastn.hg18KG/blastOut
     rm -rf /cluster/bluearc/tetNig1/bed/tblastn.hg18KG/blastOut
 #end tblastn
 
 ##########################################################################
 # SWAP BLASTZ Medaka oryLat2 (DONE - 2008-08-27 - Hiram)
     ssh kkstore03	# not too important since everything moved to hive
     screen # use screen to control this job
     cd /cluster/data/oryLat2/bed/blastz.tetNig1.2008-08-26
     cat fb.oryLat2.chainTetNig1Link.txt
     #	163171121 bases of 700386597 (23.297%) in intersection
 
     #	And, for the swap:
     mkdir /cluster/data/tetNig1/bed/blastz.oryLat2.swap
     cd /cluster/data/tetNig1/bed/blastz.oryLat2.swap
     time doBlastzChainNet.pl -chainMinScore=3000 -chainLinearGap=medium \
 	/cluster/data/oryLat2/bed/blastz.tetNig1.2008-08-26/DEF \
 	-swap -tRepeats=windowmaskerSdust -qRepeats=windowmaskerSdust \
 	-verbose=2 -smallClusterHub=pk -bigClusterHub=pk > swap.log 2>&1 &
     #	real    22m48.350s
     cat fb.tetNig1.chainOryLat2Link.txt
     #	139059872 bases of 342403326 (40.613%) in intersection
 
 ##########################################################################
 # add NCBI identifiers to the dbDb (DONE - 2008-10-21 - Hiram)
     hgsql -e 'update dbDb set
 sourceName="Genoscope and Broad Institute V7 (NCBI project 12350, CAAE01000000)" where name="tetNig1";' hgcentraltest
 ###########################################################################
 ############################################################################
 # TRANSMAP vertebrate.2009-07-01 build  (2009-07-21 markd)
 
 vertebrate-wide transMap alignments were built  Tracks are created and loaded
 by a single Makefile. This is available from:
    svn+ssh://hgwdev.cse.ucsc.edu/projects/compbio/usr/markd/svn/projs/transMap/tags/vertebrate.2009-07-01
 
 see doc/builds.txt for specific details.
 ############################################################################
 ############################################################################
 # TRANSMAP vertebrate.2009-09-13 build  (2009-09-20 markd)
 
 vertebrate-wide transMap alignments were built  Tracks are created and loaded
 by a single Makefile. This is available from:
    svn+ssh://hgwdev.cse.ucsc.edu/projects/compbio/usr/markd/svn/projs/transMap/tags/vertebrate.2009-09-13
 
 see doc/builds.txt for specific details.
+
 ############################################################################
+# LIFTOVER TO TetNig2 (DONE - 2010-01-15 - Hiram )
+    mkdir /hive/data/genomes/tetNig1/bed/blat.tetNig2.2010-01-15
+    cd /hive/data/genomes/tetNig1/bed/blat.tetNig2.2010-01-15
+    # -debug run to create run dir, preview scripts...
+    doSameSpeciesLiftOver.pl -debug tetNig1 tetNig2
+    # Real run:
+    time nice -n +19 doSameSpeciesLiftOver.pl -verbose=2 \
+	-bigClusterHub=pk -dbHost=hgwdev -workhorse=hgwdev \
+	 tetNig1 tetNig2 > do.log 2>&1
+    #	real    36m16.693s
+#############################################################################