src/hg/makeDb/doc/mm10.txt 6c02988b8ef7077f418bd3185126666f612759b1

6c02988b8ef7077f418bd3185126666f612759b1
markd
  Thu Sep 26 10:28:41 2019 -0700
import of gencode V32 final

diff --git src/hg/makeDb/doc/mm10.txt src/hg/makeDb/doc/mm10.txt
index 7a9f9ac..7aba9a1 100644
--- src/hg/makeDb/doc/mm10.txt
+++ src/hg/makeDb/doc/mm10.txt
@@ -1,18122 +1,18124 @@
 # for emacs: -*- mode: sh; -*-
 
 # This file describes browser build for the mm10
 # Mus musculus (mouse)
 
 #	DATE:   07-Dec-2011
 #	ORGANISM:       Mus musculus
 #	TAXID:  10090
 #	ASSEMBLY LONG NAME:     Genome Reference Consortium Mouse Build 38
 #	ASSEMBLY SHORT NAME:    GRCm38
 #	ASSEMBLY SUBMITTER:     Genome Reference Consortium
 #	ASSEMBLY TYPE:  Haploid + alternate loci
 #	NUMBER OF ASSEMBLY-UNITS:       16
 #	ASSEMBLY ACCESSION:     GCA_000001635.2
 
 #	rsync://ftp.ncbi.nlm.nih.gov/genbank/genomes/Eukaryotes/vertebrates_mammals/Mus_musculus/GRCm38/
 
 #	Genome ID:
 #	http://www.ncbi.nlm.nih.gov/genome/52
 
 #	Taxonomy:
 #	http://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=10090
 #	http://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=39442
 
 #	GRC information
 #	http://www.ncbi.nlm.nih.gov/projects/genome/assembly/grc/mouse/
 
 #	Mitochondrial sequence:
 #	http://www.ncbi.nlm.nih.gov/bioproject/13767
 #	C57BL/6J sequence:
 #	http://www.ncbi.nlm.nih.gov/bioproject/51977
 #	Finishing project:
 #	http://www.ncbi.nlm.nih.gov/bioproject/20689
 
 #	Assembly ID: 327618
 #	http://www.ncbi.nlm.nih.gov/genome/assembly/327618/
 
 #	Celera Assembly
 # http://www.ncbi.nlm.nih.gov/Traces/wgs/?val=AAHY00
 
 #############################################################################
 # fetch sequence from genbank (DONE - 2012-01-30 - Hiram)
     mkdir -p /hive/data/genomes/mm10/genbank
     cd /hive/data/genomes/mm10/genbank
 
     rsync -a -P \
 rsync://ftp.ncbi.nlm.nih.gov/genbank/genomes/Eukaryotes/vertebrates_mammals/Mus_musculus/GRCm38/ ./
 
     # measure sequence to be used here
     faSize Primary_Assembly/assembled_chromosomes/FASTA/*.fa.gz \
 	Primary_Assembly/unplaced_scaffolds/FASTA/*.fa.gz \
 	Primary_Assembly/unlocalized_scaffolds/FASTA/*.fa.gz \
 	non-nuclear/assembled_chromosomes/FASTA/chrMT.fa.gz
     # 2730871774 bases (78088274 N's 2652783500 real 2652783500 upper 0 lower)
     # in 66 sequences in 29 files
     #	Total size: mean 41376845.1 sd 63617337.3 min 1976
     #	(gi|371559559|gb|JH584295.1|) max 195471971
     #	(gi|371561115|gb|CM000994.2|) median 184189
 
 #############################################################################
 # fixup names for UCSC standards (DONE - 2012-02-06 - Hiram)
     mkdir /hive/data/genomes/mm10/ucsc
     cd /hive/data/genomes/mm10/ucsc
 
     ########################  Assembled Chromosomes
     cat << '_EOF_' > toUcsc.pl
 #!/bin/env perl
 
 use strict;
 use warnings;
 
 my %accToChr;
 
 open (FH, "<../genbank/Primary_Assembly/assembled_chromosomes/chr2acc") or
         die "can not read Primary_Assembly/assembled_chromosomes/chr2acc";
 while (my $line = <FH>) {
     next if ($line =~ m/^#/);
     chomp $line;
     my ($chrN, $acc) = split('\s+', $line);
     $accToChr{$acc} = $chrN;
 }
 close (FH);
 
 foreach my $acc (keys %accToChr) {
     my $chrN =  $accToChr{$acc};
     print "$acc $accToChr{$acc}\n";
     open (FH, "zcat ../genbank/Primary_Assembly/assembled_chromosomes/AGP/chr${chrN}.agp.gz|") or die "can not read chr${chrN}.agp.gz";
     open (UC, ">chr${chrN}.agp") or die "can not write to chr${chrN}.agp";
     while (my $line = <FH>) {
         if ($line =~ m/^#/) {
             print UC $line;
         } else {
             $line =~ s/^$acc/chr${chrN}/;
             print UC $line;
         }
     }
     close (FH);
     close (UC);
     open (FH, "zcat ../genbank/Primary_Assembly/assembled_chromosomes/FASTA/chr${chrN}.fa.gz|") or die "can not read chr${chrN}.fa.gz";
     open (UC, ">chr${chrN}.fa") or die "can not write to chr${chrN}.fa";
     while (my $line = <FH>) {
         if ($line =~ m/^>/) {
             printf UC ">chr${chrN}\n";
         } else {
             print UC $line;
         }
     }
     close (FH);
     close (UC);
 }
 '_EOF_'
     # << happy emacs
     chmod +x toUcsc.pl
     time ./toUcsc.pl
     #	real    0m53.256s
     faSize chr*.fa
     #	2725521370 bases (77999939 N's 2647521431 real 2647521431 upper 0
     #	lower) in 21 sequences in 21 files
     #	Total size: mean 129786731.9 sd 33408399.1 min 61431566 (chr19)
     #	max 195471971 (chr1) median 124902244
 
     ########################  Unplaced scaffolds
     cat << '_EOF_' > unplaced.pl
 #!/bin/env perl
 
 use strict;
 use warnings;
 
 my $agpFile =  "../genbank/Primary_Assembly/unplaced_scaffolds/AGP/unplaced.scaf.agp.gz";
 my $fastaFile =  "../genbank/Primary_Assembly/unplaced_scaffolds/FASTA/unplaced.scaf.fa.gz";
 open (FH, "zcat $agpFile|") or die "can not read $agpFile";
 open (UC, ">unplaced.agp") or die "can not write to unplaced.agp";
 while (my $line = <FH>) {
     if ($line =~ m/^#/) {
         print UC $line;
     } else {
         $line =~ s/\.1//;
         printf UC "chrUn_%s", $line;
     }
 }
 close (FH);
 close (UC);
 
 open (FH, "zcat $fastaFile|") or die "can not read $fastaFile";
 open (UC, ">unplaced.fa") or die "can not write to unplaced.fa";
 while (my $line = <FH>) {
     if ($line =~ m/^>/) {
         chomp $line;
         $line =~ s/.*gb\|//;
         $line =~ s/\.1\|.*//;
         printf UC ">chrUn_$line\n";
     } else {
         print UC $line;
     }
 }
 close (FH);
 close (UC);
 '_EOF_'
     # << happy emacs
     chmod +x unplaced.pl
     time ./unplaced.pl
     #	real    0m0.119s
     # make sure none of the names got to be over 31 characers long:
     grep -v "^#" unplaced.agp | cut -f1 | sort | uniq -c | sort -rn
     # not much in that sequence:
     faSize unplaced.fa
     #	803895 bases (62411 N's 741484 real 741484 upper 0 lower)
     #	in 22 sequences in 1 files
     #	Total size: mean 36540.7 sd 21518.0 min 20208 (chrUn_GL456368)
     #	max 114452 (chrUn_JH584304) median 28772
 
     ########## chrM
     zcat ../genbank/non-nuclear/assembled_chromosomes/FASTA/chrMT.fa.gz \
 	| sed -e "s/^>.*/>chrM/" > chrM.fa
     zcat ../genbank/non-nuclear/assembled_chromosomes/AGP/chrMT.comp.agp.gz \
 	| sed -e "s/^AY172335.1/chrM/" > chrM.agp
 
     ########################  Unlocalized scaffolds
     cat << '_EOF_' > unlocalized.pl
 #!/bin/env perl
 
 use strict;
 use warnings;
 
 my %accToChr;
 my %chrNames;
 
 open (FH, "<../genbank/Primary_Assembly/unlocalized_scaffolds/unlocalized.chr2scaf") or
         die "can not read Primary_Assembly/unlocalized_scaffolds/unlocalized.chr2scaf";
 while (my $line = <FH>) {
     next if ($line =~ m/^#/);
     chomp $line;
     my ($chrN, $acc) = split('\s+', $line);
     $accToChr{$acc} = $chrN;
     $chrNames{$chrN} += 1;
 }
 close (FH);
 
 foreach my $chrN (keys %chrNames) {
     my $agpFile =  "../genbank/Primary_Assembly/unlocalized_scaffolds/AGP/chr$chrN.unlocalized.scaf.agp.gz";
     my $fastaFile =  "../genbank/Primary_Assembly/unlocalized_scaffolds/FASTA/chr$chrN.unlocalized.scaf.fa.gz";
     open (FH, "zcat $agpFile|") or die "can not read $agpFile";
     open (UC, ">chr${chrN}_random.agp") or die "can not write to chr${chrN}_random.agp";
     while (my $line = <FH>) {
         if ($line =~ m/^#/) {
             print UC $line;
         } else {
             chomp $line;
             my (@a) = split('\t', $line);
             my $acc = $a[0];
             my $accNo1 = $acc;
             $accNo1 =~ s/.1$//;
             die "ERROR: acc not .1: $acc" if ($accNo1 =~ m/\./);
             die "ERROR: chrN $chrN not correct for $acc"
                 if ($accToChr{$acc} ne $chrN);
             my $ucscName = "chr${chrN}_${accNo1}_random";
             printf UC "%s", $ucscName;
             for (my $i = 1; $i < scalar(@a); ++$i) {
                 printf UC "\t%s", $a[$i];
             }
             printf UC "\n";
         }
     }
     close (FH);
     close (UC);
     printf "chr%s\n", $chrN;
     open (FH, "zcat $fastaFile|") or die "can not read $fastaFile";
     open (UC, ">chr${chrN}_random.fa") or die "can not write to chr${chrN}_random.fa";
     while (my $line = <FH>) {
         if ($line =~ m/^>/) {
             chomp $line;
             my $acc = $line;
             $acc =~ s/.*gb\|//;
             $acc =~ s/\|.*//;
             my $accNo1 = $acc;
             $accNo1 =~ s/.1$//;
             die "ERROR: acc not .1: $acc" if ($accNo1 =~ m/\./);
             die "ERROR: chrN $chrN not correct for $acc"
                 if ($accToChr{$acc} ne $chrN);
             my $ucscName = "chr${chrN}_${accNo1}_random";
             printf UC ">$ucscName\n";
         } else {
             print UC $line;
         }
     }
     close (FH);
     close (UC);
 }
 '_EOF_'
     # << happy emacs
     chmod +x unlocalized.pl
     time ./unlocalized.pl
     #	real    0m0.430s
     faSize chr*_random.fa
     #	4530210 bases (25924 N's 4504286 real 4504286 upper 0 lower)
     #	in 22 sequences in 6 files
     #	Total size: mean 205918.6 sd 184688.0 min 1976 (chr4_JH584295_random)
     #	max 953012 (chr5_JH584299_random) median 191905
     # verify none of the names are longer than 31 characters:
     grep -h -v "^#" chr*_random.agp | cut -f1 | sort | uniq -c | sort -nr
 
 
     # compress all these fasta and agp files:
     gzip *.fa *.agp
     #	verify all the sequence is still here after all this rigamarole:
     time faSize *.fa.gz
     #	2730871774 bases (78088274 N's 2652783500 real 2652783500 upper 0
     #	lower) in 66 sequences in 29 files
     #	Total size: mean 41376845.1 sd 63617337.3 min 1976
     #	(chr4_JH584295_random) max 195471971 (chr1) median 184189
 
 #############################################################################
 #   Initial browser build (DONE - 2012-01-06 - Hiram)
     cd /hive/data/genomes/mm10
     cat << '_EOF_' > mm10.config.ra
 # Config parameters for makeGenomeDb.pl:
 db mm10
 clade mammal
 genomeCladePriority 40
 scientificName Mus musculus
 commonName Mouse
 assemblyDate Dec. 2011
 assemblyLabel Genome Reference Consortium Mouse Build 38 (GCA_000001635.2)
 assemblyShortLabel GRCm38
 orderKey 1209
 mitoAcc none
 fastaFiles /hive/data/genomes/mm10/ucsc/*.fa.gz
 agpFiles /hive/data/genomes/mm10/ucsc/*.agp.gz
 dbDbSpeciesDir mouse
 taxId   10090
 ncbiAssemblyId   327618
 ncbiAssemblyName   GRCm38
 '_EOF_'
     # << happy emacs
 
     time makeGenomeDb.pl -stop=agp mm10.config.ra > agp.log 2>&1
     #	real    3m4.568s
     # check the end of agp.log to verify it is OK
     time makeGenomeDb.pl -workhorse=hgwdev -fileServer=hgwdev \
 	-continue=db mm10.config.ra > db.log 2>&1
     #	real    20m51.374s
     # verify the end of db.log indicates successful
 
 
 #############################################################################
 # running repeat masker (DONE - 2012-02-06 - Hiram)
     mkdir /hive/data/genomes/mm10/bed/repeatMasker
     cd /hive/data/genomes/mm10/bed/repeatMasker
     time doRepeatMasker.pl -buildDir=`pwd` -noSplit \
 	-bigClusterHub=swarm -dbHost=hgwdev -workhorse=hgwdev \
 	-smallClusterHub=encodek mm10 > do.log 2>&1 &
     #	real    609m48.767s
 
     cat faSize.rmsk.txt
     #	2730871774 bases (78088274 N's 2652783500 real 1456094545 upper
     #	1196688955 lower) in 66 sequences in 1 files
     #	Total size: mean 41376845.1 sd 63617337.3 min 1976
     #	(chr4_JH584295_random) max 195471971 (chr1) median 184189
     #	%43.82 masked total, %45.11 masked real
 
     grep -i versi do.log
 # RepeatMasker version development-$Id: RepeatMasker,v 1.26 2011/09/26 16:19:44 angie Exp $
 #    April 26 2011 (open-3-3-0) version of RepeatMasker
 
     time featureBits -countGaps mm10 rmsk
     #	1196694219 bases of 2730871774 (43.821%) in intersection
     #	real    0m30.460s
     # why is it different than the faSize above ?
     # because rmsk masks out some N's as well as bases, the count above
     #	separates out the N's from the bases, it doesn't show lower case N's
 
 ##########################################################################
 # running simple repeat (DONE - 2012-02-06 - Hiram)
     mkdir /hive/data/genomes/mm10/bed/simpleRepeat
     cd /hive/data/genomes/mm10/bed/simpleRepeat
     time doSimpleRepeat.pl -buildDir=`pwd` -bigClusterHub=swarm \
 	-dbHost=hgwdev -workhorse=hgwdev -smallClusterHub=encodek \
 	mm10 > do.log 2>&1 &
     #	real    16m35.603s
 
     #	batch failed, one job failed:
     # ./TrfRun.csh /hive/data/genomes/mm10/TrfPart/062/062.lst.bed
     # which is the chrM sequence - it has no simple repeats
     # create an empty output file result:
     touch /hive/data/genomes/mm10/TrfPart/062/062.lst.bed
     # go to encodek and create the run.time file to signal this step is done
     cd /hive/data/genomes/mm10/bed/simpleRepeat/run.cluster
     para time > run.time
 # Completed: 70 of 71 jobs
 # Crashed: 1 jobs
 # CPU time in finished jobs:      13103s     218.38m     3.64h    0.15d  0.000 y
 # IO & Wait Time:                   163s       2.72m     0.05h    0.00d  0.000 y
 # Average job time:                 190s       3.16m     0.05h    0.00d
 # Longest finished job:             392s       6.53m     0.11h    0.00d
 # Submission to last job:           894s      14.90m     0.25h    0.01d
 
 
     # continue procedure:
     time doSimpleRepeat.pl -buildDir=`pwd` -bigClusterHub=swarm \
 	-dbHost=hgwdev -workhorse=hgwdev -smallClusterHub=encodek \
 	-continue=filter mm10 > filter.log 2>&1 &
     #	real    1m20.021s
 
     cat fb.simpleRepeat
     #	92161833 bases of 2652783500 (3.474%) in intersection
 
     # when RepeatMasker is done, add this mask to the sequence:
     cd /hive/data/genomes/mm10
     twoBitMask mm10.rmsk.2bit \
 	-add bed/simpleRepeat/trfMask.bed mm10.2bit
     #	you can safely ignore the warning about fields >= 13
 
     twoBitToFa mm10.2bit stdout | faSize stdin > faSize.mm10.2bit.txt
     cat faSize.mm10.2bit.txt
     #	2730871774 bases (78088274 N's 2652783500 real 1454267808 upper
     #	1198515692 lower) in 66 sequences in 1 files
     #	Total size: mean 41376845.1 sd 63617337.3 min 1976
     #	(chr4_JH584295_random) max 195471971 (chr1) median 184189
     #	%43.89 masked total, %45.18 masked real
 
     # set SymLink in gbdb to this masked sequence
     rm /gbdb/mm10/mm10.2bit
     ln -s `pwd`/mm10.2bit /gbdb/mm10/mm10.2bit
 
 #########################################################################
 # Verify all gaps are marked, add any N's not in gap as type 'other'
 #	(DONE - 2012-02-06 - Hiram)
     mkdir /hive/data/genomes/mm10/bed/gap
     cd /hive/data/genomes/mm10/bed/gap
     time nice -n +19 findMotif -motif=gattaca -verbose=4 \
 	-strand=+ ../../mm10.unmasked.2bit > findMotif.txt 2>&1
     #	real    1m0.372s
     grep "^#GAP " findMotif.txt | sed -e "s/^#GAP //" > allGaps.bed
     time featureBits -countGaps mm10 -not gap -bed=notGap.bed
     #	2658879040 bases of 2730871774 (97.364%) in intersection
     #	real    0m13.067s
 
     time featureBits -countGaps mm10 allGaps.bed notGap.bed -bed=new.gaps.bed
     #	6095540 bases of 2730871774 (0.223%) in intersection
     #	real    0m15.177s
 
     #	what is the highest index in the existing gap table:
     hgsql -N -e "select ix from gap;" mm10 | sort -n | tail -1
     #	54
     cat << '_EOF_' > mkGap.pl
 #!/bin/env perl
 
 use strict;
 use warnings;
 
 my $ix=`hgsql -N -e "select ix from gap;" mm10 | sort -n | tail -1`;
 chomp $ix;
 
 open (FH,"<new.gaps.bed") or die "can not read new.gaps.bed";
 while (my $line = <FH>) {
     my ($chrom, $chromStart, $chromEnd, $rest) = split('\s+', $line);
     ++$ix;
     printf "%s\t%d\t%d\t%d\tN\t%d\tother\tyes\n", $chrom, $chromStart,
         $chromEnd, $ix, $chromEnd-$chromStart;
 }
 close (FH);
 '_EOF_'
     # << happy emacs
     chmod +x ./mkGap.pl
     ./mkGap.pl > other.bed
     wc -l other.bed
     #	384
     featureBits -countGaps mm10 other.bed
     #	6095540 bases of 2730871774 (0.223%) in intersection
     hgLoadBed -sqlTable=$HOME/kent/src/hg/lib/gap.sql \
 	-noLoad mm10 otherGap other.bed
     # verify no overlap with gap table:
     time featureBits -countGaps mm10 gap other.bed
     #	0 bases of 2730871774 (0.000%) in intersection
     #	real    0m1.281s
 
     # verify no errors before adding to the table:
     time gapToLift -minGap=1 mm10 nonBridged.before.lift \
 	-bedFile=nonBridged.before.bed > before.gapToLift.txt 2>&1 &
     #	real    0m7.205s
     # check for warnings in before.gapToLift.txt, should be empty:
     #	-rw-rw-r-- 1     1633 Jan  6 15:20 before.gapToLift.txt
     # it indicates that there are telomere's adjacent to centromere's
     #	and heterochromatin
     #	starting with this many:
     hgsql -e "select count(*) from gap;" mm10
     #	302
     hgsql mm10 -e 'load data/genomes local infile "bed.tab" into table gap;'
     #	result count:
     hgsql -e "select count(*) from gap;" mm10
     #	686
     # == 302 + 384
     # verify we aren't adding gaps where gaps already exist
     # this would output errors if that were true:
     gapToLift -minGap=1 mm10 nonBridged.lift -bedFile=nonBridged.bed
     #same set of warnings as before, telomere's centromere's and heterochromatin
     # there should be no errors or other output, checked bridged gaps:
     hgsql -N -e "select bridge from gap;" mm10 | sort | uniq -c
     #	191 no
     #	495 yes
 
 ##########################################################################
 ## WINDOWMASKER (DONE - 2012-02-06 - Hiram)
     mkdir /hive/data/genomes/mm10/bed/windowMasker
     cd /hive/data/genomes/mm10/bed/windowMasker
     time nice -n +19 doWindowMasker.pl -buildDir=`pwd` -workhorse=hgwdev \
 	-dbHost=hgwdev mm10 > do.log 2>&1 &
     #	real    167m12.012s
 
     # Masking statistics
     twoBitToFa mm10.wmsk.2bit stdout | faSize stdin
     #	2730871774 bases (78088274 N's 2652783500 real 1686407708 upper
     #	966375792 lower) in 66 sequences in 1 files
     #	Total size: mean 41376845.1 sd 63617337.3 min 1976
     #	(chr4_JH584295_random) max 195471971 (chr1) median 184189
     #	%35.39 masked total, %36.43 masked real
 
 
     twoBitToFa mm10.wmsk.sdust.2bit stdout | faSize stdin
     #	2730871774 bases (78088274 N's 2652783500 real 1670424648 upper
     #	982358852 lower) in 66 sequences in 1 files
     #	Total size: mean 41376845.1 sd 63617337.3 min 1976
     #	(chr4_JH584295_random) max 195471971 (chr1) median 184189
     #	%35.97 masked total, %37.03 masked real
 
     hgLoadBed mm10 windowmaskerSdust windowmasker.sdust.bed.gz
     #	Loaded 12655947 elements of size 3
 
     featureBits -countGaps mm10 windowmaskerSdust
     #	1060447084 bases of 2730871774 (38.832%) in intersection
 
     #	eliminate the gaps from the masking
     featureBits mm10 -not gap -bed=notGap.bed
     #	2652783500 bases of 2652783500 (100.000%) in intersection
     time nice -n +19 featureBits mm10 windowmaskerSdust notGap.bed \
         -bed=stdout | gzip -c > cleanWMask.bed.gz
     #	982358852 bases of 2652783500 (37.031%) in intersection
     #	real    1m42.449s
 
     #	reload track to get it clean
     hgLoadBed mm10 windowmaskerSdust cleanWMask.bed.gz
     #	Loaded 12655987  elements of size 4
     time featureBits -countGaps mm10 windowmaskerSdust
     #	982358852 bases of 2730871774 (35.972%) in intersection
     #	real    1m13.889s
 
     #	do *not* need to mask with this clean result since RepeatMasker
     #	does a very good job here.  Using RM masking instead.
     zcat cleanWMask.bed.gz \
 	| twoBitMask ../../mm10.unmasked.2bit stdin \
 	    -type=.bed mm10.cleanWMSdust.2bit
     twoBitToFa mm10.cleanWMSdust.2bit stdout | faSize stdin \
         > mm10.cleanWMSdust.faSize.txt
     cat mm10.cleanWMSdust.faSize.txt
 
     # how much does this window masker and repeat masker overlap:
     time featureBits -countGaps mm10 rmsk windowmaskerSdust
     #	753614881 bases of 2730871774 (27.596%) in intersection
     #	real    1m42.691s
     # RM by itself:
     time featureBits -countGaps mm10 rmsk
     #	1196694219 bases of 2730871774 (43.821%) in intersection
     #	real    0m30.460s
 
 #############################################################################
 # PREPARE LINEAGE SPECIFIC REPEAT FILES FOR BLASTZ (DONE - 2012-02-07 - Hiram)
     ssh encodek
     mkdir /hive/data/genomes/mm10/bed/linSpecRep
     cd /hive/data/genomes/mm10/bed/linSpecRep
 
     # split the RM output by chromosome name into separate files
     mkdir rmsk dateRepeats
     head -3 ../repeatMasker/mm10.sorted.fa.out > rmsk.header.txt
     headRest 3 ../repeatMasker/mm10.sorted.fa.out \
 	| splitFileByColumn -ending=.out -col=5 -head=rmsk.header.txt stdin rmsk
 
     ls -1S rmsk/* > rmOut.list
 
     cat << '_EOF_' > mkLSR
 #!/bin/csh -fe
 rm -f dateRepeats/$1_homo-sapiens_rattus_canis-familiaris_bos-taurus
 /scratch/data/genomes/RepeatMasker/DateRepeats \
     $1 -query mouse -comp human -comp rat -comp dog -comp cow
 mv $1_homo-sapiens_rattus_canis-lupus-familiaris_bos-taurus dateRepeats
 '_EOF_'
     #	<< happy emacs
     chmod +x mkLSR
 
     cat << '_EOF_' > template
 #LOOP
 ./mkLSR $(path1) {check out line+ dateRepeats/$(file1)_homo-sapiens_rattus_canis-lupus-familiaris_bos-taurus}
 #ENDLOOP
 '_EOF_'
     #	<< happy emacs
 
     gensub2 rmOut.list single template jobList
     para create jobList
     para try ... check ... push ... etc...
     para time
 # Completed: 66 of 66 jobs
 # CPU time in finished jobs:       1743s      29.05m     0.48h    0.02d  0.000 y
 # IO & Wait Time:                   190s       3.16m     0.05h    0.00d  0.000 y
 # Average job time:                  29s       0.49m     0.01h    0.00d
 # Longest finished job:              65s       1.08m     0.02h    0.00d
 # Submission to last job:           160s       2.67m     0.04h    0.00d
 
     mkdir notInHuman notInRat notInDog notInCow
     for F in dateRepeats/chr*.out_homo-sapiens*
     do
 	B=`basename ${F}`
 	B=${B/.out*/}
 	echo $B
         /cluster/bin/scripts/extractRepeats 1 ${F} > \
 		notInHuman/${B}.out.spec
         /cluster/bin/scripts/extractRepeats 2 ${F} > \
 		notInRat/${B}.out.spec
         /cluster/bin/scripts/extractRepeats 3 ${F} > \
 		notInDog/${B}.out.spec
         /cluster/bin/scripts/extractRepeats 4 ${F} > \
 		notInCow/${B}.out.spec
     done
 
     #	notInDog, and notInCow ended up being identical.
     #	The notInRat and notInHuman are different
     #	To check identical
     find . -name "*.out.spec" | \
 	while read FN; do echo `cat ${FN} | sum -r` ${FN}; done \
 	| sort -k1,1n | sort -t"/" -k3,3 > check.same
     # this produces a count of 2 for the sums for Cow and Dog, all the same
     egrep "Cow|Dog" check.same | awk '{print $1}' | sort | uniq -c | sort -rn
     # this does not produce a count of 2 for the sums for Cow and Human
     egrep "Cow|Human" check.same | awk '{print $1}' | sort | uniq -c | sort -rn
     #	Copy to data/genomes staging for cluster replication
     mkdir /hive/data/genomes/staging/data/genomes/mm10
     rsync -a -P ./notInRat/ /hive/data/genomes/staging/data/genomes/mm10/notInRat/
     rsync -a -P ./notInHuman/ /hive/data/genomes/staging/data/genomes/mm10/notInHuman/
     rsync -a -P ./notInCow/ /hive/data/genomes/staging/data/genomes/mm10/notInOthers/
 
 
     # We also need the nibs for the lastz runs with lineage specific repeats
     mkdir /hive/data/genomes/mm10/nib
     cd /hive/data/genomes/mm10
     cut -f1 chrom.sizes | while read C
 do
     twoBitToFa -seq=${C} mm10.2bit stdout | faToNib -softMask stdin nib/${C}.nib
     ls -og nib/$C.nib
 done
     # verify one is properly masked:
     nibFrag -masked nib/chrM.nib 0 16299 + stdout | less
     # compare to:
     twoBitToFa -seq=chrM mm10.fa stdout | less
 
     #	Copy to data/genomes staging for cluster replication
     rsync -a -P ./nib/ /hive/data/genomes/staging/data/genomes/mm10/nib/
 
 #########################################################################
 # MAKE 11.OOC FILE FOR BLAT/GENBANK (DONE - 2012-02-08 - Hiram)
     # Use -repMatch=650, based on size -- for human we use 1024
     # use the "real" number from the faSize measurement,
     # hg19 is 2897316137, calculate the ratio factor for 1024:
     calc \( 2652783500 / 2897316137 \) \* 1024
     #	( 2652783500 / 2897316137 ) * 1024 = 937.574699
 
     # round up to 1000  (mm9 used 912)
 
     cd /hive/data/genomes/mm10
     time blat mm10.2bit /dev/null /dev/null -tileSize=11 \
       -makeOoc=jkStuff/mm10.11.ooc -repMatch=1000
     #	Wrote 27208 overused 11-mers to jkStuff/mm10.11.ooc
     #	real    2m9.568s
 
     #	at repMatch=900:
     #	Wrote 31822 overused 11-mers to jkStuff/mm10.11.ooc
 
     # there are non-bridged gaps, make lift file for genbank
     hgsql -N -e "select bridge from gap;" mm10 | sort | uniq -c
     #	191 no
     #	495 yes
     cd /hive/data/genomes/mm10/jkStuff
     gapToLift mm10 mm10.nonBridged.lift -bedFile=mm10.nonBridged.bed
     # largest non-bridged contig:
     awk '{print $3-$2,$0}' mm10.nonBridged.bed | sort -nr | head
     116378660 chr2  59120641        175499301       chr2.02
 
     #	copy all of this stuff to the klusters:
     cd /hive/data/genomes/mm10
     mkdir /hive/data/genomes/staging/data/genomes/mm10
     cp -p jkStuff/mm10.11.ooc jkStuff/mm10.nonBridged.lift chrom.sizes \
 	mm10.2bit /hive/data/genomes/staging/data/genomes/mm10
     # request rsync copy from cluster admin
 
 #########################################################################
 # AUTO UPDATE GENBANK (DONE - 2012-02-08 - Hiram)
     # examine the file:
     /cluster/data/genomes/genbank/data/genomes/organism.lst
     # for your species to see what counts it has for:
 # organism       mrnaCnt estCnt  refSeqCnt
 # Mus musculus    334577  4853663 26288
     # to decide which "native" mrna or ests you want to specify in genbank.conf
     # of course, mm10 has plenty of everything
 
     ssh hgwdev
     cd $HOME/kent/src/hg/makeDb/genbank
     git pull
     # edit etc/genbank.conf to add mm10 just after mm9 and commit to GIT
 # mm10
 mm10.serverGenome = /hive/data/genomes/mm10/mm10.2bit
 mm10.clusterGenome = /scratch/data/genomes/mm10/mm10.2bit
 mm10.ooc = /scratch/data/genomes/mm10/mm10.11.ooc
 mm10.align.unplacedChroms = chr*
 mm10.lift = /scratch/data/genomes/mm10/mm10.nonBridged.lift
 mm10.refseq.mrna.native.pslCDnaFilter  = ${finished.refseq.mrna.native.pslCDnaFilter}
 mm10.refseq.mrna.xeno.pslCDnaFilter    = ${finished.refseq.mrna.xeno.pslCDnaFilter}
 mm10.genbank.mrna.native.pslCDnaFilter = ${finished.genbank.mrna.native.pslCDnaFilter}
 mm10.genbank.mrna.xeno.pslCDnaFilter   = ${finished.genbank.mrna.xeno.pslCDnaFilter}
 mm10.genbank.est.native.pslCDnaFilter  = ${finished.genbank.est.native.pslCDnaFilter}
 mm10.downloadDir = mm10
 mm10.refseq.mrna.xeno.load  = yes
 mm10.refseq.mrna.xeno.loadDesc = yes
 mm10.mgc = yes
 mm10.genbank.mrna.blatTargetDb = yes
 # mm10.ccds.ncbiBuild = 37.2
 # mm10.upstreamGeneTbl = refGene
 # mm10.upstreamMaf = multiz30way
 # /hive/data/genomes/mm10/bed/multiz30way/species.list
 
     # end of section added to etc/genbank.conf
     git commit -m "adding mm10 definitions" genbank.conf
     git push
     make etc-update
 
     ssh hgwdev			# used to do this on "genbank" machine
     screen			# long running job managed in screen
     cd /cluster/data/genomes/genbank
     time nice -n +19 ./bin/gbAlignStep -initial mm10 &
     #	var/build/logs/2012.02.08-11:38:50.mm10.initalign.log
     #	real    795m52.388s
 
     # load data/genomesbase when finished
     ssh hgwdev
     cd /cluster/data/genomes/genbank
     time nice -n +19 ./bin/gbDbLoadStep -drop -initialLoad mm10 &
     #	logFile: var/dbload/hgwdev/logs/2012.02.09-10:05:25.dbload.log
     #	real    114m56.461s
 
     # enable daily alignment and update of hgwdev (DONE - 2012-02-09 - Hiram)
     cd ~/kent/src/hg/makeDb/genbank
     git pull
     # add mm10 to:
         etc/align.dbs
         etc/hgwdev.dbs
     git commit -m "Added mm10." etc/align.dbs etc/hgwdev.dbs
     git push
     make etc-update
 
 ############################################################################
 # running cpgIsland business (DONE - 2012-02-09 - Hiram)
     mkdir /hive/data/genomes/mm10/bed/cpgIsland
     cd /hive/data/genomes/mm10/bed/cpgIsland
     # use a previous binary for this program
     ln -s ../../../mm9/bed/cpgIsland/hg3rdParty/cpgIslands/cpglh.exe .
 
     mkdir -p hardMaskedFa
     cut -f1 ../../chrom.sizes | while read C
 do
     echo ${C}
     twoBitToFa ../../mm10.2bit:$C stdout \
 	| maskOutFa stdin hard hardMaskedFa/${C}.fa
 done
 
     ssh swarm
     cd /hive/data/genomes/mm10/bed/cpgIsland
     mkdir results
     cut -f1 ../../chrom.sizes > chr.list
     cat << '_EOF_' > template
 #LOOP
 ./runOne $(root1) {check out exists results/$(root1).cpg}
 #ENDLOOP
 '_EOF_'
     # << happy emacs
 
     #	the faCount business is to make sure there is enough sequence to
     #	work with in the fasta.  cpglh.exe does not like files with too many
     #	N's - it gets stuck.
     cat << '_EOF_' > runOne
 #!/bin/csh -fe
 set C = `faCount hardMaskedFa/$1.fa | egrep -v "^#seq|^total" | awk '{print  $2 - $7 }'`
 if ( $C > 200 ) then
     ./cpglh.exe hardMaskedFa/$1.fa > /scratch/tmp/$1.$$
     mv /scratch/tmp/$1.$$ $2
 else
     touch $2
 endif
 '_EOF_'
     # << happy emacs
     chmod +x runOne
 
     gensub2 chr.list single template jobList
     para create jobList
     para try
     para check ... etc
     para time
 # Completed: 66 of 66 jobs
 # CPU time in finished jobs:        191s       3.19m     0.05h    0.00d  0.000 y
 # IO & Wait Time:                   189s       3.14m     0.05h    0.00d  0.000 y
 # Average job time:                   6s       0.10m     0.00h    0.00d
 # Longest finished job:              19s       0.32m     0.01h    0.00d
 # Submission to last job:            51s       0.85m     0.01h    0.00d
 
     # Transform cpglh output to bed +
     catDir results | awk '{
 $2 = $2 - 1;
 width = $3 - $2;
 printf("%s\t%d\t%s\t%s %s\t%s\t%s\t%0.0f\t%0.1f\t%s\t%s\n",
        $1, $2, $3, $5,$6, width,
        $6, width*$7*0.01, 100.0*2*$6/width, $7, $9);
 }' > cpgIsland.bed
 
     # verify longest unique chrom name:
     cut -f1 cpgIsland.bed | awk '{print length($0)}' | sort -rn | head -1
     #	20
     # update the length 14 in the template to be 16:
     sed -e "s/14/20/" $HOME/kent/src/hg/lib/cpgIslandExt.sql > cpgIslandExt.sql
 
     cd /hive/data/genomes/mm10/bed/cpgIsland
     hgLoadBed mm10 cpgIslandExt -tab -sqlTable=cpgIslandExt.sql cpgIsland.bed
 # Loaded 16023 elements of size 10
 
     featureBits mm10 cpgIslandExt
     #	10495450 bases of 2652783500 (0.396%) in intersection
     # compare to previous:
     featureBits mm9 cpgIslandExt
     #	10496250 bases of 2620346127 (0.401%) in intersection
 
     # there should be no output from checkTableCoords:
     checkTableCoords -verboseBlocks -table=cpgIslandExt mm10
 
     #	cleanup, unless you want to move them to the genscan procedure below
     rm -fr hardMaskedFa
 
 #########################################################################
 # GENSCAN GENE PREDICTIONS (DONE - 2012-02-09,10 - Hiram)
     mkdir /hive/data/genomes/mm10/bed/genscan
     cd /hive/data/genomes/mm10/bed/genscan
     # use a previously existing genscan binary
     ln -s ../../../mm9/bed/genscan/hg3rdParty .
 
     # create hard masked .fa files
     mkdir -p hardMaskedFa
     cut -f1 ../../chrom.sizes | while read C
 do
     echo ${C}
     twoBitToFa ../../mm10.2bit:$C stdout \
 	| maskOutFa stdin hard hardMaskedFa/${C}.fa
 done
 
     # Generate a list file, genome.list, of all the hard-masked contig chunks:
     find ./hardMaskedFa/ -type f | sed -e 's#^./##' > genome.list
 
     wc -l genome.list
     #	66 genome.list
 
     # Run on small cluster (more mem than big cluster).
     ssh encodek
     cd /hive/data/genomes/mm10/bed/genscan
     # Make 3 subdirectories for genscan to put their output files in
     mkdir gtf pep subopt
     # Create template file, template, for gensub2.  For example (3-line file):
     cat << '_EOF_' > template
 #LOOP
 /cluster/bin/x86_64/gsBig {check in exists+ $(path1)} {check out exists gtf/$(root1).gtf} -trans={check out exists pep/$(root1).pep} -subopt={check out exists subopt/$(root1).bed} -exe=hg3rdParty/genscanlinux/genscan -par=hg3rdParty/genscanlinux/HumanIso.smat -tmp=/tmp -window=2400000
 #ENDLOOP
 '_EOF_'
     # << emacs
     gensub2 genome.list single template jobList
     para create jobList
     para try
     para check ... etc...
     para time
 # Crashed: 2 jobs
 # CPU time in finished jobs:     171336s    2855.60m    47.59h    1.98d  0.005 y
 # IO & Wait Time:                   261s       4.35m     0.07h    0.00d  0.000 y
 # Average job time:                2640s      44.00m     0.73h    0.03d
 # Longest finished job:           22618s     376.97m     6.28h    0.26d
 # Submission to last job:         28682s     478.03m     7.97h    0.33d
 
     # one of the two crashed jobs was just a stray line in the jobList,
     # somehow a line with the string: '_EOF_' got in there.
 
     # as with mm9, chr7 did not work.  Break it up into pieces
     mkdir /hive/data/genomes/mm10/bed/genscan/chr7Split
     cd /hive/data/genomes/mm10/bed/genscan/chr7Split
     grep chr7 ../../../jkStuff/mm10.nonBridged.lift | grep -v random \
 	> chr7.nonBridged.lift
     faToTwoBit ../hardMaskedFa/chr7.fa chr7.2bit
     ~/kent/src/hg/utils/lft2BitToFa.pl chr7.2bit chr7.nonBridged.lift \
 	| sed -e "s/chr7./chr7_/" > chr7.nonBridged.fa
     faSplit sequence chr7.nonBridged.fa 100 split7/chr7_
 
     ln -s ../../../../mm9/bed/genscan/hg3rdParty .
     echo '#!/bin/sh' > cmdList.sh
     ls split7 | while read F
 do
 echo "/cluster/bin/x86_64/gsBig split7/${F} gtf/${F}.gtf} -trans=pep/${F}.pep} -subopt=subopt/${F}.bed -exe=hg3rdParty/genscanlinux/genscan -par=hg3rdParty/genscanlinux/HumanIso.smat -tmp=/tmp -window=2400000 &"
 done >> cmdList.sh
     echo "wait" >> cmdList.sh
     chmod +x cmdList.sh
     mkdir gtf pep subopt
     time ./cmdList.sh > run.log 2>&1
     # about 20 minutes
 
     # fix the names in the lift file
     cat chr7.nonBridged.lift | sed -e "s/chr7./chr7_/" > chr7.lift
 
     # the sed mangling will provide unique names for them all, but they
     #	will not be in the strict numerical order that genscan usually produces
     cat gtf/chr7_*.gtf | liftUp -type=.gtf stdout chr7.lift error stdin \
 	| sed -e "s/chr7_0\([0-4]\)\./chr7.\1/g" > chr7.gtf
     cat subopt/chr7_*.bed | liftUp -type=.bed stdout chr7.lift error stdin \
 	| sed -e "s/chr7_0\([0-4]\)\./chr7.\1/g" > chr7.subopt.bed
     cat pep/chr7_*.pep | sed -e "s/chr7_0\([0-4]\)\./chr7.\1/g" > chr7.pep
     cp -p chr7.pep ../pep
     cp -p chr7.gtf ../gtf
     cp -p chr7.subopt.bed ../subopt/chr7.bed
 
     find ./gtf -type f | xargs -n 256 endsInLf -zeroOk
 
     # Concatenate results:
     cd /hive/data/genomes/mm10/bed/genscan
     find ./gtf -type f | xargs cat > genscan.gtf
     find ./pep -type f | xargs cat > genscan.pep
     find ./subopt -type f | xargs cat > genscanSubopt.bed
 
     # Load into the data/genomesbase (without -genePredExt because no frame info):
     # Don't load the Pep anymore -- redundant since it's from genomic.
     ssh hgwdev
     cd /hive/data/genomes/mm10/bed/genscan
     # to construct a local file with the genePred business:
     gtfToGenePred genscan.gtf genscan.gp
     # this produces exactly the same thing and loads the table:
     ldHgGene -gtf mm10 genscan genscan.gtf
     #	Read 45012 transcripts in 323529 lines in 1 files
     #	45012 groups 59 seqs 1 sources 1 feature types
     #	45012 gene predictions
     hgLoadBed mm10 genscanSubopt genscanSubopt.bed
     #	Read 526572 elements of size 6 from genscanSubopt.bed
     featureBits mm10 genscan
     #	55743040 bases of 2652783500 (2.101%) in intersection
     # previously:
     featureBits mm9 genscan
     #	55293837 bases of 2620346127 (2.110%) in intersection
 
 #########################################################################
 # CREATE MICROSAT TRACK (DONE - 2012-02-09 - Hiram
      ssh hgwdev
      mkdir /cluster/data/genomes/mm10/bed/microsat
      cd /cluster/data/genomes/mm10/bed/microsat
      awk '($5==2 || $5==3) && $6 >= 15 && $8 == 100 && $9 == 0 {printf("%s\t%s\t%s\t%dx%s\n", $1, $2, $3, $6, $16);}' \
 	../simpleRepeat/simpleRepeat.bed > microsat.bed
     hgLoadBed mm10 microsat microsat.bed
     #	Read 197237 elements of size 4 from microsat.bed
 
 #########################################################################
 #  BLATSERVERS ENTRY (DONE - 2012-02-09 - Hiram)
 #	After getting a blat server assigned by the Blat Server Gods,
     ssh hgwdev
 
     hgsql -e 'INSERT INTO blatServers (db, host, port, isTrans, canPcr) \
 	VALUES ("mm10", "blat13", "17832", "1", "0"); \
 	INSERT INTO blatServers (db, host, port, isTrans, canPcr) \
 	VALUES ("mm10", "blat13", "17833", "0", "1");' \
 	    hgcentraltest
     #	test it with some sequence
 
 ############################################################################
 # set default position the same as was mm9 via blat
 #	(DONE - 2012-02-09 - Hiram)
     hgsql -e \
 'update dbDb set defaultPos="chr12:56694976-56714605" where name="mm10";' \
 	hgcentraltest
 
 ############################################################################
 # constructing downloads (DONE - 2012-02-09 - Hiram)
     cd /hive/data/genomes/mm10
     # some of the smaller bits are missing the simple repeat results
     time makeDownloads.pl -allowMissedTrfs -workhorse=hgwdev mm10
     #	real    41m42.408s
 
     # edit the README files in goldenPath/*/README.txt
 
 #########################################################################
 # create pushQ entry (DONE - 2012-02-09 - Hiram)
     # first make sure all.joiner is up to date and has this new organism
     # a keys check should be clean:
     cd ~/kent/src/hg/makeDb/schema
     joinerCheck -data/genomesbase=mm10 -keys all.joiner
 
     mkdir /hive/data/genomes/mm10/pushQ
     cd /hive/data/genomes/mm10/pushQ
     makePushQSql.pl mm10 > mm10.sql 2> stderr.out
     # check stderr.out for no significant problems, it is common to see:
 # WARNING: hgwdev does not have /gbdb/mm10/wib/gc5Base.wib
 # WARNING: hgwdev does not have /gbdb/mm10/wib/quality.wib
 # WARNING: hgwdev does not have /gbdb/mm10/bbi/quality.bw
 # WARNING: mm10 does not have seq
 # WARNING: mm10 does not have extFile
 # *** All done!
 
     # which are not real problem
     # if some tables are not identified:
 # WARNING: Could not tell (from trackDb, all.joiner and hardcoded lists of
 # supporting and genbank tables) which tracks to assign these tables to:
 #  list of tables will be in the output
 # put them in manually after loading the pushQ entry
     scp -p mm10.sql hgwbeta:/tmp
     ssh hgwbeta
     cd /tmp
     hgsql qapushq < mm10.sql
 
 #########################################################################
 # lifting ensGene track from mm9 (DONE - 2012-02-22 - Hiram)
     # no gene tracks yet on mm10.  liftUp mm9 ensGenes to mm10
     # history of mm9 ensGene indicates it is the same as v64 release
     #	with v65 being identical
     mkdir /hive/data/genomes/mm10/bed/ensGene
     cd /hive/data/genomes/mm10/bed/ensGene
     hgsql -N -e "select * from ensGene;" mm9 | cut -f2- > mm9.ensGene.gp
     liftOver -genePred mm9.ensGene.gp \
 	/gbdb/mm9/liftOver/mm9ToMm10.over.chain.gz \
 	mm10.lifted.ensGene.gp unmapped.ensGene.gp
     wc -l *.gp
     #	95651 mm10.lifted.ensGene.gp
     #	95883 mm9.ensGene.gp
     #	464 unmapped.ensGene.gp
 
     hgLoadGenePred -skipInvalid -genePredExt mm10 ensGene mm10.lifted.ensGene.gp
     #	Warning: skipping 118 invalid genePreds
 
     # make a list of what did get loaded:
     hgsql -N -e "select name from ensGene;" mm10 \
 	| sort -u > mm10.name.ensGene.txt
     wc -l mm10.name.ensGene.txt
     #	95533 mm10.name.ensGene.txt
 
     hgsql -N -e "select * from ensPep;" mm9 | sort > mm9.ensPep.tab
     hgsql -N -e "select * from ensGtp;" mm9 | sort -k2,2 > mm9.ensGtp.tab
     hgsql -N -e "select * from ensemblToGeneName;" mm9 | sort -k1,1 \
 	> mm9.ensemblToGeneName.tab
     hgsql -N -e "select * from ensemblSource;" mm9 | sort -k1,1 \
 	> mm9.ensemblSource.tab
 
     # select out ensGtp records that match with the names in mm10 ensGene:
     join -1 2 -2 1 -o "1.1,1.2,1.3" mm9.ensGtp.tab mm10.name.ensGene.txt \
 	| tr '[ ]' '[\t]' > mm10.ensGtp.tab
     wc -l *.ensGtp.tab
     #	95533 mm10.ensGtp.tab
     #	95883 mm9.ensGtp.tab
 
     # select out ensPep records that match with the names in mm10 ensGene:
     join -1 1 -2 2 -o "1.1,1.2" mm9.ensPep.tab mm10.ensGtp.tab \
 	| tr '[ ]' '[\t]' > mm10.ensPep.tab
 
     wc -l mm9.ensPep.tab mm10.ensPep.tab
     #	55798 mm9.ensPep.tab
     #	55485 mm10.ensPep.tab
 
     # select out ensemblSource records that match the mm10 ensGene names:
     join -1 1 -2 1 -o "1.1,1.2" mm9.ensemblSource.tab mm10.name.ensGene.txt \
 	| tr '[ ]' '[\t]' > mm10.ensemblSource.tab
     wc -l mm9.ensemblSource.tab mm10.ensemblSource.tab
   95883 mm9.ensemblSource.tab
   95533 mm10.ensemblSource.tab
 
     # select out ensemblToGeneName records that match the mm10 ensGene names:
     join -1 1 -2 1 -o "1.1,1.2" mm9.ensemblToGeneName.tab \
 	mm10.name.ensGene.txt | tr '[ ]' '[\t]' > mm10.ensemblToGeneName.tab
     wc -l mm9.ensemblToGeneName.tab mm10.ensemblToGeneName.tab
     #	95883 mm9.ensemblToGeneName.tab
     #	95533 mm10.ensemblToGeneName.tab
 
     hgPepPred mm10 tab ensPep mm10.ensPep.tab
     hgLoadSqlTab mm10 ensGtp ~/kent/src/hg/lib/ensGtp.sql mm10.ensGtp.tab
     sed -e "s/15/18/" ~/kent/src/hg/lib/ensemblSource.sql > ensemblSource.sql
     hgLoadSqlTab mm10 ensemblSource ensemblSource.sql mm10.ensemblSource.tab
 
     # find sizes for indexes
   NL=`awk '{print length($1)}' mm10.ensemblToGeneName.tab | sort -rn | head -1`
   VL=`awk '{print length($2)}' mm10.ensemblToGeneName.tab | sort -rn | head -1`
     # construct sql definition with appropriate index sizes
     sed -e "s/ knownTo / ensemblToGeneName /; s/known gene/ensGen/; s/INDEX(name(12)/PRIMARY KEY(name($NL)/; s/value(12)/value($VL)/" \
 	~/kent/src/hg/lib/knownTo.sql > ensemblToGeneName.sql
 
     hgLoadSqlTab mm10 ensemblToGeneName ensemblToGeneName.sql \
 	mm10.ensemblToGeneName.tab
 
 hgsql -e 'INSERT INTO trackVersion \
     (db, name, who, version, updateTime, comment, source, dateReference) \
     VALUES("mm10", "ensGene", "hiram", "65", now(), \
         "lifted from mm9 ensGene 65", \
         "lifted from mm9 ensGene 65", \
         "dec2011" );' hgFixed
 
 #########################################################################
 # Swap lastz Human hg19 (DONE - 2012-03-08 - Hiram)
     # original alignment to hg19
     cd /hive/data/genomes/hg19/bed/lastzMm10.2012-03-07
     cat fb.hg19.chainMm10Link.txt
     #	1021265143 bases of 2897316137 (35.249%) in intersection
 
     #	and the swap
     mkdir /hive/data/genomes/mm10/bed/blastz.hg19.swap
     cd /hive/data/genomes/mm10/bed/blastz.hg19.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/hg19/bed/lastzMm10.2012-03-07/DEF \
 	-swap -noLoadChainSplit -syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
 	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
     #	real    72m32.794s
     cat fb.mm10.chainHg19Link.txt
     #	1014045890 bases of 2652783500 (38.226%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/mm10/bed
     ln -s blastz.hg19.swap lastz.hg19
 
 #########################################################################
 # LASTZ RAT Rn4 (DONE - 2012-03-08 - Hiram)
     mkdir /hive/data/genomes/mm10/bed/lastzRn4.2012-03-08
     cd /hive/data/genomes/mm10/bed/lastzRn4.2012-03-08
 
     cat << '_EOF_' > DEF
 # mouse vs rat
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
 # Specially tuned blastz parameters from Webb Miller
 BLASTZ_O=600
 BLASTZ_E=150
 BLASTZ_Y=15000
 BLASTZ_T=2
 BLASTZ_K=4500
 BLASTZ_Q=/scratch/data/blastz/human_chimp.v2.q
 
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
 # QUERY: Rat Rn4
 SEQ2_DIR=/scratch/data/rn4/rn4.2bit
 SEQ2_LEN=/scratch/data/rn4/chrom.sizes
 SEQ2_CHUNK=10000000
 SEQ2_LAP=0
 
 BASE=/hive/data/genomes/mm10/bed/lastzRn4.2012-03-08
 TMPDIR=/scratch/tmp
 '_EOF_'
     # << happy emacs
 
     #	establish a screen to control this job
     screen -S rn4Mm10
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	`pwd`/DEF \
 	-bigClusterHub=swarm -chainMinScore=5000 -chainLinearGap=medium \
 	-noLoadChainSplit -syntenicNet -workhorse=hgwdev \
 	-smallClusterHub=encodek > do.log 2>&1 &
     #	real    129m48.444s
     cat fb.mm10.chainRn4Link.txt
     #	1449612208 bases of 2652783500 (54.645%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/mm10/bed
     ln -s lastzRn4.2012-03-08 lastz.rn4
 
     #	and the swap
     mkdir /hive/data/genomes/rn4/bed/blastz.mm10.swap
     cd /hive/data/genomes/rn4/bed/blastz.mm10.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzRn4.2012-03-08/DEF \
 	-swap -bigClusterHub=swarm -chainMinScore=5000 -chainLinearGap=medium \
 	-noLoadChainSplit -syntenicNet -workhorse=hgwdev \
 	-smallClusterHub=encodek > swap.log 2>&1 &
     #	real    71m10.645s
     cat fb.rn4.chainMm10Link.txt
     #	1449012636 bases of 2571531505 (56.348%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/rn4/bed
     ln -s blastz.mm10.swap lastz.mm10
 
 #########################################################################
 # LASTZ Gorilla gorGor3 (DONE - 2012-03-08 - Hiram)
     mkdir /hive/data/genomes/mm10/bed/lastzGorGor3.2012-03-08
     cd /hive/data/genomes/mm10/bed/lastzGorGor3.2012-03-08
 
     cat << '_EOF_' > DEF
 # gorilla vs mouse
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
 # QUERY: Gorilla GorGor3
 SEQ2_DIR=/scratch/data/gorGor3/gorGor3.2bit
 SEQ2_LEN=/scratch/data/gorGor3/chrom.sizes
 SEQ2_CHUNK=10000000
 SEQ2_LAP=0
 SEQ2_LIMIT=300
 
 BASE=/hive/data/genomes/mm10/bed/lastzGorGor3.2012-03-08
 TMPDIR=/scratch/tmp
 '_EOF_'
     # << happy emacs
 
     #	establish a screen to control this job
     screen -S mm10GorGor3
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	`pwd`/DEF \
 	-syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
 	-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
     #	real    625m17.180s
     cat fb.mm10.chainGorGor3Link.txt
     #	901610588 bases of 2652783500 (33.987%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/mm10/bed
     ln -s lastzGorGor3.2012-03-08 lastz.gorGor3
 
     mkdir /hive/data/genomes/gorGor3/bed/blastz.mm10.swap
     cd /hive/data/genomes/gorGor3/bed/blastz.mm10.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzGorGor3.2012-03-08/DEF \
 	-swap -syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
 	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
     #	real    91m3.616s
     cat fb.gorGor3.chainMm10Link.txt
     #	969595533 bases of 2822760080 (34.349%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/gorGor3/bed
     ln -s blastz.mm10.swap lastz.mm10
 
 ##############################################################################
 # LASTZ Gibbon nomLeu1 (DONE - 2012-03-08 - Hiram)
     mkdir /hive/data/genomes/mm10/bed/lastzNomLeu1.2012-03-08
     cd /hive/data/genomes/mm10/bed/lastzNomLeu1.2012-03-08
 
     cat << '_EOF_' > DEF
 # gibbon vs mouse
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
 # QUERY: Gibbon NomLeu1
 SEQ2_DIR=/scratch/data/nomLeu1/nomLeu1.2bit
 SEQ2_LEN=/scratch/data/nomLeu1/chrom.sizes
 SEQ2_CHUNK=10000000
 SEQ2_LAP=0
 SEQ2_LIMIT=300
 
 BASE=/hive/data/genomes/mm10/bed/lastzNomLeu1.2012-03-08
 TMPDIR=/scratch/tmp
 '_EOF_'
     # << happy emacs
 
     #	establish a screen to control this job
     screen -S mm10NomLeu1
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	`pwd`/DEF \
 	-syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
 	-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
     #	real    556m26.589s
     cat fb.mm10.chainNomLeu1Link.txt
     #	905455766 bases of 2652783500 (34.132%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/mm10/bed
     ln -s lastzNomLeu1.2012-03-08 lastz.nomLeu1
 
     mkdir /hive/data/genomes/nomLeu1/bed/blastz.mm10.swap
     cd /hive/data/genomes/nomLeu1/bed/blastz.mm10.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzNomLeu1.2012-03-08/DEF \
 	-swap -syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
 	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
     #	real    66m50.839s
     cat fb.nomLeu1.chainMm10Link.txt
     #	892362811 bases of 2756591777 (32.372%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/nomLeu1/bed
     ln -s blastz.mm10.swap lastz.mm10
 
 ##############################################################################
 # LASTZ Rhesus rheMac3 (DONE - 2012-03-08 - Hiram)
     mkdir /hive/data/genomes/mm10/bed/lastzRheMac3.2012-03-08
     cd /hive/data/genomes/mm10/bed/lastzRheMac3.2012-03-08
 
     cat << '_EOF_' > DEF
 # rhesus vs mouse
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
 # QUERY: Rhesus RheMac3
 SEQ2_DIR=/scratch/data/rheMac3/rheMac3.2bit
 SEQ2_LEN=/scratch/data/rheMac3/chrom.sizes
 SEQ2_CHUNK=10000000
 SEQ2_LAP=0
 SEQ2_LIMIT=300
 
 BASE=/hive/data/genomes/mm10/bed/lastzRheMac3.2012-03-08
 TMPDIR=/scratch/tmp
 '_EOF_'
     # << happy emacs
 
     #	establish a screen to control this job
     screen -S mm10RheMac3
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	`pwd`/DEF \
 	-syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
 	-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
     #	real    596m55.622s
     cat fb.mm10.chainRheMac3Link.txt
     #	900117108 bases of 2652783500 (33.931%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/mm10/bed
     ln -s lastzRheMac3.2012-03-08 lastz.rheMac3
 
     mkdir /hive/data/genomes/rheMac3/bed/blastz.mm10.swap
     cd /hive/data/genomes/rheMac3/bed/blastz.mm10.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzRheMac3.2012-03-08/DEF \
 	-swap -syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
 	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
     #	real    69m5.839s
     cat fb.rheMac3.chainMm10Link.txt
     #	883164992 bases of 2639145830 (33.464%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/rheMac3/bed
     ln -s blastz.mm10.swap lastz.mm10
 
 ##############################################################################
 # LASTZ Baboon papHam1 (DONE - 2012-03-09 - Hiram)
     mkdir /hive/data/genomes/mm10/bed/lastzPapHam1.2012-03-09
     cd /hive/data/genomes/mm10/bed/lastzPapHam1.2012-03-09
 
     cat << '_EOF_' > DEF
 # baboon vs mouse
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
 # QUERY: Baboon PapHam1
 SEQ2_DIR=/scratch/data/papHam1/papHam1.2bit
 SEQ2_LEN=/scratch/data/papHam1/chrom.sizes
 SEQ2_CHUNK=10000000
 SEQ2_LAP=0
 SEQ2_LIMIT=500
 
 BASE=/hive/data/genomes/mm10/bed/lastzPapHam1.2012-03-09
 TMPDIR=/scratch/tmp
 '_EOF_'
     # << happy emacs
 
     #	establish a screen to control this job
     screen -S mm10PapHam1
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	`pwd`/DEF \
 	-syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
 	-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
     #	real    1138m52.716s
     cat fb.mm10.chainPapHam1Link.txt
     #	890718423 bases of 2652783500 (33.577%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/mm10/bed
     ln -s lastzPapHam1.2012-03-09 lastz.papHam1
 
     # better to have reciprocal best for this one since it is low coverage:
     cd /hive/data/genomes/mm10/bed/lastzPapHam1.2012-03-09
     time doRecipBest.pl mm10 papHam1 -buildDir=`pwd` -workhorse=hgwdev \
 	> best.log 2>&1 &
     #	real    899m48.908s
 
     mkdir /hive/data/genomes/papHam1/bed/blastz.mm10.swap
     cd /hive/data/genomes/papHam1/bed/blastz.mm10.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzPapHam1.2012-03-09/DEF \
 	-swap -syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
 	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
     #	real    548m15.438s
     cat fb.mm10.chainPapHam1Link.txt
     #	878016290 bases of 2741867288 (32.023%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/papHam1/bed
     ln -s blastz.mm10.swap lastz.mm10
 
 ##############################################################################
 # Swap ponAbe2 lastz (DONE - 2012-03-09 - Hiram)
     # original alignment result:
     cd /hive/data/genomes/ponAbe2/bed/lastzMm10.2012-03-08
     cat fb.ponAbe2.chainMm10Link.txt
     #	946932454 bases of 3093572278 (30.610%) in intersection
 
     #	and the swap
     mkdir /hive/data/genomes/mm10/bed/blastz.ponAbe2.swap
     cd /hive/data/genomes/mm10/bed/blastz.ponAbe2.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/ponAbe2/bed/lastzMm10.2012-03-08/DEF \
 	-swap -syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
 	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
     #	real    72m38.550s
     cat fb.mm10.chainPonAbe2Link.txt
     #	915093866 bases of 2652783500 (34.496%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/mm10/bed
     ln -s blastz.ponAbe2.swap lastz.ponAbe2
 
 ##############################################################################
 # LASTZ Squirrel monkey saiBol1 (DONE - 2012-03-09 - Hiram)
     mkdir /hive/data/genomes/mm10/bed/lastzSaiBol1.2012-03-09
     cd /hive/data/genomes/mm10/bed/lastzSaiBol1.2012-03-09
 
     cat << '_EOF_' > DEF
 # squirrel monkey vs mouse
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
 # QUERY: Squirrel monkey SaiBol1
 SEQ2_DIR=/hive/data/genomes/saiBol1/saiBol1.2bit
 SEQ2_LEN=/hive/data/genomes/saiBol1/chrom.sizes
 SEQ2_CHUNK=10000000
 SEQ2_LAP=0
 SEQ2_LIMIT=50
 
 BASE=/hive/data/genomes/mm10/bed/lastzSaiBol1.2012-03-09
 TMPDIR=/scratch/tmp
 '_EOF_'
     # << happy emacs
 
     #	establish a screen to control this job
     screen -S mm10SaiBol1
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	`pwd`/DEF \
 	-syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
 	-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
     #	real    538m42.643s
     cat fb.mm10.chainSaiBol1Link.txt
     #	857872391 bases of 2652783500 (32.339%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/mm10/bed
     ln -s lastzSaiBol1.2012-03-09 lastz.saiBol1
 
     mkdir /hive/data/genomes/saiBol1/bed/blastz.mm10.swap
     cd /hive/data/genomes/saiBol1/bed/blastz.mm10.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzSaiBol1.2012-03-09/DEF \
 	-swap -syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
 	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
     #	real    59m36.306s
     cat fb.saiBol1.chainMm10Link.txt
     #	838457857 bases of 2477131095 (33.848%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/saiBol1/bed
     ln -s blastz.mm10.swap lastz.mm10
 
 ##############################################################################
 # LASTZ Marmoset calJac3 (DONE - 2012-03-09 - Hiram)
     mkdir /hive/data/genomes/mm10/bed/lastzCalJac3.2012-03-09
     cd /hive/data/genomes/mm10/bed/lastzCalJac3.2012-03-09
 
     cat << '_EOF_' > DEF
 # marmoset vs mouse
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
 # QUERY: Marmoset monkey CalJac3
 SEQ2_DIR=/scratch/data/calJac3/calJac3.2bit
 SEQ2_LEN=/scratch/data/calJac3/chrom.sizes
 SEQ2_CHUNK=10000000
 SEQ2_LAP=0
 SEQ2_LIMIT=100
 
 BASE=/hive/data/genomes/mm10/bed/lastzCalJac3.2012-03-09
 TMPDIR=/scratch/tmp
 '_EOF_'
     # << happy emacs
 
     #	establish a screen to control this job
     screen -S mm10CalJac3
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	`pwd`/DEF \
 	-syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
 	-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
     #	real    529m39.657s
     cat fb.mm10.chainCalJac3Link.txt
     #	860830771 bases of 2652783500 (32.450%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/mm10/bed
     ln -s lastzCalJac3.2012-03-09 lastz.calJac3
 
     mkdir /hive/data/genomes/calJac3/bed/blastz.mm10.swap
     cd /hive/data/genomes/calJac3/bed/blastz.mm10.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzCalJac3.2012-03-09/DEF \
 	-swap -syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
 	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
     #	real    67m21.635s
     cat fb.calJac3.chainMm10Link.txt
     #	861565545 bases of 2752505800 (31.301%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/calJac3/bed
     ln -s blastz.mm10.swap lastz.mm10
 
 ##############################################################################
 # LASTZ Chimp PanTro4 (DONE - 2012-03-09 - Hiram)
     mkdir /hive/data/genomes/panTro3/bed/lastzMm10.2012-03-09
     cd /hive/data/genomes/panTro3/bed/lastzMm10.2012-03-09
 
     cat << '_EOF_' > DEF
 # chimp vs mouse
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
 
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=10000000
 SEQ1_LAP=10000
 
 # QUERY: Chimp PanTro4
 SEQ2_DIR=/hive/data/genomes/panTro4/panTro4.2bit
 SEQ2_LEN=/hive/data/genomes/panTro4/chrom.sizes
 SEQ2_CHUNK=20000000
 SEQ2_LAP=0
 SEQ2_LIMIT=200
 
 BASE=/hive/data/genomes/mm10/bed/lastzPanTro4.2012-03-09
 TMPDIR=/scratch/tmp
 '_EOF_'
     # << happy emacs
 
     #	establish a screen to control this job
     screen -S mm10PanTro4
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	`pwd`/DEF \
 	-syntenicNet -noLoadChainSplit \
 	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
 	-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
     #	real    682m53.046s
     cat fb.mm10.chainPanTro4Link.txt
     #	919836299 bases of 2652783500 (34.674%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/mm10/bed
     ln -s lastzPanTro4.2012-03-09 lastz.panTro4
 
     mkdir /hive/data/genomes/panTro4/bed/blastz.mm10.swap
     cd /hive/data/genomes/panTro4/bed/blastz.mm10.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzPanTro4.2012-03-09/DEF \
 	-swap -syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
 	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
     #	real    73m23.855s
     cat fb.panTro4.chainMm10Link.txt
     #	926540065 bases of 2902338967 (31.924%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/panTro4/bed
     ln -s blastz.mm10.swap lastz.mm10
 
 ##############################################################################
 # LASTZ tarsier tarSyr1 (DONE - 2012-03-10 - Hiram)
     mkdir /hive/data/genomes/mm10/bed/lastzTarSyr1.2012-03-10
     cd /hive/data/genomes/mm10/bed/lastzTarSyr1.2012-03-10
 
     cat << '_EOF_' > DEF
 # tarsier vs mouse
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
 # QUERY: tarsier TarSyr1
 SEQ2_DIR=/scratch/data/tarSyr1/tarSyr1.2bit
 SEQ2_LEN=/scratch/data/tarSyr1/chrom.sizes
 SEQ2_CHUNK=10000000
 SEQ2_LAP=0
 SEQ2_LIMIT=800
 
 BASE=/hive/data/genomes/mm10/bed/lastzTarSyr1.2012-03-10
 TMPDIR=/scratch/tmp
 '_EOF_'
     # << happy emacs
 
     #	establish a screen to control this job
     screen -S mm10TarSyr1
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	`pwd`/DEF \
 	-syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
 	-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
     #	real    2457m45.759s
     cat fb.mm10.chainTarSyr1Link.txt
     #	651517559 bases of 2652783500 (24.560%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/mm10/bed
     ln -s lastzTarSyr1.2012-03-10 lastz.tarSyr1
 
     # better to have reciprocal best for this one since it is low coverage:
     cd /hive/data/genomes/mm10/bed/lastzTarSyr1.2012-03-10
     time doRecipBest.pl mm10 tarSyr1 -buildDir=`pwd` -workhorse=hgwdev \
 	> best.log 2>&1 &
     #	real    1176m19.336s
 
     mkdir /hive/data/genomes/tarSyr1/bed/blastz.mm10.swap
     cd /hive/data/genomes/tarSyr1/bed/blastz.mm10.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzTarSyr1.2012-03-10/DEF \
 	-swap -syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
 	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
     #	real    746m30.852s
     cat fb.tarSyr1.chainMm10Link.txt
     #	691746721 bases of 2768536343 (24.986%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/tarSyr1/bed
     ln -s blastz.mm10.swap lastz.mm10
 
 ##############################################################################
 # Swap chimp panTro3 to Mm10 (DONE - 2012-03-12 - Hiram)
     # original alignment on panTro3
     cd /hive/data/genomes/panTro3/bed/lastzMm10.2012-03-08
     cat fb.panTro3.chainMm10Link.txt
     #	929073028 bases of 2900529764 (32.031%) in intersection
 
     # and this swap:
     mkdir /hive/data/genomes/mm10/bed/blastz.panTro3.swap
     cd /hive/data/genomes/mm10/bed/blastz.panTro3.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/panTro3/bed/lastzMm10.2012-03-08/DEF \
 	-swap -syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
 	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
     #	real    68m46.408s
     cat fb.mm10.chainPanTro3Link.txt
     #	922491113 bases of 2652783500 (34.774%) in intersection
 
 ##############################################################################
 # LASTZ bushbaby otoGar3 (DONE - 2012-03-13 - Hiram)
     mkdir /hive/data/genomes/mm10/bed/lastzOtoGar3.2012-03-13
     cd /hive/data/genomes/mm10/bed/lastzOtoGar3.2012-03-13
 
     # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
     #	number of jobs, 50,000 to something under 100,000
     cat << '_EOF_' > DEF
 # bushbaby vs mouse
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
 # QUERY: bushbaby OtoGar3
 SEQ2_DIR=/hive/data/genomes/otoGar3/otoGar3.2bit
 SEQ2_LEN=/hive/data/genomes/otoGar3/chrom.sizes
 SEQ2_CHUNK=10000000
 SEQ2_LAP=0
 SEQ2_LIMIT=30
 
 BASE=/hive/data/genomes/mm10/bed/lastzOtoGar3.2012-03-13
 TMPDIR=/scratch/tmp
 '_EOF_'
     # << happy emacs
 
     #	establish a screen to control this job
     screen -S mm10OtoGar3
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	`pwd`/DEF \
 	-syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
 	-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
     #	real    757m32.438s
     cat fb.mm10.chainOtoGar3Link.txt
     #	790408953 bases of 2652783500 (29.795%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/mm10/bed
     ln -s lastzOtoGar3.2012-03-13 lastz.otoGar3
 
     mkdir /hive/data/genomes/otoGar3/bed/blastz.mm10.swap
     cd /hive/data/genomes/otoGar3/bed/blastz.mm10.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzOtoGar3.2012-03-13/DEF \
 	-swap -syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
 	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
     #	real    61m18.952s
     cat fb.otoGar3.chainMm10Link.txt
     #	776907989 bases of 2359530453 (32.926%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/otoGar3/bed
     ln -s blastz.mm10.swap lastz.mm10
 
 ##############################################################################
 # LASTZ mouse lemur micMur1 (DONE - 2012-03-13 - Hiram)
     mkdir /hive/data/genomes/mm10/bed/lastzMicMur1.2012-03-13
     cd /hive/data/genomes/mm10/bed/lastzMicMur1.2012-03-13
 
     # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
     #	number of jobs, 50,000 to something under 100,000
     cat << '_EOF_' > DEF
 # mouse lemur vs mouse
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
 # QUERY: mouse lemur MicMur1
 SEQ2_DIR=/scratch/data/micMur1/micMur1.2bit
 SEQ2_LEN=/scratch/data/micMur1/chrom.sizes
 SEQ2_CHUNK=10000000
 SEQ2_LAP=0
 SEQ2_LIMIT=400
 
 BASE=/hive/data/genomes/mm10/bed/lastzMicMur1.2012-03-13
 TMPDIR=/scratch/tmp
 '_EOF_'
     # << happy emacs
 
     #	establish a screen to control this job
     screen -S mm10MicMur1
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	`pwd`/DEF \
 	-syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
 	-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
     #	real    687m41.863s
     cat fb.mm10.chainMicMur1Link.txt
     #	706607444 bases of 2652783500 (26.636%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/mm10/bed
     ln -s lastzMicMur1.2012-03-13 lastz.micMur1
 
     # better to have reciprocal best for this one since it is low coverage:
     cd /hive/data/genomes/mm10/bed/lastzMicMur1.2012-03-13
     time doRecipBest.pl mm10 micMur1 -buildDir=`pwd` -workhorse=hgwdev \
 	> best.log 2>&1 &
     #	real    877m18.105s
 
     mkdir /hive/data/genomes/micMur1/bed/blastz.mm10.swap
     cd /hive/data/genomes/micMur1/bed/blastz.mm10.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzMicMur1.2012-03-13/DEF \
 	-swap -syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
 	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
     #	real    116m54.411s
     cat fb.micMur1.chainMm10Link.txt
     #	696025630 bases of 1852394361 (37.574%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/micMur1/bed
     ln -s blastz.mm10.swap lastz.mm10
 
 ##############################################################################
 # LASTZ squirrel speTri2 (DONE - 2012-03-15 - Hiram)
     mkdir /hive/data/genomes/mm10/bed/lastzSpeTri2.2012-03-15
     cd /hive/data/genomes/mm10/bed/lastzSpeTri2.2012-03-15
 
     # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
     #	number of jobs, 50,000 to something under 100,000
     cat << '_EOF_' > DEF
 # squirrel vs mouse
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
 # QUERY: squirrel SpeTri2
 SEQ2_DIR=/hive/data/genomes/speTri2/speTri2.2bit
 SEQ2_LEN=/hive/data/genomes/speTri2/chrom.sizes
 SEQ2_CHUNK=10000000
 SEQ2_LAP=0
 SEQ2_LIMIT=50
 
 BASE=/hive/data/genomes/mm10/bed/lastzSpeTri2.2012-03-15
 TMPDIR=/scratch/tmp
 '_EOF_'
     # << happy emacs
 
     #	establish a screen to control this job
     screen -S mm10SpeTri2
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	`pwd`/DEF \
 	-syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
 	-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
     #	real    935m27.893s
     cat fb.mm10.chainSpeTri2Link.txt
     #	907715417 bases of 2652783500 (34.217%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/mm10/bed
     ln -s lastzSpeTri2.2012-03-15 lastz.speTri2
 
     mkdir /hive/data/genomes/speTri2/bed/blastz.mm10.swap
     cd /hive/data/genomes/speTri2/bed/blastz.mm10.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzSpeTri2.2012-03-15/DEF \
 	-swap -syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
 	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
     #	real    74m41.819s
     #	real    116m54.411s
     cat fb.speTri2.chainMm10Link.txt
     #	906956512 bases of 2311060300 (39.244%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/speTri2/bed
     ln -s blastz.mm10.swap lastz.mm10
 
 ##############################################################################
 # LASTZ kangaroo rat dipOrd1 (DONE - 2012-03-15 - Hiram)
     #	establish a screen to control this job
     screen -S mm10DipOrd1
     mkdir /hive/data/genomes/mm10/bed/lastzDipOrd1.2012-03-15
     cd /hive/data/genomes/mm10/bed/lastzDipOrd1.2012-03-15
 
     # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
     #	number of jobs, 50,000 to something under 100,000
     cat << '_EOF_' > DEF
 # kangaroo rat vs mouse
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
 # QUERY: kangaroo rat DipOrd1
 SEQ2_DIR=/scratch/data/dipOrd1/dipOrd1.2bit
 SEQ2_LEN=/scratch/data/dipOrd1/chrom.sizes
 SEQ2_CHUNK=10000000
 SEQ2_LAP=0
 SEQ2_LIMIT=400
 
 BASE=/hive/data/genomes/mm10/bed/lastzDipOrd1.2012-03-15
 TMPDIR=/scratch/tmp
 '_EOF_'
     # << happy emacs
 
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	`pwd`/DEF \
 	-syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
 	-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
     #	real    867m19.972s
     cat fb.mm10.chainDipOrd1Link.txt
     #	516232678 bases of 2652783500 (19.460%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/mm10/bed
     ln -s lastzDipOrd1.2012-03-15 lastz.dipOrd1
 
     # better to have reciprocal best for this one since it is low coverage:
     cd /hive/data/genomes/mm10/bed/lastzDipOrd1.2012-03-15
     time doRecipBest.pl mm10 dipOrd1 -buildDir=`pwd` -workhorse=hgwdev \
 	> best.log 2>&1 &
     #	real    914m20.405s
 
     mkdir /hive/data/genomes/dipOrd1/bed/blastz.mm10.swap
     cd /hive/data/genomes/dipOrd1/bed/blastz.mm10.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzDipOrd1.2012-03-15/DEF \
 	-swap -syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
 	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
     #	real    115m1.497s
     cat fb.dipOrd1.chainMm10Link.txt
     #	507580668 bases of 1844961421 (27.512%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/dipOrd1/bed
     ln -s blastz.mm10.swap lastz.mm10
 
 ##############################################################################
 # LASTZ Naked mole-rat hetGla1 (DONE - 2012-03-15 - Hiram)
     #	establish a screen to control this job
     screen -S mm10HetGla1
     mkdir /hive/data/genomes/mm10/bed/lastzHetGla1.2012-03-15
     cd /hive/data/genomes/mm10/bed/lastzHetGla1.2012-03-15
 
     # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
     #	number of jobs, 50,000 to something under 100,000
     cat << '_EOF_' > DEF
 # Naked mole-rat vs mouse
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
 # QUERY: Naked mole-rat HetGla1
 SEQ2_DIR=/scratch/data/hetGla1/hetGla1.2bit
 SEQ2_LEN=/scratch/data/hetGla1/chrom.sizes
 SEQ2_CHUNK=10000000
 SEQ2_LAP=0
 SEQ2_LIMIT=200
 
 BASE=/hive/data/genomes/mm10/bed/lastzHetGla1.2012-03-15
 TMPDIR=/scratch/tmp
 '_EOF_'
     # << happy emacs
 
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	`pwd`/DEF \
 	-syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
 	-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
     #	real    745m15.097s
     cat fb.mm10.chainHetGla1Link.txt
     #	853221843 bases of 2652783500 (32.163%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/mm10/bed
     ln -s lastzHetGla1.2012-03-15 lastz.hetGla1
 
     mkdir /hive/data/genomes/hetGla1/bed/blastz.mm10.swap
     cd /hive/data/genomes/hetGla1/bed/blastz.mm10.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzHetGla1.2012-03-15/DEF \
 	-swap -syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
 	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
     #	real    74m26.471s
     cat fb.hetGla1.chainMm10Link.txt
     #	885195861 bases of 2430064805 (36.427%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/hetGla1/bed
     ln -s blastz.mm10.swap lastz.mm10
 
 ##############################################################################
 # LASTZ horse equCab2 (DONE - 2012-03-16 - Hiram)
     # establish a screen to control this job with a name to indicate what it is
     screen -S mm10EquCab2
     mkdir /hive/data/genomes/mm10/bed/lastzEquCab2.2012-03-16
     cd /hive/data/genomes/mm10/bed/lastzEquCab2.2012-03-16
 
     # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
     #	number of jobs, 50,000 to something under 100,000
     cat << '_EOF_' > DEF
 # horse vs mouse
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
 # QUERY: horse EquCab2
 SEQ2_DIR=/scratch/data/equCab2/equCab2.2bit
 SEQ2_LEN=/scratch/data/equCab2/chrom.sizes
 SEQ2_CHUNK=10000000
 SEQ2_LAP=0
 SEQ2_LIMIT=1
 
 BASE=/hive/data/genomes/mm10/bed/lastzEquCab2.2012-03-16
 TMPDIR=/scratch/tmp
 '_EOF_'
     # << happy emacs
 
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	`pwd`/DEF \
 	-syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
 	-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
     #	real    566m34.024s
     cat fb.mm10.chainEquCab2Link.txt
     #	912967841 bases of 2652783500 (34.415%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/mm10/bed
     ln -s lastzEquCab2.2012-03-16 lastz.equCab2
 
     mkdir /hive/data/genomes/equCab2/bed/blastz.mm10.swap
     cd /hive/data/genomes/equCab2/bed/blastz.mm10.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzEquCab2.2012-03-16/DEF \
 	-swap -syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
 	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
     #	real    87m2.261s
     cat fb.equCab2.chainMm10Link.txt
     #	901995882 bases of 2428790173 (37.138%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/equCab2/bed
     ln -s blastz.mm10.swap lastz.mm10
 
 ##############################################################################
 # LASTZ guinea pig cavPor3 (DONE - 2012-03-16 - Hiram)
     # establish a screen to control this job with a name to indicate what it is
     screen -S mm10CavPor3
     mkdir /hive/data/genomes/mm10/bed/lastzCavPor3.2012-03-16
     cd /hive/data/genomes/mm10/bed/lastzCavPor3.2012-03-16
 
     # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
     #	number of jobs, 50,000 to something under 100,000
     cat << '_EOF_' > DEF
 # guinea pig vs mouse
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
 # QUERY: guinea pig CavPor3
 SEQ2_DIR=/scratch/data/cavPor3/cavPor3.2bit
 SEQ2_LEN=/scratch/data/cavPor3/chrom.sizes
 SEQ2_CHUNK=10000000
 SEQ2_LAP=0
 SEQ2_LIMIT=10
 
 BASE=/hive/data/genomes/mm10/bed/lastzCavPor3.2012-03-16
 TMPDIR=/scratch/tmp
 '_EOF_'
     # << happy emacs
 
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	`pwd`/DEF \
 	-syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
 	-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
     #	real    1523m35.729s
     cat fb.mm10.chainCavPor3Link.txt
     #	754642254 bases of 2652783500 (28.447%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/mm10/bed
     ln -s lastzCavPor3.2012-03-16 lastz.cavPor3
 
     mkdir /hive/data/genomes/cavPor3/bed/blastz.mm10.swap
     cd /hive/data/genomes/cavPor3/bed/blastz.mm10.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzCavPor3.2012-03-16/DEF \
 	-swap -syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
 	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
     #	real    80m23.870s
     cat fb.cavPor3.chainMm10Link.txt
     #	775452752 bases of 2663369733 (29.115%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/cavPor3/bed
     ln -s blastz.mm10.swap lastz.mm10
 
 ##############################################################################
 # LASTZ alpaca vicPac1 (DONE - 2012-03-16 - Hiram)
     # establish a screen to control this job with a name to indicate what it is
     screen -S mm10VicPac1
     mkdir /hive/data/genomes/mm10/bed/lastzVicPac1.2012-03-16
     cd /hive/data/genomes/mm10/bed/lastzVicPac1.2012-03-16
 
     # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
     #	number of jobs, 50,000 to something under 100,000
     cat << '_EOF_' > DEF
 # guinea pig vs mouse
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
 # QUERY: alpaca VicPac1
 SEQ2_DIR=/scratch/data/vicPac1/vicPac1.2bit
 SEQ2_LEN=/scratch/data/vicPac1/chrom.sizes
 SEQ2_CHUNK=10000000
 SEQ2_LAP=0
 SEQ2_LIMIT=700
 
 BASE=/hive/data/genomes/mm10/bed/lastzVicPac1.2012-03-16
 TMPDIR=/scratch/tmp
 '_EOF_'
     # << happy emacs
 
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	`pwd`/DEF \
 	-syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
 	-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
     #	real    2049m38.674s
     cat fb.mm10.chainVicPac1Link.txt
     #	600477253 bases of 2652783500 (22.636%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/mm10/bed
     ln -s lastzVicPac1.2012-03-16 lastz.vicPac1
 
     # better to have reciprocal best for this one since it is low coverage:
     cd /hive/data/genomes/mm10/bed/lastzVicPac1.2012-03-16
     time doRecipBest.pl mm10 vicPac1 -buildDir=`pwd` -workhorse=hgwdev \
 	> best.log 2>&1 &
     #	real    824m37.107s
 
     mkdir /hive/data/genomes/vicPac1/bed/blastz.mm10.swap
     cd /hive/data/genomes/vicPac1/bed/blastz.mm10.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzVicPac1.2012-03-16/DEF \
 	-swap -syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
 	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
     #	real    159m21.952s
     cat fb.vicPac1.chainMm10Link.txt
     #	610885692 bases of 1922910435 (31.769%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/vicPac1/bed
     ln -s blastz.mm10.swap lastz.mm10
 
 ##############################################################################
 # LASTZ dolphin turTru1 (DONE - 2012-03-16 - Hiram)
     # establish a screen to control this job with a name to indicate what it is
     screen -S mm10TurTru1
     mkdir /hive/data/genomes/mm10/bed/lastzTurTru1.2012-03-16
     cd /hive/data/genomes/mm10/bed/lastzTurTru1.2012-03-16
 
     # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
     #	number of jobs, 50,000 to something under 100,000
     cat << '_EOF_' > DEF
 # dolphin vs mouse
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
 # QUERY: dolphin TurTru1
 SEQ2_DIR=/scratch/data/turTru1/turTru1.2bit
 SEQ2_LEN=/scratch/data/turTru1/chrom.sizes
 SEQ2_CHUNK=10000000
 SEQ2_LAP=0
 SEQ2_LIMIT=300
 
 BASE=/hive/data/genomes/mm10/bed/lastzTurTru1.2012-03-16
 TMPDIR=/scratch/tmp
 '_EOF_'
     # << happy emacs
 
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	`pwd`/DEF \
 	-syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
 	-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
     #	real    1484m14.609s
     cat fb.mm10.chainTurTru1Link.txt
     #	762961671 bases of 2652783500 (28.761%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/mm10/bed
     ln -s lastzTurTru1.2012-03-16 lastz.turTru1
 
     # better to have reciprocal best for this one since it is low coverage:
     cd /hive/data/genomes/mm10/bed/lastzTurTru1.2012-03-16
     time doRecipBest.pl mm10 turTru1 -buildDir=`pwd` -workhorse=hgwdev \
 	> best.log 2>&1 &
     #	real    733m37.272s
 
     mkdir /hive/data/genomes/turTru1/bed/blastz.mm10.swap
     cd /hive/data/genomes/turTru1/bed/blastz.mm10.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzTurTru1.2012-03-16/DEF \
 	-swap -syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
 	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
     #	real    79m38.703s
     cat fb.turTru1.chainMm10Link.txt
     #	744359707 bases of 2298444090 (32.385%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/turTru1/bed
     ln -s blastz.mm10.swap lastz.mm10
 
 ##############################################################################
 # LASTZ tree shrew tupBel1 (DONE - 2012-03-16 - Hiram)
     # establish a screen to control this job with a name to indicate what it is
     screen -S mm10TupBel1
     mkdir /hive/data/genomes/mm10/bed/lastzTupBel1.2012-03-16
     cd /hive/data/genomes/mm10/bed/lastzTupBel1.2012-03-16
 
     # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
     #	number of jobs, 50,000 to something under 100,000
     cat << '_EOF_' > DEF
 # tree shrew vs mouse
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
 # QUERY: tree shrew TupBel1
 SEQ2_DIR=/scratch/data/tupBel1/tupBel1.2bit
 SEQ2_LEN=/scratch/data/tupBel1/chrom.sizes
 SEQ2_CHUNK=10000000
 SEQ2_LAP=0
 SEQ2_LIMIT=400
 
 BASE=/hive/data/genomes/mm10/bed/lastzTupBel1.2012-03-16
 TMPDIR=/scratch/tmp
 '_EOF_'
     # << happy emacs
 
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	`pwd`/DEF \
 	-syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
 	-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
     #	real    1731m30.449s
     cat fb.mm10.chainTupBel1Link.txt
     #	524337666 bases of 2652783500 (19.766%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/mm10/bed
     ln -s lastzTupBel1.2012-03-16 lastz.tupBel1
 
     # better to have reciprocal best for this one since it is low coverage:
     cd /hive/data/genomes/mm10/bed/lastzTupBel1.2012-03-16
     time doRecipBest.pl mm10 tupBel1 -buildDir=`pwd` -workhorse=hgwdev \
 	> best.log 2>&1 &
     #	real    1090m30.429s
 
     mkdir /hive/data/genomes/tupBel1/bed/blastz.mm10.swap
     cd /hive/data/genomes/tupBel1/bed/blastz.mm10.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzTupBel1.2012-03-16/DEF \
 	-swap -syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
 	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
     #	real    136m7.163s
     cat fb.tupBel1.chainMm10Link.txt
     #	537379661 bases of 2137225476 (25.144%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/tupBel1/bed
     ln -s blastz.mm10.swap lastz.mm10
 
 ##############################################################################
 # LASTZ pig susScr2 (DONE - 2012-03-16 - Hiram)
     # establish a screen to control this job with a name to indicate what it is
     screen -S mm10SusScr2
     mkdir /hive/data/genomes/mm10/bed/lastzSusScr2.2012-03-16
     cd /hive/data/genomes/mm10/bed/lastzSusScr2.2012-03-16
 
     # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
     #	number of jobs, 50,000 to something under 100,000
     cat << '_EOF_' > DEF
 # pig vs mouse
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
 # QUERY: pig SusScr2
 SEQ2_DIR=/scratch/data/susScr2/susScr2.2bit
 SEQ2_LEN=/scratch/data/susScr2/chrom.sizes
 SEQ2_CHUNK=10000000
 SEQ2_LAP=0
 SEQ2_LIMIT=1
 
 BASE=/hive/data/genomes/mm10/bed/lastzSusScr2.2012-03-16
 TMPDIR=/scratch/tmp
 '_EOF_'
     # << happy emacs
 
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	`pwd`/DEF \
 	-syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
 	-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
     #	real    1272m57.727s
     cat fb.mm10.chainSusScr2Link.txt
     #	616716602 bases of 2652783500 (23.248%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/mm10/bed
     ln -s lastzSusScr2.2012-03-16 lastz.susScr2
 
     mkdir /hive/data/genomes/susScr2/bed/blastz.mm10.swap
     cd /hive/data/genomes/susScr2/bed/blastz.mm10.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzSusScr2.2012-03-16/DEF \
 	-swap -syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
 	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
     #	real    62m47.465s
     cat fb.susScr2.chainMm10Link.txt
     #	656498040 bases of 2231298548 (29.422%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/susScr2/bed
     ln -s blastz.mm10.swap lastz.mm10
 
 ##############################################################################
 # LASTZ rabbit oryCun2 (DONE - 2012-03-16 - Hiram)
     # establish a screen to control this job with a name to indicate what it is
     screen -S mm10OryCun2
     mkdir /hive/data/genomes/mm10/bed/lastzOryCun2.2012-03-16
     cd /hive/data/genomes/mm10/bed/lastzOryCun2.2012-03-16
 
     # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
     #	number of jobs, 50,000 to something under 100,000
     cat << '_EOF_' > DEF
 # rabbit vs mouse
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
 # QUERY: rabbit OryCun2
 SEQ2_DIR=/scratch/data/oryCun2/oryCun2.2bit
 SEQ2_LEN=/scratch/data/oryCun2/chrom.sizes
 SEQ2_CHUNK=10000000
 SEQ2_LAP=0
 SEQ2_LIMIT=20
 
 BASE=/hive/data/genomes/mm10/bed/lastzOryCun2.2012-03-16
 TMPDIR=/scratch/tmp
 '_EOF_'
     # << happy emacs
 
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	`pwd`/DEF \
 	-syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
 	-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
     #	real    1412m58.641s
     cat fb.mm10.chainOryCun2Link.txt
     #	669778489 bases of 2652783500 (25.248%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/mm10/bed
     ln -s lastzOryCun2.2012-03-16 lastz.oryCun2
 
     mkdir /hive/data/genomes/oryCun2/bed/blastz.mm10.swap
     cd /hive/data/genomes/oryCun2/bed/blastz.mm10.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzOryCun2.2012-03-16/DEF \
 	-swap -syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
 	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
     #	real    64m40.959s
     cat fb.oryCun2.chainMm10Link.txt
     #	668643668 bases of 2604023284 (25.677%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/oryCun2/bed
     ln -s blastz.mm10.swap lastz.mm10
 
 ##############################################################################
 # LASTZ sloth choHof1 (DONE - 2012-03-19 - Hiram)
     # establish a screen to control this job with a name to indicate what it is
     screen -S mm10ChoHof1
     mkdir /hive/data/genomes/mm10/bed/lastzChoHof1.2012-03-19
     cd /hive/data/genomes/mm10/bed/lastzChoHof1.2012-03-19
 
     # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
     #	number of jobs, 50,000 to something under 100,000
     cat << '_EOF_' > DEF
 # sloth vs mouse
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
 # QUERY: sloth ChoHof1
 SEQ2_DIR=/scratch/data/choHof1/choHof1.2bit
 SEQ2_LEN=/scratch/data/choHof1/chrom.sizes
 SEQ2_CHUNK=10000000
 SEQ2_LAP=0
 SEQ2_LIMIT=800
 
 BASE=/hive/data/genomes/mm10/bed/lastzChoHof1.2012-03-19
 TMPDIR=/scratch/tmp
 '_EOF_'
     # << happy emacs
 
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	`pwd`/DEF \
 	-syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
 	-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
     # rebooted hgwdev during first swarm run, continuing:
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	`pwd`/DEF \
 	-continue=cat -syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
 	-chainMinScore=3000 -chainLinearGap=medium > cat.log 2>&1 &
     #	Elapsed time: 65m26s
     cat fb.mm10.chainChoHof1Link.txt
     #	477994856 bases of 2652783500 (18.019%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/mm10/bed
     ln -s lastzChoHof1.2012-03-19 lastz.choHof1
 
     # better to have reciprocal best for this one since it is low coverage:
     cd /hive/data/genomes/mm10/bed/lastzChoHof1.2012-03-19
     time doRecipBest.pl mm10 choHof1 -buildDir=`pwd` -workhorse=hgwdev \
 	> best.log 2>&1 &
     #	real    1171m56.481s
 
     mkdir /hive/data/genomes/choHof1/bed/blastz.mm10.swap
     cd /hive/data/genomes/choHof1/bed/blastz.mm10.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzChoHof1.2012-03-19/DEF \
 	-swap -syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
 	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
     #	real    1613m3.348s
     cat fb.choHof1.chainMm10Link.txt
     #	488047499 bases of 2060419685 (23.687%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/choHof1/bed
     ln -s blastz.mm10.swap lastz.mm10
 
 ##############################################################################
 # LASTZ megabat pteVam1 (DONE - 2012-03-19 - Hiram)
     # establish a screen to control this job with a name to indicate what it is
     screen -S mm10PteVam1
     mkdir /hive/data/genomes/mm10/bed/lastzPteVam1.2012-03-19
     cd /hive/data/genomes/mm10/bed/lastzPteVam1.2012-03-19
 
     # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
     #	number of jobs, 50,000 to something under 100,000
     cat << '_EOF_' > DEF
 # megabat vs mouse
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
 # QUERY: megabat PteVam1
 SEQ2_DIR=/scratch/data/pteVam1/pteVam1.2bit
 SEQ2_LEN=/scratch/data/pteVam1/chrom.sizes
 SEQ2_CHUNK=10000000
 SEQ2_LAP=0
 SEQ2_LIMIT=200
 
 BASE=/hive/data/genomes/mm10/bed/lastzPteVam1.2012-03-19
 TMPDIR=/scratch/tmp
 '_EOF_'
     # << happy emacs
 
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	`pwd`/DEF \
 	-syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
 	-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
     #	real    1843m33.186s
     cat fb.mm10.chainPteVam1Link.txt
     #	725414059 bases of 2652783500 (27.345%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/mm10/bed
     ln -s lastzPteVam1.2012-03-19 lastz.pteVam1
 
     # better to have reciprocal best for this one since it is low coverage:
     cd /hive/data/genomes/mm10/bed/lastzPteVam1.2012-03-19
     time doRecipBest.pl mm10 pteVam1 -buildDir=`pwd` -workhorse=hgwdev \
 	> best.log 2>&1 &
     #	real    743m57.901s
 
     mkdir /hive/data/genomes/pteVam1/bed/blastz.mm10.swap
     cd /hive/data/genomes/pteVam1/bed/blastz.mm10.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzPteVam1.2012-03-19/DEF \
 	-swap -syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
 	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
     #	Elapsed time: 75m35s
     cat fb.pteVam1.chainMm10Link.txt
     #	710519911 bases of 1839436660 (38.627%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/pteVam1/bed
     ln -s blastz.mm10.swap lastz.mm10
 
 ##############################################################################
 # LASTZ elephant loxAfr3 (DONE - 2012-03-19 - Hiram)
     # establish a screen to control this job with a name to indicate what it is
     screen -S mm10LoxAfr3
     mkdir /hive/data/genomes/mm10/bed/lastzLoxAfr3.2012-03-19
     cd /hive/data/genomes/mm10/bed/lastzLoxAfr3.2012-03-19
 
     # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
     #	number of jobs, 50,000 to something under 100,000
     cat << '_EOF_' > DEF
 # elephant vs mouse
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
 # QUERY: elephant LoxAfr3
 SEQ2_DIR=/scratch/data/loxAfr3/loxAfr3.2bit
 SEQ2_LEN=/scratch/data/loxAfr3/chrom.sizes
 SEQ2_CHUNK=10000000
 SEQ2_LAP=0
 SEQ2_LIMIT=50
 
 BASE=/hive/data/genomes/mm10/bed/lastzLoxAfr3.2012-03-19
 TMPDIR=/scratch/tmp
 '_EOF_'
     # << happy emacs
 
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	`pwd`/DEF \
 	-syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
 	-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
     #	real    1848m11.111s
     cat fb.mm10.chainLoxAfr3Link.txt
     #	685029753 bases of 2652783500 (25.823%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/mm10/bed
     ln -s lastzLoxAfr3.2012-03-19 lastz.loxAfr3
 
     mkdir /hive/data/genomes/loxAfr3/bed/blastz.mm10.swap
     cd /hive/data/genomes/loxAfr3/bed/blastz.mm10.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzLoxAfr3.2012-03-19/DEF \
 	-swap -syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
 	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
     #	Elapsed time: 73m14s
     cat fb.loxAfr3.chainMm10Link.txt
     #	674108752 bases of 3118565340 (21.616%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/loxAfr3/bed
     ln -s blastz.mm10.swap lastz.mm10
 
 ##############################################################################
 # LASTZ cat felCat4 (DONE - 2012-03-19 - Hiram)
     # establish a screen to control this job with a name to indicate what it is
     screen -S mm10FelCat4
     mkdir /hive/data/genomes/mm10/bed/lastzFelCat4.2012-03-19
     cd /hive/data/genomes/mm10/bed/lastzFelCat4.2012-03-19
 
     # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
     #	number of jobs, 50,000 to something under 100,000
     cat << '_EOF_' > DEF
 # cat vs mouse
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
 # QUERY: cat FelCat4
 SEQ2_DIR=/scratch/data/felCat4/felCat4.2bit
 SEQ2_LEN=/scratch/data/felCat4/chrom.sizes
 SEQ2_CHUNK=10000000
 SEQ2_LAP=0
 SEQ2_LIMIT=300
 
 BASE=/hive/data/genomes/mm10/bed/lastzFelCat4.2012-03-19
 TMPDIR=/scratch/tmp
 '_EOF_'
     # << happy emacs
 
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	`pwd`/DEF \
 	-syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
 	-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
     #	real    2010m48.963s
     cat fb.mm10.chainFelCat4Link.txt
     #	637531191 bases of 2652783500 (24.033%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/mm10/bed
     ln -s lastzFelCat4.2012-03-19 lastz.felCat4
 
     # better to have reciprocal best for this one since it is low coverage:
     cd /hive/data/genomes/mm10/bed/lastzFelCat4.2012-03-19
     time doRecipBest.pl mm10 felCat4 -buildDir=`pwd` -workhorse=hgwdev \
 	> best.log 2>&1 &
     #	real    1135m12.207s
 
     mkdir /hive/data/genomes/felCat4/bed/blastz.mm10.swap
     cd /hive/data/genomes/felCat4/bed/blastz.mm10.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzFelCat4.2012-03-19/DEF \
 	-swap -syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
 	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
     #	 Elapsed time: 88m12s
     cat fb.felCat4.chainMm10Link.txt
     #	616167655 bases of 1990635005 (30.953%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/felCat4/bed
     ln -s blastz.mm10.swap lastz.mm10
 
 ##############################################################################
 # LASTZ panda ailMel1 (DONE - 2012-03-19 - Hiram)
     # establish a screen to control this job with a name to indicate what it is
     screen -S mm10AilMel1
     mkdir /hive/data/genomes/mm10/bed/lastzAilMel1.2012-03-19
     cd /hive/data/genomes/mm10/bed/lastzAilMel1.2012-03-19
 
     # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
     #	number of jobs, 50,000 to something under 100,000
     cat << '_EOF_' > DEF
 # panda vs mouse
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
 # QUERY: panda AilMel1
 SEQ2_DIR=/scratch/data/ailMel1/ailMel1.2bit
 SEQ2_LEN=/scratch/data/ailMel1/chrom.sizes
 SEQ2_CHUNK=10000000
 SEQ2_LAP=0
 SEQ2_LIMIT=200
 
 BASE=/hive/data/genomes/mm10/bed/lastzAilMel1.2012-03-19
 TMPDIR=/scratch/tmp
 '_EOF_'
     # << happy emacs
 
     # forgot to copy to the log
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	`pwd`/DEF \
 	-syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
 	-chainMinScore=3000 -chainLinearGap=medium
     #	real    1914m15.921s
     cat fb.mm10.chainAilMel1Link.txt
     #	821806974 bases of 2652783500 (30.979%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/mm10/bed
     ln -s lastzAilMel1.2012-03-19 lastz.ailMel1
 
     mkdir /hive/data/genomes/ailMel1/bed/blastz.mm10.swap
     cd /hive/data/genomes/ailMel1/bed/blastz.mm10.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzAilMel1.2012-03-19/DEF \
 	-swap -syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
 	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
     #	Elapsed time: 65m50s
     cat fb.ailMel1.chainMm10Link.txt
     #	798482731 bases of 2245312831 (35.562%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/ailMel1/bed
     ln -s blastz.mm10.swap lastz.mm10
 
 ##############################################################################
 # LASTZ dog canFam3 (DONE - 2012-03-19 - Hiram)
     # establish a screen to control this job with a name to indicate what it is
     screen -S mm10CanFam3
     mkdir /hive/data/genomes/mm10/bed/lastzCanFam3.2012-03-19
     cd /hive/data/genomes/mm10/bed/lastzCanFam3.2012-03-19
 
     # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
     #	number of jobs, 50,000 to something under 100,000
     cat << '_EOF_' > DEF
 # dog vs mouse
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
 # QUERY: dog CanFam3
 SEQ2_DIR=/hive/data/genomes/canFam3/canFam3.2bit
 SEQ2_LEN=/hive/data/genomes/canFam3/chrom.sizes
 SEQ2_CHUNK=10000000
 SEQ2_LAP=0
 SEQ2_LIMIT=20
 
 BASE=/hive/data/genomes/mm10/bed/lastzCanFam3.2012-03-19
 TMPDIR=/scratch/tmp
 '_EOF_'
     # << happy emacs
 
     # forgot to copy to the log
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	`pwd`/DEF \
 	-syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
 	-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
     #	real    1883m21.850s
     cat fb.mm10.chainCanFam3Link.txt
     #	773114990 bases of 2652783500 (29.144%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/mm10/bed
     ln -s lastzCanFam3.2012-03-19 lastz.canFam3
 
     mkdir /hive/data/genomes/canFam3/bed/blastz.mm10.swap
     cd /hive/data/genomes/canFam3/bed/blastz.mm10.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzCanFam3.2012-03-19/DEF \
 	-swap -syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
 	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
     #	Elapsed time: 63m22s
     cat fb.canFam3.chainMm10Link.txt
     #	756678903 bases of 2392715236 (31.624%) in intersectio
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/canFam3/bed
     ln -s blastz.mm10.swap lastz.mm10
 
 ##############################################################################
 # LASTZ armadillo dasNov2 (DONE - 2012-03-21 - Hiram)
     # establish a screen to control this job with a name to indicate what it is
     screen -S mm10DasNov2
     mkdir /hive/data/genomes/mm10/bed/lastzDasNov2.2012-03-21
     cd /hive/data/genomes/mm10/bed/lastzDasNov2.2012-03-21
 
     # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
     #	number of jobs, 50,000 to something under 100,000
     cat << '_EOF_' > DEF
 # armadillo vs mouse
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
 # QUERY: armadillo DasNov2
 SEQ2_DIR=/scratch/data/dasNov2/dasNov2.2bit
 SEQ2_LEN=/scratch/data/dasNov2/chrom.sizes
 SEQ2_CHUNK=10000000
 SEQ2_LAP=0
 SEQ2_LIMIT=800
 
 BASE=/hive/data/genomes/mm10/bed/lastzDasNov2.2012-03-21
 TMPDIR=/scratch/tmp
 '_EOF_'
     # << happy emacs
 
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	`pwd`/DEF \
 	-syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
 	-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
     #	real    2655m49.904s
     cat fb.mm10.chainDasNov2Link.txt
     #	451070039 bases of 2652783500 (17.004%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/mm10/bed
     ln -s lastzDasNov2.2012-03-21 lastz.dasNov2
 
     # better to have reciprocal best for this one since it is low coverage:
     cd /hive/data/genomes/mm10/bed/lastzDasNov2.2012-03-21
     time doRecipBest.pl mm10 dasNov2 -buildDir=`pwd` -workhorse=hgwdev \
 	> best.log 2>&1 &
     #	real    1163m1.023s
 
     mkdir /hive/data/genomes/dasNov2/bed/blastz.mm10.swap
     cd /hive/data/genomes/dasNov2/bed/blastz.mm10.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzDasNov2.2012-03-21/DEF \
 	-swap -syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
 	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
     #	real    201m9.701s
     cat fb.dasNov2.chainMm10Link.txt
     #	461142417 bases of 2371493872 (19.445%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/dasNov2/bed
     ln -s blastz.mm10.swap lastz.mm10
 
 ##############################################################################
 # LASTZ microbat myoLuc2 (DONE - 2012-03-21 - Hiram)
     # establish a screen to control this job with a name to indicate what it is
     screen -S mm10MyoLuc2
     mkdir /hive/data/genomes/mm10/bed/lastzMyoLuc2.2012-03-21
     cd /hive/data/genomes/mm10/bed/lastzMyoLuc2.2012-03-21
 
     # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
     #	number of jobs, 50,000 to something under 100,000
     cat << '_EOF_' > DEF
 # microbat vs mouse
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
 # QUERY: microbat MyoLuc2
 SEQ2_DIR=/scratch/data/myoLuc2/myoLuc2.2bit
 SEQ2_LEN=/scratch/data/myoLuc2/chrom.sizes
 SEQ2_CHUNK=10000000
 SEQ2_LAP=0
 SEQ2_LIMIT=50
 
 BASE=/hive/data/genomes/mm10/bed/lastzMyoLuc2.2012-03-21
 TMPDIR=/scratch/tmp
 '_EOF_'
     # << happy emacs
 
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	`pwd`/DEF \
 	-syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
 	-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
     #	real    1033m38.184s
     cat fb.mm10.chainMyoLuc2Link.txt
     #	646292112 bases of 2652783500 (24.363%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/mm10/bed
     ln -s lastzMyoLuc2.2012-03-21 lastz.myoLuc2
 
     # better to have reciprocal best for this one since it is low coverage:
     cd /hive/data/genomes/mm10/bed/lastzMyoLuc2.2012-03-21
     time doRecipBest.pl mm10 myoLuc2 -buildDir=`pwd` -workhorse=hgwdev \
 	> best.log 2>&1 &
     #   real    29m16.249s
 
     mkdir /hive/data/genomes/myoLuc2/bed/blastz.mm10.swap
     cd /hive/data/genomes/myoLuc2/bed/blastz.mm10.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzMyoLuc2.2012-03-21/DEF \
 	-swap -syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
 	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
     #	real    54m5.607s
     cat fb.myoLuc2.chainMm10Link.txt
     #	661704053 bases of 1966419868 (33.650%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/myoLuc2/bed
     ln -s blastz.mm10.swap lastz.mm10
 
 ##############################################################################
 # LASTZ cow bosTau7 (DONE - 2012-03-21 - Hiram)
     # establish a screen to control this job with a name to indicate what it is
     screen -S mm10BosTau7
     mkdir /hive/data/genomes/mm10/bed/lastzBosTau7.2012-03-21
     cd /hive/data/genomes/mm10/bed/lastzBosTau7.2012-03-21
 
     # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
     #	number of jobs, 50,000 to something under 100,000
     cat << '_EOF_' > DEF
 # cow vs mouse
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
 # QUERY: cow BosTau7
 SEQ2_DIR=/scratch/data/bosTau7/bosTau7.2bit
 SEQ2_LEN=/scratch/data/bosTau7/chrom.sizes
 SEQ2_CHUNK=10000000
 SEQ2_LAP=0
 SEQ2_LIMIT=50
 
 BASE=/hive/data/genomes/mm10/bed/lastzBosTau7.2012-03-21
 TMPDIR=/scratch/tmp
 '_EOF_'
     # << happy emacs
 
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	`pwd`/DEF \
 	-syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
 	-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
     #	real    1151m20.445s
     cat fb.mm10.chainBosTau7Link.txt
     #	696498363 bases of 2652783500 (26.255%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/mm10/bed
     ln -s lastzBosTau7.2012-03-21 lastz.bosTau7
 
     mkdir /hive/data/genomes/bosTau7/bed/blastz.mm10.swap
     cd /hive/data/genomes/bosTau7/bed/blastz.mm10.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzBosTau7.2012-03-21/DEF \
 	-swap -syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
 	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
     #	real    77m58.759s
     cat fb.bosTau7.chainMm10Link.txt
     #	711923052 bases of 2804673174 (25.383%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/bosTau7/bed
     ln -s blastz.mm10.swap lastz.mm10
 
 ##############################################################################
 # LASTZ sheep oviAri1 (DONE - 2012-03-21 - Hiram)
     # establish a screen to control this job with a name to indicate what it is
     screen -S mm10OviAri1
     mkdir /hive/data/genomes/mm10/bed/lastzOviAri1.2012-03-21
     cd /hive/data/genomes/mm10/bed/lastzOviAri1.2012-03-21
 
     # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
     #	number of jobs, 50,000 to something under 100,000
     cat << '_EOF_' > DEF
 # sheep vs mouse
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
 # QUERY: sheep OviAri1
 SEQ2_DIR=/scratch/data/oviAri1/oviAri1.2bit
 SEQ2_LEN=/scratch/data/oviAri1/chrom.sizes
 SEQ2_CHUNK=10000000
 SEQ2_LAP=0
 SEQ2_LIMIT=50
 
 BASE=/hive/data/genomes/mm10/bed/lastzOviAri1.2012-03-21
 TMPDIR=/scratch/tmp
 '_EOF_'
     # << happy emacs
 
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	`pwd`/DEF \
 	-syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
 	-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
     #	real    892m33.068s
     cat fb.mm10.chainOviAri1Link.txt
     #	406955832 bases of 2652783500 (15.341%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/mm10/bed
     ln -s lastzOviAri1.2012-03-21 lastz.oviAri1
 
     # better to have reciprocal best for this one since it is low coverage:
     cd /hive/data/genomes/mm10/bed/lastzOviAri1.2012-03-21
     time doRecipBest.pl mm10 oviAri1 -buildDir=`pwd` -workhorse=hgwdev \
 	> best.log 2>&1 &
     #	real    1183m43.488s
 
     mkdir /hive/data/genomes/oviAri1/bed/blastz.mm10.swap
     cd /hive/data/genomes/oviAri1/bed/blastz.mm10.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzOviAri1.2012-03-21/DEF \
 	-swap -syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
 	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
     #	real    30m5.554s
     cat fb.oviAri1.chainMm10Link.txt
     #	383499897 bases of 1201271277 (31.925%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/oviAri1/bed
     ln -s blastz.mm10.swap lastz.mm10
 
 ##############################################################################
 # LASTZ rock hyrax proCap1 (DONE - 2012-03-21 - Hiram)
     # establish a screen to control this job with a name to indicate what it is
     screen -S mm10ProCap1
     mkdir /hive/data/genomes/mm10/bed/lastzProCap1.2012-03-21
     cd /hive/data/genomes/mm10/bed/lastzProCap1.2012-03-21
 
     # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
     #	number of jobs, 50,000 to something under 100,000
     cat << '_EOF_' > DEF
 # rock hyrax vs mouse
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
 # QUERY: rock hyrax ProCap1
 SEQ2_DIR=/scratch/data/proCap1/proCap1.2bit
 SEQ2_LEN=/scratch/data/proCap1/chrom.sizes
 SEQ2_CHUNK=10000000
 SEQ2_LAP=0
 SEQ2_LIMIT=600
 
 BASE=/hive/data/genomes/mm10/bed/lastzProCap1.2012-03-21
 TMPDIR=/scratch/tmp
 '_EOF_'
     # << happy emacs
 
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	`pwd`/DEF \
 	-syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
 	-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
     #	real    2859m51.317s
     cat fb.mm10.chainProCap1Link.txt
     #	401804601 bases of 2652783500 (15.147%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/mm10/bed
     ln -s lastzProCap1.2012-03-21 lastz.proCap1
 
     # better to have reciprocal best for this one since it is low coverage:
     cd /hive/data/genomes/mm10/bed/lastzProCap1.2012-03-21
     time doRecipBest.pl mm10 proCap1 -buildDir=`pwd` -workhorse=hgwdev \
 	> best.log 2>&1 &
     #	real    1083m57.139s
 
     mkdir /hive/data/genomes/proCap1/bed/blastz.mm10.swap
     cd /hive/data/genomes/proCap1/bed/blastz.mm10.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzProCap1.2012-03-21/DEF \
 	-swap -syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
 	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
     #	real    165m10.285s
     cat fb.proCap1.chainMm10Link.txt
     #	390409777 bases of 2407847681 (16.214%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/proCap1/bed
     ln -s blastz.mm10.swap lastz.mm10
 
 ##############################################################################
 # LASTZ pika ochPri2 (DONE - 2012-03-22 - Hiram)
     # establish a screen to control this job with a name to indicate what it is
     screen -S mm10OchPri2
     mkdir /hive/data/genomes/mm10/bed/lastzOchPri2.2012-03-22
     cd /hive/data/genomes/mm10/bed/lastzOchPri2.2012-03-22
 
     # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
     #	number of jobs, 50,000 to something under 100,000
     cat << '_EOF_' > DEF
 # pika vs mouse
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
 # QUERY: pika OchPri2
 SEQ2_DIR=/scratch/data/ochPri2/ochPri2.2bit
 SEQ2_LEN=/scratch/data/ochPri2/chrom.sizes
 SEQ2_CHUNK=10000000
 SEQ2_LAP=0
 SEQ2_LIMIT=500
 
 BASE=/hive/data/genomes/mm10/bed/lastzOchPri2.2012-03-22
 TMPDIR=/scratch/tmp
 '_EOF_'
     # << happy emacs
 
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	`pwd`/DEF \
 	-syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
 	-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
     #	real    2578m43.648s
     cat fb.mm10.chainOchPri2Link.txt
     #	385766335 bases of 2652783500 (14.542%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/mm10/bed
     ln -s lastzOchPri2.2012-03-22 lastz.ochPri2
 
     # better to have reciprocal best for this one since it is low coverage:
     cd /hive/data/genomes/mm10/bed/lastzOchPri2.2012-03-22
     time doRecipBest.pl mm10 ochPri2 -buildDir=`pwd` -workhorse=hgwdev \
 	> best.log 2>&1 &
     #	real    1036m29.080s
 
     mkdir /hive/data/genomes/ochPri2/bed/blastz.mm10.swap
     cd /hive/data/genomes/ochPri2/bed/blastz.mm10.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzOchPri2.2012-03-22/DEF \
 	-swap -syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
 	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
     #	real    103m34.369s
     cat fb.ochPri2.chainMm10Link.txt
     #	382959642 bases of 1923624051 (19.908%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/ochPri2/bed
     ln -s blastz.mm10.swap lastz.mm10
 
 ##############################################################################
 # LASTZ hedgehog eriEur1 (DONE - 2012-03-22 - Hiram)
     # establish a screen to control this job with a name to indicate what it is
     screen -S mm10EriEur1
     mkdir /hive/data/genomes/mm10/bed/lastzEriEur1.2012-03-22
     cd /hive/data/genomes/mm10/bed/lastzEriEur1.2012-03-22
 
     # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
     #	number of jobs, 50,000 to something under 100,000
     cat << '_EOF_' > DEF
 # hedgehog vs mouse
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
 # QUERY: hedgehog EriEur1
 SEQ2_DIR=/scratch/data/eriEur1/eriEur1.2bit
 SEQ2_LEN=/scratch/data/eriEur1/chrom.sizes
 SEQ2_CHUNK=10000000
 SEQ2_LAP=0
 SEQ2_LIMIT=700
 
 BASE=/hive/data/genomes/mm10/bed/lastzEriEur1.2012-03-22
 TMPDIR=/scratch/tmp
 '_EOF_'
     # << happy emacs
 
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	`pwd`/DEF \
 	-syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
 	-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
     #	real    3006m41.470s
     cat fb.mm10.chainEriEur1Link.txt
     #	261447061 bases of 2652783500 (9.856%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/mm10/bed
     ln -s lastzEriEur1.2012-03-22 lastz.eriEur1
 
     # better to have reciprocal best for this one since it is low coverage:
     cd /hive/data/genomes/mm10/bed/lastzEriEur1.2012-03-22
     time doRecipBest.pl mm10 eriEur1 -buildDir=`pwd` -workhorse=hgwdev \
 	> best.log 2>&1 &
     #	real    1171m41.349s
 
     mkdir /hive/data/genomes/eriEur1/bed/blastz.mm10.swap
     cd /hive/data/genomes/eriEur1/bed/blastz.mm10.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzEriEur1.2012-03-22/DEF \
 	-swap -syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
 	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
     #	real    241m24.183s
     cat fb.eriEur1.chainMm10Link.txt
     #	261605017 bases of 2133134836 (12.264%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/eriEur1/bed
     ln -s blastz.mm10.swap lastz.mm10
 
 ##############################################################################
 # LASTZ tenrec echTel1 (DONE - 2012-03-22 - Hiram)
     # establish a screen to control this job with a name to indicate what it is
     screen -S mm10EchTel1
     mkdir /hive/data/genomes/mm10/bed/lastzEchTel1.2012-03-22
     cd /hive/data/genomes/mm10/bed/lastzEchTel1.2012-03-22
 
     # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
     #	number of jobs, 50,000 to something under 100,000
     cat << '_EOF_' > DEF
 # tenrec vs mouse
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
 # QUERY: tenrec EchTel1
 SEQ2_DIR=/scratch/data/echTel1/echTel1.2bit
 SEQ2_LEN=/scratch/data/echTel1/chrom.sizes
 SEQ2_CHUNK=10000000
 SEQ2_LAP=0
 SEQ2_LIMIT=700
 
 BASE=/hive/data/genomes/mm10/bed/lastzEchTel1.2012-03-22
 TMPDIR=/scratch/tmp
 '_EOF_'
     # << happy emacs
 
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	`pwd`/DEF \
 	-syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
 	-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
     #	real    3047m28.723s
     cat fb.mm10.chainEchTel1Link.txt
     #	290413150 bases of 2652783500 (10.947%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/mm10/bed
     ln -s lastzEchTel1.2012-03-22 lastz.echTel1
 
     # better to have reciprocal best for this one since it is low coverage:
     cd /hive/data/genomes/mm10/bed/lastzEchTel1.2012-03-22
     time doRecipBest.pl mm10 echTel1 -buildDir=`pwd` -workhorse=hgwdev \
 	> best.log 2>&1 &
     #	real    1201m39.275s
 
     mkdir /hive/data/genomes/echTel1/bed/blastz.mm10.swap
     cd /hive/data/genomes/echTel1/bed/blastz.mm10.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzEchTel1.2012-03-22/DEF \
 	-swap -syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
 	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
     #	real    269m52.619s
     cat fb.echTel1.chainMm10Link.txt
     #	298082139 bases of 2111581369 (14.117%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/echTel1/bed
     ln -s blastz.mm10.swap lastz.mm10
 
 ##############################################################################
 # LASTZ shrew sorAra1 (DONE - 2012-03-22 - Hiram)
     # establish a screen to control this job with a name to indicate what it is
     screen -S mm10SorAra1
     mkdir /hive/data/genomes/mm10/bed/lastzSorAra1.2012-03-22
     cd /hive/data/genomes/mm10/bed/lastzSorAra1.2012-03-22
 
     # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
     #	number of jobs, 50,000 to something under 100,000
     cat << '_EOF_' > DEF
 # shrew vs mouse
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
 # QUERY: shrew SorAra1
 SEQ2_DIR=/scratch/data/sorAra1/sorAra1.2bit
 SEQ2_LEN=/scratch/data/sorAra1/chrom.sizes
 SEQ2_CHUNK=10000000
 SEQ2_LAP=0
 SEQ2_LIMIT=500
 
 BASE=/hive/data/genomes/mm10/bed/lastzSorAra1.2012-03-22
 TMPDIR=/scratch/tmp
 '_EOF_'
     # << happy emacs
 
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	`pwd`/DEF \
 	-syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
 	-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
     #	real    2600m22.528s
     cat fb.mm10.chainSorAra1Link.txt
     #	248874412 bases of 2652783500 (9.382%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/mm10/bed
     ln -s lastzSorAra1.2012-03-22 lastz.sorAra1
 
     # better to have reciprocal best for this one since it is low coverage:
     cd /hive/data/genomes/mm10/bed/lastzSorAra1.2012-03-22
     time doRecipBest.pl mm10 sorAra1 -buildDir=`pwd` -workhorse=hgwdev \
 	> best.log 2>&1 &
     #	real    1074m22.651s
 
     mkdir /hive/data/genomes/sorAra1/bed/blastz.mm10.swap
     cd /hive/data/genomes/sorAra1/bed/blastz.mm10.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzSorAra1.2012-03-22/DEF \
 	-swap -syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
 	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
     #	real    141m38.806s
     cat fb.sorAra1.chainMm10Link.txt
     #	248692550 bases of 1832864697 (13.569%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/sorAra1/bed
     ln -s blastz.mm10.swap lastz.mm10
 
 ##############################################################################
 # LASTZ wallaby macEug2 (DONE - 2012-03-22 - Hiram)
     # establish a screen to control this job with a name to indicate what it is
     screen -S mm10MacEug2
     mkdir /hive/data/genomes/mm10/bed/lastzMacEug2.2012-03-22
     cd /hive/data/genomes/mm10/bed/lastzMacEug2.2012-03-22
 
     # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
     #	number of jobs, 50,000 to something under 100,000
     cat << '_EOF_' > DEF
 # wallaby vs mouse
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
 # QUERY: wallaby MacEug2
 SEQ2_DIR=/scratch/data/macEug2/macEug2.2bit
 SEQ2_LEN=/scratch/data/macEug2/chrom.sizes
 SEQ2_CHUNK=10000000
 SEQ2_LAP=0
 SEQ2_LIMIT=500
 
 BASE=/hive/data/genomes/mm10/bed/lastzMacEug2.2012-03-22
 TMPDIR=/scratch/tmp
 '_EOF_'
     # << happy emacs
 
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	`pwd`/DEF \
 	-syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
 	-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
     #	real    2893m50.341s
     cat fb.mm10.chainMacEug2Link.txt
     #	115481931 bases of 2652783500 (4.353%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/mm10/bed
     ln -s lastzMacEug2.2012-03-22 lastz.macEug2
 
     # better to have reciprocal best for this one since it is low coverage:
     cd /hive/data/genomes/mm10/bed/lastzMacEug2.2012-03-22
     time doRecipBest.pl mm10 macEug2 -buildDir=`pwd` -workhorse=hgwdev \
 	> best.log 2>&1 &
     #	real    1032m58.798s
 
     mkdir /hive/data/genomes/macEug2/bed/blastz.mm10.swap
     cd /hive/data/genomes/macEug2/bed/blastz.mm10.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzMacEug2.2012-03-22/DEF \
 	-swap -syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
 	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
     #	real    130m7.404s
     cat fb.macEug2.chainMm10Link.txt
     #	112811810 bases of 2536076957 (4.448%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/macEug2/bed
     ln -s blastz.mm10.swap lastz.mm10
 
 ##############################################################################
 # LASTZ RAT Rn5 (DONE - 2012-03-23 - Hiram)
     # establish a screen to control this job with a name to indicate what it is
     screen -S mm10Rn5
     mkdir /hive/data/genomes/mm10/bed/lastzRn5.2012-03-23
     cd /hive/data/genomes/mm10/bed/lastzRn5.2012-03-23
 
     cat << '_EOF_' > DEF
 # mouse vs rat
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
 # From tuning experiment between mouse chr12:15000000-25000000 and
 #       rat chr6:38000000-48000000
 BLASTZ_O=600
 BLASTZ_E=55
 BLASTZ_Y=5000
 BLASTZ_T=2
 BLASTZ_K=3000
 BLASTZ_L=3000
 BLASTZ_Q=/hive/data/genomes/mm10/bed/lastzRn5.2012-03-23/mouse_rat_2.q
 
 BLASTZ_ABRIDGE_REPEATS=1
 
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_SMSK=/scratch/data/mm10/notInRat
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
 # QUERY: Rat Rn5
 SEQ2_DIR=/hive/data/genomes/rn5/rn5.2bit
 SEQ2_LEN=/hive/data/genomes/rn5/chrom.sizes
 SEQ2_SMSK=/hive/data/genomes/rn5/bed/linSpecRep/notInMouse
 SEQ2_CHUNK=10000000
 SEQ2_LAP=0
 SEQ2_LIMIT=10
 
 BASE=/hive/data/genomes/mm10/bed/lastzRn5.2012-03-23
 TMPDIR=/scratch/tmp
 '_EOF_'
     # << happy emacs
 
     #	establish a screen to control this job
     screen -S rn5Mm10
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	`pwd`/DEF \
 	-bigClusterHub=swarm -chainMinScore=5000 -chainLinearGap=medium \
 	-noLoadChainSplit -syntenicNet -workhorse=hgwdev \
 	-smallClusterHub=encodek > do.log 2>&1 &
     # broken lastz run when SMSK files did not exist for some of the
     #	Rn5 contigs - made empty files for those and completed, continuing:
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	-continue=cat `pwd`/DEF \
 	-bigClusterHub=swarm -chainMinScore=5000 -chainLinearGap=medium \
 	-noLoadChainSplit -syntenicNet -workhorse=hgwdev \
 	-smallClusterHub=encodek > cat.log 2>&1 &
     #	real    285m28.458s
     cat fb.mm10.chainRn5Link.txt
     #	1786721927 bases of 2652783500 (67.353%) in intersection
     # FYI: rn4 was:
     #	1449612208 bases of 2652783500 (54.645%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/mm10/bed
     ln -s lastzRn5.2012-03-23 lastz.rn5
 
     #	and the swap
     mkdir /hive/data/genomes/rn5/bed/blastz.mm10.swap
     cd /hive/data/genomes/rn5/bed/blastz.mm10.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzRn5.2012-03-23/DEF \
 	-swap -bigClusterHub=swarm -chainMinScore=5000 -chainLinearGap=medium \
 	-noLoadChainSplit -syntenicNet -workhorse=hgwdev \
 	-smallClusterHub=encodek > swap.log 2>&1 &
     #	real    121m21.029s
     cat fb.rn5.chainMm10Link.txt
     #	1808154679 bases of 2572853723 (70.278%) in intersection
     # FYI, rn4 was:
     #	1449012636 bases of 2571531505 (56.348%) in intersection
 
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/rn5/bed
     ln -s blastz.mm10.swap lastz.mm10
 
 #########################################################################
 # LASTZ Manatee triMan1 (DONE - 2012-03-29 - Hiram)
     # establish a screen to control this job with a name to indicate what it is
     screen -S mm10TriMan1
     mkdir /hive/data/genomes/mm10/bed/lastzTriMan1.2012-03-29
     cd /hive/data/genomes/mm10/bed/lastzTriMan1.2012-03-29
 
     # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
     #	number of jobs, 50,000 to something under 100,000
     cat << '_EOF_' > DEF
 # manatee vs mouse
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
 # QUERY: manatee TriMan1
 SEQ2_DIR=/hive/data/genomes/triMan1/triMan1.2bit
 SEQ2_LEN=/hive/data/genomes/triMan1/chrom.sizes
 SEQ2_CHUNK=10000000
 SEQ2_LAP=0
 SEQ2_LIMIT=30
 
 BASE=/hive/data/genomes/mm10/bed/lastzTriMan1.2012-03-29
 TMPDIR=/scratch/tmp
 '_EOF_'
     # << happy emacs
 
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	`pwd`/DEF \
 	-syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
 	-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
     #	real    1455m24.772s
     cat fb.mm10.chainTriMan1Link.txt
     #	704207702 bases of 2652783500 (26.546%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/mm10/bed
     ln -s lastzTriMan1.2012-03-29 lastz.triMan1
 
     mkdir /hive/data/genomes/triMan1/bed/blastz.mm10.swap
     cd /hive/data/genomes/triMan1/bed/blastz.mm10.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzTriMan1.2012-03-29/DEF \
 	-swap -syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
 	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
     #	real    62m33.530s
     cat fb.triMan1.chainMm10Link.txt
     #	682557025 bases of 2769099677 (24.649%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/triMan1/bed
     ln -s blastz.mm10.swap lastz.mm10
 
 #########################################################################
 # lastz Opossum monDom5 (DONE - 2012-03-29 - Hiram)
     # establish a screen to control this job with a name to indicate what it is
     screen -S mm10MonDom5
     mkdir /hive/data/genomes/mm10/bed/lastzMonDom5.2012-03-29
     cd /hive/data/genomes/mm10/bed/lastzMonDom5.2012-03-29
 
     cat << '_EOF_' > DEF
 # Mouse vs. opossum
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
 BLASTZ_Y=3400
 BLASTZ_L=6000
 BLASTZ_K=2200
 BLASTZ_Q=/scratch/data/blastz/HoxD55.q
 
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
 # QUERY: Opossum monDom5
 SEQ2_DIR=/scratch/data/monDom5/monDom5.2bit
 SEQ2_LEN=/scratch/data/monDom5/chrom.sizes
 SEQ2_CHUNK=10000000
 SEQ2_LAP=0
 
 BASE=/hive/data/genomes/mm10/bed/lastzMonDom5.2012-03-29
 TMPDIR=/scratch/tmp
 '_EOF_'
     # << happy emacs
 
     # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
     #	number of jobs, 50,000 to something under 100,000
     #	Can't do this when there are only the single small set of chroms
      time nice -n +19 doBlastzChainNet.pl -verbose=2 \
         `pwd`/DEF \
         -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
         -chainMinScore=5000 -chainLinearGap=loose > do.log 2>&1 &
     #	real    1792m40.071s
 
     cat fb.mm10.chainMonDom5Link.txt
     #	254245903 bases of 2652783500 (9.584%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/mm10/bed
     ln -s lastzMonDom5.2012-03-29 lastz.monDom5
 
     #	and for the swap
     mkdir /hive/data/genomes/monDom5/bed/blastz.mm10.swap
     cd /hive/data/genomes/monDom5/bed/blastz.mm10.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzMonDom5.2012-03-29/DEF \
         -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
         -swap -chainMinScore=5000 -chainLinearGap=loose > do.log 2>&1 &
     #	real    73m49.230s
     cat  fb.monDom5.chainMm10Link.txt
     #	252291401 bases of 3501660299 (7.205%) in intersection
 
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/monDom5/bed
     ln -s blastz.mm10.swap lastz.mm10
 
 #########################################################################
 # lastz Tasmanian Devil sarHar1 (DONE - 2012-03-29 - Hiram)
     # establish a screen to control this job with a name to indicate what it is
     screen -S mm10SarHar1
     mkdir /hive/data/genomes/mm10/bed/lastzSarHar1.2012-03-29
     cd /hive/data/genomes/mm10/bed/lastzSarHar1.2012-03-29
 
     cat << '_EOF_' > DEF
 # Mouse vs. tasmanian devil
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
 BLASTZ_Y=3400
 BLASTZ_L=6000
 BLASTZ_K=2200
 BLASTZ_Q=/scratch/data/blastz/HoxD55.q
 
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
 # QUERY: tasmanian devil sarHar1
 SEQ2_DIR=/scratch/data/sarHar1/sarHar1.2bit
 SEQ2_LEN=/scratch/data/sarHar1/chrom.sizes
 SEQ2_CHUNK=10000000
 SEQ2_LAP=0
 
 BASE=/hive/data/genomes/mm10/bed/lastzSarHar1.2012-03-29
 TMPDIR=/scratch/tmp
 '_EOF_'
     # << happy emacs
 
     # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
     #	number of jobs, 50,000 to something under 100,000
     # when not present, SEQ2_LIMIT is a default 100
      time nice -n +19 doBlastzChainNet.pl -verbose=2 \
         `pwd`/DEF \
         -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
         -chainMinScore=5000 -chainLinearGap=loose > do.log 2>&1 &
     #	real    1208m55.866s
 
     cat fb.mm10.chainSarHar1Link.txt
     #	224935746 bases of 2652783500 (8.479%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/mm10/bed
     ln -s lastzSarHar1.2012-03-29 lastz.sarHar1
 
     #	and for the swap
     mkdir /hive/data/genomes/sarHar1/bed/blastz.mm10.swap
     cd /hive/data/genomes/sarHar1/bed/blastz.mm10.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzSarHar1.2012-03-29/DEF \
         -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
         -swap -chainMinScore=5000 -chainLinearGap=loose > do.log 2>&1 &
     #	real    45m53.015s
     cat  fb.sarHar1.chainMm10Link.txt
     #	231249436 bases of 2931539702 (7.888%) in intersection
 
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/sarHar1/bed
     ln -s blastz.mm10.swap lastz.mm10
 
 #########################################################################
 # lastz budgerigar melUnd1 (DONE - 2012-03-29 - Hiram)
     # establish a screen to control this job with a name to indicate what it is
     screen -S mm10MelUnd1
     mkdir /hive/data/genomes/mm10/bed/lastzMelUnd1.2012-03-29
     cd /hive/data/genomes/mm10/bed/lastzMelUnd1.2012-03-29
 
     cat << '_EOF_' > DEF
 # Mouse vs. budgerigar
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
 BLASTZ_Y=3400
 BLASTZ_L=6000
 BLASTZ_K=2200
 BLASTZ_Q=/scratch/data/blastz/HoxD55.q
 
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
 # QUERY: budgerigar melUnd1
 SEQ2_DIR=/hive/data/genomes/melUnd1/melUnd1.2bit
 SEQ2_LEN=/hive/data/genomes/melUnd1/chrom.sizes
 SEQ2_CHUNK=10000000
 SEQ2_LAP=0
 SEQ2_LIMIT=50
 
 BASE=/hive/data/genomes/mm10/bed/lastzMelUnd1.2012-03-29
 TMPDIR=/scratch/tmp
 '_EOF_'
     # << happy emacs
 
     # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
     #	number of jobs, 50,000 to something under 100,000
     # when not present, SEQ2_LIMIT is a default 100
      time nice -n +19 doBlastzChainNet.pl -verbose=2 \
         `pwd`/DEF \
         -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
         -chainMinScore=5000 -chainLinearGap=loose > do.log 2>&1 &
     #	real    883m58.198s
 
     cat fb.mm10.chainMelUnd1Link.txt
     #	95217653 bases of 2652783500 (3.589%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/mm10/bed
     ln -s lastzMelUnd1.2012-03-29 lastz.melUnd1
 
     #	and for the swap
     mkdir /hive/data/genomes/melUnd1/bed/blastz.mm10.swap
     cd /hive/data/genomes/melUnd1/bed/blastz.mm10.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzMelUnd1.2012-03-29/DEF \
         -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
         -swap -chainMinScore=5000 -chainLinearGap=loose > do.log 2>&1 &
     #	real    9m9.260s
     cat  fb.melUnd1.chainMm10Link.txt
     #	79867911 bases of 1086614815 (7.350%) in intersection
 
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/melUnd1/bed
     ln -s blastz.mm10.swap lastz.mm10
 
 #########################################################################
 # lastz platypus ornAna1 (DONE - 2012-03-29 - Hiram)
     # establish a screen to control this job with a name to indicate what it is
     screen -S mm10OrnAna1
     mkdir /hive/data/genomes/mm10/bed/lastzOrnAna1.2012-03-29
     cd /hive/data/genomes/mm10/bed/lastzOrnAna1.2012-03-29
 
     cat << '_EOF_' > DEF
 # Mouse vs. platypus
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
 BLASTZ_Y=3400
 BLASTZ_L=6000
 BLASTZ_K=2200
 BLASTZ_Q=/scratch/data/blastz/HoxD55.q
 
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
 # QUERY: platypus ornAna1
 SEQ2_DIR=/scratch/data/ornAna1/ornAna1.2bit
 SEQ2_LEN=/scratch/data/ornAna1/chrom.sizes
 SEQ2_CHUNK=10000000
 SEQ2_LAP=0
 SEQ2_LIMIT=400
 
 BASE=/hive/data/genomes/mm10/bed/lastzOrnAna1.2012-03-29
 TMPDIR=/scratch/tmp
 '_EOF_'
     # << happy emacs
 
     # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
     #	number of jobs, 50,000 to something under 100,000
     # when not present, SEQ2_LIMIT is a default 100
      time nice -n +19 doBlastzChainNet.pl -verbose=2 \
         `pwd`/DEF \
         -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
         -chainMinScore=5000 -chainLinearGap=loose > do.log 2>&1 &
     #	real    1264m1.056s
 
     cat fb.mm10.chainOrnAna1Link.txt
     #	141873792 bases of 2652783500 (5.348%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/mm10/bed
     ln -s lastzOrnAna1.2012-03-29 lastz.ornAna1
 
     #	and for the swap
     mkdir /hive/data/genomes/ornAna1/bed/blastz.mm10.swap
     cd /hive/data/genomes/ornAna1/bed/blastz.mm10.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzOrnAna1.2012-03-29/DEF \
         -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
         -swap -chainMinScore=5000 -chainLinearGap=loose > do.log 2>&1 &
     #	real    49m45.308s
     cat  fb.ornAna1.chainMm10Link.txt
     #	135101083 bases of 1842236818 (7.334%) in intersection
 
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/ornAna1/bed
     ln -s blastz.mm10.swap lastz.mm10
 
 #########################################################################
 # lastz turtle chrPic1 (DONE - 2012-03-29 - Hiram)
     # establish a screen to control this job with a name to indicate what it is
     screen -S mm10ChrPic1
     mkdir /hive/data/genomes/mm10/bed/lastzChrPic1.2012-03-29
     cd /hive/data/genomes/mm10/bed/lastzChrPic1.2012-03-29
 
     cat << '_EOF_' > DEF
 # Mouse vs. turtle
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
 BLASTZ_Y=3400
 BLASTZ_L=6000
 BLASTZ_K=2200
 BLASTZ_Q=/scratch/data/blastz/HoxD55.q
 
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
 # QUERY: turtle chrPic1
 SEQ2_DIR=/hive/data/genomes/chrPic1/chrPic1.2bit
 SEQ2_LEN=/hive/data/genomes/chrPic1/chrom.sizes
 SEQ2_CHUNK=10000000
 SEQ2_LAP=0
 SEQ2_LIMIT=200
 
 BASE=/hive/data/genomes/mm10/bed/lastzChrPic1.2012-03-29
 TMPDIR=/scratch/tmp
 '_EOF_'
     # << happy emacs
 
     # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
     #	number of jobs, 50,000 to something under 100,000
     # when not present, SEQ2_LIMIT is a default 100
      time nice -n +19 doBlastzChainNet.pl -verbose=2 \
         `pwd`/DEF \
         -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
         -chainMinScore=5000 -chainLinearGap=loose > do.log 2>&1 &
     #	real    1243m2.518s
     cat fb.mm10.chainChrPic1Link.txt
     #	125499965 bases of 2652783500 (4.731%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/mm10/bed
     ln -s lastzChrPic1.2012-03-29 lastz.chrPic1
 
     #	and for the swap
     mkdir /hive/data/genomes/chrPic1/bed/blastz.mm10.swap
     cd /hive/data/genomes/chrPic1/bed/blastz.mm10.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzChrPic1.2012-03-29/DEF \
         -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
         -swap -chainMinScore=5000 -chainLinearGap=loose > swap.log 2>&1 &
     #	real    19m26.835s
     cat  fb.chrPic1.chainMm10Link.txt
     #	118436838 bases of 2158289746 (5.488%) in intersection
 
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/chrPic1/bed
     ln -s blastz.mm10.swap lastz.mm10
 
 #########################################################################
 # lastz chicken galGal4 (DONE - 2012-04-02 - Hiram)
     # establish a screen to control this job with a name to indicate what it is
     screen -S mm10GalGal4
     mkdir /hive/data/genomes/mm10/bed/lastzGalGal4.2012-04-02
     cd /hive/data/genomes/mm10/bed/lastzGalGal4.2012-04-02
 
     cat << '_EOF_' > DEF
 # Mouse vs. chicken
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
 BLASTZ_Y=3400
 BLASTZ_L=6000
 BLASTZ_K=2200
 BLASTZ_Q=/scratch/data/blastz/HoxD55.q
 
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
 # QUERY: chicken galGal4
 SEQ2_DIR=/hive/data/genomes/galGal4/galGal4.2bit
 SEQ2_LEN=/hive/data/genomes/galGal4/chrom.sizes
 SEQ2_CHUNK=10000000
 SEQ2_LAP=0
 SEQ2_LIMIT=30
 
 BASE=/hive/data/genomes/mm10/bed/lastzGalGal4.2012-04-02
 TMPDIR=/scratch/tmp
 '_EOF_'
     # << happy emacs
 
     # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
     #	number of jobs, 50,000 to something under 100,000
     # when not present, SEQ2_LIMIT is a default 100
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
         `pwd`/DEF \
         -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
         -chainMinScore=5000 -chainLinearGap=loose > do.log 2>&1 &
     #	real    109m21.068s
     #	broken swarm cluster, continuing:
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
         -continue=cat `pwd`/DEF \
         -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
         -chainMinScore=5000 -chainLinearGap=loose > cat.log 2>&1 &
     #	real    57m24.155s
     cat fb.mm10.chainGalGal4Link.txt
     #	97510773 bases of 2652783500 (3.676%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/mm10/bed
     ln -s lastzGalGal4.2012-04-02 lastz.galGal4
 
     #	and for the swap
     mkdir /hive/data/genomes/galGal4/bed/blastz.mm10.swap
     cd /hive/data/genomes/galGal4/bed/blastz.mm10.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzGalGal4.2012-04-02/DEF \
         -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
         -swap -chainMinScore=5000 -chainLinearGap=loose > swap.log 2>&1 &
     #	real    95m50.996s
     cat  fb.galGal4.chainMm10Link.txt
     #	83660034 bases of 1032854810 (8.100%) in intersection
 
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/galGal4/bed
     ln -s blastz.mm10.swap lastz.mm10
 
 #########################################################################
 # lastz zebra finch taeGut1 (DONE - 2012-04-02 - Hiram)
     # establish a screen to control this job with a name to indicate what it is
     screen -S mm10TaeGut1
     mkdir /hive/data/genomes/mm10/bed/lastzTaeGut1.2012-04-02
     cd /hive/data/genomes/mm10/bed/lastzTaeGut1.2012-04-02
 
     cat << '_EOF_' > DEF
 # Mouse vs. zebra finch
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
 BLASTZ_Y=3400
 BLASTZ_L=6000
 BLASTZ_K=2200
 BLASTZ_Q=/scratch/data/blastz/HoxD55.q
 
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
 # QUERY: zebra finch taeGut1
 SEQ2_DIR=/scratch/data/taeGut1/taeGut1.2bit
 SEQ2_LEN=/scratch/data/taeGut1/chrom.sizes
 SEQ2_CHUNK=10000000
 SEQ2_LAP=0
 SEQ2_LIMIT=5
 
 BASE=/hive/data/genomes/mm10/bed/lastzTaeGut1.2012-04-02
 TMPDIR=/scratch/tmp
 '_EOF_'
     # << happy emacs
 
     # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
     #	number of jobs, 50,000 to something under 100,000
     # when not present, SEQ2_LIMIT is a default 100
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
         `pwd`/DEF \
         -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
         -chainMinScore=5000 -chainLinearGap=loose > do.log 2>&1 &
     #	real    106m11.612s
     # broken swarm, continuing:
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
         -continue=cat `pwd`/DEF \
         -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
         -chainMinScore=5000 -chainLinearGap=loose > cat.log 2>&1 &
     #	real    29m11.090s
     cat fb.mm10.chainTaeGut1Link.txt
     #	95469341 bases of 2652783500 (3.599%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/mm10/bed
     ln -s lastzTaeGut1.2012-04-02 lastz.taeGut1
 
     #	and for the swap
     mkdir /hive/data/genomes/taeGut1/bed/blastz.mm10.swap
     cd /hive/data/genomes/taeGut1/bed/blastz.mm10.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzTaeGut1.2012-04-02/DEF \
         -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
         -swap -chainMinScore=5000 -chainLinearGap=loose > swap.log 2>&1 &
     #	real    37m17.483s
     cat  fb.taeGut1.chainMm10Link.txt
     #	89312133 bases of 1222864691 (7.304%) in intersection
 
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/taeGut1/bed
     ln -s blastz.mm10.swap lastz.mm10
 
 #########################################################################
 # lastz lizard anoCar2 (DONE - 2012-04-02 - Hiram)
     # establish a screen to control this job with a name to indicate what it is
     screen -S mm10AnoCar2
     mkdir /hive/data/genomes/mm10/bed/lastzAnoCar2.2012-04-02
     cd /hive/data/genomes/mm10/bed/lastzAnoCar2.2012-04-02
 
     cat << '_EOF_' > DEF
 # Mouse vs. lizard
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
 BLASTZ_Y=3400
 BLASTZ_L=6000
 BLASTZ_K=2200
 BLASTZ_Q=/scratch/data/blastz/HoxD55.q
 
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
 # QUERY: lizard anoCar2
 SEQ2_DIR=/scratch/data/anoCar2/anoCar2.2bit
 SEQ2_LEN=/scratch/data/anoCar2/chrom.sizes
 SEQ2_CHUNK=10000000
 SEQ2_LAP=0
 SEQ2_LIMIT=15
 
 BASE=/hive/data/genomes/mm10/bed/lastzAnoCar2.2012-04-02
 TMPDIR=/scratch/tmp
 '_EOF_'
     # << happy emacs
 
     # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
     #	number of jobs, 50,000 to something under 100,000
     # when not present, SEQ2_LIMIT is a default 100
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
         `pwd`/DEF \
         -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
         -chainMinScore=5000 -chainLinearGap=loose > do.log 2>&1 &
     #	real    103m17.133s
     # broken swarm, continuing:
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
         -continue=cat `pwd`/DEF \
         -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
         -chainMinScore=5000 -chainLinearGap=loose > cat.log 2>&1 &
     #	real    43m2.183s
     cat fb.mm10.chainAnoCar2Link.txt
     #	88356459 bases of 2652783500 (3.331%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/mm10/bed
     ln -s lastzAnoCar2.2012-04-02 lastz.anoCar2
 
     #	and for the swap
     mkdir /hive/data/genomes/anoCar2/bed/blastz.mm10.swap
     cd /hive/data/genomes/anoCar2/bed/blastz.mm10.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzAnoCar2.2012-04-02/DEF \
         -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
         -swap -chainMinScore=5000 -chainLinearGap=loose > swap.log 2>&1 &
     #	real    97m50.599s
     cat  fb.anoCar2.chainMm10Link.txt
     #	84865552 bases of 1701353770 (4.988%) in intersection
 
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/anoCar2/bed
     ln -s blastz.mm10.swap lastz.mm10
 
 #########################################################################
 # lastz turkey melGal1 (DONE - 2012-04-02 - Hiram)
     # establish a screen to control this job with a name to indicate what it is
     screen -S mm10MelGal1
     mkdir /hive/data/genomes/mm10/bed/lastzMelGal1.2012-04-02
     cd /hive/data/genomes/mm10/bed/lastzMelGal1.2012-04-02
 
     cat << '_EOF_' > DEF
 # Mouse vs. turkey
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
 BLASTZ_Y=3400
 BLASTZ_L=6000
 BLASTZ_K=2200
 BLASTZ_Q=/scratch/data/blastz/HoxD55.q
 
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
 # QUERY: turkey melGal1
 SEQ2_DIR=/scratch/data/melGal1/melGal1.2bit
 SEQ2_LEN=/scratch/data/melGal1/chrom.sizes
 SEQ2_CHUNK=10000000
 SEQ2_LAP=0
 SEQ2_LIMIT=15
 
 BASE=/hive/data/genomes/mm10/bed/lastzMelGal1.2012-04-02
 TMPDIR=/scratch/tmp
 '_EOF_'
     # << happy emacs
 
     # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
     #	number of jobs, 50,000 to something under 100,000
     # when not present, SEQ2_LIMIT is a default 100
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
         `pwd`/DEF \
         -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
         -chainMinScore=5000 -chainLinearGap=loose > do.log 2>&1 &
     #	real    101m17.902s
     # broken swarm, continuing:
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
         -continue=cat `pwd`/DEF \
         -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
         -chainMinScore=5000 -chainLinearGap=loose > cat.log 2>&1 &
     #	real    20m47.771s
     cat fb.mm10.chainMelGal1Link.txt
     #	93132953 bases of 2652783500 (3.511%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/mm10/bed
     ln -s lastzMelGal1.2012-04-02 lastz.melGal1
 
     #	and for the swap
     mkdir /hive/data/genomes/melGal1/bed/blastz.mm10.swap
     cd /hive/data/genomes/melGal1/bed/blastz.mm10.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzMelGal1.2012-04-02/DEF \
         -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
         -swap -chainMinScore=5000 -chainLinearGap=loose > swap.log 2>&1 &
     #	real    88m39.591s
     cat  fb.melGal1.chainMm10Link.txt
     #	76848161 bases of 935922386 (8.211%) in intersection
 
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/melGal1/bed
     ln -s blastz.mm10.swap lastz.mm10
 
 #########################################################################
 # lastz frog xenTro3 (DONE - 2012-04-02 - Hiram)
     # establish a screen to control this job with a name to indicate what it is
     screen -S mm10XenTro3
     mkdir /hive/data/genomes/mm10/bed/lastzXenTro3.2012-04-02
     cd /hive/data/genomes/mm10/bed/lastzXenTro3.2012-04-02
 
     cat << '_EOF_' > DEF
 # Mouse vs. frog
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
 BLASTZ_Y=3400
 BLASTZ_L=6000
 BLASTZ_K=2200
 BLASTZ_Q=/scratch/data/blastz/HoxD55.q
 
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
 # QUERY: frog xenTro3
 SEQ2_DIR=/scratch/data/xenTro3/xenTro3.2bit
 SEQ2_LEN=/scratch/data/xenTro3/chrom.sizes
 SEQ2_CHUNK=10000000
 SEQ2_LAP=0
 SEQ2_LIMIT=40
 
 BASE=/hive/data/genomes/mm10/bed/lastzXenTro3.2012-04-02
 TMPDIR=/scratch/tmp
 '_EOF_'
     # << happy emacs
 
     # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
     #	number of jobs, 50,000 to something under 100,000
     # when not present, SEQ2_LIMIT is a default 100
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
         `pwd`/DEF \
         -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
         -chainMinScore=5000 -chainLinearGap=loose > do.log 2>&1 &
     #	real    99m10.611s
     # broken swarm, continuing:
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
         -continue=cat `pwd`/DEF \
         -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
         -chainMinScore=5000 -chainLinearGap=loose > cat.log 2>&1 &
     #	real    37m52.678s
     cat fb.mm10.chainXenTro3Link.txt
     #	82900338 bases of 2652783500 (3.125%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/mm10/bed
     ln -s lastzXenTro3.2012-04-02 lastz.xenTro3
 
     #	and for the swap
     mkdir /hive/data/genomes/xenTro3/bed/blastz.mm10.swap
     cd /hive/data/genomes/xenTro3/bed/blastz.mm10.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzXenTro3.2012-04-02/DEF \
         -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
         -swap -chainMinScore=5000 -chainLinearGap=loose > swap.log 2>&1 &
     #	real    53m19.485s
     cat  fb.xenTro3.chainMm10Link.txt
     #	90345130 bases of 1358334882 (6.651%) in intersection
 
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/xenTro3/bed
     ln -s blastz.mm10.swap lastz.mm10
 
 #########################################################################
 # lastz coelacanth latCha1 (DONE - 2012-04-02 - Hiram)
     # establish a screen to control this job with a name to indicate what it is
     screen -S mm10LatCha1
     mkdir /hive/data/genomes/mm10/bed/lastzLatCha1.2012-04-02
     cd /hive/data/genomes/mm10/bed/lastzLatCha1.2012-04-02
 
     cat << '_EOF_' > DEF
 # Mouse vs. coelacanth
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
 BLASTZ_Y=3400
 BLASTZ_L=6000
 BLASTZ_K=2200
 BLASTZ_Q=/scratch/data/blastz/HoxD55.q
 
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
 # QUERY: coelacanth latCha1
 SEQ2_DIR=/hive/data/genomes/latCha1/latCha1.2bit
 SEQ2_LEN=/hive/data/genomes/latCha1/chrom.sizes
 SEQ2_CHUNK=10000000
 SEQ2_LAP=0
 SEQ2_LIMIT=50
 
 BASE=/hive/data/genomes/mm10/bed/lastzLatCha1.2012-04-02
 TMPDIR=/scratch/tmp
 '_EOF_'
     # << happy emacs
 
     # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
     #	number of jobs, 50,000 to something under 100,000
     # when not present, SEQ2_LIMIT is a default 100
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
         `pwd`/DEF \
         -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
         -chainMinScore=5000 -chainLinearGap=loose > do.log 2>&1 &
     #	real    95m34.477s
     #	broken swarm, continuing:
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
         -continue=cat `pwd`/DEF \
         -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
         -chainMinScore=5000 -chainLinearGap=loose > cat.log 2>&1 &
     #	real    214m7.324s
     cat fb.mm10.chainLatCha1Link.txt
     #	72036116 bases of 2652783500 (2.715%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/mm10/bed
     ln -s lastzLatCha1.2012-04-02 lastz.latCha1
 
     #	and for the swap
     mkdir /hive/data/genomes/latCha1/bed/blastz.mm10.swap
     cd /hive/data/genomes/latCha1/bed/blastz.mm10.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzLatCha1.2012-04-02/DEF \
         -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
         -swap -chainMinScore=5000 -chainLinearGap=loose > swap.log 2>&1 &
     #	real    14m44.600s
     cat  fb.latCha1.chainMm10Link.txt
     #	73798131 bases of 2183592768 (3.380%) in intersection
 
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/latCha1/bed
     ln -s blastz.mm10.swap lastz.mm10
 
 #########################################################################
 # lastz atlantic cod gadMor1 (DONE - 2012-04-02 - Hiram)
     # establish a screen to control this job with a name to indicate what it is
     screen -S mm10GadMor1
     mkdir /hive/data/genomes/mm10/bed/lastzGadMor1.2012-04-02
     cd /hive/data/genomes/mm10/bed/lastzGadMor1.2012-04-02
 
     cat << '_EOF_' > DEF
 # Mouse vs. atlantic cod
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
 BLASTZ_Y=3400
 BLASTZ_L=6000
 BLASTZ_K=2200
 BLASTZ_Q=/scratch/data/blastz/HoxD55.q
 
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
 # QUERY: atlantic cod gadMor1
 SEQ2_DIR=/hive/data/genomes/gadMor1/gadMor1.2bit
 SEQ2_LEN=/hive/data/genomes/gadMor1/chrom.sizes
 SEQ2_CHUNK=10000000
 SEQ2_LAP=0
 SEQ2_LIMIT=700
 
 BASE=/hive/data/genomes/mm10/bed/lastzGadMor1.2012-04-02
 TMPDIR=/scratch/tmp
 '_EOF_'
     # << happy emacs
 
     # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
     #	number of jobs, 50,000 to something under 100,000
     # when not present, SEQ2_LIMIT is a default 100
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
         `pwd`/DEF \
         -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
         -chainMinScore=5000 -chainLinearGap=loose > do.log 2>&1 &
     #	real    91m23.642s
     # broken swarm, continuing:
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
         -continue=cat `pwd`/DEF \
         -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
         -chainMinScore=5000 -chainLinearGap=loose > cat.log 2>&1 &
     #	real    39m41.194s
     cat fb.mm10.chainGadMor1Link.txt
     #	45795692 bases of 2652783500 (1.726%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/mm10/bed
     ln -s lastzGadMor1.2012-04-02 lastz.gadMor1
 
     #	and for the swap
     mkdir /hive/data/genomes/gadMor1/bed/blastz.mm10.swap
     cd /hive/data/genomes/gadMor1/bed/blastz.mm10.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzGadMor1.2012-04-02/DEF \
         -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
         -swap -chainMinScore=5000 -chainLinearGap=loose > swap.log 2>&1 &
     #	real    62m58.963s
     cat  fb.gadMor1.chainMm10Link.txt
     #	41406507 bases of 608038597 (6.810%) in intersection
 
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/gadMor1/bed
     ln -s blastz.mm10.swap lastz.mm10
 
 #########################################################################
 # lastz nile tilapia oreNil1 (DONE - 2012-04-02 - Hiram)
     # establish a screen to control this job with a name to indicate what it is
     screen -S mm10OreNil1
     mkdir /hive/data/genomes/mm10/bed/lastzOreNil1.2012-04-02
     cd /hive/data/genomes/mm10/bed/lastzOreNil1.2012-04-02
 
     cat << '_EOF_' > DEF
 # Mouse vs. nile tilapia
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
 BLASTZ_Y=3400
 BLASTZ_L=6000
 BLASTZ_K=2200
 BLASTZ_Q=/scratch/data/blastz/HoxD55.q
 
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
 # QUERY: nile tilapia oreNil1
 SEQ2_DIR=/scratch/data/oreNil1/oreNil1.2bit
 SEQ2_LEN=/scratch/data/oreNil1/chrom.sizes
 SEQ2_CHUNK=10000000
 SEQ2_LAP=0
 SEQ2_LIMIT=10
 
 BASE=/hive/data/genomes/mm10/bed/lastzOreNil1.2012-04-02
 TMPDIR=/scratch/tmp
 '_EOF_'
     # << happy emacs
 
     # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
     #	number of jobs, 50,000 to something under 100,000
     # when not present, SEQ2_LIMIT is a default 100
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
         `pwd`/DEF \
         -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
         -chainMinScore=5000 -chainLinearGap=loose > do.log 2>&1 &
     #	real    89m6.727s
     #	broken swarm, continuing:
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
         -continue=cat `pwd`/DEF \
         -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
         -chainMinScore=5000 -chainLinearGap=loose > cat.log 2>&1 &
     #	real    24m3.960s
     cat fb.mm10.chainOreNil1Link.txt
     #	51915568 bases of 2652783500 (1.957%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/mm10/bed
     ln -s lastzOreNil1.2012-04-02 lastz.oreNil1
 
     #	and for the swap
     mkdir /hive/data/genomes/oreNil1/bed/blastz.mm10.swap
     cd /hive/data/genomes/oreNil1/bed/blastz.mm10.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzOreNil1.2012-04-02/DEF \
         -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
         -swap -chainMinScore=5000 -chainLinearGap=loose > swap.log 2>&1 &
     #	real    90m55.298s
     cat  fb.oreNil1.chainMm10Link.txt
     #	49709461 bases of 816084674 (6.091%) in intersection
 
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/oreNil1/bed
     ln -s blastz.mm10.swap lastz.mm10
 
 #########################################################################
 # lastz stickleback gasAcu1 (DONE - 2012-04-02 - Hiram)
     # establish a screen to control this job with a name to indicate what it is
     screen -S mm10GasAcu1
     mkdir /hive/data/genomes/mm10/bed/lastzGasAcu1.2012-04-02
     cd /hive/data/genomes/mm10/bed/lastzGasAcu1.2012-04-02
 
     cat << '_EOF_' > DEF
 # Mouse vs. stickleback
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
 BLASTZ_Y=3400
 BLASTZ_L=6000
 BLASTZ_K=2200
 BLASTZ_Q=/scratch/data/blastz/HoxD55.q
 
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
 # QUERY: stickleback gasAcu1
 SEQ2_DIR=/scratch/data/gasAcu1/gasAcu1.2bit
 SEQ2_LEN=/scratch/data/gasAcu1/chrom.sizes
 SEQ2_CHUNK=10000000
 SEQ2_LAP=0
 SEQ2_LIMIT=1
 
 BASE=/hive/data/genomes/mm10/bed/lastzGasAcu1.2012-04-02
 TMPDIR=/scratch/tmp
 '_EOF_'
     # << happy emacs
 
     # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
     #	number of jobs, 50,000 to something under 100,000
     # when not present, SEQ2_LIMIT is a default 100
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
         `pwd`/DEF \
         -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
         -chainMinScore=5000 -chainLinearGap=loose > do.log 2>&1 &
     #	real    87m5.963s
     # broken swarm, continuing:
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
         -continue=cat `pwd`/DEF \
         -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
         -chainMinScore=5000 -chainLinearGap=loose > cat.log 2>&1 &
     #	real    9m49.199s
     cat fb.mm10.chainGasAcu1Link.txt
     #	53469711 bases of 2652783500 (2.016%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/mm10/bed
     ln -s lastzGasAcu1.2012-04-02 lastz.gasAcu1
 
     #	and for the swap
     mkdir /hive/data/genomes/gasAcu1/bed/blastz.mm10.swap
     cd /hive/data/genomes/gasAcu1/bed/blastz.mm10.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzGasAcu1.2012-04-02/DEF \
         -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
         -swap -chainMinScore=5000 -chainLinearGap=loose > swap.log 2>&1 &
     #	real    12m58.072s
     cat  fb.gasAcu1.chainMm10Link.txt
     #	48802831 bases of 446627861 (10.927%) in intersection
 
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/gasAcu1/bed
     ln -s blastz.mm10.swap lastz.mm10
 
 #########################################################################
 # lastz fugu fr3 (DONE - 2012-04-02 - Hiram)
     # establish a screen to control this job with a name to indicate what it is
     screen -S mm10Fr3
     mkdir /hive/data/genomes/mm10/bed/lastzFr3.2012-04-02
     cd /hive/data/genomes/mm10/bed/lastzFr3.2012-04-02
 
     cat << '_EOF_' > DEF
 # Mouse vs. fugu
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
 BLASTZ_Y=3400
 BLASTZ_L=6000
 BLASTZ_K=2200
 BLASTZ_Q=/scratch/data/blastz/HoxD55.q
 
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
 # QUERY: fugu fr3
 SEQ2_DIR=/scratch/data/fr3/fr3.2bit
 SEQ2_LEN=/scratch/data/fr3/chrom.sizes
 SEQ2_CHUNK=10000000
 SEQ2_LAP=0
 SEQ2_LIMIT=30
 
 BASE=/hive/data/genomes/mm10/bed/lastzFr3.2012-04-02
 TMPDIR=/scratch/tmp
 '_EOF_'
     # << happy emacs
 
     # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
     #	number of jobs, 50,000 to something under 100,000
     # when not present, SEQ2_LIMIT is a default 100
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
         `pwd`/DEF \
         -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
         -chainMinScore=5000 -chainLinearGap=loose > do.log 2>&1 &
     #	real    84m37.070s
     # broken swarm, continuing:
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
         -continue=cat `pwd`/DEF \
         -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
         -chainMinScore=5000 -chainLinearGap=loose > cat.log 2>&1 &
     #	real    171m16.627s
     cat fb.mm10.chainFr3Link.txt
     #	47460021 bases of 2652783500 (1.789%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/mm10/bed
     ln -s lastzFr3.2012-04-02 lastz.fr3
 
     #	and for the swap
     mkdir /hive/data/genomes/fr3/bed/blastz.mm10.swap
     cd /hive/data/genomes/fr3/bed/blastz.mm10.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzFr3.2012-04-02/DEF \
         -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
         -swap -chainMinScore=5000 -chainLinearGap=loose > swap.log 2>&1 &
     #	real    7m13.151s
     cat  fb.fr3.chainMm10Link.txt
     #	42586058 bases of 350961831 (12.134%) in intersection
 
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/fr3/bed
     ln -s blastz.mm10.swap lastz.mm10
 
 #########################################################################
 # lastz tetraodon tetNig2 (DONE - 2012-04-02 - Hiram)
     # establish a screen to control this job with a name to indicate what it is
     screen -S mm10TetNig2
     mkdir /hive/data/genomes/mm10/bed/lastzTetNig2.2012-04-02
     cd /hive/data/genomes/mm10/bed/lastzTetNig2.2012-04-02
 
     cat << '_EOF_' > DEF
 # Mouse vs. tetraodon
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
 BLASTZ_Y=3400
 BLASTZ_L=6000
 BLASTZ_K=2200
 BLASTZ_Q=/scratch/data/blastz/HoxD55.q
 
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
 # QUERY: tetraodon tetNig2
 SEQ2_DIR=/scratch/data/tetNig2/tetNig2.2bit
 SEQ2_LEN=/scratch/data/tetNig2/chrom.sizes
 SEQ2_CHUNK=10000000
 SEQ2_LAP=0
 SEQ2_LIMIT=30
 
 BASE=/hive/data/genomes/mm10/bed/lastzTetNig2.2012-04-02
 TMPDIR=/scratch/tmp
 '_EOF_'
     # << happy emacs
 
     # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
     #	number of jobs, 50,000 to something under 100,000
     # when not present, SEQ2_LIMIT is a default 100
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
         `pwd`/DEF \
         -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
         -chainMinScore=5000 -chainLinearGap=loose > do.log 2>&1 &
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
         -continue=cat `pwd`/DEF \
         -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
         -chainMinScore=5000 -chainLinearGap=loose > cat.log 2>&1 &
     #	real    13m21.638s
     cat fb.mm10.chainTetNig2Link.txt
     #	46035322 bases of 2652783500 (1.735%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/mm10/bed
     ln -s lastzTetNig2.2012-04-02 lastz.tetNig2
 
     #	and for the swap
     mkdir /hive/data/genomes/tetNig2/bed/blastz.mm10.swap
     cd /hive/data/genomes/tetNig2/bed/blastz.mm10.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzTetNig2.2012-04-02/DEF \
         -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
         -swap -chainMinScore=5000 -chainLinearGap=loose > swap.log 2>&1 &
     #	real    7m24.115s
     cat  fb.tetNig2.chainMm10Link.txt
     #	41242926 bases of 302314788 (13.642%) in intersection
 
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/tetNig2/bed
     ln -s blastz.mm10.swap lastz.mm10
 
 #########################################################################
 # lastz zebrafish danRer7 (DONE - 2012-04-02 - Hiram)
     # establish a screen to control this job with a name to indicate what it is
     screen -S mm10DanRer7
     mkdir /hive/data/genomes/mm10/bed/lastzDanRer7.2012-04-02
     cd /hive/data/genomes/mm10/bed/lastzDanRer7.2012-04-02
 
     cat << '_EOF_' > DEF
 # Mouse vs. zebrafish
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
 BLASTZ_Y=3400
 BLASTZ_L=6000
 BLASTZ_K=2200
 BLASTZ_Q=/scratch/data/blastz/HoxD55.q
 
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
 # QUERY: zebrafish danRer7
 SEQ2_DIR=/scratch/data/danRer7/danRer7.2bit
 SEQ2_LEN=/scratch/data/danRer7/chrom.sizes
 SEQ2_CHUNK=10000000
 SEQ2_LAP=0
 SEQ2_LIMIT=50
 
 BASE=/hive/data/genomes/mm10/bed/lastzDanRer7.2012-04-02
 TMPDIR=/scratch/tmp
 '_EOF_'
     # << happy emacs
 
     # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
     #	number of jobs, 50,000 to something under 100,000
     # when not present, SEQ2_LIMIT is a default 100
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
         `pwd`/DEF \
         -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
         -chainMinScore=5000 -chainLinearGap=loose > do.log 2>&1 &
     #	real    80m32.118s
     # broken swarm, continuing:
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
         -continue=cat `pwd`/DEF \
         -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
         -chainMinScore=5000 -chainLinearGap=loose > cat.log 2>&1 &
     #	real    40m27.762s
     cat fb.mm10.chainDanRer7Link.txt
     #	69028912 bases of 2652783500 (2.602%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/mm10/bed
     ln -s lastzDanRer7.2012-04-02 lastz.danRer7
 
     #	and for the swap
     mkdir /hive/data/genomes/danRer7/bed/blastz.mm10.swap
     cd /hive/data/genomes/danRer7/bed/blastz.mm10.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzDanRer7.2012-04-02/DEF \
         -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
         -swap -chainMinScore=5000 -chainLinearGap=loose > swap.log 2>&1 &
     #	real    109m49.939s
     cat  fb.danRer7.chainMm10Link.txt
     #	72001768 bases of 1409770109 (5.107%) in intersection
 
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/danRer7/bed
     ln -s blastz.mm10.swap lastz.mm10
 
 #########################################################################
 # lastz medaka oryLat2 (DONE - 2012-04-02 - Hiram)
     # establish a screen to control this job with a name to indicate what it is
     screen -S mm10OryLat2
     mkdir /hive/data/genomes/mm10/bed/lastzOryLat2.2012-04-02
     cd /hive/data/genomes/mm10/bed/lastzOryLat2.2012-04-02
 
     cat << '_EOF_' > DEF
 # Mouse vs. medaka
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
 BLASTZ_Y=3400
 BLASTZ_L=6000
 BLASTZ_K=2200
 BLASTZ_Q=/scratch/data/blastz/HoxD55.q
 
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
 # QUERY: medaka oryLat2
 SEQ2_DIR=/scratch/data/oryLat2/oryLat2.2bit
 SEQ2_LEN=/scratch/data/oryLat2/chrom.sizes
 SEQ2_CHUNK=10000000
 SEQ2_LAP=0
 SEQ2_LIMIT=50
 
 BASE=/hive/data/genomes/mm10/bed/lastzOryLat2.2012-04-02
 TMPDIR=/scratch/tmp
 '_EOF_'
     # << happy emacs
 
     # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
     #	number of jobs, 50,000 to something under 100,000
     # when not present, SEQ2_LIMIT is a default 100
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
         `pwd`/DEF \
         -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
         -chainMinScore=5000 -chainLinearGap=loose > do.log 2>&1 &
     #	real    78m53.408s
     #	broken swarm, continuing:
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
         -continue=cat `pwd`/DEF \
         -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
         -chainMinScore=5000 -chainLinearGap=loose > cat.log 2>&1 &
     #	real    113m29.462s
     cat fb.mm10.chainOryLat2Link.txt
     #	51344841 bases of 2652783500 (1.936%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/mm10/bed
     ln -s lastzOryLat2.2012-04-02 lastz.oryLat2
 
     #	and for the swap
     mkdir /hive/data/genomes/oryLat2/bed/blastz.mm10.swap
     cd /hive/data/genomes/oryLat2/bed/blastz.mm10.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzOryLat2.2012-04-02/DEF \
         -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
         -swap -chainMinScore=5000 -chainLinearGap=loose > swap.log 2>&1 &
     #	real    7m52.846s
     cat  fb.oryLat2.chainMm10Link.txt
     #	45954178 bases of 700386597 (6.561%) in intersection
 
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/oryLat2/bed
     ln -s blastz.mm10.swap lastz.mm10
 
 #########################################################################
 # lastz lamprey petMar1 (DONE - 2012-04-02 - Hiram)
     # establish a screen to control this job with a name to indicate what it is
     screen -S mm10PetMar1
     mkdir /hive/data/genomes/mm10/bed/lastzPetMar1.2012-04-02
     cd /hive/data/genomes/mm10/bed/lastzPetMar1.2012-04-02
 
     cat << '_EOF_' > DEF
 # Mouse vs. lamprey
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
 BLASTZ_Y=3400
 BLASTZ_L=6000
 BLASTZ_K=2200
 BLASTZ_Q=/scratch/data/blastz/HoxD55.q
 
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
 # QUERY: lamprey petMar1
 SEQ2_DIR=/scratch/data/petMar1/petMar1.2bit
 SEQ2_LEN=/scratch/data/petMar1/chrom.sizes
 SEQ2_CHUNK=10000000
 SEQ2_LAP=0
 SEQ2_LIMIT=200
 
 BASE=/hive/data/genomes/mm10/bed/lastzPetMar1.2012-04-02
 TMPDIR=/scratch/tmp
 '_EOF_'
     # << happy emacs
 
     # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
     #	number of jobs, 50,000 to something under 100,000
     # when not present, SEQ2_LIMIT is a default 100
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
         `pwd`/DEF \
         -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
         -chainMinScore=5000 -chainLinearGap=loose > do.log 2>&1 &
     #	real    77m3.923s
     # broken swarm, continuing:
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
         -qRepeats=windowmaskerSdust -continue=cat `pwd`/DEF \
         -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
         -chainMinScore=5000 -chainLinearGap=loose > cat.log 2>&1 &
     # missing qRepeats specification
     rm axtChain/mm10.petMar1.net
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
         -qRepeats=windowmaskerSdust -continue=load `pwd`/DEF \
         -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
         -chainMinScore=5000 -chainLinearGap=loose > load.log 2>&1 &
     #	real    6m31.527s
     cat fb.mm10.chainPetMar1Link.txt
     #	29205053 bases of 2652783500 (1.101%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/mm10/bed
     ln -s lastzPetMar1.2012-04-02 lastz.petMar1
 
     #	and for the swap
     mkdir /hive/data/genomes/petMar1/bed/blastz.mm10.swap
     cd /hive/data/genomes/petMar1/bed/blastz.mm10.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzPetMar1.2012-04-02/DEF \
         -qRepeats=windowmaskerSdust -workhorse=hgwdev \
 	-smallClusterHub=encodek -bigClusterHub=swarm \
         -swap -chainMinScore=5000 -chainLinearGap=loose > swap.log 2>&1 &
     #   real    17m40.196s
     cat  fb.petMar1.chainMm10Link.txt
     #   26274715 bases of 831696438 (3.159%) in intersection
 
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/petMar1/bed
     ln -s blastz.mm10.swap lastz.mm10
 
 #########################################################################
 ## 60-Way Multiz (DONE - 2011-09-28 - Hiram)
     ssh hgwdev
     mkdir /hive/data/genomes/mm10/bed/multiz60way
     cd /hive/data/genomes/mm10/bed/multiz60way
 
     # from the 62-way in the source tree, do not need aliMis1 and croPor1:
     /cluster/bin/phast/tree_doctor --prune ailMis1,croPor1 \
         /cluster/home/hiram/kent/src/hg/utils/phyloTrees/62way.nh > 60way.nh
 
     # note, newer assemblies: susScr3, dasNov3, felCat5, hetGla2, turTru2,
     #   nomLeu2, oreNil2
 
     #	what that looks like:
     cat 60way.nh
 # (((((((((((((((((((hg19:0.006550,panTro4:0.006840):0.002220,
 # gorGor3:0.008964):0.009693,ponAbe2:0.018940):0.003471,
 # nomLeu2:0.022270):0.012040,(rheMac3:0.007991,
 # papHam1:0.008042):0.029610):0.021830,(calJac3:0.030000,
 # saiBol1:0.040000):0.039650):0.052090,tarSyr1:0.111400):0.020520,
 # (micMur1:0.085600,otoGar3:0.119400):0.020520):0.015494,
 # tupBel1:0.186203):0.004937,(((((mm10:0.084509,rn5:0.091589):0.197773,
 # dipOrd1:0.211609):0.022992,(hetGla2:0.100000,
 # cavPor3:0.125629):0.100000):0.010150,speTri2:0.148468):0.025746,
 # (oryCun2:0.114227,ochPri2:0.201069):0.101463):0.015313):0.020593,
 # (((susScr3:0.120000,(vicPac1:0.087275,(turTru2:0.064688,
 # (oviAri1:0.100000,bosTau7:0.100000):0.023592):0.025153):0.020335):0.020000,
 # ((equCab2:0.109397,(felCat5:0.098612,
 # (canFam3:0.052458,ailMel1:0.050000):0.050000):0.049845):0.006219,
 # (myoLuc2:0.142540,pteVam1:0.113399):0.033706):0.004508):0.011671,
 # (eriEur1:0.221785,sorAra1:0.269562):0.056393):0.021227):0.023664,
 # ((((loxAfr3:0.082242,proCap1:0.155358):0.026990,echTel1:0.245936):0.010000,
 # triMan1:0.100000):0.049697,(dasNov3:0.116664,
 # choHof1:0.096357):0.053145):0.006717):0.234728,(monDom5:0.125686,
 # (sarHar1:0.100000,macEug2:0.072008):0.050000):0.215100):0.071664,
 # ornAna1:0.456592):0.109504,(((((melGal1:0.100000,galGal4:0.065536):0.100000,
 # taeGut1:0.171542):0.199223,melUnd1:0.100000):0.155143,
 # anoCar2:0.539241):0.122371,chrPic1:0.200000):0.010000):0.050000,
 # xenTro3:0.855573):0.100000,latCha1:0.855573):0.311354,
 # ((((((tetNig2:0.224159,fr3:0.203847):0.097590,oreNil2:0.200000):0.097590,
 # gasAcu1:0.316413):0.030000,oryLat2:0.511970):0.030000,
 # gadMor1:0.350000):0.225640,danRer7:0.730752):0.147949):0.526688,
 # petMar1:0.526688);
 
     #	rearrange to get mm10 on top:
     cat << '_EOF_' > mm10.60way.nh
 (((((((((((((((mm10:0.084509,rn5:0.091589):0.197773,dipOrd1:0.211609):0.022992,
 (hetGla2:0.1,cavPor3:0.125629):0.1):0.01015,speTri2:0.148468):0.025746,(oryCun2:0.114227,ochPri2:0.201069):0.101463):0.015313,
 (((((((((hg19:0.00655,panTro4:0.00684):0.00222,gorGor3:0.008964):0.009693,ponAbe2:0.01894):0.003471,
 nomLeu2:0.02227):0.01204,(rheMac3:0.007991,papHam1:0.008042):0.02961):0.02183,
 (calJac3:0.03,saiBol1:0.04):0.03965):0.05209,tarSyr1:0.1114):0.02052,(micMur1:0.0856,otoGar3:0.1194):0.02052):0.015494,
 tupBel1:0.186203):0.004937):0.020593,
 ((susScr3:0.12,(vicPac1:0.087275,(turTru2:0.064688,
 (oviAri1:0.1,bosTau7:0.1):0.023592):0.025153):0.020335):0.01,
 ((((felCat5:0.098612,
 (canFam3:0.052458,ailMel1:0.05):0.05):0.049845,equCab2:0.109397):0.006219,
 (myoLuc2:0.14254,pteVam1:0.113399):0.033706):0.004508,(eriEur1:0.221785,
 sorAra1:0.269562):0.056393):0.021227):0.01):0.013664,((((loxAfr3:0.082242,proCap1:0.155358):0.02699,
 echTel1:0.245936):0.01,triMan1:0.1):0.049697,(dasNov3:0.116664,
 choHof1:0.096357):0.053145):0.006717):0.234728,(monDom5:0.125686,(sarHar1:0.1,
 macEug2:0.072008):0.05):0.2151):0.071664,ornAna1:0.456592):0.109504,
 (((((melGal1:0.1,galGal4:0.065536):0.1,taeGut1:0.171542):0.199223,melUnd1:0.1):0.155143,anoCar2:0.539241):0.122371,
 chrPic1:0.2):0.01):0.05,xenTro3:0.855573):0.1,latCha1:0.855573):0.311354,
 ((((((tetNig2:0.224159,fr3:0.203847):0.09759,oreNil2:0.2):0.09759,gasAcu1:0.316413):0.03,
 oryLat2:0.51197):0.03,gadMor1:0.35):0.22564,danRer7:0.730752):0.147949):0.526688,petMar1:0.526688);
 '_EOF_'
     # << happy emacs
 
     # extract species list from that .nh file
     sed 's/[a-z][a-z]*_//g; s/:[0-9\.][0-9\.]*//g; s/;//; /^ *$/d' \
         mm10.60way.nh | xargs echo | sed 's/ //g; s/,/ /g' \
         | sed 's/[()]//g; s/,/ /g' | tr '[ ]' '[\n]' > species.list.txt
 
     # construct db to name translation list:
     cat species.list.txt | while read DB
 do
 hgsql -N -e "select name,organism from dbDb where name=\"${DB}\";" hgcentraltest
 done | sed -e "s/\t/->/; s/ /_/g;" | sed -e 's/$/;/' | sed -e 's/\./_/g' \
         > db.to.name.txt
 
     # construct a common name .nh file:
     /cluster/bin/phast/tree_doctor --rename \
     "`cat db.to.name.txt`" mm10.60way.nh | sed -e 's/00*)/)/g; s/00*,/,/g' \
         | sed -e 's/X__trop/X._trop/' > mm10.60way.commonNames.nh
 # (((((((((((((((Mouse:0.084509,Rat:0.091589):0.197773,
 # Kangaroo_rat:0.211609):0.022992,(Naked_mole:0.1,
 # Guinea_pig:0.125629):0.1):0.01015,Squirrel:0.148468):0.025746,
 # (Rabbit:0.114227,Pika:0.201069):0.101463):0.015313,
 # (((((((((Human:0.00655,Chimp:0.00684):0.00222,Gorilla:0.008964):0.009693,
 # Orangutan:0.01894):0.003471,Gibbon:0.02227):0.01204,
 # (Chinese_rhesus:0.007991,Baboon:0.008042):0.02961):0.02183,
 # (Marmoset:0.03,Squirrel_monkey:0.04):0.03965):0.05209,
 # Tarsier:0.1114):0.02052,(Mouse_lemur:0.0856,
 # Bushbaby:0.1194):0.02052):0.015494,Tree_shrew:0.186203):0.004937):0.020593,
 # ((Pig:0.12,(Alpaca:0.087275,(Dolphin:0.064688,
 # (Sheep:0.1,Cow:0.1):0.023592):0.025153):0.020335):0.01,
 # ((((Cat:0.098612,(Dog:0.052458,Panda:0.05):0.05):0.049845,
 # Horse:0.109397):0.006219,(Microbat:0.14254,
 # Megabat:0.113399):0.033706):0.004508,(Hedgehog:0.221785,
 # Shrew:0.269562):0.056393):0.021227):0.01):0.013664,
 # ((((Elephant:0.082242,Rock_hyrax:0.155358):0.02699,
 # Tenrec:0.245936):0.01,Manatee:0.1):0.049697,
 # (Armadillo:0.116664,Sloth:0.096357):0.053145):0.006717):0.234728,
 # (Opossum:0.125686,(Tasmanian_devil:0.1,
 # Wallaby:0.072008):0.05):0.2151):0.071664,Platypus:0.456592):0.109504,
 # (((((Turkey:0.1,Chicken:0.065536):0.1,Zebra_finch:0.171542):0.199223,
 # Budgerigar:0.1):0.155143,Lizard:0.539241):0.122371,
 # Painted_turtle:0.2):0.01):0.05,X._tropicalis:0.855573):0.1,
 # Coelacanth:0.855573):0.311354,((((((Tetraodon:0.224159,
 # Fugu:0.203847):0.09759,Nile_tilapia:0.2):0.09759,
 # Stickleback:0.316413):0.03,Medaka:0.51197):0.03,
 # Atlantic_cod:0.35):0.22564,Zebrafish:0.730752):0.147949):0.526688,
 # Lamprey:0.526688);
 
 
     #	Use this specification in the phyloGif tool:
     #	http://genome.ucsc.edu/cgi-bin/phyloGif
     #	to obtain a png image for src/hg/htdocs/images/phylo/mm10_60way.png
 
     /cluster/bin/phast/all_dists mm10.60way.nh | grep mm10 \
         | sed -e "s/mm10^I//" | sort -k2n > 60way.distances.txt
     #	Use this output to create the table below
     head 60way.distances.txt
 # rn5     0.176098
 # speTri2 0.463892
 # micMur1 0.483034
 # dipOrd1 0.493891
 # vicPac1 0.504686
 # hetGla2 0.505274
 # hg19    0.505328
 # gorGor3 0.505522
 # panTro4 0.505618
 # nomLeu2 0.505664
     cat << '_EOF_' > sizeStats.pl
 #!/usr/bin/env perl
 
 use strict;
 use warnings;
 
 open (FH, "<60way.distances.txt") or
         die "can not read 60way.distances.txt";
 
 my $count = 0;
 while (my $line = <FH>) {
     chomp $line;
     my ($D, $dist) = split('\s+', $line);
     my $chain = "chain" . ucfirst($D);
     my $B="/hive/data/genomes/mm10/bed/lastz.$D/fb.mm10." .
         $chain . "Link.txt";
     my $chainLinkMeasure =
         `awk '{print \$5}' ${B} 2> /dev/null | sed -e "s/(//; s/)//"`;
     chomp $chainLinkMeasure;
     $chainLinkMeasure = 0.0 if (length($chainLinkMeasure) < 1);
     $chainLinkMeasure =~ s/\%//;
     my $swapFile="/hive/data/genomes/${D}/bed/lastz.mm10/fb.${D}.chainMm10Link.txt";
     my $swapMeasure = "N/A";
     if ( -s $swapFile ) {
 	$swapMeasure =
 	    `awk '{print \$5}' ${swapFile} 2> /dev/null | sed -e "s/(//; s/)//"`;
 	chomp $swapMeasure;
 	$swapMeasure = 0.0 if (length($swapMeasure) < 1);
 	$swapMeasure =~ s/\%//;
     }
     my $orgName=
     `hgsql -N -e "select organism from dbDb where name='$D';" hgcentraltest`;
     chomp $orgName;
     if (length($orgName) < 1) {
         $orgName="N/A";
     }
     ++$count;
     printf "# %02d  %.4f (%% %06.3f) (%% %06.3f) - %s %s\n", $count, $dist,
         $chainLinkMeasure, $swapMeasure, $orgName, $D;
 }
 close (FH);
 '_EOF_'
     # << happy emacs
     chmod +x ./sizeStats.pl
     ./sizeStats.pl
 #
 
 #	If you can fill in all the numbers in this table, you are ready for
 #	the multiple alignment procedure
 
 #       featureBits chainLink measures
 #               chainAnoCar2Link
 #  N distance  on mm10  on other     other species
 # 01  0.1761 (% 67.353) (% 70.278) - Rat rn5
 # 02  0.4639 (% 34.217) (% 39.244) - Squirrel speTri2
 # 03  0.4830 (% 26.636) (% 37.574) - Mouse lemur micMur1
 # 04  0.4939 (% 19.460) (% 27.512) - Kangaroo rat dipOrd1
 # 05  0.5047 (% 22.636) (% 31.769) - Alpaca vicPac1
 # 06  0.5053 (% 32.753) (% 37.989) - Naked mole rat hetGla2
 # 07  0.5053 (% 38.226) (% 35.249) - Human hg19
 # 08  0.5055 (% 33.987) (% 34.349) - Gorilla gorGor3
 # 09  0.5056 (% 34.674) (% 31.924) - Chimp panTro4
 # 10  0.5057 (% 34.031) (% 32.274) - Gibbon nomLeu2
 # 11  0.5058 (% 34.496) (% 30.610) - Orangutan ponAbe2
 # 12  0.5073 (% 30.267) (% 33.492) - Dolphin turTru2
 # 13  0.5088 (% 24.560) (% 24.986) - Tarsier tarSyr1
 # 14  0.5090 (% 33.931) (% 33.464) - Chinese rhesus rheMac3
 # 15  0.5090 (% 33.577) (% 32.023) - Baboon papHam1
 # 16  0.5168 (% 29.795) (% 32.926) - Bushbaby otoGar3
 # 17  0.5171 (% 25.685) (% 29.445) - Pig susScr3
 # 18  0.5192 (% 32.450) (% 31.301) - Marmoset calJac3
 # 19  0.5284 (% 34.415) (% 37.138) - Horse equCab2
 # 20  0.5292 (% 32.339) (% 33.848) - Squirrel monkey saiBol1
 # 21  0.5309 (% 28.447) (% 29.115) - Guinea pig cavPor3
 # 22  0.5470 (% 18.019) (% 23.687) - Sloth choHof1
 # 23  0.5472 (% 26.546) (% 24.649) - Manatee triMan1
 # 24  0.5476 (% 19.766) (% 25.144) - Tree shrew tupBel1
 # 25  0.5569 (% 25.248) (% 25.677) - Rabbit oryCun2
 # 26  0.5599 (% 27.345) (% 38.627) - Megabat pteVam1
 # 27  0.5662 (% 26.255) (% 25.383) - Cow bosTau7
 # 28  0.5662 (% 15.341) (% 31.925) - Sheep oviAri1
 # 29  0.5664 (% 25.823) (% 21.616) - Elephant loxAfr3
 # 30  0.5673 (% 25.201) (% 21.066) - Armadillo dasNov3
 # 31  0.5675 (% 29.725) (% 32.244) - Cat felCat5
 # 32  0.5689 (% 30.979) (% 35.562) - Panda ailMel1
 # 33  0.5713 (% 29.144) (% 31.624) - Dog canFam3
 # 34  0.5891 (% 24.363) (% 33.650) - Microbat myoLuc2
 # 35  0.6395 (% 15.147) (% 16.214) - Rock hyrax proCap1
 # 36  0.6437 (% 14.542) (% 19.908) - Pika ochPri2
 # 37  0.6865 (% 09.856) (% 12.264) - Hedgehog eriEur1
 # 38  0.7031 (% 10.947) (% 14.117) - Tenrec echTel1
 # 39  0.7343 (% 09.382) (% 13.569) - Shrew sorAra1
 # 40  0.9626 (% 04.353) (% 04.448) - Wallaby macEug2
 # 41  0.9663 (% 09.584) (% 07.205) - Opossum monDom5
 # 42  0.9906 (% 08.479) (% 07.888) - Tasmanian devil sarHar1
 # 43  1.0166 (% 04.731) (% 05.488) - Painted turtle chrPic1
 # 44  1.1537 (% 05.348) (% 07.334) - Platypus ornAna1
 # 45  1.1942 (% 03.589) (% 07.350) - Budgerigar melUnd1
 # 46  1.4589 (% 03.676) (% 08.100) - Chicken galGal4
 # 47  1.4649 (% 03.599) (% 07.304) - Zebra finch taeGut1
 # 48  1.4782 (% 03.331) (% 04.988) - Lizard anoCar2
 # 49  1.4934 (% 03.511) (% 08.211) - Turkey melGal1
 # 50  1.7122 (% 03.125) (% 06.651) - X. tropicalis xenTro3
 # 51  1.8122 (% 02.715) (% 03.380) - Coelacanth latCha1
 # 52  1.9916 (% 01.726) (% 06.810) - Atlantic cod gadMor1
 # 53  1.9992 (% 01.957) (% 06.091) - Nile tilapia oreNil2
 # 54  2.0180 (% 02.016) (% 10.927) - Stickleback gasAcu1
 # 55  2.1006 (% 01.789) (% 12.134) - Fugu fr3
 # 56  2.1209 (% 01.735) (% 13.642) - Tetraodon tetNig2
 # 57  2.1467 (% 02.602) (% 05.107) - Zebrafish danRer7
 # 58  2.1835 (% 01.936) (% 06.561) - Medaka oryLat2
 # 59  2.3214 (% 01.101) (% 03.159) - Lamprey petMar1
 
 # None of this concern for distances matters in building the first step, the
 # maf files.
 
     # create species list and stripped down tree for autoMZ
     sed 's/[a-z][a-z]*_//g; s/:[0-9\.][0-9\.]*//g; s/;//; /^ *$/d' \
 	mm10.60way.nh > tmp.nh
     echo `cat tmp.nh` > tree-commas.nh
     echo `cat tree-commas.nh` | sed 's/ //g; s/,/ /g' > tree.nh
     sed 's/[()]//g; s/,/ /g' tree.nh > species.list
 
     #	bash shell syntax here ...
     cd /hive/data/genomes/mm10/bed/multiz60way
     export H=/hive/data/genomes/mm10/bed
     mkdir mafLinks
     for G in `sed -e "s/mm10 //" species.list`
     do
 	mkdir mafLinks/$G
 	if [ -s ${H}/lastz.${G}/mafRBestNet/chr1.maf.gz ]; then
 	    echo "$G - recipBest"
 	    ln -s ${H}/lastz.$G/mafRBestNet/*.maf.gz ./mafLinks/$G
 	else
 	    if [ -s ${H}/lastz.${G}/mafSynNet/chr1.maf.gz ]; then
 		echo "$G - synNet"
 		ln -s ${H}/lastz.$G/mafSynNet/*.maf.gz ./mafLinks/$G
 	    else
 		if [ -s ${H}/lastz.${G}/mafNet/chr1.maf.gz ]; then
 		    echo "$G - mafNet"
 		    ln -s ${H}/lastz.$G/mafNet/*.maf.gz ./mafLinks/$G
 		else
 		    echo "missing directory lastz.${G}/*Net"
 		fi
 	    fi
 	fi
     done
 
     #	verify the alignment type is correct:
     for D in `grep -v mm10 /hive/users/hiram/bigWays/mm10.60way/ordered.list`
 do
     ls -l mafLinks/$D/chr1.maf.gz | awk '{print $NF}'
 done
     #	compare to the list at:
     #	http://genomewiki.ucsc.edu/index.php/Mm10_Genome_size_statistics
 
     #	need to split these things up into smaller pieces for
     #	efficient kluster run.
     cd /hive/data/genomes/mm10/bed/multiz60way
     mkdir mafSplit
     cd mafSplit
     #	mafSplitPos splits on gaps or repeat areas that will not have
     #	any chains, approx 5 Mbp intervals, gaps at least 10,000
     mafSplitPos -minGap=10000 mm10 5 stdout | sort -u \
 	| sort -k1,1 -k2,2n > mafSplit.bed
     #	There is a splitRegions.pl script here (copied from previous hg19 46way)
     #	that can create a custom track from this mafSplit.bed file.
     #	Take a look at that in the browser and see if it looks OK,
     #	check the number of sections on each chrom to verify none are
     #	too large.  Despite the claim above, it does appear that some
     #	areas are split where actual chains exist.
     ./splitRegions.pl mafSplit.bed > splitRegions.ct
 
     # to see the sizes of the regions:
     grep "^chr" splitRegions.ct | awk '{print $3-$2,$0}' | sort -rn | less
 
     #	run a kluster job to split them all
     ssh swarm
     cd /hive/data/genomes/mm10/bed/multiz60way/mafSplit
     cat << '_EOF_' > runOne
 #!/bin/csh -ef
 set G = $1
 set C = $2
 mkdir -p $G
 pushd $G > /dev/null
 if ( -s ../../mafLinks/${G}/${C}.maf.gz ) then
     if ( -s mm10_${C}.00.maf ) then
         /bin/rm -f mm10_${C}.*.maf
     endif
     /cluster/bin/x86_64/mafSplit ../mafSplit.bed mm10_ ../../mafLinks/${G}/${C}.maf.gz
     /bin/gzip mm10_${C}.*.maf
 else
     /bin/touch mm10_${C}.00.maf
     /bin/gzip mm10_${C}.00.maf
 endif
 popd > /dev/null
 '_EOF_'
     # << happy emacs
     chmod +x runOne
 
     cat << '_EOF_' > template
 #LOOP
 runOne $(root1) $(root2) {check out exists+ $(root1)/mm10_$(root2).00.maf.gz}
 #ENDLOOP
 '_EOF_'
     # << happy emacs
 
     for G in `sed -e "s/mm10 //" ../species.list`
 do
     echo $G
 done > species.list
     cut -f 1 ../../../chrom.sizes > chr.list
 
     gensub2 species.list chr.list template jobList
     para -ram=8g create jobList
     para try ... check ... push ... etc...
 # Completed: 3894 of 3894 jobs
 # CPU time in finished jobs:      18929s     315.49m     5.26h    0.22d  0.001 y
 # IO & Wait Time:                 62908s    1048.46m    17.47h    0.73d  0.002 y
 # Average job time:                  21s       0.35m     0.01h    0.00d
 # Longest finished job:             346s       5.77m     0.10h    0.00d
 # Submission to last job:           471s       7.85m     0.13h    0.01d
 
     # construct a list of all possible maf file names.
     # they do not all exist in each of the species directories
     find . -type f | grep "maf.gz" | wc -l
     # 19733
     find . -type f | grep ".maf.gz$" | xargs -L 1 basename | sort -u > maf.list
     wc -l maf.list
     #   336 maf.list
 
     mkdir /hive/data/genomes/mm10/bed/multiz60way/splitRun
     cd /hive/data/genomes/mm10/bed/multiz60way/splitRun
     mkdir maf run
     cd run
     mkdir penn
     cp -p /cluster/bin/penn/multiz.2009-01-21/multiz penn
     cp -p /cluster/bin/penn/multiz.2009-01-21/maf_project penn
     cp -p /cluster/bin/penn/multiz.2009-01-21/autoMZ penn
 
     #	set the db and pairs directories here
     cat > autoMultiz.csh << '_EOF_'
 #!/bin/csh -ef
 set db = mm10
 set c = $1
 set result = $2
 set run = `/bin/pwd`
 set tmp = /scratch/tmp/$db/multiz.$c
 set pairs = /hive/data/genomes/mm10/bed/multiz60way/mafSplit
 /bin/rm -fr $tmp
 /bin/mkdir -p $tmp
 /bin/cp -p ../../tree.nh ../../species.list $tmp
 pushd $tmp > /dev/null
 foreach s (`/bin/sed -e "s/$db //" species.list`)
     set in = $pairs/$s/$c
     set out = $db.$s.sing.maf
     if (-e $in.gz) then
         /bin/zcat $in.gz > $out
         if (! -s $out) then
             echo "##maf version=1 scoring=autoMZ" > $out
         endif
     else if (-e $in) then
         /bin/ln -s $in $out
     else
         echo "##maf version=1 scoring=autoMZ" > $out
     endif
 end
 set path = ($run/penn $path); rehash
 $run/penn/autoMZ + T=$tmp E=$db "`cat tree.nh`" $db.*.sing.maf $c \
         > /dev/null
 popd > /dev/null
 /bin/rm -f $result
 /bin/cp -p $tmp/$c $result
 /bin/rm -fr $tmp
 '_EOF_'
 # << happy emacs
     chmod +x autoMultiz.csh
 
     cat  << '_EOF_' > template
 #LOOP
 ./autoMultiz.csh $(root1) {check out line+ /hive/data/genomes/mm10/bed/multiz60way/splitRun/maf/$(root1)}
 #ENDLOOP
 '_EOF_'
 # << happy emacs
 
     ln -s ../../mafSplit/maf.list maf.list
     ssh swarm
     cd /hive/data/genomes/mm10/bed/multiz60way/splitRun/run
     # the tac reverses the list to get the small jobs first
     gensub2 maf.list single template stdout | tac > jobList
     para -ram=8g create jobList
 # Completed: 336 of 336 jobs
 # CPU time in finished jobs:    2828651s   47144.19m   785.74h   32.74d  0.090 y
 # IO & Wait Time:                200533s    3342.21m    55.70h    2.32d  0.006 y
 # Average job time:                9015s     150.26m     2.50h    0.10d
 # Longest finished job:           47029s     783.82m    13.06h    0.54d
 # Submission to last job:         48982s     816.37m    13.61h    0.57d
 
     # put the split maf results back together into a single maf file
     #	eliminate duplicate comments
     ssh hgwdev
     cd /hive/data/genomes/mm10/bed/multiz60way/splitRun
     mkdir ../maf
     #	the sed edits take out partitioning name information from the comments
     #	so the multiple parts will condense to smaller number of lines
     #	this takes almost 2 hours of time, resulting in a bit over 150 Gb,
     #	almost all chrom files over 1 Gb, up to almost 10 Gb for chr2
     #	HOWEVER, this is actually not necessary to maintain these comments,
     #	they are lost during the mafAddIRows
 
     cat << '_EOF_' >> runOne
 #!/bin/csh -fe
 set C = $1
 if ( -s ../maf/${C}.maf.gz ) then
     rm -f ../maf/${C}.maf.gz
 endif
 head -q -n 1 maf/mm10_${C}.*.maf | sort -u > ../maf/${C}.maf
 grep -h "^#" maf/mm10_${C}.*.maf | egrep -v "maf version=1|eof maf" | \
     sed -e "s#${C}.[0-9][0-9]*#${C}#g; s#_MZ_[^ ]* # #g;" \
         | sort -u >> ../maf/${C}.maf
 grep -h -v "^#" `ls maf/mm10_${C}.*.maf | sort -t. -k2,2n` >> ../maf/${C}.maf
 tail -q -n 1 maf/mm10_${C}.*.maf | sort -u >> ../maf/${C}.maf
 '_EOF_'
     # << happy emacs
     chmod +x runOne
 
     cat << '_EOF_' >> template
 #LOOP
 runOne $(root1) {check out exists+ ../maf/$(root1).maf}
 #ENDLOOP
 '_EOF_'
     # << happy emacs
 
     cut -f1 ../../../chrom.sizes > chr.list
     ssh encodek
     cd /hive/data/genomes/mm10/bed/multiz60way/splitRun
     gensub2 chr.list single template jobList
     para -ram=8g create jobList
     para try ... check ... push ... etc ...
 # Completed: 62 of 66 jobs
 # Crashed: 4 jobs
 # CPU time in finished jobs:        461s       7.68m     0.13h    0.01d  0.000 y
 # IO & Wait Time:                 17863s     297.72m     4.96h    0.21d  0.001 y
 # Average job time:                 296s       4.93m     0.08h    0.00d
 # Longest finished job:            1144s      19.07m     0.32h    0.01d
 # Submission to last job:          1156s      19.27m     0.32h    0.01d
 
     # these four have empty results:
 #       chrUn_GL456383
 #       chrUn_GL456389
 #       chrUn_GL456390
 #       chrUn_GL456396
 
     # Load into database
     ssh hgwdev
     mkdir -p /gbdb/mm10/multiz60way
     cd /hive/data/genomes/mm10/bed/multiz60way/maf
     ln -s `pwd`/*.maf /gbdb/mm10/multiz60way
 
     # this generates an immense multiz60way.tab file in the directory
     #	where it is running.  Best to run this over in scratch.
     #   This is going to take all day.
     cd /scratch/tmp
     time nice -n +19 hgLoadMaf mm10 multiz60way
     #   Loaded 56185270 mafs in 66 files from /gbdb/mm10/multiz60way
     #   real    72m45.513s
 # -rw-rw-r-- 1 2857704841 Apr 18 10:49 multiz60way.tab
 
     time cat /gbdb/mm10/multiz60way/*.maf \
         | nice -n +19 hgLoadMafSummary -verbose=2 -minSize=30000 \
 	-mergeGap=1500 -maxSize=200000 mm10 multiz60waySummary stdin
     #   Created 12012784 summary blocks from 1074134156 components and
     #   56185270 mafs from stdin
     #   real    104m2.107s
 
     wc -l multiz60way*.tab
     #   56185270 multiz60way.tab
     #   12012784 multiz60waySummary.tab
     #   68198054 total
     #   -rw-rw-r-- 1 2857704841 Apr 18 10:49 multiz60way.tab
     #   -rw-rw-r-- 1  567210414 Apr 18 17:28 multiz60waySummary.tab
 
     rm multiz60way*.tab
 
 #######################################################################
 # GAP ANNOTATE MULTIZ9WAY MAF AND LOAD TABLES (DONE - 2012-05-31 - Hiram)
     # mafAddIRows has to be run on single chromosome maf files, it does not
     #	function correctly when more than one reference sequence
     #	are in a single file.
     mkdir -p /hive/data/genomes/mm10/bed/multiz60way/anno
     cd /hive/data/genomes/mm10/bed/multiz60way/anno
 
     cd /hive/data/genomes/mm10/bed/multiz60way/anno
     # check for N.bed files everywhere:
     for DB in `cat ../species.list`
 do
     if [ ! -s /hive/data/genomes/${DB}/${DB}.N.bed ]; then
         echo "MISS: ${DB}"
         cd /hive/data/genomes/${DB}
         twoBitInfo -nBed ${DB}.2bit ${DB}.N.bed
     else
         echo "  OK: ${DB}"
     fi
 done
 
     cd /hive/data/genomes/mm10/bed/multiz60way/anno
     for DB in `cat ../species.list`
 do
     echo "${DB} "
     ln -s  /hive/data/genomes/${DB}/${DB}.N.bed ${DB}.bed
     echo ${DB}.bed  >> nBeds
     ln -s  /hive/data/genomes/${DB}/chrom.sizes ${DB}.len
     echo ${DB}.len  >> sizes
 done
     # make sure they all are successful symLinks:
     ls -ogrtL
 
     screen -S mm10      # use a screen to control this longish job
     ssh swarm
     cd /hive/data/genomes/mm10/bed/multiz60way/anno
     mkdir result
     # NEXT TIME: this template should have a check out exists+ statement
     cat << '_EOF_' > template
 #LOOP
 mafAddIRows -nBeds=nBeds $(path1) /hive/data/genomes/mm10/mm10.2bit {check out line+ result/$(file1)}
 #ENDLOOP
 '_EOF_'
     # << happy emacs
 
     ls ../maf/*.maf > maf.list
     # the tac puts the short jobs first
     gensub2 maf.list single template stdout | tac > jobList
     # limit jobs to one per node with the ram=8g requirement
     para -ram=8g create jobList
     para try ... check ... push ...
 # Completed: 46 of 66 jobs
 # CPU time in finished jobs:        350s       5.83m     0.10h    0.00d  0.000 y
 # IO & Wait Time:                   603s      10.06m     0.17h    0.01d  0.000 y
 # Average job time:                  21s       0.35m     0.01h    0.00d
 # Longest finished job:              54s       0.90m     0.01h    0.00d
 # Submission to last job:           113s       1.88m     0.03h    0.00d
 
     # a number of these jobs did not finish due to memory limitations.
     # The jobs would sit on the nodes appearing to occupy 8 Gb of memory,
     # but did not see any swapping or CPU time accumulation.  Stop the
     # batch and run the rest manually on hgwdev:
 #!/bin/sh
 
 export maxMem=188743680
 ulimit -S -m $maxMem -v $maxMem
 
 mafAddIRows -nBeds=nBeds ../maf/chrX.maf /hive/data/genomes/mm10/mm10.2bit hgwdev/chrX.maf &
 mafAddIRows -nBeds=nBeds ../maf/chr9.maf /hive/data/genomes/mm10/mm10.2bit hgwdev/chr9.maf &
 mafAddIRows -nBeds=nBeds ../maf/chr8.maf /hive/data/genomes/mm10/mm10.2bit hgwdev/chr8.maf &
 mafAddIRows -nBeds=nBeds ../maf/chr7.maf /hive/data/genomes/mm10/mm10.2bit hgwdev/chr7.maf &
 wait
 mafAddIRows -nBeds=nBeds ../maf/chr6.maf /hive/data/genomes/mm10/mm10.2bit hgwdev/chr6.maf &
 mafAddIRows -nBeds=nBeds ../maf/chr5.maf /hive/data/genomes/mm10/mm10.2bit hgwdev/chr5.maf &
 mafAddIRows -nBeds=nBeds ../maf/chr4.maf /hive/data/genomes/mm10/mm10.2bit hgwdev/chr4.maf &
 mafAddIRows -nBeds=nBeds ../maf/chr3.maf /hive/data/genomes/mm10/mm10.2bit hgwdev/chr3.maf &
 wait
 ... etc ...
     # the run time for those 20 jobs:
     #   real    159m49.217s
 
     # verify all result files have some content, look for 0 size files:
     find . -type f -size 0
     # should see none
 
     # combine into one file  (realized after this, that we do *not* need
     #                           this single file.  Individual files are OK.
     head -q -n 1 result/chrM.maf > mm10.60way.maf
     time for F in hgwdev/*.maf result/*.maf
 do
     grep -h -v "^#" ${F}
 done >> mm10.60way.maf
     #   real    1082m47.484s -> 18 hours !
 # -rw-rw-r-- 1 261567878241 Jun  8 10:30 mm10.60way.maf
     du -hsc mm10.60way.maf
     #   244G    mm10.60way.maf
 
     #	these maf files do not have the end marker, this does nothing:
     #	tail -q -n 1 result/chrM.maf >> mm10.60way.maf
     # How about an official end marker:
     echo "##eof maf" >> mm10.60way.maf
 
     # construct symlinks to get the individual maf files into gbdb:
     mkdir /gbdb/mm10/multiz60way/maf
     ln -s `pwd`/result/*.maf `pwd`/hgwdev/*.maf /gbdb/mm10/multiz60way/maf/
 
     # Load into database
     rm /gbdb/mm10/multiz60way/*.maf   # remove previous results
     cd /scratch/tmp
     time nice -n +19 hgLoadMaf -pathPrefix=/gbdb/mm10/multiz60way/maf \
         mm10 multiz60way
     #   Loaded 58087742 mafs in 66 files from /gbdb/mm10/multiz60way/maf
     #   real    868m28.108s
 
     time (cat /gbdb/mm10/multiz60way/maf/*.maf \
         | hgLoadMafSummary -verbose=2 -minSize=30000 \
 	-mergeGap=1500 -maxSize=200000 mm10 multiz60waySummary stdin)
 
     #   -rw-rw-r-- 1 3009209972 Jun  9 03:23 multiz60way.tab
     #   -rw-rw-r-- 1  591235982 Jun 11 18:34 multiz60waySummary.tab
 
     rm multiz60way*.tab
 
 #######################################################################
 # MULTIZ60WAY MAF FRAMES (DONE - 2012-05-30 - Hiram)
     ssh hgwdev
     mkdir /hive/data/genomes/mm10/bed/multiz60way/frames
     cd /hive/data/genomes/mm10/bed/multiz60way/frames
 #   survey all the genomes to find out what kinds of gene tracks they have
     cat << '_EOF_' > showGenes.csh
 #!/bin/csh -fe
 foreach db (`cat ../species.list`)
     echo -n "${db}: "
     set tables = `hgsql $db -N -e "show tables like '%Gene%'"`
     foreach table ($tables)
         if ($table == "ensGene" || $table == "refGene" || \
            $table == "mgcGenes" || $table == "knownGene" || \
            $table == "xenoRefGene" ) then
            set count = `hgsql $db -N -e "select count(*) from $table"`
             echo -n "${table}: ${count}, "
         endif
     end
     set orgName = `hgsql hgcentraltest -N -e \
             "select scientificName from dbDb where name='$db'"`
     set orgId = `hgsql hg19 -N -e \
             "select id from organism where name='$orgName'"`
     if ($orgId == "") then
         echo "Mrnas: 0"
     else
         set count = `hgsql hg19 -N -e "select count(*) from gbCdnaInfo where organism=$orgId"`
         echo "Mrnas: ${count}"
     endif
 end
 '_EOF_'
     # << happy emacs
     chmod +x ./showGenes.csh
     time ./showGenes.csh > showGenes.txt
     #   real    9m11.678s
 
     #   rearrange that output to create four sections, and place these names
     #           in .list files here:
     #   1. knownGene: hg19
     #   2. refGene: bosTau7 danRer7 galGal4 mm10 rheMac3 rn5 susScr3 xenTro3
     #   3. ensGene: ailMel1 anoCar2 calJac3 cavPor3 choHof1 dipOrd1 echTel1
     #               equCab2 eriEur1 fr3 gasAcu1 gorGor3 loxAfr3 melGal1
     #               micMur1 monDom5 myoLuc2 ochPri2 ornAna1 oryCun2 oryLat2
     #               panTro4 ponAbe2 proCap1 pteVam1 sorAra1 taeGut1 tarSyr1
     #               tetNig2 tupBel1 vicPac1
     #   4. xenoRefGene: canFam3 chrPic1 dasNov3 felCat5 hetGla2 latCha1 macEug2
     #               nomLeu2 otoGar3 oviAri1 papHam1 petMar1 saiBol1 sarHar1
     #               triMan1
     #   5. genscan: gadMor1 melUnd1 oreNil2 speTri2 turTru2
 
     mkdir genes
     #   1. knownGene: hg19
     hgsql -N -e "select name,chrom,strand,txStart,txEnd,cdsStart,cdsEnd,exonCount,exonStarts,exonEnds from knownGene" hg19 \
       | genePredSingleCover stdin stdout | gzip -2c \
         > genes/hg19.gp.gz
     #   2. refGene, want the full extended genePred:
     for DB in `cat refGene.list`
 do
 hgsql -N -e "select * from refGene" ${DB} | cut -f2- \
       | genePredSingleCover stdin stdout | gzip -2c \
         > /scratch/tmp/${DB}.tmp.gz
     mv /scratch/tmp/${DB}.tmp.gz genes/$DB.gp.gz
     echo "${DB} done"
 done
     #   3. ensGene, want the full extended genePred:
     for DB in `cat ensGene.list`
 do
 hgsql -N -e "select * from ensGene" ${DB} | cut -f2- \
       | genePredSingleCover stdin stdout | gzip -2c \
         > /scratch/tmp/${DB}.tmp.gz
     mv /scratch/tmp/${DB}.tmp.gz genes/$DB.gp.gz
     echo "${DB} done"
 done
     #   4. xenoRefGene, want the full extended genePred:
     for DB in `cat xenoRG.list`
 do
 hgsql -N -e "select * from xenoRefGene" ${DB} | cut -f2- \
       | genePredSingleCover stdin stdout | gzip -2c \
         > /scratch/tmp/${DB}.tmp.gz
     mv /scratch/tmp/${DB}.tmp.gz genes/$DB.gp.gz
     echo "${DB} done"
 done
     #   5. genscan: gadMor1 melUnd1 oreNil2 speTri2 turTru2
     # this was done in error the first time, mistakenly using
     # the xenoRefGene table instead of genscan
     for DB in `cat genscan.list`
 do
 hgsql -N -e "select * from genscan" ${DB} | cut -f2- \
       | genePredSingleCover stdin stdout | gzip -2c \
         > /scratch/tmp/${DB}.tmp.gz
     mv /scratch/tmp/${DB}.tmp.gz genes/$DB.gp.gz
     echo "${DB} done"
 done
 
     # verify counts for genes are reasonable:
     for T in genes/*.gz
 do
     echo -n "# $T: "
     zcat $T | cut -f1 | sort | uniq -c | wc -l
 done
 
 # genes/ailMel1.gp.gz: 19204
 # genes/anoCar2.gp.gz: 17766
 # genes/bosTau7.gp.gz: 12958
 # genes/calJac3.gp.gz: 20843
 # genes/canFam3.gp.gz: 20652
 # genes/cavPor3.gp.gz: 18631
 # genes/choHof1.gp.gz: 12403
 # genes/chrPic1.gp.gz: 19433
 # genes/danRer7.gp.gz: 13902
 # genes/dasNov3.gp.gz: 29551
 # genes/dipOrd1.gp.gz: 15784
 # genes/echTel1.gp.gz: 16499
 # genes/equCab2.gp.gz: 20403
 # genes/eriEur1.gp.gz: 11712
 # genes/felCat5.gp.gz: 19512
 # genes/fr3.gp.gz: 18014
 # genes/gadMor1.gp.gz: 27572
 # genes/galGal4.gp.gz: 4892
 # genes/gasAcu1.gp.gz: 20631
 # genes/gorGor3.gp.gz: 20759
 # genes/hetGla2.gp.gz: 25749
 # genes/hg19.gp.gz: 20718
 # genes/latCha1.gp.gz: 18786
 # genes/loxAfr3.gp.gz: 19986
 # genes/macEug2.gp.gz: 26006
 # genes/melGal1.gp.gz: 14050
 # genes/melUnd1.gp.gz: 15296
 # genes/micMur1.gp.gz: 16240
 # genes/mm10.gp.gz: 20985
 # genes/monDom5.gp.gz: 19188
 # genes/myoLuc2.gp.gz: 19685
 # genes/nomLeu2.gp.gz: 22996
 # genes/ochPri2.gp.gz: 15970
 # genes/oreNil2.gp.gz: 18636
 # genes/ornAna1.gp.gz: 17728
 # genes/oryCun2.gp.gz: 18921
 # genes/oryLat2.gp.gz: 19576
 # genes/otoGar3.gp.gz: 24061
 # genes/oviAri1.gp.gz: 17890
 # genes/panTro4.gp.gz: 18647
 # genes/papHam1.gp.gz: 27842
 # genes/petMar1.gp.gz: 11089
 # genes/ponAbe2.gp.gz: 19895
 # genes/proCap1.gp.gz: 16043
 # genes/pteVam1.gp.gz: 16966
 # genes/rheMac3.gp.gz: 5580
 # genes/rn5.gp.gz: 16393
 # genes/saiBol1.gp.gz: 23419
 # genes/sarHar1.gp.gz: 20694
 # genes/sorAra1.gp.gz: 13156
 # genes/speTri2.gp.gz: 22377
 # genes/susScr3.gp.gz: 3771
 # genes/taeGut1.gp.gz: 17354
 # genes/tarSyr1.gp.gz: 13615
 # genes/tetNig2.gp.gz: 19539
 # genes/triMan1.gp.gz: 19514
 # genes/tupBel1.gp.gz: 15407
 # genes/turTru2.gp.gz: 28375
 # genes/vicPac1.gp.gz: 11754
 # genes/xenTro3.gp.gz: 8447
 
     # kluster job to annotate each maf file
     screen -S mm10      # manage long running procedure with screen
     ssh swarm
     cd /hive/data/genomes/mm10/bed/multiz60way/frames
     cat << '_EOF_' > runOne
 #!/bin/csh -fe
 
 set C = $1
 set G = $2
 
 cat ../maf/${C}.maf | genePredToMafFrames mm10 stdin stdout \
         ${G} genes/${G}.gp.gz | gzip > parts/${C}.${G}.mafFrames.gz
 '_EOF_'
     # << happy emacs
     chmod +x runOne
 
     # older instructions excluded mm10 from the gene.list
     #   this was a mistake.  mm10 can be annotated too.
     #   Mistakenly did this the first run through, had to manually
     #   do the mm10 genes separately on hgwdev after this was done
     ls ../maf | sed -e "s/.maf//" > chr.list
     ls genes | sed -e "s/.gp.gz//" > gene.list
 
     cat << '_EOF_' > template
 #LOOP
 runOne $(root1) $(root2) {check out exists+ parts/$(root1).$(root2).mafFrames.gz}
 #ENDLOOP
 '_EOF_'
     # << happy emacs
 
     mkdir parts
     gensub2 chr.list gene.list template jobList
     para -ram=8g create jobList
     para try ... check ... push
 # Completed: 3960 of 3960 jobs
 # CPU time in finished jobs:      85610s    1426.83m    23.78h    0.99d  0.003 y
 # IO & Wait Time:               2030956s   33849.27m   564.15h   23.51d  0.064 y
 # Average job time:                 534s       8.91m     0.15h    0.01d
 # Longest finished job:            3877s      64.62m     1.08h    0.04d
 # Submission to last job:         12974s     216.23m     3.60h    0.15d
 
     # collect all results into one file:
     cd /hive/data/genomes/mm10/bed/multiz60way/frames
     find ./parts -type f | while read F
 do
     zcat ${F}
 done | sort -k1,1 -k2,2n > multiz60wayFrames.bed
     #   -rw-rw-r-- 1 1164299719 May 30 11:28 multiz60wayFrames.bed
 
     # verify there are frames on everything:
     cut -f4 multiz60wayFrames.bed | sort | uniq -c | sort -n \
         > annotation.survey.txt
     # should be 60 species:
     wc -l annotation.survey.txt
     #   60 annotation.survey.txt
     # and the minimum numbers:
     head annotation.survey.txt
     #   43900 susScr3
     #   59839 rheMac3
     #   153246 petMar1
     #   162501 choHof1
     # ... etc ...
 
     #   load the resulting file
     ssh hgwdev
     cd /hive/data/genomes/mm10/bed/multiz60way/frames
     time gzip multiz60wayFrames.bed
     #   real    0m51.826s
     # reloading this table 2012-10-11 with more accurate frames:
     time hgLoadMafFrames mm10 multiz60wayFrames multiz60wayFrames.bed.gz
     #   real    3m2.449s
     time featureBits -countGaps mm10 multiz60wayFrames
     #   57707702 bases of 2730871774 (2.113%) in intersection
     #   real    1m45.141s
 
     # reload table to fix frames problems 2014-03-19 - Hiram
     time featureBits -countGaps mm10 multiz60wayFrames
     # 79955378 bases of 2730871774 (2.928%) in intersection
 
     #   enable the trackDb entries:
 # frames multiz60wayFrames
 # irows on
     #   appears to work OK
 
 #########################################################################
 # Phylogenetic tree from 60-way (DONE - 2012-05-31 - 2012-06-12 - Hiram)
     mkdir /hive/data/genomes/mm10/bed/multiz60way/4d
     cd /hive/data/genomes/mm10/bed/multiz60way/4d
 
     # the annotated maf's are in:
     ../anno/result/*.maf
 
     # using ensGene for mm10, only transcribed genes and nothing
     #	from the randoms and other misc.
     hgsql mm10 -Ne \
     "select * from ensGene WHERE cdsEnd > cdsStart;" | cut -f 2-20 \
 	| egrep -E -v "chrM|chrUn|random|_hap" > ensGene.gp
     wc -l *.gp
     #   55423 ensGene.gp
 
     genePredSingleCover ensGene.gp stdout | sort > ensGeneNR.gp
     wc -l ensGeneNR.gp
     #	22457 ensGeneNR.gp
 
     ssh encodek
     mkdir /hive/data/genomes/mm10/bed/multiz60way/4d/run
     cd /hive/data/genomes/mm10/bed/multiz60way/4d/run
     mkdir ../mfa
 
     # newer versions of msa_view have a slightly different operation
     # the sed of the gp file inserts the reference species in the chr name
     cat << '_EOF_' > 4d.csh
 #!/bin/csh -fe
 set PHASTBIN = /cluster/bin/phast.build/cornellCVS/phast.2010-12-30/bin
 set r = "/hive/data/genomes/mm10/bed/multiz60way"
 set c = $1
 set infile = $r/anno/result/$2
 set outfile = $3
 cd /scratch/tmp
 # 'clean' maf
 perl -wpe 's/^s ([^.]+)\.\S+/s $1/' $infile > $c.maf
 awk -v C=$c '$2 == C {print}' $r/4d/ensGeneNR.gp | sed -e "s/\t$c\t/\tmm10.$c\t/" > $c.gp
 set NL=`wc -l $c.gp| gawk '{print $1}'`
 if ("$NL" != "0") then
     $PHASTBIN/msa_view --4d --features $c.gp -i MAF $c.maf -o SS > $c.ss
     $PHASTBIN/msa_view -i SS --tuple-size 1 $c.ss > $r/4d/run/$outfile
 else
     echo "" > $r/4d/run/$outfile
 endif
 rm -f $c.gp $c.maf $c.ss
 '_EOF_'
     # << happy emacs
     chmod +x 4d.csh
 
     ls -1S /hive/data/genomes/mm10/bed/multiz60way/anno/result/*.maf \
 	| sed -e "s#.*multiz60way/anno/result/##" \
 	> maf.list
 
     cat << '_EOF_' > template
 #LOOP
 4d.csh $(root1) $(path1) {check out line+ ../mfa/$(root1).mfa}
 #ENDLOOP
 '_EOF_'
     # << happy emacs
 
     # the tac puts the quick jobs at the front
     gensub2 maf.list single template stdout | tac > jobList
     para create jobList
     para try ... check
     para -maxJob=5 push
     para time
 # Completed: 66 of 66 jobs
 # CPU time in finished jobs:      13176s     219.60m     3.66h    0.15d  0.000 y
 # IO & Wait Time:                 31790s     529.84m     8.83h    0.37d  0.001 y
 # Average job time:                 681s      11.36m     0.19h    0.01d
 # Longest finished job:            2883s      48.05m     0.80h    0.03d
 # Submission to last job:          2925s      48.75m     0.81h    0.03d
 
     # combine mfa files
     ssh hgwdev
     cd /hive/data/genomes/mm10/bed/multiz60way/4d
     # remove the broken empty files, size 0 and size 1:
     find ./mfa -type f -size 0 | xargs rm -f
     # most interesting, this did not identify files of size 1:
 #    find ./mfa -type f -size 1
     ls -og mfa | awk '$3 == 1' | awk '{print $NF}' > empty.list
     sed -e "s#^#mfa/##" empty.list | xargs rm -f
     #want comma-less species.list
     /cluster/bin/phast.build/cornellCVS/phast.2010-12-30/bin/msa_view \
 	--aggregate "`cat ../species.list`" mfa/*.mfa | sed s/"> "/">"/ \
 	    > 4d.all.mfa
     # check they are all in there:
     grep "^>" 4d.all.mfa | wc -l
     #   60
 
     # use phyloFit to create tree model (output is phyloFit.mod)
     time nice -n +19 \
 	/cluster/bin/phast.build/cornellCVS/phast.2010-12-30/bin/phyloFit \
 	    --EM --precision MED --msa-format FASTA --subst-mod REV \
 		--tree ../tree-commas.nh 4d.all.mfa
     #   real    98m59.203s
     mv phyloFit.mod all.mod
 
     grep TREE all.mod
 #TREE: (((((((((((((((mm10:0.0855383,rn5:0.0922719):0.202381,dipOrd1:0.210819):0.0258471,(hetGla2:0.0917322,cavPor3:0.136876):0.0994271):0.00910944,speTri2:0.145483):0.0274969,(oryCun2:0.109639,ochPri2:0.200966):0.102067):0.0141654,(((((((((hg19:0.00674057,panTro4:0.00692231):0.00309904,gorGor3:0.00918625):0.00954082,ponAbe2:0.0191843):0.00356049,nomLeu2:0.0218207):0.0116848,(rheMac3:0.00814945,papHam1:0.0079848):0.0289473):0.0208338,(calJac3:0.0342405,saiBol1:0.0333221):0.0359171):0.0594469,tarSyr1:0.137467):0.011091,(micMur1:0.0918138,otoGar3:0.127231):0.0351527):0.0153171,tupBel1:0.18879):0.0042463):0.0214646,((susScr3:0.121641,(vicPac1:0.109818,(turTru2:0.0635753,(oviAri1:0.0392493,bosTau7:0.0315816):0.0939861):0.0203711):0.00368417):0.0444758,((((felCat5:0.0897448,(canFam3:0.0888602,ailMel1:0.0767935):0.021837):0.05011,equCab2:0.109367):0.00605998,(myoLuc2:0.137144,pteVam1:0.114013):0.0339604):0.00395001,(eriEur1:0.226934,sorAra1:0.270619):0.0628319):0.00292667):0.0291403):0.0231397,((((loxAfr3:0.078841,proCap1:0.160295):0.00825096,echTel1:0.266786):0.0031636,triMan1:0.0685675):0.0736043,(dasNov3:0.112086,choHof1:0.0974658):0.0535724):0.00739115):0.245967,(monDom5:0.139913,(sarHar1:0.132596,macEug2:0.111778):0.0294309):0.21273):0.0770867,ornAna1:0.50425):0.135096,(((((melGal1:0.067697,galGal4:0.05253):0.13729,taeGut1:0.202681):0.00899388,melUnd1:0.127774):0.216078,anoCar2:0.575186):0.0128221,chrPic1:0.201659):0.137011):0.113527,xenTro3:0.943162):0.0646458,latCha1:0.596956):0.463611,((((((tetNig2:0.223213,fr3:0.198755):0.263107,oreNil2:0.33649):0.0139699,gasAcu1:0.314841):0.0573697,oryLat2:0.430105):0.185668,gadMor1:0.562778):0.169352,danRer7:0.753326):0.117017):0.501088,petMar1:0.501088);
 
     #   four different subset lists:
     paste glire.list euarchontoglires.list placental.list all.list
 # mm10    mm10    mm10    mm10
 # rn5     rn5     rn5     rn5
 # dipOrd1 dipOrd1 dipOrd1 dipOrd1
 # hetGla2 hetGla2 hetGla2 hetGla2
 # cavPor3 cavPor3 cavPor3 cavPor3
 # speTri2 speTri2 speTri2 speTri2
 # oryCun2 oryCun2 oryCun2 oryCun2
 # ochPri2 ochPri2 ochPri2 ochPri2
 #         tupBel1 tupBel1 tupBel1
 #         hg19    hg19    hg19
 #         gorGor3 gorGor3 gorGor3
 #         panTro4 panTro4 panTro4
 #         nomLeu2 nomLeu2 nomLeu2
 #         ponAbe2 ponAbe2 ponAbe2
 #         tarSyr1 tarSyr1 tarSyr1
 #         rheMac3 rheMac3 rheMac3
 #         papHam1 papHam1 papHam1
 #         otoGar3 otoGar3 otoGar3
 #         calJac3 calJac3 calJac3
 #         micMur1 micMur1 micMur1
 #         saiBol1 saiBol1 saiBol1
 #                 equCab2 equCab2
 #                 vicPac1 vicPac1
 #                 turTru2 turTru2
 #                 susScr3 susScr3
 #                 bosTau7 bosTau7
 #                 oviAri1 oviAri1
 #                 pteVam1 pteVam1
 #                 myoLuc2 myoLuc2
 #                 felCat5 felCat5
 #                 canFam3 canFam3
 #                 ailMel1 ailMel1
 #                 eriEur1 eriEur1
 #                 sorAra1 sorAra1
 #                 choHof1 choHof1
 #                 dasNov3 dasNov3
 #                 proCap1 proCap1
 #                 echTel1 echTel1
 #                 triMan1 triMan1
 #                 loxAfr3 loxAfr3
 #                         macEug2
 #                         sarHar1
 #                         monDom5
 #                         ornAna1
 #                         galGal4
 #                         taeGut1
 #                         melGal1
 #                         melUnd1
 #                         anoCar2
 #                         chrPic1
 #                         xenTro3
 #                         latCha1
 #                         gadMor1
 #                         gasAcu1
 #                         fr3
 #                         oreNil2
 #                         tetNig2
 #                         danRer7
 #                         oryLat2
 #                         petMar1
 
     # on organisms that do not have all species in all files, the file names
     #	need to be filtered.  Using this perl script to extract from
     # the full mfa files, only the subset of species from the four lists:
     cat << '_EOF_' > filterMfa.pl
 #!/usr/bin/env perl
 
 use strict;
 use warnings;
 
 my $argc = scalar(@ARGV);
 
 if ($argc != 1) {
     printf STDERR "usage: filterMfa.pl <subset.list>\n";
     exit 255;
 }
 
 my %dbList;
 my $file = shift;
 open (FH, "<$file") or die "can not read $file";
 printf STDERR "using list: $file\n";
 while (my $db = <FH>) {
     chomp $db;
     $dbList{$db} = 1;
 }
 close (FH);
 
 my $dirName = $file;
 $dirName =~ s/.list//;
 $dirName .= "Mfa";
 
 my @mfaFileList = split('\n', `ls mfa/*.mfa`);
 for (my $i = 0; $i < scalar(@mfaFileList); ++$i) {
     my $file = $mfaFileList[$i];
     my $chr = $file;
     $chr =~ s#^mfa/##;
 #    printf STDERR "processing: %s into %s/%s\n", $file, $dirName, $chr;
     open (FH, "<$file") or die "can not read $file";
     open (OF, ">$dirName/$chr") or die "can not write to $dirName/$chr";
     my $inGroup = 0;
     while (my $line = <FH>) {
         if ($line =~ m/^> /) {
           chomp $line;
           my ($faHead, $faDbName) = split('\s+', $line);
           if (exists($dbList{$faDbName})) {
               $inGroup = 1;
                 printf OF "> %s\n", $faDbName;
           } else {
               $inGroup = 0;
           }
         } elsif ($inGroup) {
             printf OF "%s", $line;
         }
     }
     close (FH);
     close (OF);
 }
 '_EOF_'
     # << happy emacs
     chmod +x filterMfa.pl
 
     mkdir glireMfa euarchontogliresMfa placentalMfa vertebrateMfa
 
     # extract each set from the full mfa files, run msa_view on
     #   each subset and construct .nh tree for that subset
     for N in glire euarchontoglires placental vertebrate
 do
     ./filterMfa.pl ${N}.list
     /cluster/bin/phast.build/cornellCVS/phast.2010-12-30/bin/msa_view \
 	--aggregate "`cat ${N}.list|xargs echo`" ${N}Mfa/*.mfa \
         | sed s/"> "/">"/ > 4d.${N}.mfa
     /cluster/bin/phast.build/cornellCVS/phast.2010-12-30/bin/tree_doctor \
         --no-branchlen --prune-all-but="`cat ${N}.list|xargs echo`" \
         ../tree-commas.nh > tree-commas.${N}.nh
 done
 
     ### XXX ### MOST INTERESTING, this phyloFit operation was repeated
     ### to verify that the full 60 species vertebrate operation produced the
     ### same result as the original "all" subset.  This phyloFit appears to
     ### produce a different result each time ?
     # use phyloFit to create tree model (output is phyloFit.mod)
     for N in glire euarchontoglires placental vertebrate
 do
     time nice -n +19 \
 	/cluster/bin/phast.build/cornellCVS/phast.2010-12-30/bin/phyloFit \
 	    --EM --precision MED --msa-format FASTA --subst-mod REV \
 		--tree ./tree-commas.${N}.nh 4d.${N}.mfa
     mv phyloFit.mod ${N}.mod
     grep TREE ${N}.mod | sed 's/TREE\:\ //' > ${N}.Nway.nh
 done
     #   real    0m15.747s
     #   real    4m5.526s
     #   real    20m45.982s
     #   real    141m21.248s
 
 #######################################################################
 # phastCons 60-way (DONE - 2012-06-12, 2012-08-21 - Hiram)
     #	was unable to split the full chrom MAF files, now working on the
     #	maf files as they were split up during multiz
 
     # split 60way mafs into 10M chunks and generate sufficient statistics
     # files for # phastCons
     ssh encodek
     mkdir -p /hive/data/genomes/mm10/bed/multiz60way/cons/ss
     mkdir -p /hive/data/genomes/mm10/bed/multiz60way/cons/msa.split
     cd /hive/data/genomes/mm10/bed/multiz60way/cons/msa.split
 
     cat << '_EOF_' > doSplit.csh
 #!/bin/csh -ef
 set c = $1
 set MAF = /hive/data/genomes/mm10/bed/multiz60way/anno/result/$c.maf
 set WINDOWS = /hive/data/genomes/mm10/bed/multiz60way/cons/ss/$c
 set WC = `cat $MAF | wc -l`
 set NL = `grep "^#" $MAF | wc -l`
 if ( -s $2 ) then
     exit 0
 endif
 if ( -s $2.running ) then
     exit 0
 endif
 
 date >> $2.running
 
 rm -fr $WINDOWS
 mkdir $WINDOWS
 pushd $WINDOWS > /dev/null
 if ( $WC != $NL ) then
 /cluster/bin/phast.build/cornellCVS/phast.2010-12-30/bin/msa_split \
     $MAF -i MAF -o SS -r $WINDOWS/$c -w 10000000,0 -I 1000 -B 5000
 endif
 popd > /dev/null
 date >> $2
 rm -f $2.running
 '_EOF_'
     # << happy emacs
     chmod +x doSplit.csh
 
     cat << '_EOF_' > template
 #LOOP
 doSplit.csh $(root1) {check out line+ $(root1).done}
 #ENDLOOP
 '_EOF_'
     # << happy emacs
 
     #	do the easy ones first to see some immediate results
     ls -1S -r ../../anno/result | sed -e "s/.maf//;" > maf.list
 
     gensub2 maf.list single template jobList
     para -ram=8g create jobList
     para try ... check ... etc
 # Completed: 64 of 66 jobs
 # Crashed: 2 jobs
 # CPU time in finished jobs:     347730s    5795.49m    96.59h    4.02d  0.011 y
 # IO & Wait Time:                102813s    1713.56m    28.56h    1.19d  0.003 y
 # Average job time:                7040s     117.33m     1.96h    0.08d
 # Longest finished job:           42666s     711.10m    11.85h    0.49d
 # Submission to last job:        150336s    2505.60m    41.76h    1.74d
     # finish the last two on hgwdev with more memory.
 # linux data memory, in 1024-byte units
 export M=188743680
 ulimit -S -m $M -v $M
 ./doSplit.csh chr1 chr1.done &
 ./doSplit.csh chr2 chr2.done
 wait
     #   real    864m53.235s
 
     # Run phastCons
     #	This job is I/O intensive in its output files, beware where this
     #	takes place or do not run too many at once.
     ssh swarm
     mkdir -p /hive/data/genomes/mm10/bed/multiz60way/cons/run.cons
     cd /hive/data/genomes/mm10/bed/multiz60way/cons/run.cons
 
     #	there are going to be several different phastCons runs using
     #	this same script.  They trigger off of the current working directory
     #	$cwd:t which is the "grp" in this script.  It is one of:
     #	all glire glirePrimate glirePrimatePlacental
 
     cat << '_EOF_' > doPhast.csh
 #!/bin/csh -fe
 set PHASTBIN = /cluster/bin/phast.build/cornellCVS/phast.2010-12-30/bin
 set c = $1
 set f = $2
 set len = $3
 set cov = $4
 set rho = $5
 set grp = $cwd:t
 set cons = /hive/data/genomes/mm10/bed/multiz60way/cons
 set tmp = $cons/tmp/$f
 mkdir -p $tmp
 set ssSrc = $cons/ss
 set useGrp = "$grp.mod"
 if (-s $cons/$grp/$grp.non-inf) then
   ln -s $cons/$grp/$grp.mod $tmp
   ln -s $cons/$grp/$grp.non-inf $tmp
   ln -s $ssSrc/$c/$f.ss $tmp
 else
   ln -s $ssSrc/$c/$f.ss $tmp
   ln -s $cons/$grp/$grp.mod $tmp
 endif
 pushd $tmp > /dev/null
 if (-s $grp.non-inf) then
   $PHASTBIN/phastCons $f.ss $useGrp \
     --rho $rho --expected-length $len --target-coverage $cov --quiet \
     --not-informative `cat $grp.non-inf` \
     --seqname $c --idpref $c --most-conserved $f.bed --score > $f.pp
 else
   $PHASTBIN/phastCons $f.ss $useGrp \
     --rho $rho --expected-length $len --target-coverage $cov --quiet \
     --seqname $c --idpref $c --most-conserved $f.bed --score > $f.pp
 endif
 popd > /dev/null
 mkdir -p pp/$c bed/$c
 sleep 4
 touch pp/$c bed/$c
 rm -f pp/$c/$f.pp
 rm -f bed/$c/$f.bed
 mv $tmp/$f.pp pp/$c
 mv $tmp/$f.bed bed/$c
 rm -fr $tmp
 '_EOF_'
     # << happy emacs
     chmod a+x doPhast.csh
 
     #	this template will serve for all runs
     #	root1 == chrom name, file1 == ss file name without .ss suffix
     cat << '_EOF_' > template
 #LOOP
 ../run.cons/doPhast.csh $(root1) $(file1) 45 0.3 0.3 {check out line+ pp/$(root1)/$(file1).pp}
 #ENDLOOP
 '_EOF_'
     # << happy emacs
 
     ls -1S ../ss/chr*/chr* | sed -e "s/.ss$//" > ss.list
 
     # Create parasol batch and run it
     ############################ run for all species
     cd /hive/data/genomes/mm10/bed/multiz60way/cons
     mkdir all
     cd all
     cp -p ../../4d/all.mod ./all.mod
 
     gensub2 ../run.cons/ss.list single ../run.cons/template jobList
     para -ram=8g create jobList
     para try ... check ... push ... etc.
 # Completed: 314 of 314 jobs
 # CPU time in finished jobs:      36286s     604.77m    10.08h    0.42d  0.001 y
 # IO & Wait Time:                 10101s     168.35m     2.81h    0.12d  0.000 y
 # Average job time:                 148s       2.46m     0.04h    0.00d
 # Longest finished job:             219s       3.65m     0.06h    0.00d
 # Submission to last job:          4383s      73.05m     1.22h    0.05d
 
     # create Most Conserved track
     cd /hive/data/genomes/mm10/bed/multiz60way/cons/all
     cut -f1 ../../../../chrom.sizes | while read C
 do
     ls -d bed/${C} 2> /dev/null | while read D
     do
         cat ${D}/${C}*.bed
     done | sort -k1,1 -k2,2n \
     | awk '{printf "%s\t%d\t%d\tlod=%d\t%s\n", "'${C}'", $2, $3, $5, $5;}'
 done > tmpMostConserved.bed
     #   -rw-rw-r--  1 230642249 Jun 15 11:48 tmpMostConserved.bed
     /cluster/bin/scripts/lodToBedScore tmpMostConserved.bed > mostConserved.bed
     #   -rw-rw-r--  1 236425914 Jun 15 11:52 mostConserved.bed
 
     # load into database
     ssh hgwdev
     cd /hive/data/genomes/mm10/bed/multiz60way/cons/all
     time nice -n +19 hgLoadBed mm10 phastConsElements60way mostConserved.bed
     #   Read 6748481 elements of size 5 from mostConserved.bed
     #   real    2m20.950s
 
     # Try for 5% overall cov, and 70% CDS cov
     featureBits mm10 -enrichment refGene:cds phastConsElements60way
     #	--rho 0.3 --expected-length 45 --target-coverage 0.3
     #   refGene:cds 1.281%, phastConsElements60way 6.517%,
     #   both 0.913%, cover 71.29%, enrich 10.94x
     time featureBits mm10 -enrichment ensGene:cds phastConsElements60way
     #   ensGene:cds 1.357%, phastConsElements60way 6.517%, both 0.942%, cover
     #   69.39%, enrich 10.65x
     #   real    0m54.109s
     time featureBits mm10 -enrichment knownGene:cds phastConsElements60way
     #   knownGene:cds 1.325%, phastConsElements60way 6.517%, both 0.930%,
     #   cover 70.18%, enrich 10.77x
     #   real    0m50.472s
 
     # Create merged posterier probability file and wiggle track data files
     cd /hive/data/genomes/mm10/bed/multiz60way/cons/all
     mkdir downloads
 
     for D in `ls -d pp/chr* | sed -e 's#pp/##'`
 do
     echo "working: $D"
     find ./pp/${D} -type f | sed -e "s#^./##; s#\.# d #g; s#-# m #;" \
 	| sort -k1,1 -k3,3n | sed -e "s# d #.#g; s# m #-#g;" | xargs cat \
         | gzip -c > downloads/${D}.phastCons60way.wigFix.gz
 done
     #   real    102m58.496s
 
     #	encode those files into wiggle data
     time (zcat downloads/*.wigFix.gz \
 	| wigEncode stdin phastCons60way.wig phastCons60way.wib)
     #   Converted stdin, upper limit 1.00, lower limit 0.00
     #   real    9m32.980s
     du -hsc *.wi?
     #   1.8G    phastCons60way.wib
     #   298M    phastCons60way.wig
     #   2.1G    total
 
     #	encode into a bigWig file:
     #	(warning wigToBigWig process grows to about 36 Gb)
     #	in bash, to avoid the 32 Gb memory limit, set 180 Gb here:
 sizeG=188743680
 export sizeG
 ulimit -d $sizeG
 ulimit -v $sizeG
     time (zcat downloads/*.wigFix.gz \
         | wigToBigWig stdin ../../../../chrom.sizes phastCons60way.bw)
     #   real    27m1.039s
     #   -rw-rw-r--  1 4671685725 Jun 18 10:24 phastCons60way.bw
     bigWigInfo phastCons60way.bw
 version: 4
 isCompressed: yes
 isSwapped: 0
 primaryDataSize: 3,333,510,917
 primaryIndexSize: 100,774,056
 zoomLevels: 10
 chromCount: 59
 basesCovered: 1,929,686,275
 mean: 0.149660
 min: 0.000000
 max: 1.000000
 std: 0.282516
 
     #	if you wanted to use the bigWig file, loading bigWig table:
     #   but we don't use the bigWig file
     mkdir /gbdb/mm10/bbi
     ln -s `pwd`/phastCons60way.bw /gbdb/mm10/bbi
     hgsql mm10 -e 'drop table if exists phastCons60way; \
             create table phastCons60way (fileName varchar(255) not null); \
             insert into phastCons60way values
 	("/gbdb/mm10/bbi/phastCons60way.bw");'
 
     # Load gbdb and database with wiggle.
     ssh hgwdev
     cd /hive/data/genomes/mm10/bed/multiz60way/cons/all
     ln -s `pwd`/phastCons60way.wib /gbdb/mm10/multiz60way/phastCons60way.wib
     time nice -n +19 hgLoadWiggle -pathPrefix=/gbdb/mm10/multiz60way mm10 \
 	phastCons60way phastCons60way.wig
     #   real    0m54.546s
 
     wigTableStats.sh mm10 phastCons60way
 # db.table      min max mean count sumData
 # mm10.phastCons60way     0 1 0.14966 1929686275 2.88797e+08
 #       stdDev viewLimits
 #       0.282516 viewLimits=0:1
 
     #  Create histogram to get an overview of all the data
     ssh hgwdev
     cd /hive/data/genomes/mm10/bed/multiz60way/cons/all
     time nice -n +19 hgWiggle -doHistogram -db=mm10 \
 	-hBinSize=0.001 -hBinCount=1000 -hMinVal=0.0 -verbose=2 \
 	    phastCons60way > histogram.data 2>&1
     #	real    7m37.212s
 
     #	create plot of histogram:
 
     cat << '_EOF_' | gnuplot > histo.png
 set terminal png small x000000 xffffff xc000ff x66ff66 xffff00 x00ffff
 set size 1.4, 0.8
 set key left box
 set grid noxtics
 set grid ytics
 set title " Mouse Mm10 Histogram phastCons60way track"
 set xlabel " phastCons60way score"
 set ylabel " Relative Frequency"
 set y2label " Cumulative Relative Frequency (CRF)"
 set y2range [0:1]
 set y2tics
 set yrange [0:0.02]
 
 plot "histogram.data" using 2:5 title " RelFreq" with impulses, \
         "histogram.data" using 2:7 axes x1y2 title " CRF" with lines
 '_EOF_'
     #	<< happy emacs
 
     display histo.png &
 
     ########################################################################
     ### Create a phastCons data set for Glires
 
     # setup glire-only run
     ssh swarm
     mkdir /hive/data/genomes/mm10/bed/multiz60way/cons/glire
     cd /hive/data/genomes/mm10/bed/multiz60way/cons/glire
     # glire-only: get the glire only tree from the 4d directory
     cp -p ../../4d/glire.mod ./glire.mod
     # and all the others become the non-informative list for phastCons to ignore
     sort ../../4d/glire.list > glire.list
     sort ../../4d/vertebrate.list > vertebrate.list
     comm -13 glire.list vertebrate.list | xargs echo \
         | sed -e "s/ /,/g" > glire.non-inf
 
     gensub2 ../run.cons/ss.list single ../run.cons/template jobList
     para -ram=8g create jobList
     para try ... check ... push ... etc.
 # Completed: 314 of 314 jobs
 # CPU time in finished jobs:      12411s     206.85m     3.45h    0.14d  0.000 y
 # IO & Wait Time:                117850s    1964.16m    32.74h    1.36d  0.004 y
 # Average job time:                 415s       6.91m     0.12h    0.00d
 # Longest finished job:             658s      10.97m     0.18h    0.01d
 # Submission to last job:           796s      13.27m     0.22h    0.01d
 
     cd /hive/data/genomes/mm10/bed/multiz60way/cons/glire
     # create Most Conserved track
     cut -f1 ../../../../chrom.sizes | while read C
 do
     ls -d bed/${C} 2> /dev/null | while read D
     do
         cat ${D}/${C}*.bed
     done | sort -k1,1 -k2,2n \
     | awk '{printf "%s\t%d\t%d\tlod=%d\t%s\n", "'${C}'", $2, $3, $5, $5;}'
 done > tmpMostConserved.bed
     #   real    0m32.945s
     /cluster/bin/scripts/lodToBedScore tmpMostConserved.bed > mostConserved.bed
     #   real    0m19.122s
 
     featureBits mm10 mostConserved.bed
     #   117058023 bases of 2652783500 (4.413%) in intersection
     #   real    0m21.506s
 
     # load into database
     ssh hgwdev
     cd /hive/data/genomes/mm10/bed/multiz60way/cons/glire
     time nice -n +19 hgLoadBed mm10 phastConsElements60wayGlire \
 	mostConserved.bed
     #	Loaded 1336504 elements of size 6
     #	real    0m13.672s
     # verify coverage
     time featureBits mm10 phastConsElements60wayGlire
     #   117058023 bases of 2652783500 (4.413%) in intersection
     #   real    0m15.041s
 
     #	--rho 0.3 --expected-length 45 --target-coverage 0.3
     featureBits mm10 -enrichment refGene:cds phastConsElements60wayGlire
     #   refGene:cds 1.282%, phastConsElements60wayGlire 4.413%,
     #   both 0.944%, cover 73.60%, enrich 16.68x
 
     featureBits mm10 -enrichment knownGene:cds phastConsElements60wayGlire
     #   knownGene:cds 1.325%, phastConsElements60wayGlire 4.413%,
     #   both 0.957%, cover 72.22%, enrich 16.37x
 
 #	Create the downloads .pp files, from which the phastCons wiggle data
     #	is calculated
     # sort by chromName, chromStart so that items are in numerical order
     #  for wigEncode
     cd /hive/data/genomes/mm10/bed/multiz60way/cons/glire
     mkdir downloads
     for D in `ls -d pp/chr* | sed -e 's#pp/##'`
 do
     echo "working: $D"
     find ./pp/${D} -type f | sed -e "s#^./##; s#\.# d #g; s#-# m #;" \
 	| sort -k1,1 -k3,3n | sed -e "s# d #.#g; s# m #-#g;" | xargs cat \
         | gzip -c > downloads/${D}.phastCons60way.glire.wigFix.gz
 done
 
     # Create merged posterier probability file and wiggle track data files
     time (zcat downloads/chr*.wigFix.gz \
 	 | wigEncode stdin phastCons60wayGlire.wig phastCons60wayGlire.wib) &
     #   Converted stdin, upper limit 1.00, lower limit 0.00
     #   real    10m26.712s
 
     #	encode to bigWig
     #	(warning wigToBigWig process grows to about 36 Gb)
     #	in bash, to avoid the 32 Gb memory limit:
 export sizeG=188743680
 ulimit -d $sizeG
 ulimit -v $sizeG
 
     time (zcat downloads/*.wigFix.gz \
         | wigToBigWig -verbose=2 stdin ../../../../chrom.sizes \
         phastCons60wayGlire.bw > bigWig.log 2>&1) &
     #   real    52m17.108s
     grep VmPeak bigWig.log
     # pid=5552: VmPeak:     20926360 kB
 
     bigWigInfo phastCons60wayGlire.bw
 version: 4
 isCompressed: yes
 isSwapped: 0
 primaryDataSize: 3,631,413,425
 primaryIndexSize: 100,774,056
 zoomLevels: 10
 chromCount: 59
 basesCovered: 1,929,686,275
 mean: 0.142675
 min: 0.000000
 max: 1.000000
 std: 0.252347
 
     #	if desired to use the bigWig file, loading bigWig table:
     ln -s `pwd`/phastCons60wayGlire.bw /gbdb/mm10/bbi
     hgsql mm10 -e 'drop table if exists phastCons60wayGlire; \
             create table phastCons60wayGlire \
 		(fileName varchar(255) not null); \
             insert into phastCons60wayGlire values
 	("/gbdb/mm10/bbi/phastCons60wayGlire.bw");'
 
     ## load table with wiggle data
     ssh hgwdev
     cd /hive/data/genomes/mm10/bed/multiz60way/cons/glire
     ln -s `pwd`/phastCons60wayGlire.wib \
 	/gbdb/mm10/multiz60way/phastCons60wayGlire.wib
     time nice -n +19 hgLoadWiggle -pathPrefix=/gbdb/mm10/multiz60way mm10 \
 	phastCons60wayGlire phastCons60wayGlire.wig
     #   real    0m56.786s
 
     wigTableStats.sh mm10 phastCons60wayGlire
 # db.table      min max mean count sumData
 mm10.phastCons60wayGlire     0 1 0.142675 1929686275 2.75318e+08
 #	stdDev viewLimits
 #       0.252347 viewLimits=0:1
 
     #  Create histogram to get an overview of all the data
     time nice -n +19 hgWiggle -doHistogram \
 	-hBinSize=0.001 -hBinCount=1000 -hMinVal=0.0 -verbose=2 \
 	    -db=mm10 phastCons60wayGlire  > histogram.data 2>&1
     #	real    4m28.743s
 
     #	create plot of histogram:
 
     cat << '_EOF_' | gnuplot > histo.png
 set terminal png small x000000 xffffff xc000ff x66ff66 xffff00 x00ffff
 set size 1.4, 0.8
 set key left box
 set grid noxtics
 set grid ytics
 set title " Mouse Hg19 Histogram phastCons60wayGlire track"
 set xlabel " phastCons60wayGlire score"
 set ylabel " Relative Frequency"
 set y2label " Cumulative Relative Frequency (CRF)"
 set y2range [0:1]
 set y2tics
 set yrange [0:0.02]
 
 plot "histogram.data" using 2:5 title " RelFreq" with impulses, \
         "histogram.data" using 2:7 axes x1y2 title " CRF" with lines
 '_EOF_'
     #	<< happy emacs
 
     display histo.png &
 
     ########################################################################
     ### Create a phastCons data set for Euarchontoglires
 
     # setup euarchontoglires-only run
     ssh swarm
     mkdir /hive/data/genomes/mm10/bed/multiz60way/cons/euarchontoglires
     cd /hive/data/genomes/mm10/bed/multiz60way/cons/euarchontoglires
     # euarchontoglires-only: get the euarchontoglires only tree from the 4d directory
     cp -p ../../4d/euarchontoglires.mod ./euarchontoglires.mod
     # and all the others become the non-informative list for phastCons to ignore
     sort ../../4d/euarchontoglires.list > euarchontoglires.list
     sort ../../4d/vertebrate.list > vertebrate.list
     comm -13 euarchontoglires.list vertebrate.list | xargs echo \
         | sed -e "s/ /,/g" > euarchontoglires.non-inf
 
     gensub2 ../run.cons/ss.list single ../run.cons/template jobList
     para -ram=8g create jobList
     para try ... check ... push ... etc.
 # Completed: 314 of 314 jobs
 # CPU time in finished jobs:      17421s     290.36m     4.84h    0.20d  0.001 y
 # IO & Wait Time:                 37430s     623.83m    10.40h    0.43d  0.001 y
 # Average job time:                 175s       2.91m     0.05h    0.00d
 # Longest finished job:             343s       5.72m     0.10h    0.00d
 # Submission to last job:          2403s      40.05m     0.67h    0.03d
 
     cd /hive/data/genomes/mm10/bed/multiz60way/cons/euarchontoglires
     # create Most Conserved track
     cut -f1 ../../../../chrom.sizes | while read C
 do
     ls -d bed/${C} 2> /dev/null | while read D
     do
         cat ${D}/${C}*.bed
     done | sort -k1,1 -k2,2n \
     | awk '{printf "%s\t%d\t%d\tlod=%d\t%s\n", "'${C}'", $2, $3, $5, $5;}'
 done > tmpMostConserved.bed
     #   real    0m32.945s
     /cluster/bin/scripts/lodToBedScore tmpMostConserved.bed > mostConserved.bed
     #   real    0m19.122s
 
     featureBits mm10 mostConserved.bed
     #   127113541 bases of 2652783500 (4.792%) in intersection
     #   real    0m21.506s
 
     # load into database
     ssh hgwdev
     cd /hive/data/genomes/mm10/bed/multiz60way/cons/euarchontoglires
     time nice -n +19 hgLoadBed mm10 phastConsElements60wayEuarchontoGlires \
 	mostConserved.bed
     #	Loaded 2327130 elements of size 6
     #	real    0m24.591s
     # verify coverage
     time featureBits mm10 phastConsElements60wayEuarchontoGlires
     #   127113541 bases of 2652783500 (4.792%) in intersection
     #   real    0m18.857s
 
     #	--rho 0.3 --expected-length 45 --target-coverage 0.3
     featureBits mm10 -enrichment refGene:cds phastConsElements60wayEuarchontoGlires
     #   refGene:cds 1.282%, phastConsElements60wayEuarchontoGlires 4.792%,
     #   both 0.929%, cover 72.46%, enrich 15.12x
 
     featureBits mm10 -enrichment knownGene:cds phastConsElements60wayEuarchontoGlires
     #   knownGene:cds 1.325%, phastConsElements60wayEuarchontoGlires 4.792%,
     #   both 0.943%, cover 71.16%, enrich 14.85x
 
     #	Create the downloads .pp files, from which the phastCons wiggle data
     #	is calculated
     # sort by chromName, chromStart so that items are in numerical order
     #  for wigEncode
     cd /hive/data/genomes/mm10/bed/multiz60way/cons/euarchontoglires
     mkdir downloads
     for D in `ls -d pp/chr* | sed -e 's#pp/##'`
 do
     echo "working: $D"
     find ./pp/${D} -type f | sed -e "s#^./##; s#\.# d #g; s#-# m #;" \
 	| sort -k1,1 -k3,3n | sed -e "s# d #.#g; s# m #-#g;" | xargs cat \
         | gzip -c > downloads/${D}.phastCons60way.euarchontoglires.wigFix.gz
 done
 
     # Create merged posterier probability file and wiggle track data files
     time (zcat downloads/chr*.wigFix.gz \
 	 | wigEncode stdin phastCons60wayEuarchontoGlires.wig phastCons60wayEuarchontoGlires.wib \
         > wigEncode.log 2>&1) &
     # Converted stdin, upper limit 1.00, lower limit 0.00
     #   real    9m49.080s
 
     #	encode to bigWig
     #	(warning wigToBigWig process grows to about 36 Gb)
     #	in bash, to avoid the 32 Gb memory limit:
 export sizeG=188743680
 ulimit -d $sizeG
 ulimit -v $sizeG
 
     time (zcat downloads/*.wigFix.gz \
         | wigToBigWig stdin ../../../../chrom.sizes phastCons60wayEuarchontoGlires.bw \
         > bigWig.log 2>&1 ) &
     #   real    26m0.111s
     bigWigInfo phastCons60wayEuarchontoGlires.bw
 version: 4
 isCompressed: yes
 isSwapped: 0
 primaryDataSize: 3,411,704,465
 primaryIndexSize: 100,774,056
 zoomLevels: 10
 chromCount: 59
 basesCovered: 1,929,686,275
 mean: 0.133253
 min: 0.000000
 max: 1.000000
 std: 0.256320
 
     #	if desired to use the bigWig file, loading bigWig table:
     ln -s `pwd`/phastCons60wayEuarchontoGlires.bw /gbdb/mm10/bbi
     hgsql mm10 -e 'drop table if exists phastCons60wayEuarchontoGlires; \
             create table phastCons60wayEuarchontoGlires \
 		(fileName varchar(255) not null); \
             insert into phastCons60wayEuarchontoGlires values
 	("/gbdb/mm10/bbi/phastCons60wayEuarchontoGlires.bw");'
 
     ## load table with wiggle data
     ssh hgwdev
     cd /hive/data/genomes/mm10/bed/multiz60way/cons/euarchontoglires
     ln -s `pwd`/phastCons60wayEuarchontoGlires.wib \
 	/gbdb/mm10/multiz60way/phastCons60wayEuarchontoGlires.wib
     time nice -n +19 hgLoadWiggle -pathPrefix=/gbdb/mm10/multiz60way mm10 \
 	phastCons60wayEuarchontoGlires phastCons60wayEuarchontoGlires.wig
     #   real    0m50.676s
 
     time wigTableStats.sh mm10 phastCons60wayEuarchontoGlires
 # db.table      min max mean count sumData
 mm10.phastCons60wayEuarchontoGlires  0 1 0.133253 1929686275 2.57137e+08
 #	stdDev viewLimits
 #       0.25632 viewLimits=0:1
     #   real    0m21.964s
 
     #  Create histogram to get an overview of all the data
     time nice -n +19 hgWiggle -doHistogram \
 	-hBinSize=0.001 -hBinCount=1000 -hMinVal=0.0 -verbose=2 \
 	    -db=mm10 phastCons60wayEuarchontoGlires  > histogram.data 2>&1
     #	real    3m31.112s
 
     #	create plot of histogram:
 
     cat << '_EOF_' | gnuplot > histo.png
 set terminal png small x000000 xffffff xc000ff x66ff66 xffff00 x00ffff
 set size 1.4, 0.8
 set key left box
 set grid noxtics
 set grid ytics
 set title " Mouse Mm10 Histogram phastCons60wayEuarchontoGlires track"
 set xlabel " phastCons60wayEuarchontoGlires score"
 set ylabel " Relative Frequency"
 set y2label " Cumulative Relative Frequency (CRF)"
 set y2range [0:1]
 set y2tics
 set yrange [0:0.02]
 
 plot "histogram.data" using 2:5 title " RelFreq" with impulses, \
         "histogram.data" using 2:7 axes x1y2 title " CRF" with lines
 '_EOF_'
     #	<< happy emacs
 
     display histo.png &
 
     ########################################################################
     ### Create a phastCons data set for primate ***### This was constructed
     ### and examined, but not used in the release
 
     # setup primate-only run
     ssh swarm
     mkdir /hive/data/genomes/mm10/bed/multiz60way/cons/primate
     cd /hive/data/genomes/mm10/bed/multiz60way/cons/primate
     # primate-only: get the primate only tree from the 4d directory
     cp -p ../../4d/primate.mod ./primate.mod
     # and all the others become the non-informative list for phastCons to ignore
     cat ../../4d/glire.list ../../4d/placental.list ../../4d/vertebrate.list \
         | grep -v mm10 | sort | xargs echo | sed -e "s/ /,/g" \
         > primate.non-inf
 
     gensub2 ../run.cons/ss.list single ../run.cons/template jobList
     para -ram=8g create jobList
     para try ... check ... push ... etc.
 # Completed: 314 of 314 jobs
 # CPU time in finished jobs:      13884s     231.39m     3.86h    0.16d  0.000 y
 # IO & Wait Time:                130791s    2179.86m    36.33h    1.51d  0.004 y
 # Average job time:                 461s       7.68m     0.13h    0.01d
 # Longest finished job:             741s      12.35m     0.21h    0.01d
 # Submission to last job:           910s      15.17m     0.25h    0.01d
 
     cd /hive/data/genomes/mm10/bed/multiz60way/cons/primate
     # create Most Conserved track
     cut -f1 ../../../../chrom.sizes | while read C
 do
     ls -d bed/${C} 2> /dev/null | while read D
     do
         cat ${D}/${C}*.bed
     done | sort -k1,1 -k2,2n \
     | awk '{printf "%s\t%d\t%d\tlod=%d\t%s\n", "'${C}'", $2, $3, $5, $5;}'
 done > tmpMostConserved.bed
 
     /cluster/bin/scripts/lodToBedScore tmpMostConserved.bed > mostConserved.bed
     #   real    0m27.199s
 
     featureBits mm10 mostConserved.bed
     #   112908553 bases of 2652783500 (4.256%) in intersection
 
     # load into database
     ssh hgwdev
     cd /hive/data/genomes/mm10/bed/multiz60way/cons/primate
     time nice -n +19 hgLoadBed mm10 phastConsElements60wayPrimate \
 	mostConserved.bed
     #	Loaded 1119924 elements of size 6
     #	real    0m17.423s
     # verify coverage
     featureBits mm10 phastConsElements60wayPrimate
     #   112908553 bases of 2652783500 (4.256%) in intersection
     #   real    0m13.684s
 
     #	--rho 0.3 --expected-length 45 --target-coverage 0.3
     featureBits mm10 -enrichment refGene:cds phastConsElements60wayPrimate
     #   refGene:cds 1.281%, phastConsElements60wayPrimate 4.256%,
     #   both 0.897%, cover 69.98%, enrich 16.44x
 
     featureBits mm10 -enrichment knownGene:cds phastConsElements60wayPrimate
     #   knownGene:cds 1.325%, phastConsElements60wayPrimate 4.256%,
     #   both 0.909%, cover 68.64%, enrich 16.13x
 
     featureBits mm10 -enrichment ensGene:cds phastConsElements60wayPrimate
     #   ensGene:cds 1.357%, phastConsElements60wayPrimate 4.256%, both 0.913%,
     #   cover 67.30%, enrich 15.81x
 
     #	Create the downloads .pp files, from which the phastCons wiggle data
     #	is calculated
     # sort by chromName, chromStart so that items are in numerical order
     cd /hive/data/genomes/mm10/bed/multiz60way/cons/primate
     mkdir downloads
     for D in `ls -d pp/chr* | sed -e 's#pp/##'`
 do
     echo "working: $D"
     find ./pp/${D} -type f | sed -e "s#^./##; s#\.# d #g; s#-# m #;" \
 	| sort -k1,1 -k3,3n | sed -e "s# d #.#g; s# m #-#g;" | xargs cat \
         | gzip -c > downloads/${D}.phastCons60way.primate.wigFix.gz
 done
 
     # Create merged posterier probability file and wiggle track data files
     zcat downloads/chr*.wigFix.gz \
 	 | wigEncode stdin phastCons60wayPrimate.wig phastCons60wayPrimate.wib
     # Converted stdin, upper limit 1.00, lower limit 0.00
     #	real    12m22.465s
 
     #	encode to bigWig
     #	(warning wigToBigWig process grows to about 36 Gb)
     #	in bash, to avoid the 32 Gb memory limit:
 export sizeG=188743680
 ulimit -d $sizeG
 ulimit -v $sizeG
 
     zcat downloads/*.wigFix.gz \
         | wigToBigWig stdin ../../../../chrom.sizes phastCons60wayPrimate.bw
     #    real 31m44.517s
     bigWigInfo phastCons60wayPrimate.bw
 version: 4
 isCompressed: yes
 isSwapped: 0
 primaryDataSize: 2,431,379,060
 primaryIndexSize: 100,774,056
 zoomLevels: 10
 chromCount: 59
 basesCovered: 1,929,686,275
 mean: 0.093847
 min: 0.000000
 max: 1.000000
 std: 0.233892
 
     #	if desired to use the bigWig file, loading bigWig table:
     ln -s `pwd`/phastCons60wayPrimate.bw /gbdb/mm10/bbi
     hgsql mm10 -e 'drop table if exists phastCons60wayPrimate; \
             create table phastCons60wayPrimate \
 		(fileName varchar(255) not null); \
             insert into phastCons60wayPrimate values
 	("/gbdb/mm10/bbi/phastCons60wayPrimate.bw");'
 
     ## load table with wiggle data
     ssh hgwdev
     cd /hive/data/genomes/mm10/bed/multiz60way/cons/primate
     ln -s `pwd`/phastCons60wayPrimate.wib \
 	/gbdb/mm10/multiz60way/phastCons60wayPrimate.wib
     time nice -n +19 hgLoadWiggle -pathPrefix=/gbdb/mm10/multiz60way mm10 \
 	phastCons60wayPrimate phastCons60wayPrimate.wig
     #   real    1m24.188s
 
     wigTableStats.sh mm10 phastCons60wayPrimate
 # db.table      min max mean count sumData
 # mm10.phastCons60wayPrimate 0 1 0.0938475 1929686275 1.81096e+08
 #       0.233892 viewLimits=0:1
 
     #  Create histogram to get an overview of all the data
     time nice -n +19 hgWiggle -doHistogram \
 	-hBinSize=0.001 -hBinCount=1000 -hMinVal=0.0 -verbose=2 \
 	    -db=mm10 phastCons60wayPrimate  > histogram.data 2>&1
     #   real    7m3.198s
 
     #	create plot of histogram:
 
     cat << '_EOF_' | gnuplot > histo.png
 set terminal png small x000000 xffffff xc000ff x66ff66 xffff00 x00ffff
 set size 1.4, 0.8
 set key left box
 set grid noxtics
 set grid ytics
 set title " Mouse Mm10 Histogram phastCons60wayPrimate track"
 set xlabel " phastCons60wayPrimate score"
 set ylabel " Relative Frequency"
 set y2label " Cumulative Relative Frequency (CRF)"
 set y2range [0:1]
 set y2tics
 set yrange [0:0.02]
 
 plot "histogram.data" using 2:5 title " RelFreq" with impulses, \
         "histogram.data" using 2:7 axes x1y2 title " CRF" with lines
 '_EOF_'
     #	<< happy emacs
 
     display histo.png &
 
 #########################################################################
     ### Create a phastCons data set for Placental
 
     # setup placental-only run
     ssh swarm
     mkdir /hive/data/genomes/mm10/bed/multiz60way/cons/placental
     cd /hive/data/genomes/mm10/bed/multiz60way/cons/placental
 
     # placental-only: get the placental only tree from the 4d directory
     cp -p ../../4d/placental.mod ./placental.mod
     # and all the others become the non-informative list for phastCons to ignore
     sort ../../4d/placental.list > placental.list
     sort ../../4d/vertebrate.list > vertebrate.list
     comm -13 placental.list vertebrate.list | xargs echo \
         | sed -e "s/ /,/g" > placental.non-inf
 
     gensub2 ../run.cons/ss.list single ../run.cons/template jobList
     para create jobList
     para try ... check ... push ... etc.
 # Completed: 314 of 314 jobs
 # CPU time in finished jobs:      27853s     464.21m     7.74h    0.32d  0.001 y
 # IO & Wait Time:                128981s    2149.69m    35.83h    1.49d  0.004 y
 # Average job time:                 499s       8.32m     0.14h    0.01d
 # Longest finished job:             785s      13.08m     0.22h    0.01d
 # Submission to last job:          5970s      99.50m     1.66h    0.07d
 
     cd /hive/data/genomes/mm10/bed/multiz60way/cons/placental
     # create Most Conserved track
     cut -f1 ../../../../chrom.sizes | while read C
 do
     ls -d bed/${C} 2> /dev/null | while read D
     do
         cat ${D}/${C}*.bed
     done | sort -k1,1 -k2,2n \
     | awk '{printf "%s\t%d\t%d\tlod=%d\t%s\n", "'${C}'", $2, $3, $5, $5;}'
 done > tmpMostConserved.bed
     #   real    0m44.506s
     /cluster/bin/scripts/lodToBedScore tmpMostConserved.bed > mostConserved.bed
     #   real    0m44.170s
 
     featureBits mm10 mostConserved.bed
     #   144041584 bases of 2652783500 (5.430%) in intersection
     #   real    0m54.927s
 
     # load into database
     ssh hgwdev
     cd /hive/data/genomes/mm10/bed/multiz60way/cons/placental
     time nice -n +19 hgLoadBed mm10 phastConsElements60wayPlacental \
 	mostConserved.bed
     #	Loaded 5257437 elements of size 6
     #	real    0m56.788s
 
     # verify coverage, should be the same as the file measured above
     time featureBits mm10 phastConsElements60wayPlacental
     #   144041584 bases of 2652783500 (5.430%) in intersection
     #   real    0m39.537s
 
     #	--rho 0.3 --expected-length 45 --target-coverage 0.3
     time featureBits mm10 -enrichment refGene:cds phastConsElements60wayPlacental
     #   refGene:cds 1.282%, phastConsElements60wayPlacental 5.430%,
     #   both 0.920%, cover 71.73%, enrich 13.21x
     #   real    0m39.833s
 
     time featureBits mm10 -enrichment knownGene:cds phastConsElements60wayPlacental
     #   knownGene:cds 1.325%, phastConsElements60wayPlacental 5.430%,
     #   both 0.934%, cover 70.47%, enrich 12.98x
     #   real    0m44.567s
 
     time featureBits mm10 -enrichment ensGene:cds phastConsElements60wayPlacental
     #   ensGene:cds 1.357%, phastConsElements60wayPlacental 5.430%,
     #   both 0.941%, cover 69.32%, enrich 12.77x
     #   real    0m43.093s
 
     #	Create the downloads .pp files, from which the phastCons wiggle data
     #	is calculated
     # sort by chromName, chromStart so that items are in numerical order
     #  for wigEncode
     cd /hive/data/genomes/mm10/bed/multiz60way/cons/placental
     mkdir downloads
     for D in `ls -d pp/chr* | sed -e 's#pp/##'`
 do
     echo "working: $D"
     find ./pp/${D} -type f | sed -e "s#^./##; s#\.# d #g; s#-# m #;" \
 	| sort -k1,1 -k3,3n | sed -e "s# d #.#g; s# m #-#g;" | xargs cat \
         | gzip -c > downloads/${D}.phastCons60way.placental.wigFix.gz
 done
 
     # Create merged posterier probability file and wiggle track data files
     time (zcat downloads/chr*.wigFix.gz \
 	 | wigEncode stdin phastCons60wayPlacental.wig \
         phastCons60wayPlacental.wib > wigEncode.log 2>&1) &
     # Converted stdin, upper limit 1.00, lower limit 0.00
     #   real    9m48.237s
 
     #	encode to bigWig
     #	(warning wigToBigWig process grows to about 36 Gb)
     #	in bash, to avoid the 32 Gb memory limit:
 export sizeG=188743680
 ulimit -d $sizeG
 ulimit -v $sizeG
 
     time (zcat downloads/*.wigFix.gz \
         | wigToBigWig stdin ../../../../chrom.sizes \
         phastCons60wayPlacental.bw > bigWig.log 2>&1) &
     #   real    25m18.556s
     bigWigInfo phastCons60wayPlacental.bw
 version: 4
 isCompressed: yes
 isSwapped: 0
 primaryDataSize: 3,271,676,156
 primaryIndexSize: 100,774,056
 zoomLevels: 10
 chromCount: 59
 basesCovered: 1,929,686,275
 mean: 0.135703
 min: 0.000000
 max: 1.000000
 std: 0.266432
 
     #	if desired to use the bigWig file, loading bigWig table:
     ln -s `pwd`/phastCons60wayPlacental.bw /gbdb/mm10/bbi
     hgsql mm10 -e 'drop table if exists phastCons60wayPlacental; \
             create table phastCons60wayPlacental \
 		(fileName varchar(255) not null); \
             insert into phastCons60wayPlacental values
 	("/gbdb/mm10/bbi/phastCons60wayPlacental.bw");'
 
     ## load table with wiggle data
     ssh hgwdev
     cd /hive/data/genomes/mm10/bed/multiz60way/cons/placental
     ln -s `pwd`/phastCons60wayPlacental.wib \
 	/gbdb/mm10/multiz60way/phastCons60wayPlacental.wib
     time nice -n +19 hgLoadWiggle -pathPrefix=/gbdb/mm10/multiz60way mm10 \
 	phastCons60wayPlacental phastCons60wayPlacental.wig
     #   real    0m41.999s
 
     time wigTableStats.sh mm10 phastCons60wayPlacental
 # db.table      min max mean count sumData
 # mm10.phastCons60wayPlacental 0 1 0.135703 1929686275 2.61864e+08
 #	stdDev viewLimits
 #       0.266432 # viewLimits=0:1
     #   real    0m21.723s
 
 
     #  Create histogram to get an overview of all the data
     time nice -n +19 hgWiggle -doHistogram \
 	-hBinSize=0.001 -hBinCount=1000 -hMinVal=0.0 -verbose=2 \
 	    -db=mm10 phastCons60wayPlacental  > histogram.data 2>&1
     #   real    2m39.659s
 
     #	create plot of histogram:
 
     cat << '_EOF_' | gnuplot > histo.png
 set terminal png small x000000 xffffff xc000ff x66ff66 xffff00 x00ffff
 set size 1.4, 0.8
 set key left box
 set grid noxtics
 set grid ytics
 set title " Mouse Mm10 Histogram phastCons60wayPlacental track"
 set xlabel " phastCons60wayPlacental score"
 set ylabel " Relative Frequency"
 set y2label " Cumulative Relative Frequency (CRF)"
 set y2range [0:1]
 set y2tics
 set yrange [0:0.02]
 
 plot "histogram.data" using 2:5 title " RelFreq" with impulses, \
         "histogram.data" using 2:7 axes x1y2 title " CRF" with lines
 '_EOF_'
     #	<< happy emacs
 
     display histo.png &
 
 #########################################################################
     ### Create a phastCons data set for Vertebrate
 
     # setup vertebrate-only run
     ssh swarm
     mkdir /hive/data/genomes/mm10/bed/multiz60way/cons/vertebrate
     cd /hive/data/genomes/mm10/bed/multiz60way/cons/vertebrate
 
     # vertebrate-only: get the vertebrate only tree from the 4d directory
     cp -p ../../4d/vertebrate.mod ./vertebrate.mod
     # they are all in this one, no need for non-informative list
 
     gensub2 ../run.cons/ss.list single ../run.cons/template jobList
     para create jobList
     para try ... check ... push ... etc.
 # Completed: 313 of 314 jobs
 # Crashed: 1 jobs
 # CPU time in finished jobs:      36058s     600.97m    10.02h    0.42d  0.001 y
 # IO & Wait Time:                125496s    2091.59m    34.86h    1.45d  0.004 y
 # Average job time:                 516s       8.60m     0.14h    0.01d
 # Longest finished job:             912s      15.20m     0.25h    0.01d
 # Submission to last job:          2681s      44.68m     0.74h    0.03d
     # the one failed job was completed manually on hgwdev
 
     cd /hive/data/genomes/mm10/bed/multiz60way/cons/vertebrate
     # create Most Conserved track
     cut -f1 ../../../../chrom.sizes | while read C
 do
     ls -d bed/${C} 2> /dev/null | while read D
     do
         cat ${D}/${C}*.bed
     done | sort -k1,1 -k2,2n \
     | awk '{printf "%s\t%d\t%d\tlod=%d\t%s\n", "'${C}'", $2, $3, $5, $5;}'
 done > tmpMostConserved.bed
     #   real    0m44.506s
     /cluster/bin/scripts/lodToBedScore tmpMostConserved.bed > mostConserved.bed
 
     time featureBits mm10 mostConserved.bed
     #   172842314 bases of 2652783500 (6.516%) in intersection
     #   real    1m23.298s
 
     # load into database
     ssh hgwdev
     cd /hive/data/genomes/mm10/bed/multiz60way/cons/vertebrate
     time nice -n +19 hgLoadBed mm10 phastConsElements60wayVertebrate \
 	mostConserved.bed
     #   Read 6747163 elements of size 5 from mostConserved.bed
     #   real    1m15.122s
 
     # verify coverage
     featureBits mm10 phastConsElements60wayVertebrate
     #   172842314 bases of 2652783500 (6.516%) in intersection
 
     #	--rho 0.3 --expected-length 45 --target-coverage 0.3
     featureBits mm10 -enrichment refGene:cds phastConsElements60wayVertebrate
     #   refGene:cds 1.282%, phastConsElements60wayVertebrate 6.516%,
     #   both 0.914%, cover 71.26%, enrich 10.94x
 
     time featureBits mm10 -enrichment ensGene:cds phastConsElements60wayVertebrate
     #   ensGene:cds 1.357%, phastConsElements60wayVertebrate 6.516%,
     #   both 0.942%, cover 69.39%, enrich 10.65x
     #   real    0m51.139s
 
     time featureBits mm10 -enrichment knownGene:cds phastConsElements60wayVertebrate
     #   knownGene:cds 1.325%, phastConsElements60wayVertebrate 6.516%,
     #   both 0.930%, cover 70.18%, enrich 10.77x
     #   real    0m51.545s
 
     #	Create the downloads .pp files, from which the phastCons wiggle data
     #	is calculated
     # sort by chromName, chromStart so that items are in numerical order
     #  for wigEncode
     cd /hive/data/genomes/mm10/bed/multiz60way/cons/vertebrate
     mkdir downloads
     for D in `ls -d pp/chr* | sed -e 's#pp/##'`
 do
     echo "working: $D"
     find ./pp/${D} -type f | sed -e "s#^./##; s#\.# d #g; s#-# m #;" \
 	| sort -k1,1 -k3,3n | sed -e "s# d #.#g; s# m #-#g;" | xargs cat \
         | gzip -c > downloads/${D}.phastCons60way.vertebrate.wigFix.gz
 done
 
     # Create merged posterier probability file and wiggle track data files
     time (zcat downloads/chr*.wigFix.gz \
 	 | wigEncode stdin phastCons60wayVertebrate.wig \
         phastCons60wayVertebrate.wib > wigEncode.log 2>&1 ) &
     # Converted stdin, upper limit 1.00, lower limit 0.00
     #   real    9m48.554s
 
     #	encode to bigWig
     #	(warning wigToBigWig process grows to about 36 Gb)
     #	in bash, to avoid the 32 Gb memory limit:
 export sizeG=188743680
 ulimit -d $sizeG
 ulimit -v $sizeG
 
     time (zcat downloads/*.wigFix.gz \
         | wigToBigWig stdin ../../../../chrom.sizes \
         phastCons60wayVertebrate.bw > bigWig.log 2>&1) &
     #   real    25m8.630s
 
     bigWigInfo phastCons60wayVertebrate.bw
 version: 4
 isCompressed: yes
 isSwapped: 0
 primaryDataSize: 3,333,348,984
 primaryIndexSize: 100,774,056
 zoomLevels: 10
 chromCount: 59
 basesCovered: 1,929,686,275
 mean: 0.149646
 min: 0.000000
 max: 1.000000
 std: 0.282502
 
     #	if desired to use the bigWig file, loading bigWig table:
     ln -s `pwd`/phastCons60wayVertebrate.bw /gbdb/mm10/bbi
     hgsql mm10 -e 'drop table if exists phastCons60wayVertebrate; \
             create table phastCons60wayVertebrate \
 		(fileName varchar(255) not null); \
             insert into phastCons60wayVertebrate values
 	("/gbdb/mm10/bbi/phastCons60wayVertebrate.bw");'
 
     ## load table with wiggle data
     ssh hgwdev
     cd /hive/data/genomes/mm10/bed/multiz60way/cons/vertebrate
     ln -s `pwd`/phastCons60wayVertebrate.wib \
 	/gbdb/mm10/multiz60way/phastCons60wayVertebrate.wib
     time nice -n +19 hgLoadWiggle -pathPrefix=/gbdb/mm10/multiz60way mm10 \
 	phastCons60wayVertebrate phastCons60wayVertebrate.wig
     #   real    0m45.432s
 
     time wigTableStats.sh mm10 phastCons60wayVertebrate
 # db.table      min max mean count sumData
 # mm10.phastCons60wayVertebrate 0 1 0.149646 1929686275 2.8877e+08
 #	stdDev viewLimits
 #       0.282502 viewLimits=0:1
     #   real    0m22.224s
 
     #  Create histogram to get an overview of all the data
     time nice -n +19 hgWiggle -doHistogram \
 	-hBinSize=0.001 -hBinCount=1000 -hMinVal=0.0 -verbose=2 \
 	    -db=mm10 phastCons60wayVertebrate  > histogram.data 2>&1
     #   real    2m52.041s
 
     #	create plot of histogram:
 
     cat << '_EOF_' | gnuplot > histo.png
 set terminal png small x000000 xffffff xc000ff x66ff66 xffff00 x00ffff
 set size 1.4, 0.8
 set key left box
 set grid noxtics
 set grid ytics
 set title " Mouse Mm10 Histogram phastCons60wayVertebrate track"
 set xlabel " phastCons60wayVertebrate score"
 set ylabel " Relative Frequency"
 set y2label " Cumulative Relative Frequency (CRF)"
 set y2range [0:1]
 set y2tics
 set yrange [0:0.02]
 
 plot "histogram.data" using 2:5 title " RelFreq" with impulses, \
         "histogram.data" using 2:7 axes x1y2 title " CRF" with lines
 '_EOF_'
     #	<< happy emacs
 
     display histo.png &
 
 #########################################################################
 # phyloP conservation for 60-way (DONE - 2012-06-15 - 2012-08-21 - Hiram)
 #
 # Vertebrate, Glire, Primate, Placental
 #
     # split SS files into 1M chunks, this business needs smaller files
     #   to complete
 
     # many of these jobs run too much memory to finish on a kluster node
     # can run all of this on hgwdev
 
     mkdir /hive/data/genomes/mm10/bed/multiz60way/consPhyloP
     cd /hive/data/genomes/mm10/bed/multiz60way/consPhyloP
     mkdir ss run.split
     cd run.split
 
     cat << '_EOF_' > doSplit.csh
 #!/bin/csh -ef
 set c = $1
 set MAF = /hive/data/genomes/mm10/bed/multiz60way/anno/result/$c.maf
 set WINDOWS = /hive/data/genomes/mm10/bed/multiz60way/consPhyloP/ss/$c
 set WC = `cat $MAF | wc -l`
 set NL = `grep "^#" $MAF | wc -l`
 if ( -s $2 ) then
     exit 0
 endif
 if ( -s $2.running ) then
     exit 0
 endif
 
 date >> $2.running
 
 rm -fr $WINDOWS
 mkdir $WINDOWS
 pushd $WINDOWS > /dev/null
 if ( $WC != $NL ) then
 /cluster/bin/phast.build/cornellCVS/phast.2010-12-30/bin/msa_split \
     $MAF -i MAF -o SS -r $WINDOWS/$c -w 1000000,0 -I 1000 -B 5000
 endif
 popd > /dev/null
 date >> $2
 rm -f $2.running
 '_EOF_'
 # << happy emacs
 
     #	do the easy ones first to see some immediate results
     ls -1S -r ../../anno/result | sed -e "s/.maf//;" > maf.list
 
     cat << '_EOF_' > template
 #LOOP
 ./doSplit.csh $(root1) $(root1).done
 #ENDLOOP
 '_EOF_'
 # << happy emacs
 
     gensub2 maf.list single template jobList
     # copy the jobList to runEm.sh, edit to make all the commands run in
     #   the background, with wait statements every few commands to run
     #   a small number of these at once, no more than four at once with
     #   the large chroms, the small randoms can run a bunch at once, they
     #   finish quickly.
     time ./runEm.sh
     # about 11h30m
 
     # run phyloP with score=LRT
     ssh swarm
     cd /cluster/data/mm10/bed/multiz60way/consPhyloP
     mkdir run.phyloP
     cd run.phyloP
 
     # Adjust model file base composition background and rate matrix to be
     # representative of the chromosomes in play
     grep BACKGROUND ../../cons/all/all.mod | awk '{printf "%0.3f\n", $3 + $4}'
     #	0.525
     /cluster/bin/phast.build/cornellCVS/phast.2010-12-30/bin/modFreqs \
 	../../cons/all/all.mod 0.525 > all.mod
     grep BACKGROUND ../../cons/glire/glire.mod \
 	| awk '{printf "%0.3f\n", $3 + $4}'
     #	0.531
     /cluster/bin/phast.build/cornellCVS/phast.2010-12-30/bin/modFreqs \
 	../../cons/glire/glire.mod 0.531 > glire.mod
     grep BACKGROUND ../../cons/primate/primate.mod \
 	| awk '{printf "%0.3f\n", $3 + $4}'
     #	0.509
     /cluster/bin/phast.build/cornellCVS/phast.2010-12-30/bin/modFreqs \
 	../../cons/primate/primate.mod 0.509 > primate.mod
     grep BACKGROUND ../../cons/euarchontoglires/euarchontoglires.mod \
 	| awk '{printf "%0.3f\n", $3 + $4}'
     #	0.518
     /cluster/bin/phast.build/cornellCVS/phast.2010-12-30/bin/modFreqs \
 	../../cons/euarchontoglires/euarchontoglires.mod 0.518 \
         > euarchontoglires.mod
 
     grep BACKGROUND ../../cons/placental/placental.mod \
 	| awk '{printf "%0.3f\n", $3 + $4}'
     #	0.525
     /cluster/bin/phast.build/cornellCVS/phast.2010-12-30/bin/modFreqs \
 	../../cons/placental/placental.mod 0.525 > placental.mod
     grep BACKGROUND ../../cons/vertebrate/vertebrate.mod \
 	| awk '{printf "%0.3f\n", $3 + $4}'
     #	0.525
     /cluster/bin/phast.build/cornellCVS/phast.2010-12-30/bin/modFreqs \
 	../../cons/vertebrate/vertebrate.mod 0.525 > vertebrate.mod
 
     cat << '_EOF_' > doPhyloP.csh
 #!/bin/csh -fex
 set PHASTBIN = /cluster/bin/phast.build/cornellCVS/phast.2010-12-30/bin
 set f = $1
 set ssFile = $1:t
 echo "ssFile: $ssFile"
 set out = $2
 set cName = $f:h
 echo "cName: $cName"
 set n = $f:r:e
 set grp = $cwd:t
 set cons = /hive/data/genomes/mm10/bed/multiz60way/consPhyloP
 set tmp = $cons/tmp/$grp/$f
 rm -fr $tmp
 mkdir -p $tmp
 set ssSrc = "$cons/ss/$cName/$ssFile"
 set useGrp = "$grp.mod"
 ln -s $cons/run.phyloP/$grp.mod $tmp
 pushd $tmp > /dev/null
 echo source: $ssSrc.ss
 $PHASTBIN/phyloP --method LRT --mode CONACC --wig-scores --chrom $cName \
     -i SS $useGrp $ssSrc.ss > $ssFile.wigFix
 popd > /dev/null
 mkdir -p $out:h
 sleep 4
 mv $tmp/$ssFile.wigFix $out
 rm -fr $tmp
 '_EOF_'
     # << happy emacs
     chmod +x doPhyloP.csh
 
     # Create list of chunks
     find ../ss -type f | sed -e "s/.ss$//; s#../ss/##;" > ss.list
 
     # Create template file
     #	file1 == $chr/$chunk/file name without .ss suffix
     cat << '_EOF_' > template
 #LOOP
 ../run.phyloP/doPhyloP.csh $(path1) {check out line+ wigFix/$(dir1)/$(file1).wigFix}
 #ENDLOOP
 '_EOF_'
     # << happy emacs
 
     ######################   Running all species  #######################
     # setup run for all species
     mkdir /hive/data/genomes/mm10/bed/multiz60way/consPhyloP/all
     cd /hive/data/genomes/mm10/bed/multiz60way/consPhyloP/all
     rm -fr wigFix
     mkdir wigFix
 
     gensub2 ../run.phyloP/ss.list single ../run.phyloP/template jobList
     para create jobList
     para try ... check ... push ... etc ...
     para time
 # Completed: 2708 of 2708 jobs
 # CPU time in finished jobs:    1832980s   30549.67m   509.16h   21.22d  0.058 y
 # IO & Wait Time:                217434s    3623.90m    60.40h    2.52d  0.007 y
 # Average job time:                 757s      12.62m     0.21h    0.01d
 # Longest finished job:            1458s      24.30m     0.41h    0.02d
 # Submission to last job:          3647s      60.78m     1.01h    0.04d
 
     # missed chrM in the original run:
     ../run.phyloP/doPhyloP.csh chrM/chrM.1-16296 wigFix/chrM/chrM.1-16296.wigFix
 
     ssh hgwdev
     cd /cluster/data/mm10/bed/multiz60way/consPhyloP/run.phyloP/all
     mkdir downloads
     for D in `ls -d wigFix/chr* | sed -e 's#wigFix/##'`
 do
     echo "working: $D"
     find ./wigFix/${D} -type f | sed -e "s#^./##; s#\.# d #g; s#-# m #;" \
 	| sort -k1,1 -k3,3n | sed -e "s# d #.#g; s# m #-#g;" | xargs cat \
         | gzip -c > downloads/${D}.phyloP60way.wigFix.gz
 done
     #   real    38m15.538s
 
     zcat downloads/*.wigFix.gz \
 	| wigEncode stdin phyloP60way.wig phyloP60way.wib > wigEncode.log 2>&1 &
     #   Converted stdin, upper limit 7.53, lower limit -20.00
     #   real    27m53.384s
 
 export sizeG=188743680
 ulimit -d $sizeG
 ulimit -v $sizeG
     time (zcat downloads/*.wigFix.gz \
         | wigToBigWig stdin ../../../../chrom.sizes phyloP60way.bw)
     #   real    30m10.440s
 
     bigWigInfo phyloP60way.bw
 version: 4
 isCompressed: yes
 isSwapped: 0
 primaryDataSize: 4,533,501,426
 primaryIndexSize: 100,775,272
 zoomLevels: 10
 chromCount: 59
 basesCovered: 1,929,686,275
 mean: 0.169761
 min: -20.000000
 max: 7.532000
 std: 0.942744
 
     #	if you wanted to use the bigWig file, loading bigWig table:
     ln -s `pwd`/phyloP60way.bw /gbdb/mm10/bbi
     hgsql mm10 -e 'drop table if exists phyloP60wayAll; \
             create table phyloP60wayAll \
 		(fileName varchar(255) not null); \
             insert into phyloP60wayAll values
 	("/gbdb/mm10/bbi/phyloP60way.bw");'
 
     #	loading the wiggle table:
     ln -s `pwd`/phyloP60way.wib /gbdb/mm10/multiz60way
     time nice -n +19 hgLoadWiggle -pathPrefix=/gbdb/mm10/multiz60way mm10 \
 	phyloP60wayAll phyloP60way.wig
     #   real    1m16.934s
 
     wigTableStats.sh mm10 phyloP60wayAll
 # db.table      min max mean count sumData
 # mm10.phyloP60wayAll     -20 7.532 0.169761 1929686275 3.27586e+08
 #	stdDev viewLimits
 #       0.942744 viewLimits=-4.54396:4.88348
     #	that range is: 4.54396+4.88348 = 9.42744 for -hBinSize=0.0942744 below
     #   to get 1,000 bins
 
     #  Create histogram to get an overview of all the data
     time nice -n +19 hgWiggle -doHistogram \
 	-hBinSize=0.0942744 -hBinCount=1000 -hMinVal=-4.54396 -verbose=2 \
 	    -db=mm10 phyloP60wayAll > histogram.data 2>&1
     #   real    real    5m58.309s
 
     #	create plot of histogram:
 
     cat << '_EOF_' | gnuplot > histo.png
 set terminal png small x000000 xffffff xc000ff x66ff66 xffff00 x00ffff
 set size 1.4, 0.8
 set key left box
 set grid noxtics
 set grid ytics
 set title " Mouse Mm10 Histogram phyloP60way track, all 60 vertebrates"
 set xlabel " phyloP60way score, all 60 vertebrates"
 set ylabel " Relative Frequency"
 set y2label " Cumulative Relative Frequency (CRF)"
 set y2range [0:1]
 set y2tics
 set yrange [0:0.2]
 set xrange [-2:2]
 
 plot "histogram.data" using 2:5 title " RelFreq" with impulses, \
         "histogram.data" using 2:7 axes x1y2 title " CRF" with lines
 '_EOF_'
     #	<< happy emacs
 
     display histo.png &
 
     ######################   Running the glire  #######################
     mkdir /hive/data/genomes/mm10/bed/multiz60way/consPhyloP/glire
     cd /hive/data/genomes/mm10/bed/multiz60way/consPhyloP/glire
     rm -fr wigFix
     mkdir wigFix
 
     gensub2 ../run.phyloP/ss.list single ../run.phyloP/template jobList
     para create jobList
     para try ... check ... push ... etc ...
     para time
 # Completed: 2709 of 2709 jobs
 # CPU time in finished jobs:     206723s    3445.39m    57.42h    2.39d  0.007 y
 # IO & Wait Time:                256366s    4272.76m    71.21h    2.97d  0.008 y
 # Average job time:                 171s       2.85m     0.05h    0.00d
 # Longest finished job:             487s       8.12m     0.14h    0.01d
 # Submission to last job:          1926s      32.10m     0.54h    0.02d
 
     cd /hive/data/genomes/mm10/bed/multiz60way/consPhyloP/glire
     mkdir downloads
     for D in `ls -d wigFix/chr* | sed -e 's#wigFix/##'`
 do
     echo "working: $D"
     find ./wigFix/${D} -type f | sed -e "s#^./##; s#\.# d #g; s#-# m #;" \
 	| sort -k1,1 -k3,3n | sed -e "s# d #.#g; s# m #-#g;" | xargs cat \
         | gzip -c > downloads/${D}.phastCons60way.glire.wigFix.gz
 XXX - copy and paste error, should have been phyloP60way and not phastCons
 done
 
     time (zcat downloads/chr*.wigFix.gz  \
 	| wigEncode stdin phyloP60wayGlire.wig phyloP60wayGlire.wib \
 	> wigEncode.log 2>&1) &
     #   Converted stdin, upper limit 1.17, lower limit -4.35
     #   real     20m31.753s
 
 export sizeG=188743680
 ulimit -d $sizeG
 ulimit -v $sizeG
     time (zcat downloads/chr*.wigFix.gz \
 	| wigToBigWig stdin ../../../../chrom.sizes phyloP60wayGlire.bw) &
     #   real    37m9.063s
     bigWigInfo phyloP60wayGlire.bw
 version: 4
 isCompressed: yes
 isSwapped: 0
 primaryDataSize: 3,158,091,915
 primaryIndexSize: 100,775,272
 zoomLevels: 10
 chromCount: 59
 basesCovered: 1,929,686,275
 mean: 0.073187
 min: -4.346000
 max: 1.165000
 std: 0.602992
 
     #	if you wanted to use the bigWig file, loading bigWig table:
     ln -s `pwd`/phyloP60wayGlire.bw /gbdb/mm10/bbi
     hgsql mm10 -e 'drop table if exists phyloP60wayGlire; \
             create table phyloP60wayGlire \
 		(fileName varchar(255) not null); \
             insert into phyloP60wayGlire values
 	("/gbdb/mm10/bbi/phyloP60wayGlire.bw");'
 
     #	loading the wiggle table:
     ln -s `pwd`/phyloP60wayGlire.wib /gbdb/mm10/multiz60way
     time nice -n +19 hgLoadWiggle -pathPrefix=/gbdb/mm10/multiz60way mm10 \
 	phyloP60wayGlire phyloP60wayGlire.wig
     #   real    0m58.536s
 
     wigTableStats.sh mm10 phyloP60wayGlire
 # db.table      min max mean count
 # mm10.phyloP60wayGlire -4.346 1.165 0.0731873 1929686275 1.41229e+08
 #	stdDev viewLimits
 #       0.602992 viewLimits=-2.94177:1.165
     #	that range is: 4.346+1.165 = 5.511 -> hBinSize=0.005511
 
     #  Create histogram to get an overview of all the data
     time nice -n +19 hgWiggle -doHistogram \
 	-hBinSize=0.005511 -hBinCount=1000 -hMinVal=-4.346 -verbose=2 \
 	    -db=mm10 phyloP60wayGlire > histogram.data 2>&1
     #   real    8m23.088s
 
     #	create plot of histogram:
     cat << '_EOF_' | gnuplot > histo.png
 set terminal png small x000000 xffffff xc000ff x66ff66 xffff00 x00ffff
 set size 1.4, 0.8
 set key left box
 set grid noxtics
 set grid ytics
 set title " Human Mm10 Histogram phyloP60wayGlire track"
 set xlabel " phyloP60wayGlire score"
 set ylabel " Relative Frequency"
 set y2label " Cumulative Relative Frequency (CRF)"
 set y2range [0:1]
 set y2tics
 set yrange [0:0.15]
 set xrange [-2:1.2]
 
 plot "histogram.data" using 2:5 title " RelFreq" with impulses, \
         "histogram.data" using 2:7 axes x1y2 title " CRF" with lines
 '_EOF_'
     #	<< happy emacs
 
     display histo.png &
 
     ###################   Running the euarchontoglires  #######################
     mkdir /hive/data/genomes/mm10/bed/multiz60way/consPhyloP/euarchontoglires
     cd /hive/data/genomes/mm10/bed/multiz60way/consPhyloP/euarchontoglires
     rm -fr wigFix
     mkdir wigFix
 
     gensub2 ../run.phyloP/ss.list single ../run.phyloP/template jobList
     para create jobList
     para try ... check ... push ... etc ...
     para time
 # Completed: 2709 of 2709 jobs
 # CPU time in finished jobs:     542547s    9042.45m   150.71h    6.28d  0.017 y
 # IO & Wait Time:                 75914s    1265.23m    21.09h    0.88d  0.002 y
 # Average job time:                 228s       3.80m     0.06h    0.00d
 # Longest finished job:             430s       7.17m     0.12h    0.00d
 # Submission to last job:          4149s      69.15m     1.15h    0.05d
 
     cd /hive/data/genomes/mm10/bed/multiz60way/consPhyloP/euarchontoglires
     mkdir downloads
     for D in `ls -d wigFix/chr* | sed -e 's#wigFix/##'`
 do
     echo "working: $D"
     find ./wigFix/${D} -type f | sed -e "s#^./##; s#\.# d #g; s#-# m #;" \
 	| sort -k1,1 -k3,3n | sed -e "s# d #.#g; s# m #-#g;" | xargs cat \
         | gzip -c > downloads/${D}.phastCons60way.euarchontoglires.wigFix.gz
 XXX - copy and paste error, should have been phyloP60way and not phastCons
 done
 
     time (zcat downloads/chr*.wigFix.gz  \
 	| wigEncode stdin phyloP60wayEuarchontoGlires.wig phyloP60wayEuarchontoGlires.wib \
 	> wigEncode.log 2>&1) &
     #   Converted stdin, upper limit 1.75, lower limit -12.70
     #   real    10m52.064s
 
 export sizeG=188743680
 ulimit -d $sizeG
 ulimit -v $sizeG
     time (zcat downloads/chr*.wigFix.gz \
 	| wigToBigWig stdin ../../../../chrom.sizes phyloP60wayEuarchontoGlires.bw) &
     #   real    26m47.912s
     bigWigInfo phyloP60wayEuarchontoGlires.bw
 version: 4
 isCompressed: yes
 isSwapped: 0
 primaryDataSize: 3,970,501,521
 primaryIndexSize: 100,775,272
 zoomLevels: 10
 chromCount: 59
 basesCovered: 1,929,686,275
 mean: 0.078739
 min: -12.704000
 max: 1.753000
 std: 0.689759
 
     #	if you wanted to use the bigWig file, loading bigWig table:
     ln -s `pwd`/phyloP60wayEuarchontoGlires.bw /gbdb/mm10/bbi
     hgsql mm10 -e 'drop table if exists phyloP60wayEuarchontoGlires; \
             create table phyloP60wayEuarchontoGlires \
 		(fileName varchar(255) not null); \
             insert into phyloP60wayEuarchontoGlires values
 	("/gbdb/mm10/bbi/phyloP60wayEuarchontoGlires.bw");'
 
     #	loading the wiggle table:
     ln -s `pwd`/phyloP60wayEuarchontoGlires.wib /gbdb/mm10/multiz60way
     time nice -n +19 hgLoadWiggle -pathPrefix=/gbdb/mm10/multiz60way mm10 \
 	phyloP60wayEuarchontoGlires phyloP60wayEuarchontoGlires.wig
     #   real    0m51.777s
 
     time wigTableStats.sh mm10 phyloP60wayEuarchontoGlires
 # db.table      min max mean count
 # mm10.phyloP60wayEuarchontoGlires -12.704 1.753 0.0787387 1929686275
 #	sumData stdDev viewLimits
 #       1.51941e+08 0.689759 viewLimits=-3.37006:1.753
     #   real    0m26.197s
 
     #	that range is: 12.704+1.753 = 14.457 -> hBinSize=0.014457
 
     #  Create histogram to get an overview of all the data
     time nice -n +19 hgWiggle -doHistogram \
 	-hBinSize=0.014457 -hBinCount=1000 -hMinVal=-12.704 -verbose=2 \
 	    -db=mm10 phyloP60wayEuarchontoGlires > histogram.data 2>&1
     #   real    3m22.205s
 
     #	create plot of histogram:
     cat << '_EOF_' | gnuplot > histo.png
 set terminal png small x000000 xffffff xc000ff x66ff66 xffff00 x00ffff
 set size 1.4, 0.8
 set key left box
 set grid noxtics
 set grid ytics
 set title " Mouse Mm10 Histogram phyloP60wayEuarchontoGlires track"
 set xlabel " phyloP60wayEuarchontoGlires score"
 set ylabel " Relative Frequency"
 set y2label " Cumulative Relative Frequency (CRF)"
 set y2range [0:1]
 set y2tics
 set yrange [0:0.15]
 set xrange [-2:1.2]
 
 plot "histogram.data" using 2:5 title " RelFreq" with impulses, \
         "histogram.data" using 2:7 axes x1y2 title " CRF" with lines
 '_EOF_'
     #	<< happy emacs
 
     display histo.png &
 
     ######################   Running the primate  #######################
     ### ***### This was constructed
     ### and examined, but not used in the release
     mkdir /hive/data/genomes/mm10/bed/multiz60way/consPhyloP/primate
     cd /hive/data/genomes/mm10/bed/multiz60way/consPhyloP/primate
     rm -fr wigFix
     mkdir wigFix
 
     gensub2 ../run.phyloP/ss.list single ../run.phyloP/template jobList
     para -ram=8g create jobList
     para try ... check ... push ... etc ...
     para time
 # Completed: 2709 of 2709 jobs
 # CPU time in finished jobs:     307901s    5131.68m    85.53h    3.56d  0.010 y
 # IO & Wait Time:                 42937s     715.62m    11.93h    0.50d  0.001 y
 # Average job time:                 130s       2.16m     0.04h    0.00d
 # Longest finished job:             234s       3.90m     0.07h    0.00d
 # Submission to last job:          5975s      99.58m     1.66h    0.07d
 
     cd /cluster/data/mm10/bed/multiz60way/consPhyloP/run.phyloP/primate
     mkdir downloads
     for D in `ls -d wigFix/chr* | sed -e 's#wigFix/##'`
 do
     echo "working: $D"
     find ./wigFix/${D} -type f | sed -e "s#^./##; s#\.# d #g; s#-# m #;" \
 	| sort -k1,1 -k3,3n | sed -e "s# d #.#g; s# m #-#g;" | xargs cat \
         | gzip -c > downloads/${D}.phastCons60way.primate.wigFix.gz
 XXX - copy and paste error, should have been phyloP60way and not phastCons
 done
 
     time (zcat downloads/chr*.wigFix.gz \
 	| wigEncode stdin phyloP60wayPrimate.wig phyloP60wayPrimate.wib \
 	> wigEncode.log 2>&1) &
     #   real    9m37.055s
     #   Converted stdin, upper limit 0.93, lower limit -10.63
 export sizeG=188743680
 ulimit -d $sizeG
 ulimit -v $sizeG
     time (zcat downloads/chr*.wigFix.gz \
 	| wigToBigWig stdin ../../../../chrom.sizes phyloP60wayPrimate.bw) &
     #   real    24m18.842s
     bigWigInfo phyloP60wayPrimate.bw
 version: 4
 isCompressed: yes
 isSwapped: 0
 primaryDataSize: 2,715,332,211
 primaryIndexSize: 100,775,272
 zoomLevels: 10
 chromCount: 59
 basesCovered: 1,929,686,275
 mean: 0.060017
 min: -10.633000
 max: 0.930000
 std: 0.518027
 
     #	loading bigWig table:
     ln -s `pwd`/phyloP60wayPrimate.bw /gbdb/mm10/bbi
     hgsql mm10 -e 'drop table if exists phyloP60wayPrimate; \
             create table phyloP60wayPrimate \
 		(fileName varchar(255) not null); \
             insert into phyloP60wayPrimate values
 	("/gbdb/mm10/bbi/phyloP60wayPrimate.bw");'
 
     #	loading the wiggle table:
     ln -s `pwd`/phyloP60wayPrimate.wib /gbdb/mm10/multiz60way
     time nice -n +19 hgLoadWiggle -pathPrefix=/gbdb/mm10/multiz60way mm10 \
 	phyloP60wayPrimate phyloP60wayPrimate.wig
     #   real    0m45.837s
 
     wigTableStats.sh mm10 phyloP60wayPrimate
 # db.table      min max mean count sumData stdDev viewLimits
 # mm10.phyloP60wayPrimate  -10.633 0.93 0.0600168 1929686275 1.15814e+08
 #	stdDev viewLimits
 #       0.518027 viewLimits=-2.53012:0.93
     #	that range is: 10.633+0.93 = 11.563 for the hBinSize=0.11563
 
     #  Create histogram to get an overview of all the data
     time nice -n +19 hgWiggle -doHistogram \
 	-hBinSize=0.11563 -hBinCount=1000 -hMinVal=-10.633 -verbose=2 \
 	    -db=mm10 phyloP60wayPrimate > histogram.data 2>&1
     #   real    4m36.379s
     # to see yrange:
     grep -v "^#" histogram.data | ave -col=5 stdin
 
     #	create plot of histogram:
 
     cat << '_EOF_' | gnuplot > histo.png
 set terminal png small x000000 xffffff xc000ff x66ff66 xffff00 x00ffff
 set size 1.4, 0.8
 set key left box
 set grid noxtics
 set grid ytics
 set title " Human Mm10 Histogram phyloP60wayPrimate track"
 set xlabel " phyloP60wayPrimate score"
 set ylabel " Relative Frequency"
 set y2label " Cumulative Relative Frequency (CRF)"
 set y2range [0:1]
 set y2tics
 set yrange [0:0.472]
 set xrange [-2.5:1.0]
 
 plot "histogram.data" using 2:5 title " RelFreq" with impulses, \
         "histogram.data" using 2:7 axes x1y2 title " CRF" with lines
 '_EOF_'
     #	<< happy emacs
 
     display histo.png &
 
     ######################   Running the placental  #######################
     mkdir /hive/data/genomes/mm10/bed/multiz60way/consPhyloP/placental
     cd /hive/data/genomes/mm10/bed/multiz60way/consPhyloP/placental
     rm -fr wigFix
     mkdir wigFix
 
     gensub2 ../run.phyloP/ss.list single ../run.phyloP/template jobList
     para create jobList
     para try ... check ... push ... etc ...
     para time
 # Completed: 2709 of 2709 jobs
 # CPU time in finished jobs:    1188036s   19800.60m   330.01h   13.75d  0.038 y
 # IO & Wait Time:                209859s    3497.65m    58.29h    2.43d  0.007 y
 # Average job time:                 516s       8.60m     0.14h    0.01d
 # Longest finished job:            1672s      27.87m     0.46h    0.02d
 # Submission to last job:          6336s     105.60m     1.76h    0.07d
 
     cd /hive/data/genomes/mm10/bed/multiz60way/consPhyloP/placental
     mkdir downloads
     for D in `ls -d wigFix/chr* | sed -e 's#wigFix/##'`
 do
     echo "working: $D"
     find ./wigFix/${D} -type f | sed -e "s#^./##; s#\.# d #g; s#-# m #;" \
 	| sort -k1,1 -k3,3n | sed -e "s# d #.#g; s# m #-#g;" | xargs cat \
         | gzip -c > downloads/${D}.phastCons60way.placental.wigFix.gz
 XXX - copy and paste error, should have been phyloP60way and not phastCons
 done
     time (zcat downloads/chr*.wigFix.gz \
 	| wigEncode stdin phyloP60wayPlacental.wig phyloP60wayPlacental.wib \
 	> wigEncode.log 2>&1) &
     #	Converted stdin, upper limit 3.30, lower limit -20.00
     #   real    11m54.289s
 
 export sizeG=188743680
 ulimit -d $sizeG
 ulimit -v $sizeG
     time (zcat downloads/chr*.wigFix.gz \
 	| wigToBigWig stdin ../../../../chrom.sizes phyloP60wayPlacental.bw \           > bigWig.log 2>&1) &
     #   real    28m4.576s
     bigWigInfo phyloP60wayPlacental.bw
 version: 4
 isCompressed: yes
 isSwapped: 0
 primaryDataSize: 4,423,832,009
 primaryIndexSize: 100,775,272
 zoomLevels: 10
 chromCount: 59
 basesCovered: 1,929,686,275
 mean: 0.109489
 min: -20.000000
 max: 3.296000
 std: 0.810657
 
     #	loading bigWig table if that is what you wanted to do:
     ln -s `pwd`/phyloP60wayPlacental.bw /gbdb/mm10/bbi
     hgsql mm10 -e 'drop table if exists phyloP60wayPlacental; \
             create table phyloP60wayPlacental \
 		(fileName varchar(255) not null); \
             insert into phyloP60wayPlacental values
 	("/gbdb/mm10/bbi/phyloP60wayPlacental.bw");'
 
     #	loading the wiggle table:
     ln -s `pwd`/phyloP60wayPlacental.wib /gbdb/mm10/multiz60way
     time nice -n +19 hgLoadWiggle -pathPrefix=/gbdb/mm10/multiz60way mm10 \
 	phyloP60wayPlacental phyloP60wayPlacental.wig
     #   real    0m50.284s
 
     wigTableStats.sh mm10 phyloP60wayPlacental
 # db.table      min max mean count sumData
 # mm10.phyloP60wayPlacental -20 3.296 0.109489 1929686275 2.11279e+08
 #       stdDev viewLimits
 #       0.810657 viewLimits=-3.9438:3.296
 
     #	that range is: 20+3.296 = 23.296 for hBinSize=0.023296
     #  Create histogram to get an overview of all the data
     time nice -n +19 hgWiggle -doHistogram \
 	-hBinSize=0.023296 -hBinCount=1000 -hMinVal=-20 -verbose=2 \
 	    -db=mm10 phyloP60wayPlacental > histogram.data 2>&1
     #   real    3m24.650s
     # to see yrange:
     grep -v "^#" histogram.data | ave -col=5 stdin
 
     #	create plot of histogram:
 
     cat << '_EOF_' | gnuplot > histo.png
 set terminal png small x000000 xffffff xc000ff x66ff66 xffff00 x00ffff
 set size 1.4, 0.8
 set key left box
 set grid noxtics
 set grid ytics
 set title " Mouse Mm10 Histogram phyloP60wayPlacental track"
 set xlabel " phyloP60wayPlacental score"
 set ylabel " Relative Frequency"
 set y2label " Cumulative Relative Frequency (CRF)"
 set y2range [0:1]
 set y2tics
 set yrange [0:0.084]
 set xrange [-2.5:2.5]
 
 plot "histogram.data" using 2:5 title " RelFreq" with impulses, \
         "histogram.data" using 2:7 axes x1y2 title " CRF" with lines
 '_EOF_'
     #	<< happy emacs
 
     display histo.png &
 
     ######################   Running the vertebrate  #######################
     mkdir /hive/data/genomes/mm10/bed/multiz60way/consPhyloP/vertebrate
     cd /hive/data/genomes/mm10/bed/multiz60way/consPhyloP/vertebrate
     rm -fr wigFix
     mkdir wigFix
 
     gensub2 ../run.phyloP/ss.list single ../run.phyloP/template jobList
     para create jobList
     para try ... check ... push ... etc ...
     para time
 # Completed: 2709 of 2709 jobs
 # CPU time in finished jobs:    1825414s   30423.56m   507.06h   21.13d  0.058 y
 # IO & Wait Time:                211040s    3517.34m    58.62h    2.44d  0.007 y
 # Average job time:                 752s      12.53m     0.21h    0.01d
 # Longest finished job:            1530s      25.50m     0.42h    0.02d
 # Submission to last job:          6045s     100.75m     1.68h    0.07d
 
     cd /hive/data/genomes/mm10/bed/multiz60way/consPhyloP/vertebrate
     mkdir downloads
     for D in `ls -d wigFix/chr* | sed -e 's#wigFix/##'`
 do
     echo "working: $D"
     find ./wigFix/${D} -type f | sed -e "s#^./##; s#\.# d #g; s#-# m #;" \
 	| sort -k1,1 -k3,3n | sed -e "s# d #.#g; s# m #-#g;" | xargs cat \
         | gzip -c > downloads/${D}.phastCons60way.vertebrate.wigFix.gz
 XXX - copy and paste error, should have been phyloP60way and not phastCons
 done
 
     time (zcat downloads/chr*.wigFix.gz \
 	| wigEncode stdin phyloP60wayVertebrate.wig phyloP60wayVertebrate.wib \
 	> wigEncode.log 2>&1) &
     #	Converted stdin, upper limit 7.53, lower limit -20.00
     #   real    12m2.774s
 
 export sizeG=188743680
 ulimit -d $sizeG
 ulimit -v $sizeG
     time (zcat downloads/chr*.wigFix.gz \
 	| wigToBigWig stdin ../../../../chrom.sizes phyloP60wayVertebrate.bw \
         > bigWig.log 2>&1) &
     #   real    27m6.791s
     bigWigInfo phyloP60wayVertebrate.bw
 version: 4
 isCompressed: yes
 isSwapped: 0
 primaryDataSize: 4,529,467,614
 primaryIndexSize: 100,775,272
 zoomLevels: 10
 chromCount: 59
 basesCovered: 1,929,686,275
 mean: 0.169653
 min: -20.000000
 max: 7.532000
 std: 0.942808
 
     #	loading bigWig table:
     ln -s `pwd`/phyloP60wayVertebrate.bw /gbdb/mm10/bbi
     hgsql mm10 -e 'drop table if exists phyloP60wayVertebrate; \
             create table phyloP60wayVertebrate \
 		(fileName varchar(255) not null); \
             insert into phyloP60wayVertebrate values
 	("/gbdb/mm10/bbi/phyloP60wayVertebrate.bw");'
 
     #	loading the wiggle table:
     ln -s `pwd`/phyloP60wayVertebrate.wib /gbdb/mm10/multiz60way
     time nice -n +19 hgLoadWiggle -pathPrefix=/gbdb/mm10/multiz60way mm10 \
 	phyloP60wayVertebrate phyloP60wayVertebrate.wig
     #   real    0m56.535s
 
     time wigTableStats.sh mm10 phyloP60wayVertebrate
 # db.table      min max mean count sumData stdDev viewLimits
 # mm10.phyloP60wayVertebrate -20 7.532 0.169653 1929686275 3.27377e+08
 #	stdDev viewLimits
 #       0.942808 viewLimits=-4.54439:4.88369
     #   real    0m25.320s
 
     #	that range is: 20+7.532 = 27.532 for hBinSize=0.027532
 
     #  Create histogram to get an overview of all the data
     time nice -n +19 hgWiggle -doHistogram \
 	-hBinSize=0.027532 -hBinCount=1000 -hMinVal=-20 -verbose=2 \
 	    -db=mm10 phyloP60wayVertebrate > histogram.data 2>&1
     #   real    3m26.565s
     # to see yrange:
     egrep -v "^#|udcfileOpen" histogram.data  | ave -col=5 stdin
 
     #	create plot of histogram:
 
     cat << '_EOF_' | gnuplot > histo.png
 set terminal png small x000000 xffffff xc000ff x66ff66 xffff00 x00ffff
 set size 1.4, 0.8
 set key left box
 set grid noxtics
 set grid ytics
 set title " Mouse Mm10 Histogram phyloP60wayVertebrate track"
 set xlabel " phyloP60wayVertebrate score"
 set ylabel " Relative Frequency"
 set y2label " Cumulative Relative Frequency (CRF)"
 set y2range [0:1]
 set y2tics
 set yrange [0:0.1123]
 set xrange [-2.5:2.5]
 
 plot "histogram.data" using 2:5 title " RelFreq" with impulses, \
         "histogram.data" using 2:7 axes x1y2 title " CRF" with lines
 '_EOF_'
     #	<< happy emacs
 
     display histo.png &
 
 #########################################################################
 # construct download files for 60-way (DONE - 2012-06-27 - 2012-08-21 - Hiram)
 mkdir /usr/local/apache/htdocs-hgdownload/goldenPath/mm10/multiz60way
 mkdir /usr/local/apache/htdocs-hgdownload/goldenPath/mm10/multiz60way/maf
 mkdir /usr/local/apache/htdocs-hgdownload/goldenPath/mm10/multiz60way/alignments
 mkdir /usr/local/apache/htdocs-hgdownload/goldenPath/mm10/phastCons60way
 mkdir /usr/local/apache/htdocs-hgdownload/goldenPath/mm10/phastCons60way/glire
 mkdir /usr/local/apache/htdocs-hgdownload/goldenPath/mm10/phastCons60way/primate
 mkdir /usr/local/apache/htdocs-hgdownload/goldenPath/mm10/phastCons60way/euarchontoglire
 mkdir /usr/local/apache/htdocs-hgdownload/goldenPath/mm10/phastCons60way/placental
 mkdir /usr/local/apache/htdocs-hgdownload/goldenPath/mm10/phastCons60way/vertebrate
 mkdir /usr/local/apache/htdocs-hgdownload/goldenPath/mm10/phastCons60way/mm10.60way.phastCons
 mkdir /usr/local/apache/htdocs-hgdownload/goldenPath/mm10/phyloP60way
 mkdir /usr/local/apache/htdocs-hgdownload/goldenPath/mm10/phyloP60way/glire
 mkdir /usr/local/apache/htdocs-hgdownload/goldenPath/mm10/phyloP60way/primate
 mkdir /usr/local/apache/htdocs-hgdownload/goldenPath/mm10/phyloP60way/euarchontoglire
 mkdir /usr/local/apache/htdocs-hgdownload/goldenPath/mm10/phyloP60way/placental
 mkdir /usr/local/apache/htdocs-hgdownload/goldenPath/mm10/phyloP60way/vertebrate
 mkdir /usr/local/apache/htdocs-hgdownload/goldenPath/mm10/phyloP60way/mm10.60way.phyloP60way
     mkdir /hive/data/genomes/mm10/bed/multiz60way/downloads
     cd /hive/data/genomes/mm10/bed/multiz60way/downloads
     mkdir multiz60way phastCons60way phyloP60way
     cd multiz60way
     mkdir maf alignments
     cd maf
     time cp -p ../../../anno/result/chr*.maf .
     #   real    735m35.723s
     time gzip *.maf
     #   real    700m23.340s
     md5sum *.maf.gz > md5sum.txt
     ln -s `pwd`/*.maf.gz `pwd`/md5sum.txt \
         /usr/local/apache/htdocs-hgdownload/goldenPath/mm10/multiz60way/maf
     cd ..
     du -hsc maf
     #   24G     maf
     du -hsc ../../anno/result/
     #   244G    ../../anno/result/
     ln -s ../../mm10.60way.nh .
     ln -s ../../mm10.60way.commonNames.nh .
     ln -s `pwd`/*.nh \
         /usr/local/apache/htdocs-hgdownload/goldenPath/mm10/multiz60way
 
     #####################################################################
     cd /hive/data/genomes/mm10/bed/multiz60way/downloads/phastCons60way
     mkdir glire euarchontoglire primate placental vertebrate mm10.60way.phastCons
     cd glire
     ln -s ../../../cons/glire/downloads/chr*.gz .
     time md5sum *.gz > md5sum.txt &
     ln -s `pwd`/*.gz `pwd`/md5sum.txt \
         /usr/local/apache/htdocs-hgdownload/goldenPath/mm10/phastCons60way/glire
     #   real    5m50.001s
     cd ../euarchontoglire
     ln -s ../../../cons/euarchontoglires/downloads/chr*.gz .
     time md5sum *.gz > md5sum.txt &
     #   real    1m14.103s
     ln -s `pwd`/*.gz `pwd`/md5sum.txt \
       /usr/local/apache/htdocs-hgdownload/goldenPath/mm10/phastCons60way/euarchontoglire
     cd ../primate
     ln -s ../../../cons/primate/downloads/chr*.gz .
     time md5sum *.gz > md5sum.txt &
     ln -s `pwd`/*.gz `pwd`/md5sum.txt \
       /usr/local/apache/htdocs-hgdownload/goldenPath/mm10/phastCons60way/primate
     #   real    5m39.288s
     cd ../placental
     ln -s ../../../cons/placental/downloads/chr*.gz .
     time md5sum *.gz > md5sum.txt &
     ln -s `pwd`/*.gz `pwd`/md5sum.txt \
   /usr/local/apache/htdocs-hgdownload/goldenPath/mm10/phastCons60way/placental
     #   real    5m9.762s
     cd ../vertebrate
     ln -s ../../../cons/vertebrate/downloads/chr*.gz .
     time md5sum *.gz > md5sum.txt &
     ln -s `pwd`/*.gz `pwd`/md5sum.txt \
   /usr/local/apache/htdocs-hgdownload/goldenPath/mm10/phastCons60way/vertebrate
     #   real    0m45.408s
     cd ../mm10.60way.phastCons
     ln -s ../../../cons/all/downloads/chr*.gz .
     time md5sum *.gz > md5sum.txt &
     ln -s `pwd`/*.gz `pwd`/md5sum.txt \
   /usr/local/apache/htdocs-hgdownload/goldenPath/mm10/phastCons60way/mm10.60way.phastCons
     #   real    6m11.158s
     cd ..
     ln -s ../../cons/all/all.mod mm10.60way.phastCons.mod
     ln -s ../../cons/glire/glire.mod mm10.60way.phastCons.glire.mod
     ln -s ../../cons/primate/primate.mod mm10.60way.phastCons.primate.mod
     ln -s ../../cons/euarchontoglires/euarchontoglires.mod mm10.60way.phastCons.euarchontoglire.mod
     ln -s ../../cons/placental/placental.mod mm10.60way.phastCons.placental.mod
     ln -s ../../cons/vertebrate/vertebrate.mod mm10.60way.phastCons.vertebrate.mod
     ln -s ../../cons/all/phastCons60way.bw mm10.60way.phastCons.bw
     ln -s ../../cons/glire/phastCons60wayGlire.bw \
         mm10.60way.phastCons60wayGlire.bw
     ln -s ../../cons/placental/phastCons60wayPlacental.bw \
         mm10.60way.phastCons60wayPlacental.bw
     ln -s ../../cons/euarchontoglires/phastCons60wayEuarchontoGlires.bw \
         mm10.60way.phastCons60wayEuarchontoGlire.bw
     ln -s ../../cons/primate/phastCons60wayPrimate.bw \
         mm10.60way.phastCons60wayPrimate.bw
     ln -s ../../cons/vertebrate/phastCons60wayVertebrate.bw \
         mm10.60way.phastCons60wayVertebrate.bw
     time md5sum *.mod *.bw > md5sum.txt
     #   real    20m11.260s
     # obtain the README.txt from hg19/phastCons46way and update for this
     #   situation
     ln -s `pwd`/*.mod `pwd`/*.bw `pwd`/README.txt \
       /usr/local/apache/htdocs-hgdownload/goldenPath/mm10/phastCons60way
 
     #####################################################################
     cd /hive/data/genomes/mm10/bed/multiz60way/downloads/phyloP60way
     mkdir glire euarchontoglire primate placental vertebrate mm10.60way.phyloP60way
     cd glire
     ln -s ../../../consPhyloP/glire/downloads/chr*.gz .
     time md5sum *.gz > md5sum.txt &
     ln -s `pwd`/*.gz `pwd`/md5sum.txt \
         /usr/local/apache/htdocs-hgdownload/goldenPath/mm10/phyloP60way/glire
     #   real    6m5.733s
     cd ../euarchontoglire
     ln -s ../../../consPhyloP/euarchontoglires/downloads/chr*.gz .
     time md5sum *.gz > md5sum.txt &
     ln -s `pwd`/*.gz `pwd`/md5sum.txt \
 /usr/local/apache/htdocs-hgdownload/goldenPath/mm10/phyloP60way/euarchontoglire
     #   real    5m40.272s
     cd ../primate
     ln -s ../../../consPhyloP/primate/downloads/chr*.gz .
     time md5sum *.gz > md5sum.txt &
     ln -s `pwd`/*.gz `pwd`/md5sum.txt \
         /usr/local/apache/htdocs-hgdownload/goldenPath/mm10/phyloP60way/primate
     #   real    7m22.623s
     cd ../placental
     ln -s ../../../consPhyloP/placental/downloads/chr*.gz .
     time md5sum *.gz > md5sum.txt &
     ln -s `pwd`/*.gz `pwd`/md5sum.txt \
       /usr/local/apache/htdocs-hgdownload/goldenPath/mm10/phyloP60way/placental
     #   real    7m39.269s
     cd ../vertebrate
     ln -s ../../../consPhyloP/vertebrate/downloads/chr*.gz .
     time md5sum *.gz > md5sum.txt &
     ln -s `pwd`/*.gz `pwd`/md5sum.txt \
       /usr/local/apache/htdocs-hgdownload/goldenPath/mm10/phyloP60way/vertebrate
     cd ../mm10.60way.phyloP60way
     ln -s ../../../consPhyloP/all/downloads/chr*.gz .
     time md5sum *.gz > md5sum.txt &
     ln -s `pwd`/*.gz `pwd`/md5sum.txt \
       /usr/local/apache/htdocs-hgdownload/goldenPath/mm10/phyloP60way/mm10.60way.phyloP60way
     #   real    8m5.777s
 
     cd ..
     ln -s ../../consPhyloP/run.phyloP/all.mod mm10.60way.phyloP60way.mod
     ln -s ../../consPhyloP/run.phyloP/glire.mod ./mm10.phyloP.glire.mod
     ln -s ../../consPhyloP/run.phyloP/placental.mod ./mm10.phyloP.placental.mod
     ln -s ../../consPhyloP/run.phyloP/euarchontoglires.mod ./mm10.phyloP.euarchontoglire.mod
     ln -s ../../consPhyloP/run.phyloP/primate.mod ./mm10.phyloP.primate.mod
     ln -s ../../consPhyloP/run.phyloP/vertebrate.mod ./mm10.60way.vertebrate.mod
 
     ln -s ../../consPhyloP/all/phyloP60way.bw mm10.60way.phyloP60way.bw
     ln -s ../../consPhyloP/glire/phyloP60wayGlire.bw \
         mm10.60way.phyloP60wayGlire.bw
     ln -s ../../consPhyloP/vertebrate/phyloP60wayVertebrate.bw \
         mm10.60way.phyloP60wayVertebrate.bw
     ln -s ../../consPhyloP/placental/phyloP60wayPlacental.bw \
         mm10.60way.phyloP60wayPlacental.bw
     ln -s ../../consPhyloP/euarchontoglires/phyloP60wayEuarchontoGlires.bw \
         mm10.60way.phyloP60wayEuarchontoglire.bw
     ln -s ../../consPhyloP/primate/phyloP60wayPrimate.bw \
         mm10.60way.phyloP60wayPrimate.bw
 
     time md5sum *.mod *.bw > md5sum.txt &
     #   real    20m17.082s
 
     # obtain the README.txt from hg19/phyloP46way and update for this
     #   situation
     ln -s `pwd`/*.mod `pwd`/*.bw `pwd`/md5sum.txt `pwd`/README.txt \
       /usr/local/apache/htdocs-hgdownload/goldenPath/mm10/phyloP60way
 
     ###########################################################################
     ## create upstream refGene maf files
     cd /hive/data/genomes/mm10/bed/multiz60way/downloads/maf
     # bash script
 #!/bin/sh
 for S in 1000 2000 5000
 do
     echo "making upstream${S}.maf"
     featureBits mm10 refGene:upstream:${S} -fa=/dev/null -bed=stdout \
         | perl -wpe 's/_up[^\t]+/\t0/' | sort -k1,1 -k2,2n \
         | /cluster/bin/$MACHTYPE/mafFrags mm10 multiz60way \
                 stdin stdout \
                 -orgs=/hive/data/genomes/mm10/bed/multiz60way/species.list \
         | gzip -c > upstream${S}.maf.gz
     echo "done upstream${S}.maf.gz"
 done
     #   real    199m45.558s
 
     md5sum *.nh *.maf.gz > md5sum.txt
     #   real    27m59.778s
 
     # obtain the README.txt from hg19/multiz46way and update for this
     #   situation
     ln -s `pwd`/*.nh `pwd`/*.maf.gz `pwd`/*.txt \
         /usr/local/apache/htdocs-hgdownload/goldenPath/mm10/multiz60way
 
 #############################################################################
 # hgPal downloads (DONE - 2012-07-05 - 2012-07-09 - Hiram)
 #   FASTA from 60-way for refGene
 
     ssh hgwdev
     screen -S mm10HgPal
     mkdir /hive/data/genomes/mm10/bed/multiz60way/pal
     cd /hive/data/genomes/mm10/bed/multiz60way/pal
     cat ../species.list | tr '[ ]' '[\n]' > order.list
 
     export mz=multiz60way
     export gp=refGene
     export db=mm10
     export I=0
     mkdir exonAA exonNuc
     for C in `sort -nk2 ../../../chrom.sizes | cut -f1`
     do
         I=`echo $I | awk '{print $1+1}'`
 	echo "mafGene -chrom=$C -exons -noTrans $db $mz $gp order.list stdout | gzip -c > exonNuc/$C.exonNuc.fa.gz &"
 	echo "mafGene -chrom=$C -exons $db $mz $gp order.list stdout | gzip -c > exonAA/$C.exonAA.fa.gz &"
         if [ $I -gt 6 ]; then
             echo "date"
             echo "wait"
             I=0
         fi
     done > $gp.jobs
     echo "date" >> $gp.jobs
     echo "wait" >> $gp.jobs
 
     time sh -x $gp.jobs > $gp.jobs.log 2>&1 &
     #   real    93m34.376s
 
     mz=multiz60way
     gp=refGene
     db=mm10
     time zcat exonAA/*.gz | gzip -c > $gp.$mz.exonAA.fa.gz
     #   real    1m16.821s
     zcat exonNuc/*.gz | gzip -c > $gp.$mz.exonNuc.fa.gz
 
     rm -rf exonAA exonNuc
 
     # we're only distributing exons at the moment
     mz=multiz60way
     gp=refGene
     db=mm10
     pd=/usr/local/apache/htdocs-hgdownload/goldenPath/$db/$mz/alignments
     mkdir -p $pd
     md5sum *.fa.gz > md5sum.txt
     ln -s `pwd`/$gp.$mz.exonAA.fa.gz $pd/$gp.exonAA.fa.gz
     ln -s `pwd`/$gp.$mz.exonNuc.fa.gz $pd/$gp.exonNuc.fa.gz
     ln -s `pwd`/md5sum.txt $pd/
 
 
 #########################################################################
 # lastz nile tilapia oreNil2 (DONE - 2012-04-02 - Hiram)
     # establish a screen to control this job with a name to indicate what it is
     screen -S mm10OreNil2
     mkdir /hive/data/genomes/mm10/bed/lastzOreNil2.2012-04-11
     cd /hive/data/genomes/mm10/bed/lastzOreNil2.2012-04-11
 
     cat << '_EOF_' > DEF
 # Mouse vs. nile tilapia
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
 BLASTZ_Y=3400
 BLASTZ_L=6000
 BLASTZ_K=2200
 BLASTZ_Q=/scratch/data/blastz/HoxD55.q
 
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
 # QUERY: nile tilapia oreNil2
 SEQ2_DIR=/hive/data/genomes/oreNil2/oreNil2.2bit
 SEQ2_LEN=/hive/data/genomes/oreNil2/chrom.sizes
 SEQ2_CHUNK=10000000
 SEQ2_LAP=0
 SEQ2_LIMIT=10
 
 BASE=/hive/data/genomes/mm10/bed/lastzOreNil2.2012-04-11
 TMPDIR=/scratch/tmp
 '_EOF_'
     # << happy emacs
 
     # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
     #	number of jobs, 50,000 to something under 100,000
     # when not present, SEQ2_LIMIT is a default 100
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
         `pwd`/DEF \
         -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
         -chainMinScore=5000 -chainLinearGap=loose > do.log 2>&1 &
     #   real    108m51.232s
 
     cat fb.mm10.chainOreNil2Link.txt
     #   51909908 bases of 2652783500 (1.957%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/mm10/bed
     ln -s lastzOreNil2.2012-04-11 lastz.oreNil2
 
     #	and for the swap
     mkdir /hive/data/genomes/oreNil2/bed/blastz.mm10.swap
     cd /hive/data/genomes/oreNil2/bed/blastz.mm10.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzOreNil2.2012-04-11/DEF \
         -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
         -swap -chainMinScore=5000 -chainLinearGap=loose > swap.log 2>&1 &
     #   real    9m8.213s
     cat  fb.oreNil2.chainMm10Link.txt
     #   49704887 bases of 816084674 (6.091%) in intersection
 
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/oreNil2/bed
     ln -s blastz.mm10.swap lastz.mm10
 
 #########################################################################
 # LASTZ pig susScr3 (DONE - 2012-04-13 - Hiram)
     # establish a screen to control this job with a name to indicate what it is
     screen -S mm10SusScr3
     mkdir /hive/data/genomes/mm10/bed/lastzSusScr3.2012-04-13
     cd /hive/data/genomes/mm10/bed/lastzSusScr3.2012-04-13
 
     # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
     #	number of jobs, 50,000 to something under 100,000
     cat << '_EOF_' > DEF
 # pig vs mouse
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
 # QUERY: pig SusScr3
 SEQ2_DIR=/hive/data/genomes/susScr3/susScr3.2bit
 SEQ2_LEN=/hive/data/genomes/susScr3/chrom.sizes
 SEQ2_CHUNK=10000000
 SEQ2_LAP=0
 SEQ2_LIMIT=30
 
 BASE=/hive/data/genomes/mm10/bed/lastzSusScr3.2012-04-13
 TMPDIR=/scratch/tmp
 '_EOF_'
     # << happy emacs
 
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	`pwd`/DEF \
 	-syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
 	-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
     #   real    1086m29.992s
     cat fb.mm10.chainSusScr3Link.txt
     #   681359766 bases of 2652783500 (25.685%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/mm10/bed
     ln -s lastzSusScr3.2012-04-13 lastz.susScr3
 
     mkdir /hive/data/genomes/susScr3/bed/blastz.mm10.swap
     cd /hive/data/genomes/susScr3/bed/blastz.mm10.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzSusScr3.2012-04-13/DEF \
 	-swap -syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
 	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
     #   real    104m56.258s
     cat fb.susScr3.chainMm10Link.txt
     #   743574150 bases of 2525294057 (29.445%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/susScr3/bed
     ln -s blastz.mm10.swap lastz.mm10
 
 ##############################################################################
 # LASTZ armadillo dasNov3 (DONE - 2012-04-13 - Hiram)
     # establish a screen to control this job with a name to indicate what it is
     screen -S mm10DasNov3
     mkdir /hive/data/genomes/mm10/bed/lastzDasNov3.2012-04-13
     cd /hive/data/genomes/mm10/bed/lastzDasNov3.2012-04-13
 
     # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
     #	number of jobs, 50,000 to something under 100,000
     cat << '_EOF_' > DEF
 # armadillo vs mouse
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
 # QUERY: armadillo DasNov3
 SEQ2_DIR=/hive/data/genomes/dasNov3/dasNov3.2bit
 SEQ2_LEN=/hive/data/genomes/dasNov3/chrom.sizes
 SEQ2_CHUNK=10000000
 SEQ2_LAP=0
 SEQ2_LIMIT=200
 
 BASE=/hive/data/genomes/mm10/bed/lastzDasNov3.2012-04-13
 TMPDIR=/scratch/tmp
 '_EOF_'
     # << happy emacs
 
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	`pwd`/DEF \
 	-syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
 	-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
     #   real    1125m34.124s
     cat fb.mm10.chainDasNov3Link.txt
     #   668529920 bases of 2652783500 (25.201%) in intersection
 
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/mm10/bed
     ln -s lastzDasNov3.2012-04-13 lastz.dasNov3
 
     # better to have reciprocal best for this one since it is low coverage:
     cd /hive/data/genomes/mm10/bed/lastzDasNov3.2012-04-13
     time doRecipBest.pl mm10 dasNov3 -buildDir=`pwd` -workhorse=hgwdev \
 	> best.log 2>&1 &
     #   real    116m51.114s
 
     mkdir /hive/data/genomes/dasNov3/bed/blastz.mm10.swap
     cd /hive/data/genomes/dasNov3/bed/blastz.mm10.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzDasNov3.2012-04-13/DEF \
 	-swap -syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
 	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
     #	real    150m51.653s
     cat fb.dasNov3.chainMm10Link.txt
     #   695161920 bases of 3299882059 (21.066%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/dasNov3/bed
     ln -s blastz.mm10.swap lastz.mm10
 
 ##############################################################################
 # LASTZ cat felCat5 (DONE - 2012-04-13 - Hiram)
     # establish a screen to control this job with a name to indicate what it is
     screen -S mm10FelCat5
     mkdir /hive/data/genomes/mm10/bed/lastzFelCat5.2012-04-13
     cd /hive/data/genomes/mm10/bed/lastzFelCat5.2012-04-13
 
     # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
     #	number of jobs, 50,000 to something under 100,000
     cat << '_EOF_' > DEF
 # cat vs mouse
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
 # QUERY: cat FelCat5
 SEQ2_DIR=/hive/data/genomes/felCat5/felCat5.2bit
 SEQ2_LEN=/hive/data/genomes/felCat5/chrom.sizes
 SEQ2_CHUNK=10000000
 SEQ2_LAP=0
 SEQ2_LIMIT=30
 
 BASE=/hive/data/genomes/mm10/bed/lastzFelCat5.2012-04-13
 TMPDIR=/scratch/tmp
 '_EOF_'
     # << happy emacs
 
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	`pwd`/DEF \
 	-syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
 	-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
     #   real    1029m54.494s
     cat fb.mm10.chainFelCat5Link.txt
     #   788544084 bases of 2652783500 (29.725%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/mm10/bed
     ln -s lastzFelCat5.2012-04-13 lastz.felCat5
 
     # better to have reciprocal best for this one since it is low coverage:
     cd /hive/data/genomes/mm10/bed/lastzFelCat5.2012-04-13
     time doRecipBest.pl mm10 felCat5 -buildDir=`pwd` -workhorse=hgwdev \
 	> best.log 2>&1 &
     #	real    106m30.011s
 
     mkdir /hive/data/genomes/felCat5/bed/blastz.mm10.swap
     cd /hive/data/genomes/felCat5/bed/blastz.mm10.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzFelCat5.2012-04-13/DEF \
 	-swap -syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
 	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
     #	 real 124m25.850s
     cat fb.felCat5.chainMm10Link.txt
     #   762344436 bases of 2364296207 (32.244%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/felCat5/bed
     ln -s blastz.mm10.swap lastz.mm10
 
 ##############################################################################
 # LASTZ naked mole rat hetGla2 (DONE - 2012-04-14 - Hiram)
     #	establish a screen to control this job
     screen -S mm10HetGla2
     mkdir /hive/data/genomes/mm10/bed/lastzHetGla2.2012-04-14
     cd /hive/data/genomes/mm10/bed/lastzHetGla2.2012-04-14
 
     # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
     #	number of jobs, 50,000 to something under 100,000
     cat << '_EOF_' > DEF
 # naked mole rat vs mouse
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
 # QUERY: naked mole rat HetGla2
 SEQ2_DIR=/hive/data/genomes/hetGla2/hetGla2.2bit
 SEQ2_LEN=/hive/data/genomes/hetGla2/chrom.sizes
 SEQ2_CHUNK=10000000
 SEQ2_LAP=0
 SEQ2_LIMIT=30
 
 BASE=/hive/data/genomes/mm10/bed/lastzHetGla2.2012-04-14
 TMPDIR=/scratch/tmp
 '_EOF_'
     # << happy emacs
 
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	`pwd`/DEF \
 	-syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
 	-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
     #   real    690m7.626s
     cat fb.mm10.chainHetGla2Link.txt
     #	853221843 bases of 2652783500 (32.163%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/mm10/bed
     ln -s lastzHetGla2.2012-04-14 lastz.hetGla2
 
     mkdir /hive/data/genomes/hetGla2/bed/blastz.mm10.swap
     cd /hive/data/genomes/hetGla2/bed/blastz.mm10.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzHetGla2.2012-04-14/DEF \
 	-swap -syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
 	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
     #   real    92m24.775s
     cat fb.hetGla2.chainMm10Link.txt
     #   879356778 bases of 2314771103 (37.989%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/hetGla2/bed
     ln -s blastz.mm10.swap lastz.mm10
 
 ##############################################################################
 # LASTZ dolphin turTru2 (DONE - 2012-04-14 - Hiram)
     # establish a screen to control this job with a name to indicate what it is
     screen -S mm10TurTru2
     mkdir /hive/data/genomes/mm10/bed/lastzTurTru2.2012-04-14
     cd /hive/data/genomes/mm10/bed/lastzTurTru2.2012-04-14
 
     # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
     #	number of jobs, 50,000 to something under 100,000
     cat << '_EOF_' > DEF
 # dolphin vs mouse
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
 # QUERY: dolphin TurTru2
 SEQ2_DIR=/hive/data/genomes/turTru2/turTru2.2bit
 SEQ2_LEN=/hive/data/genomes/turTru2/chrom.sizes
 SEQ2_CHUNK=10000000
 SEQ2_LAP=0
 SEQ2_LIMIT=500
 
 BASE=/hive/data/genomes/mm10/bed/lastzTurTru2.2012-04-14
 TMPDIR=/scratch/tmp
 '_EOF_'
     # << happy emacs
 
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	`pwd`/DEF \
 	-syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
 	-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
     #   real    624m36.508s
     cat fb.mm10.chainTurTru2Link.txt
     #   802921354 bases of 2652783500 (30.267%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/mm10/bed
     ln -s lastzTurTru2.2012-04-14 lastz.turTru2
 
     # better to have reciprocal best for this one since it is low coverage:
     cd /hive/data/genomes/mm10/bed/lastzTurTru2.2012-04-14
     time doRecipBest.pl mm10 turTru2 -buildDir=`pwd` -workhorse=hgwdev \
 	> best.log 2>&1 &
     #   real    44m47.753s
 
     mkdir /hive/data/genomes/turTru2/bed/blastz.mm10.swap
     cd /hive/data/genomes/turTru2/bed/blastz.mm10.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzTurTru2.2012-04-14/DEF \
 	-swap -syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
 	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
     #   real    124m17.088s
     cat fb.turTru2.chainMm10Link.txt
     #   781169007 bases of 2332402443 (33.492%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/turTru2/bed
     ln -s blastz.mm10.swap lastz.mm10
 
 ##############################################################################
 # LASTZ Gibbon nomLeu2 (DONE - 2012-04-14 - Hiram)
     screen -S mm10NomLeu2
     mkdir /hive/data/genomes/mm10/bed/lastzNomLeu2.2012-04-14
     cd /hive/data/genomes/mm10/bed/lastzNomLeu2.2012-04-14
 
     cat << '_EOF_' > DEF
 # gibbon vs mouse
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
 # QUERY: Gibbon NomLeu2
 SEQ2_DIR=/hive/data/genomes/nomLeu2/nomLeu2.2bit
 SEQ2_LEN=/hive/data/genomes/nomLeu2/chrom.sizes
 SEQ2_CHUNK=10000000
 SEQ2_LAP=0
 SEQ2_LIMIT=100
 
 BASE=/hive/data/genomes/mm10/bed/lastzNomLeu2.2012-04-14
 TMPDIR=/scratch/tmp
 '_EOF_'
     # << happy emacs
 
     #	establish a screen to control this job
     screen -S mm10NomLeu2
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	`pwd`/DEF \
 	-syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
 	-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
     #   real    621m38.251s
     cat fb.mm10.chainNomLeu2Link.txt
     #   902774780 bases of 2652783500 (34.031%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/mm10/bed
     ln -s lastzNomLeu2.2012-04-14 lastz.nomLeu2
 
     mkdir /hive/data/genomes/nomLeu2/bed/blastz.mm10.swap
     cd /hive/data/genomes/nomLeu2/bed/blastz.mm10.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzNomLeu2.2012-04-14/DEF \
 	-swap -syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
 	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
     #   real 92m24.775s
     cat fb.nomLeu2.chainMm10Link.txt
     #   889660339 bases of 2756609047 (32.274%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/nomLeu2/bed
     ln -s blastz.mm10.swap lastz.mm10
 
 #####################################################################
 # tRNAs track (DONE 2012-04-02 Chin)
 #
     # Please refer to the generic tRNS track build documentation
     #   ~/kent/src/hg/makeDb/doc/tRNAsTrack.txt
     # for details about how the track was build.
 
 ##############################################################################
 # orfeome 2012-03-16  (markd)
 
 enabled ORFeome tracks in etc/genbank.conf and reload genbank
 
 ############################################################################
 # construct liftOver to mm9 (DONE - 2012-04-30 - Hiram)
     screen -S 10        # manage this longish running job in a screen
     mkdir /hive/data/genomes/mm10/bed/blat.mm9.2012-04-30
     cd /hive/data/genomes/mm10/bed/blat.mm9.2012-04-30
     # check it with -debug first to see if it is going to work:
     time doSameSpeciesLiftOver.pl -buildDir=`pwd` -bigClusterHub=swarm \
         -ooc=/scratch/data/mm10/mm10.11.ooc \
         -debug -dbHost=hgwdev -workhorse=hgwdev mm10 mm9 > do.log 2>&1
     # if that is OK, then run it:
     time doSameSpeciesLiftOver.pl -buildDir=`pwd` -bigClusterHub=swarm \
         -ooc=/scratch/data/mm10/mm10.11.ooc \
         -dbHost=hgwdev -workhorse=hgwdev mm10 mm9 > do.log 2>&1
     #   real    95m21.635s
 
     # verify this file exists:
     og -L /gbdb/mm10/liftOver/mm10ToMm9.over.chain.gz
 # -rw-rw-r-- 1 535855 Feb  9 12:07 /gbdb/mm9/liftOver/mm9ToMm10.over.chain.gz
 
     # and try out the conversion on genome-test from mm9 to mm10
 ############################################################################
 # EXONIPHY MM10, lifted from hg19 (DONE - braney 2012-05-29)
 #	needed for ucscGenes building
     # create a syntenic liftOver chain file
     cd /cluster/data/hg19/bed/lastz.mm10/axtChain
     time nice -n +19 netFilter -syn hg19.mm10.net.gz \
 	| netChainSubset -verbose=0 stdin hg19.mm10.all.chain.gz stdout \
 	| chainStitchId stdin stdout | gzip -c > hg19.mm10.syn.chain.gz
     #real    2m38.915s
     #user    3m29.458s
     #sys     0m16.033s
 
     #	slightly smaller than the ordinary liftOver chain file:
 -rw-rw-r-- 1 78419424 Mar  7 18:40 hg19.mm10.over.chain.gz
 -rw-rw-r-- 1 74588027 May 29 12:29 hg19.mm10.syn.chain.gz
 
     # exoniphyMm9.gp is prepared as follows
     mkdir /cluster/data/mm10/bed/exoniphy
     cd /cluster/data/mm10/bed/exoniphy
     hgsql hg19 -e "select * from exoniphy" -N | cut  -f 2-16 > exoniphyHg19.gp
     time nice -n +19 liftOver -genePred exoniphyHg19.gp \
 	/cluster/data/hg19/bed/lastz.mm10/axtChain/hg19.mm10.syn.chain.gz \
 	    exoniphyMm10.gp unmapped
     # real    16m0.334s
     # user    15m46.462s
     # sys     0m7.115s
 
     wc -l *
     # 186601 exoniphyHg19.gp
     # 178821 exoniphyMm10.gp
     # 15560 unmapped
 
     cd /cluster/data/mm10/bed/exoniphy
     nice -n +19 hgLoadGenePred -genePredExt mm10 exoniphy exoniphyMm10.gp
     nice -n +19 featureBits mm10 exoniphy
     # 26795543 bases of 2652783500 (1.010%) in intersection
     nice -n +19 featureBits mm9 exoniphy
     #	25931742 bases of 2620346127 (0.990%) in intersection
 
 ##############################################################################
 # LASTZ cow bosTau6 (DONE - 2012-06-19 - Chin)
     # establish a screen to control this job with a name to indicate
     # what it is
     screen -S mm10BosTau6
     mkdir /hive/data/genomes/mm10/bed/lastzBosTau6.2012-06-19
     cd /hive/data/genomes/mm10/bed/lastzBosTau6.2012-06-19
 
     # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
     #   number of jobs, 50,000 to something under 100,000
     cat << '_EOF_' > DEF
 # cow vs mouse
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
 # QUERY: cow BosTau6
 SEQ2_DIR=/scratch/data/bosTau6/bosTau6.2bit
 SEQ2_LEN=/scratch/data/bosTau6/chrom.sizes
 SEQ2_CHUNK=10000000
 SEQ2_LAP=0
 SEQ2_LIMIT=50
 
 BASE=/hive/data/genomes/mm10/bed/lastzBosTau6.2012-06-19
 TMPDIR=/scratch/tmp
 '_EOF_'
     # << happy emacs
 
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
     `pwd`/DEF \
     -syntenicNet \
     -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
     -chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
     #   real    212m21.604s
 
     cat fb.mm10.chainBosTau6Link.txt
     #   700039696 bases of 2652783500 (26.389%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/mm10/bed
     ln -s lastzBosTau6.2012-06-19 lastz.bosTau6
 
     # swap
     mkdir /hive/data/genomes/bosTau6/bed/blastz.mm10.swap
     cd /hive/data/genomes/bosTau6/bed/blastz.mm10.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
     /hive/data/genomes/mm10/bed/lastzBosTau6.2012-06-19/DEF \
     -swap -syntenicNet \
     -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
     -chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
     #   real    72m13.925s
     cat fb.bosTau6.chainMm10Link.txt
     #   688651806 bases of 2649682029 (25.990%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/bosTau6/bed
     ln -s blastz.mm10.swap lastz.mm10
 
 ##############################################################################
 # lastz Medium Ground Finch geoFor1 (DONE - 2012-07-29 - Hiram)
     # establish a screen to control this job with a name to indicate what it is
     screen -S mm10
     mkdir /hive/data/genomes/mm10/bed/lastzGeoFor1.2012-07-29
     cd /hive/data/genomes/mm10/bed/lastzGeoFor1.2012-07-29
 
     cat << '_EOF_' > DEF
 # Mouse vs. medium ground finch
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
 BLASTZ_Y=3400
 BLASTZ_L=6000
 BLASTZ_K=2200
 BLASTZ_Q=/scratch/data/blastz/HoxD55.q
 
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
 # QUERY: Medium Ground Finch GeoFor1
 SEQ2_DIR=/hive/data/genomes/geoFor1/geoFor1.2bit
 SEQ2_LEN=/hive/data/genomes/geoFor1/chrom.sizes
 SEQ2_CHUNK=10000000
 SEQ2_LAP=0
 SEQ2_LIMIT=50
 
 BASE=/hive/data/genomes/mm10/bed/lastzGeoFor1.2012-07-29
 TMPDIR=/scratch/tmp
 '_EOF_'
     # << happy emacs
 
     # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
     #	number of jobs, 50,000 to something under 100,000
     # when not present, SEQ2_LIMIT is a default 100
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
         `pwd`/DEF \
         -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
         -chainMinScore=5000 -chainLinearGap=loose > do.log 2>&1 &
     #   real    251m4.194s
     cat fb.mm10.chainGeoFor1Link.txt
     #   93984241 bases of 2652783500 (3.543%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/mm10/bed
     ln -s lastzGeoFor1.2012-07-29 lastz.geoFor1
 
     #	and for the swap
     mkdir /hive/data/genomes/geoFor1/bed/blastz.mm10.swap
     cd /hive/data/genomes/geoFor1/bed/blastz.mm10.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzGeoFor1.2012-07-29/DEF \
         -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
         -swap -chainMinScore=5000 -chainLinearGap=loose > swap.log 2>&1 &
     #   real    10m0.875s
     cat  fb.geoFor1.chainMm10Link.txt
     #   80273915 bases of 1041286029 (7.709%) in intersection
 
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/geoFor1/bed
     ln -s blastz.mm10.swap lastz.mm10
 
 #########################################################################
 # construct assembly fragments table (DONE - 2012-09-11 - Hiram)
     mkdir /hive/data/genomes/mm10/bed/assemblyFrags
     cd /hive/data/genomes/mm10/bed/assemblyFrags
     zgrep -h -v "^#" "${F}"
 
     zgrep -h -v "^#" ../../genbank/Primary_Assembly/assembled_chromosomes/AGP/*.comp.agp.gz \
     | awk '$5 != "N"' \
         | awk '{printf "%s\t%d\t%d\t%s\t0\t%s\n", $1,$2-1,$3,$6,$9}' \
     | sed -e 's/CM000994.2/chr1/; s/CM000995.2/chr2/; s/CM000996.2/chr3/; s/CM000997.2/chr4/; s/CM000998.2/chr5/; s/CM000999.2/chr6/; s/CM001000.2/chr7/; s/CM001001.2/chr8/; s/CM001002.2/chr9/; s/CM001003.2/chr10/; s/CM001004.2/chr11/; s/CM001005.2/chr12/; s/CM001006.2/chr13/; s/CM001007.2/chr14/; s/CM001008.2/chr15/; s/CM001009.2/chr16/; s/CM001010.2/chr17/; s/CM001011.2/chr18/; s/CM001012.2/chr19/; s/CM001013.2/chrX/; s/CM001014.2/chrY/;' > chr.asmFrag.bed
 
     zgrep -h -v "^#" ../../genbank/Primary_Assembly/unlocalized_scaffolds/AGP/*.agp.gz \
         | awk '$5 != "N"' \
         | awk '{printf "%s\t%d\t%d\t%s\t0\t%s\n", $1,$2-1,$3,$6,$9}' \
     | sed -e "s#GL456233.1#chrX_GL456233_random#; s#GL456216.1#chr4_GL456216_random#; s#JH584299.1#chr5_JH584299_random#; s#JH584301.1#chrY_JH584301_random#; s#JH584300.1#chrY_JH584300_random#; s#JH584303.1#chrY_JH584303_random#; s#JH584302.1#chrY_JH584302_random#; s#JH584298.1#chr5_JH584298_random#; s#JH584297.1#chr5_JH584297_random#; s#JH584296.1#chr5_JH584296_random#; s#JH584295.1#chr4_JH584295_random#; s#JH584294.1#chr4_JH584294_random#; s#JH584293.1#chr4_JH584293_random#; s#JH584292.1#chr4_JH584292_random#; s#GL456354.1#chr5_GL456354_random#; s#GL456350.1#chr4_GL456350_random#; s#GL456221.1#chr1_GL456221_random#; s#GL456219.1#chr7_GL456219_random#; s#GL456213.1#chr1_GL456213_random#; s#GL456212.1#chr1_GL456212_random#; s#GL456211.1#chr1_GL456211_random#; s#GL456210.1#chr1_GL456210_random#;" > chrUL.asmFrag.bed
 
     zgrep -h -v "^#" ../../genbank/Primary_Assembly/unplaced_scaffolds/AGP/*.agp.gz \
         | awk '$5 != "N"' | sed -e 's/\.1\t/\t/' \
         | awk '{printf "chrUn_%s\t%d\t%d\t%s\t0\t%s\n", $1,$2-1,$3,$6,$9}' \
         > chrUn.asmFrag.bed
 
     cat chr.asmFrag.bed chrUL.asmFrag.bed chrUn.asmFrag.bed > mm10.asmFrag.bed
     # add the chrM identity
     echo -e "chrM\t0\t1629\tAY172335.1\t0\t+" >> mm10.asmFrag.bed
     hgLoadBed mm10 assemblyFrags mm10.asmFrag.bed
     featureBits mm10 assemblyFrags
     #   2652769048 bases of 2652783500 (99.999%) in intersection
     # should be silent when all chr names are correct:
     checkTableCoords mm10 assemblyFrags
 
 #########################################################################
 # construct ucscToEnsembl table (DONE - 2012-09-11 - Hiram)
     mkdir /hive/data/genomes/mm10/ensembl
     cd /hive/data/genomes/mm10/ensembl
     wget --timestamping \
 'ftp://ftp.ensembl.org/pub/release-68/fasta/mus_musculus/dna/Mus_musculus.GRCm38.68.dna.toplevel.fa.gz'
 
     wget --timestamping \
 'ftp://ftp.ensembl.org/pub/release-68/fasta/mus_musculus/dna/Mus_musculus.GRCm38.68.dna.nonchromosomal.fa.gz'
 
     faCount *.fa.gz > faCount.txt
     egrep -v "total|seq" faCount.txt  | awk '{print $1,$2}' \
         | sort -u | sort -k2nr | sed -e "s/ /\t/" > ensembl.chrom.sizes
 
     mkdir /hive/data/genomes/mm10/bed/ucscToEnsembl
     cd /hive/data/genomes/mm10/bed/ucscToEnsembl
     awk '{printf "%d\t%s\n", $2,$1}' ../../chrom.sizes | sort > sizes.chrom.ucsc
     awk '{printf "%d\t%s\n", $2,$1}' ../../ensembl/ensembl.chrom.sizes \
         | sort > sizes.chrom.ensembl
     join sizes.chrom.ucsc sizes.chrom.ensembl \
         | awk '{printf "%s\t%s\n", $2,$3}' > ucscToEnsembl.tab
 
     cut -f1 ucscToEnsembl.tab | awk '{print length($1)}' | sort -rn | head -1
     #   20
 
     cat << '_EOF_' > ucscToEnsembl.sql
 # UCSC to Ensembl chr name translation
 CREATE TABLE ucscToEnsembl (
     ucsc varchar(255) not null,        # UCSC chromosome name
     ensembl varchar(255) not null,     # Ensembl chromosome name
               #Indices
     PRIMARY KEY(ucsc(20))
 );
 '_EOF_'
 
     hgLoadSqlTab mm10 ucscToEnsembl ucscToEnsembl.sql ucscToEnsembl.tab
 
 #########################################################################
 # GRC Incident database (DONE - 2012-09-21 - Hiram)
     # updated the automatic scripts to include the build of this track
     #   on Mm10
     # this procedure is run as a cron job in Hiram's account:
 
     #	43 09 * * * /hive/data/outside/grc/incidentDb/runUpdate.sh makeItSo
 
     # using the two scrips there: runUpdate.sh and update.sh
     # which are checked into the source tree as files:
     #	src/hg/utils/automation/grcIncidentUpdate.sh
     #	src/hg/utils/automation/grcRunIncidentUpdate.sh
 
     # they fetch the XML files from NCBI, convert them to SQL text
     # files, construct a bigBed file, and pushes it to genomewiki if
     # it is an update from previous
 
     # the table in the dataBase is: grcIncidentDb
     # which is the URL to the bb file, a single row:
     #   http://genomewiki.ucsc.edu/images/a/a4/Mm10.grcIncidentDb.bb
 
     # construct the table after running the script once manually:
     hgBbiDbLink mm10 grcIncidentDb \
         "http://genomewiki.ucsc.edu/images/a/a4/Mm10.grcIncidentDb.bb"
 
 #########################################################################
 # GRCm38.p1 patch 1 (DONE - 2012-09-21 - Hiram)
     mkdir /hive/data/genomes/mm10/bed/patch1
     cd /hive/data/genomes/mm10/bed/patch1
     rsync -a -P rsync://ftp.ncbi.nlm.nih.gov/genbank/genomes/Eukaryotes/vertebrates_mammals/Mus_musculus/GRCm38.p1/ ./genbank/
     # slight modifications to this script from hg19 patch9 work:
     ./gatherNames.pl genbank > ucscNames.patch1.txt
     # examine the names for sanity:
     awk '{print $NF}' ucscNames.patch1.txt | sort
     # and they should not be longer than 31 characters:
     awk '{print $NF}' ucscNames.patch1.txt | sort | awk '{print length($0)}' \
         | sort -n | tail
     # script from hg19 patch9, update the variable patchName
      ./mkTables.pl patches.chrom.sizes ucscNames.patch1.txt genbank/PATCHES/alt_scaffolds/AGP/alt.scaf.agp.gz
     # output to stdout is the contents of alt.scaf.agp.gz
     # constructs ctgPos.txt chromInfo.txt gap.txt gold.txt
     # script from hg19 patch9, update the variable patchName
     ./mkCtgPos2.pl ucscNames.patch1.txt patches.chrom.sizes > ctgPos2.txt
     cp -p ../patch5/mkHapLocate.pl .
     ./mkHapLocate.pl ctgPos.txt \
 	PATCHES/alt_scaffolds/alt_scaffold_placement.txt \
 	> haplotypeLocations.bed
     cp -p haplotypeLocations.bed altSequence.bed
 
     ./mkFasta.pl ucscNames.patch1.txt > mm10.patch1.fa
     # the build of mm10Patch1 can be seen in mm10Patch1.txt
 
     egrep -v "32,32,190" altSequence.bed  \
 	| awk '{printf "%s\t%d\t%d\t%s\t%d\t%s\n", $1,$2,$3,$4,$5,$6}' \
 	    > altSeqPatchesP1.tab
     # no haplotypes yet, this is nothing:
     egrep "32,32,190" altSequence.bed  \
 	| awk '{printf "%s\t%d\t%d\t%s\t%d\t%s\n", $1,$2,$3,$4,$5,$6}' \
 	    > altSeqHaplotypesP1.tab
     # verify none lost
     wc -l altSequence.bed altSeqPatchesP1.tab altSeqHaplotypesP1.tab
     #   9 altSequence.bed
     #   9 altSeqPatchesP1.tab
     #   0 altSeqHaplotypesP1.tab
 
     # not necessary, there are none yet:
     hgLoadBed mm10 altSeqHaplotypesP1 altSeqHaplotypesP1.tab
     #	Loaded 75 elements of size 6
     hgLoadBed mm10 altSeqPatchesP1 altSeqPatchesP1.tab
     #   Read 9 elements of size 6 from altSeqPatchesP1.tab
 
     #    these tables are part of mouse/mm10/altSeqComposite1.ra
 
 ##############################################################################
 # Haplotype track (WORKING - 2012-10-01 - Hiram)
 
 # Warning: these are all actually alternate scaffolds from OTHER mouse strains
 # These haplotypes are NOT from mm10.  Probably the table should have been called NonMm10Haplotypes!
 
 # The directory after genbank/ identifies the strain, e.g. 129S2_SvPas
 #../../../mm10/genbank/129S2_SvPas/alt_scaffolds/alt_scaffold_placement.txt
 #../../../mm10/genbank/129P2_OlaHsd/alt_scaffolds/alt_scaffold_placement.txt
 #../../../mm10/genbank/NOD_ShiLtJ/alt_scaffolds/alt_scaffold_placement.txt
 #../../../mm10/genbank/A_J/alt_scaffolds/alt_scaffold_placement.txt
 #../../../mm10/genbank/CAST_Ei/alt_scaffolds/alt_scaffold_placement.txt
 #../../../mm10/genbank/129X1_SvJ/alt_scaffolds/alt_scaffold_placement.txt
 #../../../mm10/genbank/AKR_J/alt_scaffolds/alt_scaffold_placement.txt
 #../../../mm10/genbank/RIII/alt_scaffolds/alt_scaffold_placement.txt
 #../../../mm10/genbank/129S6_SvEvTac/alt_scaffolds/alt_scaffold_placement.txt
 #../../../mm10/genbank/129S7_SvEvBrd-Hprt-b-m2/alt_scaffolds/alt_scaffold_placement.txt
 #../../../mm10/genbank/BALB_c/alt_scaffolds/alt_scaffold_placement.txt
 #../../../mm10/genbank/129S1_SvImJ/alt_scaffolds/alt_scaffold_placement.txt
 #../../../mm10/genbank/NOD_MrkTac/alt_scaffolds/alt_scaffold_placement.txt
 
 
     cat << '_EOF_' > mkBedFile.pl
 #!/usr/bin/env perl
 
 use strict;
 use warnings;
 
 my $debug = 1;
 
 sub usage() {
     print STDERR "usage: ./mkBedFile.pl ../../mm10/genbank > mm10Haplotypes.bed\n";
     print STDERR "expecting the Mus_musculus/GRCm38.p1/ hierarchy in ./genbank from NCBI\n";
     exit 255;
 }
 
 my $argc = scalar(@ARGV);
 
 if ($argc != 1) {
     usage;
 }
 
 my $patchDir = shift;
 
 if ( ! -d $patchDir ) {
     print STDERR "ERROR: given directory $patchDir is not a directory or does not exist";
     usage;
 }
 
 my %glSize;
 my %ctgToChr;
 my %ctgToFastaName;
 # my $fasta = "$patchDir/PATCHES/alt_scaffolds/FASTA/alt.scaf.fa.gz";
 my @placeList = split('\n',`find $patchDir -type f | grep placement.txt | grep alt_scaffolds | grep -v UNKNOWN`);
 for (my $i = 0; $i < scalar(@placeList); ++$i) {
     printf STDERR "# %s\n", $placeList[$i];
     open (FH, "grep -v '^#' $placeList[$i]|") or die "can not read $placeList[$i]";
     while (my $line = <FH>) {
 #        printf STDERR "%s", $line;
         chomp $line;
         my @a = split('\s+', $line);
         next if ($a[11] eq "na");
         $a[8] = "+" if ($a[8] eq "b");
         my $descr = sprintf("<B>Region&nbsp;name:&nbsp;</B>%s", $a[7]);
         printf "chr%s\t%d\t%d\t%s\t0\t%s\t%s\t%s\n",
                 $a[5], $a[11], $a[12], $a[0], $a[8], $a[3], $descr;
     }
     close (FH);
 }
 '_EOF_'
     # << happy emacs
     chmod +x mkBedFile.pl
 
     ./mkBedFile.pl > mm10Haplotypes.bedDetail
     cat << '_EOF_' > mm10Haplotypes.sql
 CREATE TABLE mm10Haplotypes (
     chrom varchar(255) not null,   # Reference sequence chromosome or scaffold
     chromStart int unsigned not null,   # Start position in chromosome
     chromEnd int unsigned not null,     # End position in chromosome
     name varchar(255) not null, # Short Name of item
     score int unsigned, # Score from 0-1000
     strand char(1),     # + or -
     id varchar(255) not null,   # ID to bed used in URL to link back
     description longblob not null, # Long description of item for the details page
     #Indices
     INDEX(chrom, chromStart)
 );
 '_EOF_'
 
     hgLoadSqlTab mm10 mm10Haplotypes mm10Haplotypes.sql mm10Haplotypes.bedDetail
 
     # trackDb entry:
 track mm10Haplotypes
 shortLabel Alt. strains
 longLabel Alternate mouse strains, mapped to reference as haplotypes
 group varRep
 priority 111
 visibility hide
 type bedDetail 8
 url http://www.ncbi.nlm.nih.gov/nuccore/$$
 urlLabel NCBI Nucleotide:
 
 ##########################################################################
 ##  CYTOBAND - ideogram track (DONE - 2012-10-19 - Hiram)
     ssh hgwdev
     mkdir -p /hive/data/outside/ncbi/ideogram/2012-10
     cd /hive/data/outside/ncbi/ideogram/2012-10
     # fetch all the ideogram files:
     rsync -a -P rsync://ftp.ncbi.nlm.nih.gov/pub/gdp/ ./
 
     mkdir /hive/data/genomes/mm10/bed/cytoband
     cd /hive/data/genomes/mm10/bed/cytoband
 
     # Create bed file
     $HOME/kent/src/utils/ncbi/createNcbiCytoBand.pl \
 /hive/data/outside/ncbi/ideogram/2012-10/ideogram_10090_GCF_000000055.19_NA_V2
 
     ## can now verify before load:
     $HOME/kent/src/utils/ncbi/cytoBandVerify.pl
     #	everything checks out OK on 21 chroms
     # Load the bed file
     hgLoadBed -noBin -sqlTable=$HOME/kent/src/hg/lib/cytoBand.sql \
 	mm10 cytoBand cytoBand.bed
     #   Read 403 elements of size 5 from cytoBand.bed
     # Make cytoBandIdeo track for ideogram gif on hgTracks page.
     # For mouse cytoBandIdeo is just a replicate of the cytoBand track.
     hgsql -e "drop table cytoBandIdeo;" mm10
     hgsql mm10 -e "create table cytoBandIdeo (index(chrom(10),chromStart)) as select * from cytoBand;"
 
 ##########################################################################
 # CYTOBANDIDEO update -  (DONE - 2013-02-27 - kuhn)
 # adding rows for chroms with no cytology
 # this is just for navigation/orientation on those chroms
 
     set db=mm10
     set sql=~/kent/src/hg/lib/cytoBandIdeo.sql
     # make backup of existing table
     hgsql -e "CREATE TABLE cytoBandIdeoCopy SELECT * FROM cytoBandIdeo" $db
     # dump existing table
     hgsql -N -e "SELECT * FROM cytoBandIdeo" $db > $db.cytoBandIdeo
 
     # find chroms already covered
     hgsql -N -e 'SELECT chrom FROM cytoBandIdeo' $db \
        | sort -u > $db.coveredNames
     # make cytoBand records for chroms not already covered
     hgsql -N -e 'SELECT chrom, size FROM chromInfo' $db \
       | grep -wvf $db.coveredNames \
       | awk '{print $1"\t0\t"$2"\t\tgneg"}' > $db.cytoBandNew
     # check
     wc -l $db.*
     # combine and sort
     cat $db.cytoBandNew $db.cytoBandIdeo > $db.cytoBandIdeoFull
     bedSort $db.cytoBandIdeoFull $db.cytoBandIdeoFull
     # replace exsting table
     hgsql -e "DROP TABLE cytoBandIdeo" $db
     hgLoadSqlTab $db cytoBandIdeo $sql $db.cytoBandIdeoFull
     # check and then drop copy
 
 ##########################################################################
 # lastz Lamprey petMar2 (DONE - 2012-10-17 - Hiram)
     # establish a screen to control this job with a name to indicate what it is
     screen -S petMar2
     mkdir /hive/data/genomes/mm10/bed/lastzPetMar2.2012-10-19
     cd /hive/data/genomes/mm10/bed/lastzPetMar2.2012-10-19
 
     cat << '_EOF_' > DEF
 # Mouse vs. Lamprey
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
 BLASTZ_Y=3400
 BLASTZ_L=6000
 BLASTZ_K=2200
 BLASTZ_M=50
 BLASTZ_Q=/scratch/data/blastz/HoxD55.q
 
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
 # QUERY: Lamprey PetMar2
 SEQ2_DIR=/hive/data/genomes/petMar2/petMar2.2bit
 SEQ2_LEN=/hive/data/genomes/petMar2/chrom.sizes
 SEQ2_CHUNK=10000000
 SEQ2_LAP=0
 SEQ2_LIMIT=60
 
 BASE=/hive/data/genomes/mm10/bed/lastzPetMar2.2012-10-19
 TMPDIR=/scratch/tmp
 '_EOF_'
     # << happy emacs
 
     # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
     #	number of jobs, 50,000 to something under 100,000
     # when not present, SEQ2_LIMIT is a default 100
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
         `pwd`/DEF \
         -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
 	-qRepeats=windowmaskerSdust \
         -chainMinScore=5000 -chainLinearGap=loose > do.log 2>&1 &
     #   real    218m29.078s
 
     cat fb.mm10.chainPetMar2Link.txt
     #   28262565 bases of 2652783500 (1.065%) in intersection
 
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/mm10/bed
     ln -s lastzPetMar2.2012-10-19 lastz.petMar2
 
     #	and for the swap
     mkdir /hive/data/genomes/petMar2/bed/blastz.mm10.swap
     cd /hive/data/genomes/petMar2/bed/blastz.mm10.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzPetMar2.2012-10-19/DEF \
         -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
         -swap -chainMinScore=5000 -chainLinearGap=loose > swap.log 2>&1 &
     #   real    7m2.754s
     cat  fb.petMar2.chainHg19Link.txt
     #	20923095 bases of 647368134 (3.232%) in intersection
 
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/petMar2/bed
     ln -s blastz.mm10.swap lastz.mm10
 
 #########################################################################
 # lastz White Rhino cerSim1 (DONE - 2012-10-23 - Hiram)
     # establish a screen to control this job with a name to indicate what it is
     screen -S mm10CerSim1
     mkdir /hive/data/genomes/mm10/bed/lastzCerSim1.2012-10-23
     cd /hive/data/genomes/mm10/bed/lastzCerSim1.2012-10-23
 
     cat << '_EOF_' > DEF
 # Mouse vs. White Rhino
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
 
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 SEQ1_LIMIT=10
 
 # QUERY: White Rhino CerSim1
 SEQ2_DIR=/hive/data/genomes/cerSim1/cerSim1.2bit
 SEQ2_LEN=/hive/data/genomes/cerSim1/chrom.sizes
 SEQ2_CHUNK=10000000
 SEQ2_LAP=0
 SEQ2_LIMIT=20
 
 BASE=/hive/data/genomes/mm10/bed/lastzCerSim1.2012-10-23
 TMPDIR=/scratch/tmp
 '_EOF_'
     # << happy emacs
 
     # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
     #	number of jobs, 50,000 to something under 100,000
     # when not present, SEQ2_LIMIT is a default 100
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
         `pwd`/DEF \
         -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
         -chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
     #   real    992m45.890s
 
     cat fb.mm10.chainCerSim1Link.txt
     #   942281365 bases of 2652783500 (35.520%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/mm10/bed
     ln -s lastzCerSim1.2012-10-23 lastz.cerSim1
 
     #	and for the swap
     mkdir /hive/data/genomes/cerSim1/bed/blastz.mm10.swap
     cd /hive/data/genomes/cerSim1/bed/blastz.mm10.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzCerSim1.2012-10-23/DEF \
         -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
         -swap -chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
     #   real    62m44s
     cat  fb.cerSim1.chainMm10Link.txt
     #	926131511 bases of 2366858012 (39.129%) in intersection
 
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/cerSim1/bed
     ln -s blastz.mm10.swap lastz.mm10
 
 #########################################################################
 # QPCR PRIMERS (DONE - 2012-12-10 - Chin)
 # The track name is changed to "qPCR Primers"
 # Reload table with new track_mouse.BED (2013-01-28)
     # Download
     mkdir /hive/data/outside/Weizmann/qPcrPrimers
     cd /hive/data/outside/Weizmann/qPcrPrimers
     wget http://www.weizmann.ac.il/complex/compphys/software/Amit/primers/mouse/track_mouse.BED
     mkdir -p /hive/data/genomes/mm10/bed/qPcrPrimers
     cat track_mouse.BED | grep -v track \
      > /hive/data/genomes/mm10/bed/qPcrPrimers/qPcrPrimers_mm10.bed
 
     cd /hive/data/genomes/mm10/bed/qPcrPrimers
     hgLoadBed -bedDetail -tab -renameSqlTable \
       -sqlTable=$HOME/kent/src/hg/lib/bedDetail.sql \
       mm10 qPcrPrimers qPcrPrimers_mm10.bed
     # Reading qPcrPrimers_mm10.bed
     # Read 518230 elements of size 14 from qPcrPrimers_mm10.bed
     # Sorted
     # Creating table definition for qPcrPrimers
     # Saving bed.tab
     # Loading mm10
 
     # NULL descrition column
     hgsql mm10 -ne "UPDATE qPcrPrimers SET description = NULL;"
 
 #########################################################################
 # DBSNP B137 / SNP137 (DONE 12/20/12 angie)
 # Redmine #7043
 
     mkdir -p /hive/data/outside/dbSNP/137/mouse
     cd /hive/data/outside/dbSNP/137/mouse
     # Look at the directory listing of ftp://ftp.ncbi.nih.gov/snp/database/organism_data/
     # to find the subdir name to use as orgDir below (mouse_10090 in this case).
     # Then click into that directory and look for file names like
     #    b(1[0-9][0-9])_*_([0-9]+_[0-9])
     # -- use the first num for build and the second num_num for buildAssembly.
     # jkStuff/liftContigs.lft maps NCBI contig names to chroms; use that for liftUp.
     #
     # Some trial and error was required to get the config.ra just right --
     # the b* filenames don't include buildAssembly!
     # patch contigs needed to be filtered out:
     cat > config.ra <<EOF
 db mm10
 orgDir mouse_10090
 build 137
 buildAssembly
 liftUp /hive/data/genomes/mm10/jkStuff/liftContigs.lft
 EOF
     ~/kent/src/hg/utils/automation/doDbSnp.pl config.ra >& do.log & tail -f do.log
     # Script ended with feedback about needing refAssemblyLabel because dbSNP
     # mapped to more than one assembly; add the label that clearly corresponds to
     # mm10, GRCm38, to config.ra and try again:
     cat > config.ra <<EOF
 db mm10
 orgDir mouse_10090
 build 137
 buildAssembly
 liftUp /hive/data/outside/dbSNP/137/mouse/suggested.lft
 refAssemblyLabel GRCm38
 EOF
     ~/kent/src/hg/utils/automation/doDbSnp.pl -continue=loadDbSnp \
       config.ra >>& do.log & tail -f do.log
     # Script ended with feedback about unrecognized NT_* contigs from dbSNP.
     # Inspect the script-generated suggested.lft for liftUp; it's usually right.
     # For contigs that are labeled as part of GRCm38 but not liftable to mm10,
     # listed in script-generated cantLiftUpSeqNames.txt, do some entrez
     # nucleotide searches for contig IDs and convince yourself that they're all
     # for alt assembly sequences that we don't include in mm10 (e.g. patches,
     # other strains).  Then tell the script to filter out those contigs:
     cut -f 2 cantLiftUpSeqNames.txt > ignoreAltAssemblyContigs.txt
     cat > config.ra <<EOF
 db mm10
 orgDir mouse_10090
 build 137
 buildAssembly
 liftUp /hive/data/outside/dbSNP/137/mouse/suggested.lft
 refAssemblyLabel GRCm38
 ignoreDbSnpContigsFile /hive/data/outside/dbSNP/137/mouse/ignoreAltAssemblyContigs.txt
 EOF
     ~/kent/src/hg/utils/automation/doDbSnp.pl -continue=loadDbSnp \
       config.ra >>& do.log & tail -f do.log
 # ...
 #MultipleAlignments      1667342 This variant aligns in more than one location.
 #ObservedMismatch        4561144 UCSC reference allele does not match any observed allele from dbSNP.
 #
 # *** All done!
     # That is an unusually high count of ObservedMismatch... follow up with dbSNP.
 
 
 #############################################################################
 # FILTER SNP137 (DONE 12/21/12 angie)
     # Redmine #7043
     # Make several tracks that are filtered subsets of snp137:
     # First, filter out the multiply-aligned and/or weight >1 SNPs -> snp137Mult
     # Second, siphon off the common variants -> snp137Common
     # Third, take the (uniquely mapped, not known to be common) variants
     # w/dbSNP's "clinically-assoc" flag -> snp137Flagged
     cd /hive/data/outside/dbSNP/137/mouse
     zcat snp137.bed.gz \
     | perl -we \
       '$minTotal2N = 10; \
        ($multCount, $comCount, $flagCount, $miscCount) = (0,0,0,0); \
        open($mult, "| gzip -c > snp137Mult.bed.gz") || die; \
        open($common,    "| gzip -c > snp137Common.bed.gz") || die; \
        open($flagged,   "| gzip -c > snp137Flagged.bed.gz") || die; \
        open($misc,      "| gzip -c > snp137Misc.bed.gz") || die; \
        while (<>) { \
          @w = split("\t"); \
          if ($w[16] > 1 || $w[17] =~ /MultipleAlignments/) { \
            print $mult $_; \
            $multCount++; \
          } else { \
            my ($alleleFreqCount, $nStr, $freqStr) = ($w[20], $w[22], $w[23]); \
            my @alNs = split(",", $nStr);      die unless scalar(@alNs) == $alleleFreqCount; \
            my @freqs = split(",", $freqStr);  die unless scalar(@freqs) == $alleleFreqCount; \
            my ($total2N, $maxAlleleFreq) = (0, 0); \
            for (my $i = 0;  $i < $alleleFreqCount;  $i++) { \
              $total2N += $alNs[$i]; \
              $maxAlleleFreq = $freqs[$i] if ($freqs[$i] > $maxAlleleFreq); \
            } \
            if ($alleleFreqCount >= 2 && $total2N >= $minTotal2N && $maxAlleleFreq <= 0.99) { \
              print $common $_; \
              $comCount++; \
            } elsif($w[24] =~ /clinically-assoc/)  { \
              print $flagged $_; \
              $flagCount++; \
            } else { \
              print $misc $_; \
              $miscCount++; \
            } \
          } \
        } \
        close($mult);  close($common); close($flagged);  close($misc); \
        print "snp137Mult:    $multCount\nsnp137Common:  $comCount\nsnp137Flagged: $flagCount\n" . \
              "leftover:      $miscCount\n";'
 #snp137Mult:    1671771
 #snp137Common:  2709532
 #snp137Flagged: 0
 #leftover:      66537658
     # It's expected for snp137Flagged to be empty because that's for human SNPs.
 
     # Load tables
     foreach subset (Mult Common)
       hgLoadBed -tab -onServer -tmpDir=/data/tmp -allowStartEqualEnd -renameSqlTable \
         mm10 snp137$subset -sqlTable=snp137.sql snp137$subset.bed.gz
     end
 
 
 ############################################################################
 # DBSNP CODING ANNOTATIONS (137) (DONE 12/21/12 angie)
     # Redmine #7043
     cd /hive/data/outside/dbSNP/137/mouse
     # ncbiFuncAnnotations.txt has NCBI coords: 0-based, fully closed.
     # For anything except an insertion (0 bases between flanks),
     # we need to add 1 to the end coord.  For an insertion, we need
     # to add 1 to the start coord.  Make a hash of the insertion IDs,
     # then look up each ID in ncbiFuncAnnotations.txt to tell which
     # transform to apply.
     # Note: sort -u with the keys below is too restrictive -- we need full line uniq.
     zcat ncbiFuncAnnotations.txt.gz \
     | perl -we 'open($IDS, "zcat ncbiFuncInsertions.ctg.bed.gz |") || die "ids: $!"; \
               while (<$IDS>) { chomp; $ids{$_} = 1; } \
               close($IDS); \
               %coding = (2=>1, 3=>1, 4=>1, 8=>1, 9=>1, 41=>1, 42=>1, 43=>1, 44=>1, 45=>1); \
               while (<>) { \
                 chomp;  @w = split("\t"); # id, ctg, start, end, ... \
                 next unless $coding{$w[5]}; \
                 $bed4 = join("\t", $w[1], $w[2], $w[3], $w[0]); \
                 if (exists $ids{$bed4} && $w[3] == $w[2]+1) { \
                   $w[2]++; # 2-base insertions: increment start coord \
                 } else { \
                   $w[3]++; # increment end coord to get half-open \
                 } \
                 print join("\t", @w) . "\n"; \
               }' \
     | sort -k1n,1n -k2,2 -k3n,3n -k5,5 -k6n,6n \
     | uniq \
       > ncbiCodingAnnotations.txt
     wc -l ncbiCodingAnnotations.txt
 #1884989 ncbiCodingAnnotations.txt
 
     # How many & what kinds of function types?
     cut -f 6 ncbiCodingAnnotations.txt \
     | sort -n | uniq -c
 # 371388 3   (coding-synon)
 #1301099 8   (cds-reference -- ignored)
 #   3465 41  (nonsense)
 # 199148 42  (missense)
 #    319 43  (stop-loss)
 #   7422 44  (frameshift)
 #   2148 45  (cds-indel)
     # In b137, the functional annotations include non-coding (frame = NULL),
     # which we'll exclude here because this is supposed to be just coding stuff...
     # probably need to update how we show dbSNP's func annos anyway, e.g.
     # it is a shame that we toss out codon number and transcript offset.
     # Gather up multiple annotation lines into one line per {snp, gene, frame}:
     perl -e  'while (<>) { chomp; \
                 my ($rsId, $ctg, $s, $e, $txId, $fxn, $frm, $nt, $aa, $codon) = split("\t"); \
                 next if ($fxn == 8 && ($frm eq "NULL" && $aa eq "NULL" && $codon eq "NULL")); \
                 if (defined $lastRs && \
                     ($lastRs != $rsId || $lastCtg ne $ctg || $lastS != $s || \
                      $lastTx ne $txId || $lastFrm ne $frm)) { \
                   if (defined $refRow) { \
                     $fxns = "$refRow->[0],$fxns";  $nts = "$refRow->[1],$nts"; \
                     $aas = "$refRow->[2],$aas";    $codons = "$refRow->[3],$codons"; \
                   } \
                   $lineOut = "$lastCtg\t$lastS\t$lastE\trs$lastRs\t$lastTx\t$lastFrm\t" . \
                         "$count\t$fxns\t$nts\t$codons\t$aas\n"; \
                   $lineOut =~ s@NULL@n/a@g; \
                   print $lineOut; \
                   $refRow = undef;  @rows = ();  ($count, $fxns, $nts, $codons, $aas) = (); \
                 } \
                 ($lastRs, $lastCtg, $lastS, $lastE, $lastTx, $lastFrm) = \
                     ($rsId, $ctg, $s, $e, $txId, $frm); \
                 $count++; \
                 if ($fxn == 8) { \
                   $refRow = [$fxn, $nt, $aa, $codon]; \
                 } else { \
                  $fxns .= "$fxn,";  $nts .= "$nt,";  $aas .= "$aa,";  $codons .= "$codon,"; \
                 } \
               } \
               if (defined $refRow) { \
                 $fxns = "$refRow->[0],$fxns";  $nts = "$refRow->[1],$nts"; \
                 $aas = "$refRow->[2],$aas";    $codons = "$refRow->[3],$codons"; \
               } \
               $lineOut = "$lastCtg\t$lastS\t$lastE\trs$lastRs\t$lastTx\t$lastFrm\t" . \
                     "$count\t$fxns\t$nts\t$codons\t$aas\n"; \
               $lineOut =~ s@NULL@n/a@g; \
               print $lineOut;' \
       ncbiCodingAnnotations.txt \
     | liftUp snp137CodingDbSnp.bed /hive/data/outside/dbSNP/137/mouse/suggested.lft warn stdin
 
     hgLoadBed mm10 snp137CodingDbSnp -sqlTable=$HOME/kent/src/hg/lib/snp125Coding.sql \
       -renameSqlTable -tab -notItemRgb -allowStartEqualEnd \
       snp137CodingDbSnp.bed
 #Read 552120 elements of size 11 from snp137CodingDbSnp.bed
 
 
 #########################################################################
 # RETROPOSED GENES ucscRetro track VERSION 2
 # (2013-04-03 - 2013-04-17, baertsch,hartera DONE)
 mkdir -p /hive/hive/groups/gencode/pseudogenes/retroFinder/mm10.20130403
 cd /hive/groups/gencode/pseudogenes/retroFinder/mm10.20130403
 
 mkdir -p /hive/data/genomes/mm10/bed/retro/
 cd /hive/groups/gencode/pseudogenes/retroFinder/mm10.20130403
 cat << '_EOF_' > DEF
 
 RETRO_OPTIONS="-verbose=4 -minAli=0.98 -nearTop=0.005 "
 RUNDATE="2013-04-03"
 DB=mm10
 SCORETHRESH=510
 GENOMENAME='Mus musculus'
 GBDB=mm
 DATE=20130403
 MRNABASE=/hive/data/genomes/$DB/bed/mrnaBlastz
 TMPMRNA=/hive/groups/gencode/pseudogenes/retroFinder/mm10.${DATE}/mrnaBlastz/$DB
 TMPEST=/hive/groups/gencode/pseudogenes/retroFinder/mm10.${DATE}/est/$DB
 BINDIR=/hive/users/hartera/GencodeWG/retroFinder/trunk/bin
 EST=all_est
 SPLICED_EST=intronEst
 SPLIT_EST=0
 SPLIT_SPLICED_EST=1
 SCRIPT=/hive/users/hartera/GencodeWG/retroFinder/trunk/src/pipeline
 GENOME=/hive/data/genomes
 TWOBIT=$GENOME/$DB/$DB.2bit
 RETRODIR=$GENOME/$DB/bed/retro
 BASE=/hive/groups/gencode/pseudogenes/retroFinder/mm10.${DATE}/retro
 VERSION=2
 OUTDIR=${BASE}/${DB}.${VERSION}
 RESULT=$OUTDIR/result
 LOG=$OUTDIR/log
 OUT=$OUTDIR/out
 OVERLAPDIR=$OUTDIR/run.o
 TABLE=ucscRetroInfo$VERSION
 ORTHOTABLE=ucscRetroOrtho$VERSION
 ALIGN=ucscRetroAli$VERSION
 LOCAL=/scratch/data/$DB
 NIB=$LOCAL/nib
 RMSK=rmsk
 NET1=netHg19
 NET2=netCanFam3
 NET3=netRn5
 GENE1=knownGene
 GENE2=refGene
 GENE3=ensGene
 CLUSTER=swarm
 SPECIES="hg19 mm10"
 ROOTDIR="/cluster/home/$USER/public_html/retro/mm10Apr13"
 WEBROOT=$ROOTDIR/retro.$RUNDATE
 WEBSERVER=http://hgwdev-hartera.soe.ucsc.edu
 EXPDIR=exp
 GENEPFAM=knownGene
 PFAM=knownToPfam
 PFAMIDFIELD=name
 PFAMDOMAIN=value
 ARRAY=gnfAtlas2
 AFFYPROBE=affyGnf1m
 ARRAYMEDIAN=hgFixed.gnfMouseAtlas2Median
 ARRAYRATIO=hgFixed.gnfMouseAtlas2AllRatio
 ARRAYABS=hgFixed.gnfMouseAtlas2All
 ARRAYEXP=hgFixed.gnfMouseAtlas2MedianExps
 ARRAYEXPALL=hgFixed.gnfMouseAtlas2AllExps
 # ARRAYLOOKUP=knownToGnfAtlas2
 #ARRAYPSLS="/hive/data/genomes/mm9/bed/geneAtlas2/affyGnf1m.psl"
 ALTSPLICE=sibTxGraph
 SPLITBYAGE=splitRetrosByAgeMouse
 PDB=proteins121210
 BREAKS=0,8,16,24,32
 XLIM=34
 YLIM=0.1
 YLIM1=4000
 YLIM2=160
 MAXDIVERGENCE=32
 '_EOF_'
     # << happy emacs
 chmod +x DEF
 mkdir mrnaBlastz
 cd mrnaBlastz
 cp ../DEF .
 # Create S1.len:
 cp /hive/data/genomes/mm10/chrom.sizes S1.len
 # Edit S1.len and remove chrM and random chroms then copy over to mm10
 # genomes directory
 mkdir -p /hive/data/genomes/mm10/bed/mrnaBlastz
 cp S1.len /hive/data/genomes/mm10/bed/mrnaBlastz
 
 screen
 # Run steps 1 to 6 of RetroFinder pipeline from scripts in CCDS SVN source tree:
 retroFinder/trunk/src/pipeline/ucscStep1.sh DEF
 # check cluster job on swarm
 retroFinder/trunk/src/pipeline/ucscStep2.sh DEF
 retroFinder/trunk/src/pipeline/ucscStep3.sh DEF
 #check cluster job
 retroFinder/trunk/src/pipeline/ucscStep4.sh DEF
 #check cluster job
     # Load the track
 retroFinder/trunk/src/pipeline/ucscStep5.sh DEF
 cd /hive/groups/gencode/pseudogenes/retroFinder/mm10.20130403/retro/mm10.2
 retroFinder/trunk/src/pipeline/filterMrna.sh DEF
 retroFinder/trunk/src/pipeline/filterEst.sh DEF
 retroFinder/trunk/src/pipeline/analyseExpress.sh DEF
 cd /hive/groups/gencode/pseudogenes/retroFinder/mm10.20130403/mrnaBlastz
 retroFinder/trunk/src/pipeline/ucscStep6.sh DEF
 #added ucscRetroAli to trackDb.ra
 # copied
 # /hive/groups/gencode/pseudogenes/retroFinder/mm10/20130403/retro/mm10.2/trackDb.retro
 # entry to kent/src/hg/makeDb/trackDb/mouse/mm10/trackDb.ra
 # and edited it to add version number and date.
 # Scripts copied ucscRetroAli2.psl, ucscRetroInfo2.bed and ucscRetroCds2.tab
 # to /hive/data/genomes/mm10/bed/retro/
 
 ##############################################################################
 # LASTZ shrew sorAra2 (DONE - 2013-06-12 - Hiram)
     # establish a screen to control this job with a name to indicate what it is
     screen -S mm10SorAra2
     mkdir /hive/data/genomes/mm10/bed/lastzSorAra2.2013-06-12
     cd /hive/data/genomes/mm10/bed/lastzSorAra2.2013-06-12
 
     # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
     #	number of jobs, 50,000 to something under 100,000
     cat << '_EOF_' > DEF
 # shrew vs mouse
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
 # QUERY: shrew SorAra2
 SEQ2_DIR=/hive/data/genomes/sorAra2/sorAra2.2bit
 SEQ2_LEN=/hive/data/genomes/sorAra2/chrom.sizes
 SEQ2_CHUNK=10000000
 SEQ2_LAP=0
 SEQ2_LIMIT=40
 
 BASE=/hive/data/genomes/mm10/bed/lastzSorAra2.2013-06-12
 TMPDIR=/scratch/tmp
 '_EOF_'
     # << happy emacs
 
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	`pwd`/DEF \
 	-syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
 	-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
     # real    785m32.163s
 
     cat fb.mm10.chainSorAra2Link.txt
     #   354499462 bases of 2652783500 (13.363%) in intersection
 
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/mm10/bed
     ln -s lastzSorAra2.2013-06-12 lastz.sorAra2
 
     # better to have reciprocal best for this one since it is low coverage:
     cd /hive/data/genomes/mm10/bed/lastzSorAra2.2013-06-12
     time doRecipBest.pl mm10 sorAra2 -buildDir=`pwd` -workhorse=hgwdev \
 	> best.log 2>&1 &
     #	real    24m38.069s
 
     mkdir /hive/data/genomes/sorAra2/bed/blastz.mm10.swap
     cd /hive/data/genomes/sorAra2/bed/blastz.mm10.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzSorAra2.2013-06-12/DEF \
 	-swap -syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
 	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
     # real    39m53.463s
     cat fb.sorAra2.chainMm10Link.txt
     #  343760052 bases of 2192103426 (15.682%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/sorAra2/bed
     ln -s blastz.mm10.swap lastz.mm10
 
 ##############################################################################
 # LASTZ tenrec echTel2 (DONE - 2013-06-12 - Hiram)
     # establish a screen to control this job with a name to indicate what it is
     screen -S mm10EchTel2
     mkdir /hive/data/genomes/mm10/bed/lastzEchTel2.2013-06-12
     cd /hive/data/genomes/mm10/bed/lastzEchTel2.2013-06-12
 
     # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
     #	number of jobs, 50,000 to something under 100,000
     cat << '_EOF_' > DEF
 # tenrec vs mouse
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
 # QUERY: tenrec EchTel2
 SEQ2_DIR=/hive/data/genomes/echTel2/echTel2.2bit
 SEQ2_LEN=/hive/data/genomes/echTel2/chrom.sizes
 SEQ2_CHUNK=10000000
 SEQ2_LAP=0
 SEQ2_LIMIT=30
 
 BASE=/hive/data/genomes/mm10/bed/lastzEchTel2.2013-06-12
 TMPDIR=/scratch/tmp
 '_EOF_'
     # << happy emacs
 
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	`pwd`/DEF \
 	-syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
 	-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
     #   real    1006m3.874s
 
     cat fb.mm10.chainEchTel2Link.txt
     #	384570981 bases of 2652783500 (14.497%) in intersection
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/mm10/bed
     ln -s lastzEchTel2.2013-06-12 lastz.echTel2
 
     # better to have reciprocal best for this one since it is low coverage:
     cd /hive/data/genomes/mm10/bed/lastzEchTel2.2013-06-12
     time doRecipBest.pl mm10 echTel2 -buildDir=`pwd` -workhorse=hgwdev \
 	> best.log 2>&1 &
     #   real    27m58.816s
 
     # and, for the swap
     mkdir /hive/data/genomes/echTel2/bed/blastz.mm10.swap
     cd /hive/data/genomes/echTel2/bed/blastz.mm10.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzEchTel2.2013-06-12/DEF \
 	-swap -syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
 	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
     #   real    43m0.194s
     cat fb.echTel2.chainMm10Link.txt
     #	380872172 bases of 2605196361 (14.620%) in intersection
 
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/echTel2/bed
     ln -s blastz.mm10.swap lastz.mm10
 
 ##############################################################################
 # LASTZ alpaca vicPac2 (DONE - 2013-06-19 - Hiram)
     # establish a screen to control this job with a name to indicate what it is
     screen -S mm10VicPac2
     mkdir /hive/data/genomes/mm10/bed/lastzVicPac2.2013-06-19
     cd /hive/data/genomes/mm10/bed/lastzVicPac2.2013-06-19
 
     # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
     #	number of jobs, 50,000 to something under 100,000
     cat << '_EOF_' > DEF
 # mouse vs alpaca
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
 # QUERY: alpaca VicPac2
 SEQ2_DIR=/hive/data/genomes/vicPac2/vicPac2.2bit
 SEQ2_LEN=/hive/data/genomes/vicPac2/chrom.sizes
 SEQ2_CHUNK=20000000
 SEQ2_LAP=0
 SEQ2_LIMIT=500
 
 BASE=/hive/data/genomes/mm10/bed/lastzVicPac2.2013-06-19
 TMPDIR=/scratch/tmp
 '_EOF_'
     # << happy emacs
 
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	`pwd`/DEF \
 	-syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
 	-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
     #   real    2156m48.687s
 
     cat fb.mm10.chainVicPac2Link.txt
     #	797843091 bases of 2652783500 (30.076%) in intersection
 
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/mm10/bed
     ln -s lastzVicPac2.2013-06-19 lastz.vicPac2
 
     # better to have reciprocal best for this one since it is low coverage:
     cd /hive/data/genomes/mm10/bed/lastzVicPac2.2013-06-19
     time doRecipBest.pl mm10 vicPac2 -buildDir=`pwd` -workhorse=hgwdev \
 	> best.log 2>&1 &
     #	real    33m49.271s
 
     mkdir /hive/data/genomes/vicPac2/bed/blastz.mm10.swap
     cd /hive/data/genomes/vicPac2/bed/blastz.mm10.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzVicPac2.2013-06-19/DEF \
 	-swap -syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
 	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
     #   real    85m53.924s
 
     cat fb.vicPac2.chainMm10Link.txt
     #	783682127 bases of 2078582856 (37.703%) in intersection
 
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/vicPac2/bed
     ln -s blastz.mm10.swap lastz.mm10
 
 #########################################################################
 # create ucscToINSDC name mapping (DONE - 2013-08-15 - Hiram)
     # this allows the "ensembl" blue bar button to appear
     mkdir /hive/data/genomes/mm10/bed/ucscToINSDC
     cd /hive/data/genomes/mm10/bed/ucscToINSDC
 
     cat << '_EOF_' > translateNames.sh
 #!/bin/sh
 
 grep -v "^#" ../../genbank/Primary_Assembly/assembled_chromosomes/chr2acc \
    | sed -e 's/^/chr/'
 
 zcat ../../genbank/Primary_Assembly/unplaced_scaffolds/AGP/unplaced.scaf.agp.gz | grep -v "^#" | cut -f1 | sort -u \
    | sed -e 's/^\([A-Za-z0-9]*\).\([0-9]*\)/chrUn_\1\t\1.\2/;'
 
 grep -v "^#" \
   ../../genbank/Primary_Assembly/unlocalized_scaffolds/unlocalized.chr2scaf \
     | sed -e 's/^\([A-Za-z0-9]*\)\t\([A-Za-z0-9]*\).\([0-9]*\)/chr\1_\2_random\t\2.\3/;'
 
 
 echo -e "chrM\tNC_005089.1"
 '_EOF_'
     # << happy emacs
 
     chmod +x translateNames.sh
     ./translateNames.sh | sort > ucscToINSDC.txt
     join <(sort ../../chrom.sizes) ucscToINSDC.txt \
         | awk '{printf "%s\t0\t%d\t%s\n", $1, $2, $3}' > ucscToINSDC.tab
 
     # maximum size of UCSC chrom name for SQL index
     cut -f1 ucscToINSDC.tab | awk '{print length($0)}' | sort -n | tail -1
     #   20
 
     sed -e 's/21/20/' $HOME/kent/src/hg/lib/ucscToINSDC.sql \
       | hgLoadSqlTab mm10 ucscToINSDC stdin ucscToINSDC.tab
 
     # verify the track link to INSDC functions
 
 ##############################################################################
 # MGI LIFTOVER FROM mm9 ( 2013-11-14 Pauline)
     ssh kolossus
     mkdir /cluster/data/mm10/bed/jaxLiftOver
     cd /cluster/data/mm10/bed/jaxLiftOver
 
     liftOver -minBlocks=0.5 /cluster/data/mm9/bed/jax/2011_06/jaxQtl.bed \
       /cluster/data/mm9/bed/liftOver/mm9ToMm10.over.chain.gz \
       -bedPlus=6 -tab jaxQtlLift.{bed,unmapped}
     wc -l jaxQtlLift.{bed,unmapped}
 #Old  1539 jaxQtlLift.bed
 #Old    12 jaxQtlLift.unmapped
 #     1883 jaxQtlLift.bed
 #       14 jaxQtlLift.unmapped
 # Numbers are of same order of magnitude (yay?) proceeding...
 
     # Load lifted track tables and original auxiliary tables:
     ssh hgwdev
     cd /cluster/data/mm10/bed/jaxLiftOver
 
     # jaxQTLLift
 #didn't run this sed command (prob already been done to this file?)
     sed -e 's/jaxQTL/jaxQTLLift/g'\
       ~/kent/src/hg/lib/jaxQTL.sql  > jaxQTLLift.sql
 
 #ran this (used this instead of hgLoadBed at Hiram's suggestion):
     hgLoadSqlTab  mm10 JaxQtl $HOME/kent/src/hg/lib/jaxQtl.sql \
 /cluster/data/mm10/bed/jaxLiftOver/jaxQtlLift.bed
 
     checkTableCoords mm10 JaxQTLLift
 #got no output (yay!)
 
 #found out hgLoadSqlTab doesn't load a positionally sorted table, sorting bed
 #file and reloading:
 
     sort -k1,1 -k2,2n jaxQtlLift.bed > jaxQtlLiftSorted.bed
 
     hgLoadSqlTab  mm10 jaxQtl $HOME/kent/src/hg/lib/jaxQtl.sql \
 /cluster/data/mm10/bed/jaxLiftOver/jaxQtlLiftSorted.bed
 
 
 ##############################################################################
 # DBSNP B138 / SNP138 (DONE 1/17/14 angie)
     # RedMine #12490
     screen
     mkdir -p /hive/data/outside/dbSNP/138/mouse
     cd /hive/data/outside/dbSNP/138/mouse
     # Look at the directory listing of ftp://ftp.ncbi.nih.gov/snp/database/organism_data/
     # to find the subdir name to use as orgDir below (mouse_10090 in this case).
     # Then click into that directory and look for file names like
     #    b(1[0-9][0-9])_
     # -- use the first num for build setting in config.ra
     # The buildAssembly setting in config.ra is empty because dbSNP stopped including
     # that in file names.
     cat > config.ra <<EOF
 db mm10
 orgDir mouse_10090
 build 138
 buildAssembly
 EOF
     ~/kent/src/hg/utils/automation/doDbSnp.pl config.ra >& do.log & tail -f do.log
     # Some trial and error was required to get the config.ra just right.
     # First stop: need a refAssemblyLabel:
 # *** This release contains more than one assembly label.
 # *** Please examine this list in case we need to exclude any of these:
 #
 #GRCm38.p1
 #Mm_Celera
 # *** Add refAssemblyLabel to config.ra.  If keeping all labels, it will
 # *** look like this:
 #
 #refAssemblyLabel GRCm38.p1,Mm_Celera
 #
 # *** Edit out any of those that are not included in mm10 (e.g. Celera).
 # *** Then restart this script with -continue=loadDbSnp .
     cat >> config.ra <<EOF
 refAssemblyLabel GRCm38.p1
 EOF
     ~/kent/src/hg/utils/automation/doDbSnp.pl config.ra -continue=loadDbSnp >>& do.log &
     tail -f do.log
     # Second stop: need to grab the NCBI Assembly Reports file for GRCm38; the
     # script will do its best to deduce the needed liftUp entries and contigs
     # to ignore (because they are for alternate mouse strains, or patch contigs etc).
 #*** b138_ContigInfo has coords for 119 sequences; these have been written to
 #*** /hive/data/outside/dbSNP/138/mouse/suggested.lft .
 #*** 152 lines of b138_ContigInfo.bcp.gz either had no lift-coords
 #*** or had unrecognized chrom names; see
 #*** /hive/data/outside/dbSNP/138/mouse/cantLiftUpSeqNames.txt .
 #
 #*** You must account for those in config.ra, in the liftUp file
 #*** and/or ignoreDbSnpContigsFile or the ignoreDbSnpContigs regex.
 #*** Then run again with -continue=loadDbSnp .
 #
 #*** NOTE: If you add the ncbiAssemblyReportFile setting to config.ra and
 #***       run again with -continue=loadDbSnp, this script may be able to
 #***       construct those files for you.
     # Look at the doDbSnp.pl -help message for instructions about how to find the
     # Assembly Reports file for GRCm38 on the NCBI web site.
     wget ftp://ftp.ncbi.nlm.nih.gov/genomes/ASSEMBLY_REPORTS/All/GCF_000001635.22.assembly.txt
     cat >> config.ra <<EOF
 ncbiAssemblyReportFile GCF_000001635.22.assembly.txt
 EOF
     ~/kent/src/hg/utils/automation/doDbSnp.pl config.ra -continue=loadDbSnp >>& do.log &
     tail -f do.log
     # Third stop: review the list of dbSNP contigs that we can't map, and if they're
     # all contigs not in our assembly, tell config.ra to ignore them.
 #*** b138_ContigInfo has coords for 119 sequences; these have been written to
 #*** /hive/data/outside/dbSNP/138/mouse/suggested.lft .
 #
 #*** GCF_000001635.22.assembly.txt has mappings for 44 sequences;
 #*** these have been written to
 #*** /hive/data/outside/dbSNP/138/mouse/suggested.lft .
 #
 #*** 108 lines of b138_ContigInfo.bcp.gz contained contig names that
 #*** could not be mapped to chrom.size via their GenBank contig mappings; see
 #*** /hive/data/outside/dbSNP/138/mouse/cantLiftUpSeqNames.txt .
 #
 #*** You must account for all 271 contig_acc values in config.ra,
 #*** in the liftUp file and/or ignoreDbSnpContigsFile.
 #*** Then run again with -continue=loadDbSnp .
     cut -f 2 cantLiftUpSeqNames.txt > contigsNotInUCSC.txt
     cat >> config.ra <<EOF
 liftUp suggested.lft
 ignoreDbSnpContigsFile contigsNotInUCSC.txt
 EOF
     ~/kent/src/hg/utils/automation/doDbSnp.pl config.ra -continue=loadDbSnp >>& do.log &
     tail -f do.log
 
     # The script died with an error implying that a perl command in a pipe got
     # empty input from sort which was getting input from an hgsql query to join
     # Batch submitter handles with rs# snp_id's.  Looks like the mysql connection
     # was lost or something.  Anyway, re-running that part of addToDbSnp.csh
     # in 2 parts and continuing manually through the end of addToDbSnp.csh:
     pushd `cat workingDir `
     hgsql mm10snp138 -NBe 'select SNPSubSNPLink.snp_id, handle from SubSNP, SNPSubSNPLink, Batch \
                        where SubSNP.subsnp_id = SNPSubSNPLink.subsnp_id and \
                              SubSNP.batch_id = Batch.batch_id' \
     | sort -k1n,1n -k2,2 -u \
       > tmp.txt
     perl -we 'while (<>) { \
               chomp; my ($id, $handle) = split("\t"); \
               if (defined $prevId && $prevId != $id) { \
                 print "$prevId\t$handleCount\t$handleBlob\n"; \
                 $handleCount = 0;  $handleBlob = ""; \
               } \
               $handleCount++; \
               $handleBlob .= "$handle,"; \
               $prevId = $id; \
             } \
             print "$prevId\t$handleCount\t$handleBlob\n";' \
       tmp.txt > ucscHandles.txt
 
     cat > ucscHandles.sql <<EOF
 CREATE TABLE ucscHandles (
         snp_id int NOT NULL,
         handleCount int unsigned NOT NULL,
         handles longblob NOT NULL,
         INDEX snp_id (snp_id)
 );
 EOF
     hgLoadSqlTab mm10snp138 ucscHandles{,.sql,.txt}
 
     # I added 'if (0) then' around the parts of addToDbSnp.csh that completed successfully;
     # complete the step by running the modified script:
     # Pop back out of workingDir
     popd
     addToDbSnp.csh >>& do.log &
     tail -f do.log
 
     # Now continue with the next step:
     ~/kent/src/hg/utils/automation/doDbSnp.pl config.ra -continue=bigJoin >>& do.log &
     tail -f do.log
 # *** All done!
 
 
 ##############################################################################
 # FILTER SNP138 (DONE 1/17/14 angie)
    cd /hive/data/outside/dbSNP/138/mouse
    zcat snp138.bed.gz \
    | ~/kent/src/hg/utils/automation/categorizeSnps.pl
 #Mult:     3066546
 #Common:   8082414
 #Flagged:  0
 #leftover: 60824824
    foreach f ({Mult,Common}.bed.gz)
      mv $f snp138$f
    end
    # Load tables
    foreach subset (Mult Common)
      hgLoadBed -tab -onServer -tmpDir=/data/tmp -allowStartEqualEnd -renameSqlTable \
        mm10 snp138$subset -sqlTable=snp138.sql snp138$subset.bed.gz
    end
 
 
 ##############################################################################
 # DBSNP CODING ANNOTATIONS (138) (DONE 1/17/14 angie)
    cd /hive/data/outside/dbSNP/138/mouse
    # ncbiFuncAnnotations.txt has NCBI coords: 0-based, fully closed.
    # For anything except an insertion (0 bases between flanks),
    # we need to add 1 to the end coord.  For an insertion, we need
    # to add 1 to the start coord.  Make a hash of the insertion IDs,
    # then look up each ID in ncbiFuncAnnotations.txt to tell which
    # transform to apply.
    # Note: sort -u with the keys below is too restrictive -- we need full line uniq.
    zcat ncbiFuncAnnotations.txt.gz \
    | perl -we 'open($IDS, "zcat ncbiFuncInsertions.ctg.bed.gz |") || die "ids: $!"; \
              while (<$IDS>) { chomp; $ids{$_} = 1; } \
              close($IDS); \
              %coding = (2=>1, 3=>1, 4=>1, 8=>1, 9=>1, 41=>1, 42=>1, 43=>1, 44=>1, 45=>1); \
              while (<>) { \
                chomp;  @w = split("\t"); # id, ctg, start, end, ... \
                next unless $coding{$w[5]}; \
                $bed4 = join("\t", $w[1], $w[2], $w[3], $w[0]); \
                if (exists $ids{$bed4} && $w[3] == $w[2]+1) { \
                  $w[2]++; # 2-base insertions: increment start coord \
                } else { \
                  $w[3]++; # increment end coord to get half-open \
                } \
                print join("\t", @w) . "\n"; \
              }' \
    | sort -k1n,1n -k2,2 -k3n,3n -k5,5 -k6n,6n \
    | uniq \
      > ncbiCodingAnnotations.txt
    wc -l ncbiCodingAnnotations.txt
 #1584257 ncbiCodingAnnotations.txt
    # How many & what kinds of function types?
    cut -f 6 ncbiCodingAnnotations.txt \
    | sort -n | uniq -c
 # 372821 3   (coding-synon)
 # 552828 8   (cds-reference -- ignored)
 #    376 41  (nonsense)
 # 181984 42  (missense)
 #     49 43  (stop-loss)
 #   3382 44  (frameshift)
 # 472817 45  (cds-indel)
 
    # In b138, the functional annotations include non-coding (frame = NULL),
    # which we'll exclude here because this is supposed to be just coding stuff...
    # probably need to update how we show dbSNP's func annos anyway, e.g.
    # it is a shame that we toss out codon number and transcript offset.
    # Gather up multiple annotation lines into one line per {snp, gene, frame}:
    perl -e  'while (<>) { chomp; \
                my ($rsId, $ctg, $s, $e, $txId, $fxn, $frm, $nt, $aa, $codon) = split("\t"); \
                next if ($fxn == 8 && ($frm eq "NULL" && $aa eq "NULL" && $codon eq "NULL")); \
                if (defined $lastRs && \
                    ($lastRs != $rsId || $lastCtg ne $ctg || $lastS != $s || \
                     $lastTx ne $txId || $lastFrm ne $frm)) { \
                  if (defined $refRow) { \
                    $fxns = "$refRow->[0],$fxns";  $nts = "$refRow->[1],$nts"; \
                    $aas = "$refRow->[2],$aas";    $codons = "$refRow->[3],$codons"; \
                  } \
                  $lineOut = "$lastCtg\t$lastS\t$lastE\trs$lastRs\t$lastTx\t$lastFrm\t" . \
                        "$count\t$fxns\t$nts\t$codons\t$aas\n"; \
                  $lineOut =~ s@NULL@n/a@g; \
                  print $lineOut; \
                  $refRow = undef;  @rows = ();  ($count, $fxns, $nts, $codons, $aas) = (); \
                } \
                ($lastRs, $lastCtg, $lastS, $lastE, $lastTx, $lastFrm) = \
                    ($rsId, $ctg, $s, $e, $txId, $frm); \
                $count++; \
                if ($fxn == 8) { \
                  $refRow = [$fxn, $nt, $aa, $codon]; \
                } else { \
                 $fxns .= "$fxn,";  $nts .= "$nt,";  $aas .= "$aa,";  $codons .= "$codon,"; \
                } \
              } \
              if (defined $refRow) { \
                $fxns = "$refRow->[0],$fxns";  $nts = "$refRow->[1],$nts"; \
                $aas = "$refRow->[2],$aas";    $codons = "$refRow->[3],$codons"; \
              } \
              $lineOut = "$lastCtg\t$lastS\t$lastE\trs$lastRs\t$lastTx\t$lastFrm\t" . \
                    "$count\t$fxns\t$nts\t$codons\t$aas\n"; \
              $lineOut =~ s@NULL@n/a@g; \
              print $lineOut;' \
      ncbiCodingAnnotations.txt \
    | liftUp snp138CodingDbSnp.bed suggested.lft warn stdin
    hgLoadBed mm10 snp138CodingDbSnp -sqlTable=$HOME/kent/src/hg/lib/snp125Coding.sql \
      -renameSqlTable -tab -notItemRgb -allowStartEqualEnd \
      snp138CodingDbSnp.bed
 #Read 1025678 elements of size 11 from snp138CodingDbSnp.bed
 
 
 ##############################################################################
 2013-12-13: import of UCSC GENCODE group processing of GENCODE VM2 (markd)
     mkdir -p /hive/data/genomes/mm10/bed/gencodeVM2
     cd /hive/data/genomes/mm10/bed/gencodeVM2
 
     # create Makefile from previous one.
     cp /hive/data/genomes/hg19/bed/gencodeV19/Makefile .
 
     # download, build and load tables
     (time nice make -j 10) >&build.1.out&
 
     # compare tables from previous release to see if number changed makes
     # sense.
         # NOT DONE THIS TIME, SINCE THIS is the first mouse.
         make cmpRelease >gencode-cmp.tsv
 
     ## Copy and update trackDb files from previous release.
     ## Change version and use lower priority so it sorts to top of
     ## super track page.
     ## Important to make sure filter attrs.transcriptType matches current set
     ## figured out with
     select distinct transcriptType from wgEncodeGencodeAttrsVM2 order by transcriptType;
     cd kent/src/hg/makeDb/trackDb
     cp human/mm10/wgEncodeGencodeV18.ra human/mm10/wgEncodeGencodeVM2.ra
     cp human/mm10/wgEncodeGencodeV18.html human/mm10/wgEncodeGencodeVM2.html
 
     # edit these plus human/mm10/trackDb.wgEncode.ra
     # - set priorities in wgEncodeGencodeVM2.ra in reverse order with previous
     #   tracks so newest shows up first
     #     priority - set to previous version priority minus 0.001
     #     searchPriority - set each to previous -0.001
     # - make current track default to pack and hide previous [ONLY if it's going to be pushed]
     #     superTrack wgEncodeGencodeSuper pack
     # - Update wgEncodeGencodeSuper.html to describe new release and to
     #   pick up other updates.
 
     ### IMPORTANT: make sure that hgTracks/gencodeTracks.c registers
     ### track handler for this version of gencode:
     registerTrackHandler("wgEncodeGencodeVM2", gencodeGeneMethods);
 
     # update all.joiner and validate
     # look for the last section `begin Gencode V??' in all.joiner
     # and copy and update version
     # repeat this until happy, editing minCheck as needed
     cd /hive/data/genomes/mm10/bed/gencodeVM2
     make joinerCheck
     # see output in check/joiner.out
 ##############################################################################
 # SEGMENTAL DUPLICATIONS (WORKING 4/14/14 Pauline)
     # File emailed from John Huddleston (jlhudd@uw.edu) in the Eichler Lab.
     mkdir /hive/data/genomes/mm10/bed/genomicSuperDups
     cd /hive/data/genomes/mm10/bed/genomicSuperDups
 
     wget --timestamping 'http://mouseparalogy.gs.washington.edu/GRCm38/genomicSuperDup.tab'
 
     mv genomicSuperDup.tab mm10_WGAC.tab
 
     awk '($3 - $2) >= 1000 && ($9 - $8) >= 1000 {print;}' mm10_WGAC.tab \
     | hgLoadBed mm10 genomicSuperDups stdin \
       -tab -sqlTable=$HOME/kent/src/hg/lib/genomicSuperDups.sql
 
     # mm8 version of track had issue where strand values were "+" and "_" --
     # checked and found same issue - so ran same fix:
     hgsql mm10 -e 'update genomicSuperDups set strand = "-" where strand = "_";'
 
     #new mm10 version has a lot more stuff than version on mm8:
     #featureBits mm8 genomicSuperDups
     #157417547 bases of 2567283971 (6.132%) in intersection
     #featureBits mm10 genomicSuperDups
     #214917441 bases of 2652783500 (8.102%) in intersection
     #select count(*) from genomicSuperDups;
     #659775 (vs. 277816 in mm8)
 #
 #########################################################################
 # hgPal downloads (DONE braney 2009-11-03)
 #   FASTA from 60way for refGene, knownGene, knownCanonical
 
     ssh hgwdev
     screen
     bash
     rm -rf /cluster/data/mm10/bed/multiz60way/pal
     mkdir /cluster/data/mm10/bed/multiz60way/pal
     cd /cluster/data/mm10/bed/multiz60way/pal
     for i in `cat ../species.list`; do echo $i; done > order.lst
 
     mz=multiz60way
     gp=refGene
     db=mm10
     mkdir exonAA exonNuc ppredAA ppredNuc
     for j in `sort -nk 2 /cluster/data/$db/chrom.sizes | awk '{print $1}'`
     do
 	echo "date"
 	echo "mafGene -chrom=$j  $db $mz $gp order.lst stdout | \
 	    gzip -c > ppredAA/$j.ppredAA.fa.gz"
 	echo "mafGene -chrom=$j -noTrans $db $mz $gp order.lst stdout | \
 	    gzip -c > ppredNuc/$j.ppredNuc.fa.gz"
 	echo "mafGene -chrom=$j -exons -noTrans $db $mz $gp order.lst stdout | \
 	    gzip -c > exonNuc/$j.exonNuc.fa.gz"
 	echo "mafGene -chrom=$j -exons $db $mz $gp order.lst stdout | \
 	    gzip -c > exonAA/$j.exonAA.fa.gz"
     done > $gp.jobs
 
     nice time sh -x $gp.jobs > $gp.jobs.log 2>&1 &
     sleep 1
     tail -f $gp.jobs.log
 
 # 1817.21user 233.92system 4:54:04elapsed 11%CPU (0avgtext+0avgdata
 # 920192maxresident)k
 # 6024inputs+0outputs (7major+1648126minor)pagefaults 0swaps
 
     mz=multiz60way
     gp=refGene
     db=mm10
     zcat exonAA/*.gz | gzip -c > $gp.$mz.exonAA.fa.gz
     zcat exonNuc/*.gz | gzip -c > $gp.$mz.exonNuc.fa.gz
     zcat ppredAA/*.gz | gzip -c > $gp.$mz.ppredAA.fa.gz
     zcat ppredNuc/*.gz | gzip -c > $gp.$mz.ppredNuc.fa.gz
 
     rm -rf exonAA exonNuc ppredAA ppredNuc
 
     # we're only distributing exons at the moment
     mz=multiz60way
     gp=refGene
     db=mm10
     pd=/usr/local/apache/htdocs-hgdownload/goldenPath/$db/$mz/alignments
     mkdir -p $pd
     ln -s `pwd`/$gp.$mz.exonAA.fa.gz $pd/$gp.exonAA.fa.gz
     ln -s `pwd`/$gp.$mz.exonNuc.fa.gz $pd/$gp.exonNuc.fa.gz
 
     mz=multiz60way
     gp=knownGene
     db=mm10
     mkdir exonAA exonNuc ppredAA ppredNuc
     for j in `sort -nk 2 /cluster/data/$db/chrom.sizes | awk '{print $1}'`
     do
 	echo "date"
 	echo "mafGene -chrom=$j  $db $mz $gp order.lst stdout | \
 	    gzip -c > ppredAA/$j.ppredAA.fa.gz"
 	echo "mafGene -chrom=$j -noTrans $db $mz $gp order.lst stdout | \
 	    gzip -c > ppredNuc/$j.ppredNuc.fa.gz"
 	echo "mafGene -chrom=$j -exons -noTrans $db $mz $gp order.lst stdout | \
 	    gzip -c > exonNuc/$j.exonNuc.fa.gz"
 	echo "mafGene -chrom=$j -exons $db $mz $gp order.lst stdout | \
 	    gzip -c > exonAA/$j.exonAA.fa.gz"
     done > $gp.$mz.jobs
 
     time sh -x $gp.$mz.jobs > $gp.$mz.job.log 2>&1 &
     sleep 1
     tail -f $gp.$mz.job.log
 
 # oops... missed the timing
 
 
     mz=multiz60way
     gp=knownGene
     db=mm10
 
     zcat exonAA/c*.gz | gzip -c > $gp.$mz.exonAA.fa.gz
     zcat exonNuc/c*.gz | gzip -c > $gp.$mz.exonNuc.fa.gz
     zcat ppredAA/c*.gz | gzip -c > $gp.$mz.ppredAA.fa.gz
     zcat ppredNuc/c*.gz | gzip -c > $gp.$mz.ppredNuc.fa.gz
 
     rm -rf exonAA exonNuc ppredAA ppredNuc
 
     mz=multiz60way
     gp=knownGene
     db=mm10
     pd=/usr/local/apache/htdocs/goldenPath/$db/$mz/alignments
     mkdir -p $pd
     ln -s `pwd`/$gp.$mz.exonAA.fa.gz $pd/$gp.exonAA.fa.gz
     ln -s `pwd`/$gp.$mz.exonNuc.fa.gz $pd/$gp.exonNuc.fa.gz
 
     # now do the canonical set
     cd /cluster/data/mm10/bed/multiz60way/pal
     mz=multiz60way
     gp=knownCanonical
     db=mm10
     for j in `awk '{print $1}' /cluster/data/mm10/chrom.sizes`
     do
 	echo "select chrom, chromStart, chromEnd, transcript from knownCanonical where chrom='$j'" | hgsql $db | tail -n +2 > $j.known.bed
     done
 
     mkdir exonAA exonNuc ppredAA ppredNuc
     for j in `sort -nk 2 /cluster/data/$db/chrom.sizes | awk '{print $1}'`
     do
 	echo "date"
 	echo "mafGene -geneBeds=$j.known.bed  $db $mz knownGene order.lst stdout | \
 	    gzip -c > ppredAA/$j.ppredAA.fa.gz"
 	echo "mafGene -geneBeds=$j.known.bed -noTrans $db $mz knownGene order.lst stdout | \
 	    gzip -c > ppredNuc/$j.ppredNuc.fa.gz"
 	echo "mafGene -geneBeds=$j.known.bed -exons -noTrans $db $mz knownGene order.lst stdout | \
 	    gzip -c > exonNuc/$j.exonNuc.fa.gz"
 	echo "mafGene -geneBeds=$j.known.bed -exons $db $mz knownGene order.lst stdout | \
 	    gzip -c > exonAA/$j.exonAA.fa.gz"
     done > $gp.$mz.jobs
 
     time sh -x $gp.$mz.jobs > $gp.$mz.job.log 2>&1 &
     sleep 1
     tail -f $gp.$mz.job.log
 
 # real    302m20.489s
 # user    27m31.179s
 # sys     5m30.071s
 
 
     rm *.known.bed
     mz=multiz60way
     gp=knownCanonical
     db=mm10
     zcat exonAA/c*.gz | gzip -c > $gp.$mz.exonAA.fa.gz
     zcat exonNuc/c*.gz | gzip -c > $gp.$mz.exonNuc.fa.gz
     zcat ppredAA/c*.gz | gzip -c > $gp.$mz.ppredAA.fa.gz
     zcat ppredNuc/c*.gz | gzip -c > $gp.$mz.ppredNuc.fa.gz
 
     rm -rf exonAA exonNuc ppredAA ppredNuc
 
     mz=multiz60way
     gp=knownCanonical
     db=mm10
     pd=/usr/local/apache/htdocs/goldenPath/$db/$mz/alignments
     mkdir -p $pd
     ln -s `pwd`/$gp.$mz.exonAA.fa.gz $pd/$gp.exonAA.fa.gz
     ln -s `pwd`/$gp.$mz.exonNuc.fa.gz $pd/$gp.exonNuc.fa.gz
 ##############################################################################
 # LASTZ Rhesus rheMac2 (DONE - 2014-05-23 - Hiram)
     mkdir /hive/data/genomes/mm10/bed/lastzRheMac2.2014-05-23
     cd /hive/data/genomes/mm10/bed/lastzRheMac2.2014-05-23
 
     cat << '_EOF_' > DEF
 # rhesus vs mouse
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.52/bin/lastz
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
 # QUERY: Rhesus RheMac2
 SEQ2_DIR=/scratch/data/rheMac2/rheMac2.2bit
 SEQ2_LEN=/scratch/data/rheMac2/chrom.sizes
 SEQ2_CHUNK=10000000
 SEQ2_LAP=0
 SEQ2_LIMIT=300
 
 BASE=/hive/data/genomes/mm10/bed/lastzRheMac2.2014-05-23
 TMPDIR=/scratch/tmp
 '_EOF_'
     # << happy emacs
 
     #	establish a screen to control this job
     screen -S mm10RheMac2
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	`pwd`/DEF \
 	-syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
 	-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1 &
     #	real    257m21.255s
 
     cat fb.mm10.chainRheMac2Link.txt
     #	895296744 bases of 2652783500 (33.749%) in intersection
 
     mkdir /hive/data/genomes/rheMac2/bed/blastz.mm10.swap
     cd /hive/data/genomes/rheMac2/bed/blastz.mm10.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzRheMac2.2014-05-23/DEF \
 	-swap -syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
 	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
     #	real    74m36.098s
 
     cat fb.rheMac2.chainMm10Link.txt
     #	875700775 bases of 2646704109 (33.086%) in intersection
 
 ############################################################################
 # FaceBase Microarray track (DONE - 2014-05-21 - Pauline)
     # establish a screen to control this job with a name to indicate what it is
     mkdir /hive/data/genomes/mm10/bed/FaceBase24SampleTypesAvg
     cd /hive/data/genomes/mm10/bed/FaceBase24SampleTypesAvg
 
     wget --timestamping http://genomebrowser.facebase.org/myHub/mm10/FaceBase_24Samp_Types_Averaged.bed
     hgLoadBed mm10 FaceBase24SampleTypesAvg FaceBase_24Samp_Types_Averaged.bed
 
     #For microarray tracks also need to add a section to
     #/cluster/home/pauline/kent/src/hg/makeDb/hgCgiData/Mouse/microarrayGroups.ra
 
 ##############################################################################
 # RepeatMasker Visualization track (DONE - 2014-07-25 - Hiram)
     mkdir /hive/data/genomes/mm10/bed/rmskJoined
     cd /hive/data/genomes/mm10/bed/rmskJoined
 
     ln -s ../repeatMasker/mm10.sorted.fa.out .
     ln -s ../repeatMasker/mm10.fa.align.gz .
 
     # working on fixing this script for the next release of RM
     # since mm10 was an older version of RM, this conversion needs the
     # bedtools, thus the extra PATH business
 
     export PATH=/cluster/bin/bedtools:$PATH
         /scratch/data/RepeatMasker140131/util/nextVerRmToUCSCTables.pl \
             -out mm10.sorted.fa.out -align mm10.fa.align.gz
 
     hgLoadBed -sqlTable=$HOME/kent/src/hg/lib/rmskJoined.sql \
         -renameSqlTable -verbose=4 -tab \
             -type=bed4+9 -as=$HOME/kent/src/hg/lib/rmskJoined.as mm10 \
                 rmskJoinedBaseline mm10.sorted.fa.join.bed \
                     > loadJoined.log 2>&1
 
     hgLoadSqlTab mm10 rmskAlignBaseline \
         /cluster/home/hiram/kent/src/hg/lib/rmskAlign.sql \
             mm10.fa.align.tsv > loadAlign.log 2>&1
 
     hgLoadOutJoined -verbose=2 mm10 mm10.sorted.fa.out > loadOut.log 2>&1
 
     featureBits -countGaps mm10 rmskJoinedBaseline
     #    2243474717 bases of 2730871774 (82.152%) in intersection
 
 ##############################################################################
 # cloneEnds (DONE - 2014-08-11 - Steve)
 
     mkdir /hive/data/genomes/mm10/bed/cloneEnds
     cd /hive/data/genomes/mm10/bed/cloneEnds
 
     # fetch the NCBI INSDC name correspondence file:
     rsync -a -P rsync://ftp.ncbi.nlm.nih.gov/genomes/ASSEMBLY_REPORTS/All/GCF_000001635.23.assembly.txt ./
 
     # fetch the clone reports
     mkdir reports
     rsync -a -P \
 rsync://ftp.ncbi.nih.gov/repository/clone/reports/Mus_musculus/*.GCF_000001635.22.103.*.gff \
        ./reports/
 
     # script to establish refSeq to UCSC chrom names:
 
     cat << '_EOF_' > refSeqNames.pl
 #!/usr/bin/env perl
 
 use strict;
 use warnings;
 
 open (FH, "<GCF_000001635.23.assembly.txt") or die "can not read GCF_000001635.23.assembly.txt";
 while (my $line = <FH>) {
   chomp $line;
   next if ($line =~ m/^#/);
   my @a = split('\t', $line);
   my $chrN = $a[2];
   my $refSeq = $a[6];
   my $contig = $a[4];
   my $type = $a[1];
   next if (!defined $type);
   next if (!defined $refSeq);
   next if (!defined $contig);
   my $suffix = "";
   if ($type eq "alt-scaffold") {
      $suffix = "_alt";
   } elsif ($type eq "unlocalized-scaffold") {
      $suffix = "_random";
   } elsif ($type eq "unplaced-scaffold") {
      $chrN = "Un";
   }
   $chrN = "M" if ($chrN eq "MT");
   if ($a[0] =~ m/_/) {
     $contig =~ s/\.[0-9]//;
     printf "%s\tchr%s_%s%s\n", $refSeq, $chrN, $contig, $suffix;
   } else {
     printf "%s\tchr%s\n", $refSeq, $chrN;
   }
 }
 close (FH);
 '_EOF_'
     # << happy emacs
 
     chmod +x refSeqNames.pl
 
     ./refSeqNames.pl > refSeq.ucscName.tab
 
     # establish full library list:
     ls reports/*.GCF_000001635.22.103.*.gff | sed -e 's#reports/##' \
        | cut -d"." -f1 | sort -u > library.list.txt
 
     # a script to scan the GFF files, with the refSeq.ucscName.tab
     # name correspondence to construct bed files
 
     cat << '_EOF_' > mm10.pl
 #!/usr/bin/env perl
 
 use strict;
 use warnings;
 
 my $argc = scalar(@ARGV);
 
 if ($argc < 1) {
   printf STDERR "usage: ./mm10.pl <report.gff> [moreReports.gff]\n";
   exit 255;
 }
 
 my %refSeqToUcsc;   # key is refSeq name, value is UCSC chrom name
 open (FH, "<refSeq.ucscName.tab") or die "can not read refSeq.ucscName.tab";
 while (my $line = <FH>) {
   chomp $line;
   my ($refSeq, $ucsc) = split('\t', $line);
   $refSeqToUcsc{$refSeq} = $ucsc;
 }
 close (FH);
 
 my %chromSizes;    # key is UCSC chrom name, key is chrom size
 open (FH, "</hive/data/genomes/mm10/chrom.sizes") or die "can not read mm10/chrom.sizes";
 while (my $line = <FH>) {
   chomp $line;
   my ($chr, $size) = split('\t', $line);
   $chromSizes{$chr} = $size;
 }
 close (FH);
 
 while (my $file = shift) {
 my %starts;   # key is parent ID, value is start end coordinates start,end
 my %ends;	# key is parent ID, value is end end coordinates start,end
 my %parents;	# key is parent ID, value is 1 to signify exists
 my %endNames;   # key is parent ID, value is the Name of the parent clone_insert
 
 printf STDERR "# processing $file\n";
 
 open (FH, "<$file") or die "can not read $file";
 while (my $line = <FH>) {
   chomp $line;
   next if ($line=~ m/^#/);
   my @a = split('\t', $line);
   next if (scalar(@a) < 1);
   my $contig = $a[0];
   $contig =~ s/ref.//;
   $contig =~ s/\|//;
   my $ucscChr = $refSeqToUcsc{$contig};
   if (!defined($ucscChr)) {
     printf STDERR "# ERR: contig not in refSeqToUcsc: '$contig'\n";
     next;
   }
   next if (! exists($chromSizes{$ucscChr}));
   my $chromSize = $chromSizes{$ucscChr};
   my $chromStart = $a[3] - 1;
   my $chromEnd = $a[4];
   if ($chromStart > $chromSize) {
     printf STDERR "# warning chromStart over size $ucscChr $chromStart $chromEnd\n";
     $chromStart = $chromSize-1;
   }
   if ($chromEnd > $chromSize) {
     my $overRun = $chromEnd - $chromSize;
     printf STDERR "# warning chromEnd over size by $overRun -> $ucscChr $chromStart $chromEnd\n";
     $chromEnd = $chromSize;
   }
   my $id="notFound";
   my $name="notFound";
   my $parent="notFound";
   my @b = split(';', $a[8]);
   for (my $i = 0; $i < scalar(@b); ++$i) {
      my ($tag, $value) = split('=', $b[$i]);
      if ($tag eq "ID") {
         $id = $value;
         if ($id !~ m/-/) {
           if (exists($parents{$id})) {
             printf STDERR "# WARN: duplicate parent: $id";
           } else {
             $parents{$id} = $ucscChr;
           }
         }
      } elsif ($tag eq "Parent") {
         $parent = $value;
      } elsif ($tag eq "Name") {
         $name = $value;
      }
   }
   my $type="notFound";
   my $insertType = $a[2];
   if ($insertType =~ m/clone_insert_start/) {
      $type = "start";
      if ($parent eq "notFound") {
        printf STDERR "# ERR: can not find parent for start $name Ttype $id\n";
      } else {
        if (!exists($parents{$parent})) {
          printf STDERR "# ERR: start found $name  with no parent $parent declared\n";
        } elsif (exists($starts{$parent})) {
          printf STDERR "# ERR: duplicate start for $parent\n";
        } elsif ($ucscChr eq $parents{$parent}) {
          $starts{$parent} = sprintf("%s\t%s", $chromStart, $chromEnd);
        } else {
          printf STDERR "# ERR: start on different chrom $ucscChr than parent $parent $parents{$parent}\n";
        }
      }
   } elsif ($insertType =~ m/clone_insert_end/) {
      $type = "end";
      if ($parent eq "notFound") {
        printf STDERR "# ERR: can not find parent for end $name Ttype $id\n";
      } else {
        if (!exists($parents{$parent})) {
          printf STDERR "# ERR: end found $name  with no parent $parent declared\n";
        } elsif (exists($ends{$parent})) {
          printf STDERR "# ERR: duplicate end for $parent\n";
        } elsif ($ucscChr eq $parents{$parent}) {
          $ends{$parent} = sprintf("%s\t%s", $chromStart, $chromEnd);
        } else {
          printf STDERR "# ERR: end on different chrom $ucscChr than parent $parent $parents{$parent}\n";
        }
      }
   } elsif ($insertType =~ m/clone_insert/) {
      $type = "insert";
      $endNames{$id} = $name;
   }
   $name =~ s/gi\|//g;
   $id =~ s/gi\|//g;
   printf STDERR "%s\t%d\t%d\t%s_%s_%s\t0\t%s\n", $ucscChr, $chromStart, $chromEnd, $name, $type, $id, $a[6];
 }       # while (my $line = <FH>)
 
 close (FH);
 
 foreach my $parent (keys %parents) {
   if (! exists($starts{$parent}) ) {
     printf STDERR "# ERR: no start for $parent\n";
   } elsif (! exists($ends{$parent}) ) {
     printf STDERR "# ERR: no end for $parent\n";
   } else {
     my $strand = "+";
     my $chrStart = 0;
     my $chrEnd = 0;
     my $blockStart = 0;
     my ($sStart, $sEnd) = split('\t', $starts{$parent});
     my ($eStart, $eEnd) = split('\t', $ends{$parent});
     my $startSize = $sEnd - $sStart;
     my $endSize = $eEnd - $eStart;
     if ($eStart < $sStart) {
       $chrStart = $eStart;
       $chrEnd = $sEnd;
       $blockStart = $sStart - $chrStart;
       $strand = "-";
       $startSize = $eEnd - $eStart;
       $endSize = $sEnd - $sStart;
     } else {
       $chrStart = $sStart;
       $chrEnd = $eEnd;
       $blockStart = $eStart - $chrStart;
     }
     if ($startSize > $blockStart) {
       printf STDERR "# startSize > blockStart $endNames{$parent}\n";
     } else {
       printf "%s\t%d\t%d\t%s\t0\t%s\t%d\t%d\t0\t2\t%d,%d\t0,%d\n", $parents{$parent}, $chrStart, $chrEnd, $endNames{$parent}, $strand, $chrStart, $chrEnd, $startSize, $endSize, $blockStart;
     }
   }
 }
 
 }
 '_EOF_'
     # << happy emacs
 
     chmod +x mm10.pl
 
     # process GFF files into bed files into separateLibs/ directory
 for L in `cat library.list.txt`
 do
    export db="`pwd -P | awk -F'/' '{print $5}'`"
    export destDir="separateLibs/${L}"
    echo "working: ${L}"
    mkdir -p "${destDir}"
    ./${db}.pl reports/${L}.GCF_000001635.22.103.*.gff \
        2> ${destDir}/tmp.bed6 | sort -k1,1 -k2,2n > ${destDir}/${db}.${L}.bed
    sort -k1,1 -k2,2n ${destDir}/tmp.bed6 > ${destDir}/${db}.${L}.items.bed6
 done
 
     # use only those libraries with more than 20,000 clone ends
     wc -l separateLibs/*/*.bed | sort -n | grep -v total | awk '$1 > 20000' \
         | sed -e 's#.*separateLibs/##; s#/.*##' > libs.over20K.list
 
     # note those libraries with less than 20,000 clone ends
     wc -l separateLibs/*/*.bed | grep -v total | awk '$1 < 20000' | sed -e 's#.*separateLibs/##; s#/.*##' > libs.under20K.list
 
     # filter out bad ends, length must be <= median size times three
 cat lis.over20K.list | while read L
 do
    if [ ! -s separateLibs/${L}/lengths.txt ]; then
       awk '{print $3-$2}' separateLibs/${L}/mm10.${L}.bed > separateLibs/${L}/lengths.txt
    fi
    median3X=`ave separateLibs/${L}/lengths.txt | grep median | awk '{printf "%d", $2*3}'`
    awk '($3-$2) < '$median3X'' separateLibs/${L}/mm10.${L}.bed > separateLibs/${L}/mm10.median3X.bed
    awk '($3-$2) >= '$median3X'' separateLibs/${L}/mm10.${L}.bed > separateLibs/${L}/mm10.badMap.bed
    before=`cat separateLibs/${L}/mm10.${L}.bed | wc -l`
    after=`cat separateLibs/${L}/mm10.median3X.bed | wc -l`
    dropped=`echo $before $after | awk '{print $1-$2}'`
    perCent=`echo $dropped $before | awk '{printf "%.2f", 100*'$dropped/$before'}'`
    echo "$L $before - $after = $dropped -> % $perCent dropped"
 done
 
 # B6Ng01 96548 - 95837 = 711 -> % 0.74 dropped
 # C3H 42705 - 42378 = 327 -> % 0.77 dropped
 # CH29 51200 - 50621 = 579 -> % 1.13 dropped
 # DN 101826 - 100472 = 1354 -> % 1.33 dropped
 # MHPN 59859 - 58582 = 1277 -> % 2.13 dropped
 # MHPP 29074 - 28550 = 524 -> % 1.80 dropped
 # MSMg01 81802 - 78772 = 3030 -> % 3.70 dropped
 # RP23 83424 - 83062 = 362 -> % 0.43 dropped
 # RP24 51112 - 50849 = 263 -> % 0.51 dropped
 # WI1 326662 - 324259 = 2403 -> % 0.74 dropped
 # bMQ 73519 - 72540 = 979 -> % 1.33 dropped
 
    # loading the median3X files
 mkdir -p filteredEnds
 for L in `cat libs.over20K.list`
 do
     echo $L 1>&2
     hgLoadBed -type=bed12 mm10 cloneEnd_${L} \
        separateLibs/${L}/mm10.median3X.bed \
         > filteredEnds/loadBed.${L}.log 2>&1
 done
 
     # construct multiple mapped ends:
 cat separateLibs/*/mm10.median3X.bed | cut -f4 | sort | uniq -c | sort -rn > allEnds.names.count.txt
 
 awk '$1 > 1' allEnds.names.count.txt | awk '{print $2}' | sort > multiples.names.txt
 
 cat separateLibs/*/mm10.median3X.bed | sort -k4 > allEnds.nameSorted.bed
 join -t'        ' -o "2.1,2.2,2.3,2.4,2.5,2.6,2.7,2.8,2.9,2.10,2.11,2.12" -2 4 multiples.names.txt allEnds.nameSorted.bed | sort -k1,1 -k2,2n > allEnds.multiple.locations.bed
 
 hgLoadBed -type=bed12 mm10 cloneEnd_multipleMaps \
    allEnds.multiple.locations.bed > load.multipleMaps.log 2>&1
 
     # construct bad mapped ends:
 mkdir -p filteredDroppedEnds
 
 for L in `cat libs.over20K.list`
 do
     echo $L 1>&2
     cat separateLibs/${L}/mm10.badMap.bed
 done | sort -k1,1 -k2,2n > filteredDroppedEnds/badEnds.bed
 
 hgLoadBed -type=bed12 mm10 cloneEndbadEnds filteredDroppedEnds/badEnds.bed \
        > filteredDroppedEnds/loadBed.badEnds.log 2>&1
 
     # construct coverage bigWig files:
 cat separateLibs/*/mm10.median3X.bed | awk '$6 == "+"' | sort -k1,1 -k2,2n \
     | bedItemOverlapCount mm10 stdin > allEnds.forward.bedGraph
 
 cat separateLibs/*/mm10.median3X.bed | awk '$6 == "-"' | sort -k1,1 -k2,2n \
     | bedItemOverlapCount mm10 stdin > allEnds.reverse.bedGraph
 
 bedGraphToBigWig allEnds.forward.bedGraph /hive/data/genomes/mm10/chrom.sizes \
    cloneEnd_coverageForward.bw
 
 bedGraphToBigWig allEnds.reverse.bedGraph /hive/data/genomes/mm10/chrom.sizes \
    cloneEnd_coverageReverse.bw
 
     mkdir /gbdb/mm10/bbi/cloneEnd
     ln -s `pwd`/cloneEnd_coverageForward.bw /gbdb/mm10/bbi/cloneEnd
     ln -s `pwd`/cloneEnd_coverageReverse.bw /gbdb/mm10/bbi/cloneEnd
 
     hgBbiDbLink mm10 cloneEnd_coverageForward \
         /gbdb/mm10/bbi/cloneEnd/cloneEnd_coverageForward.bw
     hgBbiDbLink mm10 cloneEnd_coverageReverse \
         /gbdb/mm10/bbi/cloneEnd/cloneEnd_coverageReverse.bw
 
     ### Fixup the scores to indicate how many multiple mappings as mentioned
     ### in the hg19 bacEnds description page: one mapping: score = 1000
     ### multiple mappings: score = 1500/count
     ### the sort | uniq -c | awk does this score calculation with the name
     ###   in column 1
     ### The join puts the existing table together with those scores
     ### DONE - 2016-03-02 - Hiram
 
     mkdir /hive/data/genomes/mm10/bed/cloneEnds/addCounts
     cd /hive/data/genomes/mm10/bed/cloneEnds/addCounts
     mkdir score withScore noScore withScore
     for table in cloneEndB6Ng01 cloneEndC3H cloneEndCH29 cloneEndDN \
 cloneEndMHPN cloneEndMHPP cloneEndMSMg01 cloneEndRP23 cloneEndRP24 \
 cloneEndWI1 cloneEndbMQ cloneEndbadEnds cloneEndmultipleMaps
 do
   hgsql -N -e "select name from $table;" mm10 | sort | uniq -c |
       awk '{ if (1 == $1) {printf "%s\t1000\n", $2} else {printf "%s\t%d\n", $2, 1500/$1} }' \
          | sort > score/mm10.$table.score.tab
   hgsql -N -e "select * from $table order by name;" mm10 \
       | sort -k5 > noScore/mm10.$table.tab
   join -t'	' -1 5 noScore/mm10.$table.tab score/mm10.$table.score.tab \
   | awk '{printf "%d\t%s\t%d\t%d\t%s\t%d\t%s\t%d\t%d\t%d\t%d\t%s\t%s\n", $2,$3,$4,$5,$1,$14,$7,$8,$9,$10,$11,$12,$13}' \
     | sort -k2,2 -k3,3n > withScore/mm10.$table.withScore.tab
   hgsql -e "delete from $table;" mm10
   hgsql -e "load data local infile \"withScore/mm10.$table.withScore.tab\" into table $table;" mm10
 done
 
     for table in cloneEndB6Ng01 cloneEndC3H cloneEndCH29 cloneEndDN \
 cloneEndMHPN cloneEndMHPP cloneEndMSMg01 cloneEndRP23 cloneEndRP24 \
 cloneEndWI1 cloneEndbMQ cloneEndbadEnds cloneEndmultipleMaps
 do
     hgsql -N -e "select count(*) from $table;" mm10 | cat
 done
 #     95837
 #     42378
 #     50621
 #    100472
 #     58582
 #     28550
 #     78772
 #     83062
 #     50849
 #    324259
 #     72540
 #     11809
 #      4269
 
 ##############################################################################
 # 2014-08-17: import of UCSC GENCODE group processing of GENCODE VM3 (markd)
 
     # download files
     mkdir -p /hive/data/genomes/mm10/bed/gencodeVM3/data
     cd /hive/data/genomes/mm10/bed/gencodeVM3
 
     # download gencode release
     cd data
     wget -nv -r -np ftp://ftp.sanger.ac.uk/pub/gencode/Gencode_mouse/release_M3
     mv ftp.sanger.ac.uk/pub/gencode/Gencode_mouse/release_M3 .
     rm -rf ftp.sanger.ac.uk
 
     cd ..
     # create Makefile from previous one.
     cp /hive/data/genomes/mm10/bed/gencodeVM2/Makefile .
 
     # build and load tables
     (time nice make -j 10) >&build.1.out&
 
     # compare tables from previous release to see if number changed makes
     # sense.
         make cmpRelease >gencode-cmp.tsv
 
     ## Copy and update trackDb files from previous release.
     ## Change version and use lower priority so it sorts to top of
     ## super track page.
     ## Important to make sure filter attrs.transcriptType matches current set
     ## figured out with
     select distinct transcriptType from wgEncodeGencodeAttrsVM3 order by transcriptType;
     cd kent/src/hg/makeDb/trackDb
     cp mouse/mm10/wgEncodeGencodeVM2.ra mouse/mm10/wgEncodeGencodeVM3.ra
     cp mouse/mm10/wgEncodeGencodeVM2.html mouse/mm10/wgEncodeGencodeVM3.html
 
     # edit these plus mouse/mm10/trackDb.wgEncode.ra
     # - set priorities in wgEncodeGencodeVM3.ra in reverse order with previous
     #   tracks so newest shows up first
     #     priority - set to previous version priority minus 0.001
     #     searchPriority - set each to previous minus 0.001
     # - make current track default to pack and hide previous [ONLY if it's going to be pushed]
     #     superTrack wgEncodeGencodeSuper pack
     # - Update wgEncodeGencodeSuper.html to describe new release and to
     #   pick up other updates.
 
     ### IMPORTANT: make sure that hgTracks/gencodeTracks.c registers
     ### track handler for this version of gencode:
     registerTrackHandler("wgEncodeGencodeVM3", gencodeGeneMethods);
 
     # update all.joiner and validate
     # look for the last section `begin Gencode VM?' in all.joiner
     # and copy and update version
     # repeat this until happy, editing minCheck as needed
     cd /hive/data/genomes/mm10/bed/gencodeVM3
     make joinerCheck  # output in check/joiner.out
 
 ##############################################################################
 # LASTZ Cow bosTau8 (DONE - 2014-10-15 - Steve)
     mkdir /hive/data/genomes/mm10/bed/lastzBosTau8.2014-10-15
     cd /hive/data/genomes/mm10/bed/lastzBosTau8.2014-10-15
 
     cat << '_EOF_' > DEF
 # mouse vs cow
 # maximum M allowed with lastz is only 254
 BLASTZ_M=254
 
 # TARGET: Mouse mm10
 SEQ1_DIR=/hive/data/genomes/mm10/nib
 SEQ1_LEN=/hive/data/genomes/mm10/chrom.sizes
 SEQ1_CHUNK=10000000
 SEQ1_LAP=10000
 
 # QUERY: Cow bosTau8
 SEQ2_DIR=/hive/data/genomes/bosTau8/bosTau8.2bit
 SEQ2_LEN=/hive/data/genomes/bosTau8/chrom.sizes
 SEQ2_CHUNK=10000000
 SEQ2_LAP=0
 
 BASE=/hive/data/genomes/mm10/bed/lastzBosTau8.2014-10-15
 TMPDIR=/scratch/tmp
 '_EOF_'
     # << happy emacs
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
         `pwd`/DEF \
         -syntenicNet \
         -noLoadChainSplit \
         -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
         -chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1
     # real    181m30.700s
     cat fb.mm10.chainBosTau8Link.txt
     # 698722925 bases of 2652783500 (26.339%) in intersection
     # Create link
     cd /hive/data/genomes/mm10/bed
     ln -s  lastzBosTau8.2014-10-15 lastz.bosTau8
 
     #   and the swap
     mkdir /hive/data/genomes/bosTau8/bed/blastz.mm10.swap
     cd /hive/data/genomes/bosTau8/bed/blastz.mm10.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
         /hive/data/genomes/mm10/bed/lastzBosTau8.2014-10-15/DEF \
         -swap -syntenicNet  \
         -noLoadChainSplit \
         -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
         -chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1
     #   real    58m4.272s
     cat fb.bosTau8.chainMm10Link.txt
     # 687270584 bases of 2649307237 (25.942%) in intersection
     # Create link
     cd /hive/data/genomes/bosTau8/bed
     ln -s blastz.mm10.swap lastz.mm10
 
 ############################################################################
 # 2014-12-05: import of UCSC GENCODE group processing of GENCODE VM4 (markd)
 
     # download files
     mkdir -p /hive/data/genomes/mm10/bed/gencodeVM4/data
     cd /hive/data/genomes/mm10/bed/gencodeVM4
 
     # create Makefile from previous one.
     # WARNING: next build start with hg/makeDb/outside/gencode/gencodeLoad.mk
     cp  /hive/data/genomes/hg38/bed/gencodeV21/Makefile .
 
     # download, build and load tables
     (time nice make -j 10) >&build.1.out&
 
     # compare tables from previous release to see if number changed makes
     # sense.
         make cmpRelease >gencode-cmp.tsv
 
     ## Copy and update trackDb files from previous release.
     ## Change version and use lower priority so it sorts to top of
     ## super track page.
     ## Important to make sure filter attrs.transcriptType matches current set
     ## figured out with
     select distinct transcriptType from wgEncodeGencodeAttrsVM4 order by transcriptType;
     cd kent/src/hg/makeDb/trackDb
     cp mouse/mm10/wgEncodeGencodeVM2.ra mouse/mm10/wgEncodeGencodeVM4.ra
     cp mouse/mm10/wgEncodeGencodeVM2.html mouse/mm10/wgEncodeGencodeVM4.html
 
     # edit these plus mouse/mm10/trackDb.wgEncode.ra
     # - set priorities in wgEncodeGencodeVM4.ra in reverse order with previous
     #   tracks so newest shows up first
     #     priority - set to previous version priority minus 0.001
     #     searchPriority - set each to previous minus 0.001
     # - make current track default to pack and hide previous [ONLY if it's going to be pushed]
     #     superTrack wgEncodeGencodeSuper pack
     # - Update wgEncodeGencodeSuper.html to describe new release and to
     #   pick up other updates.
 
     ### IMPORTANT: make sure that hgTracks/gencodeTracks.c registers
     ### track handler for this version of gencode:
     registerTrackHandler("wgEncodeGencodeVM4", gencodeGeneMethods);
 
     # update all.joiner and validate
     # look for the last section `begin Gencode VM?' in all.joiner
     # and copy and update version
     # repeat this until happy, editing minCheck as needed
     cd /hive/data/genomes/mm10/bed/gencodeVM4
     make joinerCheck  # output in check/joiner.out
 
 ##############################################################################
 ##############################################################################
 # TransMap V3 tracks. see makeDb/doc/transMapTracks.txt (2014-12-21 markd)
 ##############################################################################
 # LASTZ mouse/mm10 sheep/oviAri3 - (DONE - 2015-01-08 - Hiram)
     mkdir /hive/data/genomes/mm10/bed/lastzOviAri3.2015-01-08
     cd /hive/data/genomes/mm10/bed/lastzOviAri3.2015-01-08
 
     cp -p \
 /hive/users/hiram/multiz/100way/mm10.oviAri3/mm10.oviAri3.tuning.top400.txt \
      ./mm10.oviAri3.tuning.Q.txt
 
     cat << '_EOF_' > DEF
 # mouse vs sheep
 # parameters obtained from a tuning run of lastz_D
 # /hive/users/hiram/multiz/100way/mm10.oviAri3/mm10.oviAri3.tuning.top400.txt
 
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.54/bin/lastz
 BLASTZ_T=2
 BLASTZ_O=400
 BLASTZ_E=30
 BLASTZ_M=254
 BLASTZ_X=890
 BLASTZ_Y=3400
 BLASTZ_Q=/hive/data/genomes/mm10/bed/lastzOviAri3.2015-01-08/mm10.oviAri3.tuning.Q.txt
 #       A     C     G     T
 # A    89  -172   -40  -184
 # C  -172   100  -121   -40
 # G   -40  -121   100  -172
 # T  -184   -40  -172    89
 
 # TARGET: mouse mm10
 SEQ1_DIR=/hive/data/genomes/mm10/mm10.2bit
 SEQ1_LEN=/hive/data/genomes/mm10/chrom.sizes
 SEQ1_CHUNK=40000000
 SEQ1_LIMIT=2
 SEQ1_LAP=10000
 
 # QUERY: sheep oviAri3
 SEQ2_DIR=/hive/data/genomes/oviAri3/oviAri3.2bit
 SEQ2_LEN=/hive/data/genomes/oviAri3/chrom.sizes
 SEQ2_CHUNK=20000000
 SEQ2_LIMIT=10
 SEQ2_LAP=0
 
 BASE=/hive/data/genomes/mm10/bed/lastzOviAri3.2015-01-08
 TMPDIR=/dev/shm
 '_EOF_'
     # << happy emacs
 
     time (doBlastzChainNet.pl `pwd`/DEF -verbose=2 \
         -chainMinScore=3000 -chainLinearGap=medium \
           -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
             -syntenicNet) > do.log 2>&1
     # real    75m27.412s
 
     cat fb.mm10.chainOviAri3Link.txt
     # 432006690 bases of 2652783500 (16.285%) in intersection
 
     time (doRecipBest.pl -buildDir=`pwd` mm10 oviAri3) > rbest.log 2>&1 &
     # real    17m24.577s
 
     # and for the swap:
     mkdir /hive/data/genomes/oviAri3/bed/blastz.mm10.swap
     cd /hive/data/genomes/oviAri3/bed/blastz.mm10.swap
 
     time (doBlastzChainNet.pl -verbose=2 \
       /hive/data/genomes/mm10/bed/lastzOviAri3.2015-01-08/DEF \
         -swap -chainMinScore=3000 -chainLinearGap=medium \
           -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
             -syntenicNet) > swap.log 2>&1
     #  real    31m27.481s
 
     cat fb.oviAri3.chainMm10Link.txt
     #422549165 bases of 2534335866 (16.673%) in intersection
 
     time (doRecipBest.pl -buildDir=`pwd` oviAri3 mm10) > rbest.log 2>&1
     # real    16m45.956s
 
 #########################################################################
 # RETROFINDER RETROPOSED GENES ucscRetro track VERSION 6
 # (2015-01-02 - 2015-01-07, hartera, DONE)
 ssh hgwdev
 mkdir -p /hive/hive/groups/gencode/pseudogenes/retroFinder/mm10.20150102
 cd /hive/groups/gencode/pseudogenes/retroFinder/mm10.20150102
 
 cat << '_EOF_' > DEF
 
 RETRO_OPTIONS="-verbose=4 -minAli=0.98 -nearTop=0.005 "
 RUNDATE="2015-01-02"
 DB=mm10
 SCORETHRESH=510
 GENOMENAME='Mus musculus'
 GBDB=mm
 DATE=20150102
 VERSION=6
 RUNDIR=/hive/groups/gencode/pseudogenes/retroFinder/$DB.$DATE
 BINDIR=/hive/users/hartera/GencodeWG/retroFinder/branches/version2/bin
 KENTDIR=/cluster/home/hartera/kent
 KENTBINDIR=/cluster/bin/x86_64
 MRNABASE=/hive/data/genomes/$DB/bed/mrnaBlastz.$VERSION
 TMPMRNA=$RUNDIR/mrnaBlastz/$DB
 TMPEST=$RUNDIR/est/$DB
 USEALTSEQS=0
 EST=all_est
 SPLICED_EST=intronEst
 SPLIT_EST=0
 SPLIT_SPLICED_EST=1
 LASTZPROG=/cluster/bin/penn/x86_64/lastz
 SCRIPT=/hive/users/hartera/GencodeWG/retroFinder/branches/version2/src/pipeline
 GENOME=/hive/data/genomes
 TWOBIT=$GENOME/$DB/$DB.2bit
 RETRODIR=$GENOME/$DB/bed/retro
 BASE=$RUNDIR/retro
 BASE=/hive/groups/gencode/pseudogenes/retroFinder/mm10.${DATE}/retro
 OUTDIR=${BASE}/version${VERSION}/${DB}
 RESULT=$OUTDIR/result
 RESULTSPLIT=$OUTDIR/resultSplit
 LOG=$OUTDIR/log
 OUT=$OUTDIR/out
 OVERLAPDIR=$OUTDIR/run.o
 TABLE=ucscRetroInfo$VERSION
 ORTHOTABLE=ucscRetroOrtho$VERSION
 ALIGN=ucscRetroAli$VERSION
 LOCAL=/scratch/data/$DB
 NIB=$LOCAL/nib
 RMSK=rmsk
 NET1=netHg38
 NET2=netCanFam3
 NET3=netRn5
 GENE1=knownGene
 GENE2=refGene
 GENE3=wgEncodeGencodeCompVM4
 CLUSTER=ku
 SPECIES="hg38 mm10"
 ROOTDIR="~/public_html/retro/mm10Jul14"
 WEBROOT=$ROOTDIR/retro.$RUNDATE
 WEBSERVER=http://hgwdev-hartera.soe.ucsc.edu
 SHUFFLEDIR=shuffle
 SHUFFLEROOT=$WEBROOT/$SHUFFLEDIR
 DUPDIR=dups
 DUPROOT=$WEBROOT/$DUPDIR
 AGEDIR=age
 AGEROOT=$WEBROOT/$AGEDIR
 EXPDIR=exp
 GENEPFAM=knownGene
 PFAM=knownToPfam
 PFAMIDFIELD=name
 PFAMDOMAIN=value
 ARRAY=gnfAtlas2
 AFFYPROBE=affyGnf1m
 ARRAYMEDIAN=hgFixed.gnfMouseAtlas2Median
 ARRAYRATIO=hgFixed.gnfMouseAtlas2AllRatio
 ARRAYABS=hgFixed.gnfMouseAtlas2All
 ARRAYEXP=hgFixed.gnfMouseAtlas2MedianExps
 ARRAYEXPALL=hgFixed.gnfMouseAtlas2AllExps
 # ARRAYLOOKUP=knownToGnfAtlas2
 #ARRAYPSLS="/hive/data/genomes/mm9/bed/geneAtlas2/affyGnf1m.psl"
 ALTSPLICE=sibTxGraph
 SPLITBYAGE=$SCRIPT/splitRetrosByAgeMouse
 PDB=proteins140122
 BREAKS=0,8,16,24,32
 XLIM=34
 YLIM=0.1
 YLIM1=4000
 YLIM2=160
 MAXDIVERGENCE=32
 '_EOF_'
     # << happy emacs
 chmod +x DEF
 
 mkdir -p /hive/data/genomes/mm10/bed/retro
 mkdir -p /hive/data/genomes/mm10/bed/mrnaBlastz.6
 cd /hive/data/genomes/mm10/bed/mrnaBlastz.6
 # Create S1.len file
 foreach c (1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 X Y)
   echo $c
   hgsql -Ne "select chrom, size from chromInfo where chrom='chr${c}';" mm10 \
     >> S1.len
 end
 
 # NOTE: in future, use /hive/data/genomes/mm10/chrom.sizes for S1.len
 # and just remove randoms and chrM.
 cd /hive/groups/gencode/pseudogenes/retroFinder/mm10.20150102
 mkdir mrnaBlastz
 cd mrnaBlastz
 cp ../DEF .
 cp /hive/data/genomes/mm10/bed/mrnaBlastz.6/S1.len .
 
 screen
 # Run steps 1 to 5 of RetroFinder pipeline from scripts in CCDS SVN source tree:
 retroFinder/branches/version2/src/pipeline/ucscStep1.sh DEF
 # check cluster jobs on ku
 retroFinder/branches/version2/src/pipeline/ucscStep2.sh DEF
 retroFinder/branches/version2/src/pipeline/ucscStep3.sh DEF
 #check cluster jobs on ku
 retroFinder/branches/version2/src/pipeline/ucscStep4.sh DEF
 #check cluster jobs on ku
     # Load the track
 retroFinder/branches/version2/src/pipeline/ucscStep5.sh DEF
 cd /hive/groups/gencode/pseudogenes/retroFinder/mm10.20150102/retro/version6/mm10
 retroFinder/branches/version2/src/pipeline/filterMrna.sh
 retroFinder/branches/version2/src/pipeline/filterEst.sh
 # Check cluster jobs on ku
 retroFinder/branches/version2/src/pipeline/analyseExpress.sh
 # Check cluster jobs on ku
 #added ucscRetroAli6 to kent/src/hg/makeDb/mouse/mm10/trackDb.ra
 # copied
 # /hive/groups/gencode/pseudogenes/retroFinder/mm10.20150102/retro/version6/mm10/trackDb.retro
 # entry to kent/src/hg/makeDb/trackDb/mouse/mm10/trackDb.ra
 # and edited it to remove the full data and add:
 # dataVersion Jan. 2015
 # Scripts copied ucscRetroAli6.psl, ucscRetroInfo6.bed and ucscRetroCds6.tab
 # to /hive/data/genomes/mm10/bed/retro/
 
 ##############################################################################
 # LASTZ mouse/mm10 sheep/tarSyr2 - (DONE - 2015-03-27 - Hiram)
     mkdir /hive/data/genomes/mm10/bed/lastzTarSyr2.2015-03-27
     cd /hive/data/genomes/mm10/bed/lastzTarSyr2.2015-03-27
 
     cat << '_EOF_' > DEF
 # tarsier vs mouse
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.54/bin/lastz
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
 # QUERY: Tarsier TarSyr2
 SEQ2_DIR=/hive/data/genomes/tarSyr2/tarSyr2.2bit
 SEQ2_LEN=/hive/data/genomes/tarSyr2/chrom.sizes
 SEQ2_CHUNK=20000000
 SEQ2_LIMIT=800
 SEQ2_LAP=0
 
 BASE=/hive/data/genomes/mm10/bed/lastzTarSyr2.2015-03-27
 TMPDIR=/dev/shm
 '_EOF_'
     # << happy emacs
 
     time (doBlastzChainNet.pl -verbose=2 `pwd`/DEF \
       -chainMinScore=3000 -chainLinearGap=medium \
         -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku) > do.log 2>&1
     # real    301m17.238s
 
     time (doBlastzChainNet.pl -verbose=2 `pwd`/DEF \
       -chainMinScore=3000 -chainLinearGap=medium \
         -continue=syntenicNet -syntenicNet -workhorse=hgwdev \
           -smallClusterHub=ku -bigClusterHub=ku) > synNet.log 2>&1
     # real    16m5.061s
 
     cat fb.mm10.chainTarSyr2Link.txt
     # 856877439 bases of 2652783500 (32.301%) in intersection
 
     time (doRecipBest.pl -buildDir=`pwd` mm10 tarSyr2) > rbest.log 2>&1 &
     #  real    27m4.048s
 
     # and for the swap:
     mkdir /hive/data/genomes/tarSyr2/bed/blastz.mm10.swap
     cd /hive/data/genomes/tarSyr2/bed/blastz.mm10.swap
 
     time (doBlastzChainNet.pl -verbose=2 \
       /hive/data/genomes/mm10/bed/lastzTarSyr2.2015-03-27/DEF \
         -swap -chainMinScore=3000 -chainLinearGap=medium \
           -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
             -syntenicNet) > swap.log 2>&1
     #  real    181m7.042s
 
     cat fb.tarSyr2.chainMm10Link.txt
     #  900229088 bases of 3405755564 (26.433%) in intersection
 
     time (doRecipBest.pl -buildDir=`pwd` tarSyr2 mm10) > rbest.log 2>&1
     #  real    77m29.742s
 
 #########################################################################
 # UCSC to RefSeq name correspondence (DONE - 2015-04-15 - Hiram)
 
     mkdir /hive/data/genomes/mm10/bed/ucscToRefSeq
     cd /hive/data/genomes/mm10/bed/ucscToRefSeq
 
     rsync -avPL \
   rsync://ftp.ncbi.nlm.nih.gov/genomes/genbank/vertebrate_mammalian/Mus_musculus/all_assembly_versions/GCA_000001635.5_GRCm38.p3/GCA_000001635.5_GRCm38.p3_assembly_report.txt ./
 
     # this assembly_report has "UCSC-style-name" in column 10
     # but it does not name everything
 
     # columns 5 and 7 are the INSDC and RefSeq names
     grep -v "^#" GCA_000001635.5_GRCm38.p3_assembly_report.txt \
       | awk -F'\t' '{printf "%s\t%s\n", $5,$7}' | sort > insdc.refSeq.tab
 
     # chrM/MT confusion fixed by sed
     hgsql -N -e 'select name,chrom,chromStart,chromEnd from ucscToINSDC;' mm10 \
       | sed -e 's/NC_005089.1/AY172335.1/;' | sort > insdc.ucsc.tab
 
     join insdc.ucsc.tab insdc.refSeq.tab | tr '[ ]' '[\t]' \
        | cut -f2- > ucsc.refSeq.tab
 
     export chrSize=`cut -f1 ucsc.refSeq.tab | awk '{print length($0)}' | sort -n | tail -1`
     sed -e "s/21/$chrSize/" $HOME/kent/src/hg/lib/ucscToINSDC.sql \
        | sed -e 's/INSDC/RefSeq/g;' > ucscToRefSeq.sql
     hgLoadSqlTab mm10 ucscToRefSeq ./ucscToRefSeq.sql ucsc.refSeq.tab
 
     checkTableCoords  mm10 -table=ucscToRefSeq
     featureBits -countGaps mm10 ucscToRefSeq
     # 2730871774 bases of 2730871774 (100.000%) in intersection
 
     # fixup 2016-04-11 - Hiram
     # the chrM name is not correct, it was RefSeq instead of Genbank/INSDC:
     hgsql -e 'select * from ucscToINSDC where name="NC_005089.1";' mm10
 +-------+------------+----------+-------------+
 | chrom | chromStart | chromEnd | name        |
 +-------+------------+----------+-------------+
 | chrM  |          0 |    16299 | NC_005089.1 |
 +-------+------------+----------+-------------+
 
     hgsql -e 'update ucscToINSDC set name="AY172335.1" where name="NC_005089.1";' mm10
 
     hgsql -e 'select * from ucscToINSDC where name="AY172335.1";' mm10
 +-------+------------+----------+------------+
 | chrom | chromStart | chromEnd | name       |
 +-------+------------+----------+------------+
 | chrM  |          0 |    16299 | AY172335.1 |
 +-------+------------+----------+-------------+
 
 
 
 #########################################################################
 # download and load ncbiGene track ( DONE - 2015-06-09 - Brian)
 
 db=mm10
 mkdir  /cluster/data/genomes/$db/bed/ncbiGene
 cd  /cluster/data/genomes/$db/bed/ncbiGene
 
 ftpFile=ftp://ftp.ncbi.nlm.nih.gov/genomes/M_musculus/GFF/ref_GRCm38.p3_top_level.gff3.gz
 gff3File=`basename $ftpFile`
 
 echo "select * from ucscToRefSeq" | hgsql $db | tail -n +2 | awk '{print 0, $4, $3, $1, $3}' > refSeqToUcsc.lft
 rm -f $ftpFile
 wget $ftpFile
 
 /cluster/home/braney/bin/x86_64/gff3ToGenePred -useName -warnAndContinue -attrsOut=attrs -bad=bad.gp $gff3File stdout 2> convertErr.txt | liftUp -type=.gp -extGenePred lift.gp refSeqToUcsc.lft warn  stdin 2> liftErr.txt
 wc -l lift.gp
 # 108567 lift.gp
 wc -l bad.gp
 # 189
 
 tawk '{print $1}'  attrs | sort | uniq > meta
 wc -l meta
 # 110847 meta
 for i in product Dbxref gene gbkey
 do
     echo $i
     tawk -v attr=$i '$2==attr {print $1,$3}' attrs | sort | uniq | join -t $'\t' /dev/stdin meta > out
     mv out meta
 done
 wc -l meta
 # 109420 meta
 
 egrep "^N(M|R|P)" lift.gp > curated.gp
 egrep "^X(M|R)" lift.gp > predicted.gp
 
 wc -l curated.gp predicted.gp
 #33545 curated.gp
 #70587 predicted.gp
 #104132 total
 
 cat curated.gp predicted.gp | awk '{print $1}' | sort -u > tmp1
 cat meta | awk '{print $1}' | sort -u > tmp2
 join -v 1 tmp1 tmp2 | wc -l
 # 0
 
 grep dropping convertErr.txt | wc -l
 #    189
 
 awk '/isn/ {print $1}' liftErr.txt | sort -u
 #    NT_166322.1
 #    NT_187001.1
 
 hgLoadGenePred -genePredExt $db ncbiRefCurated curated.gp
 hgLoadGenePred -genePredExt $db ncbiRefPredicted predicted.gp
 hgLoadSqlTab $db ncbiRefLink $kent/src/hg/lib/ncbiRefLink.sql meta
 
 hgsql -e 'INSERT INTO trackVersion \
     (db, name, who, version, updateTime, comment, source, dateReference) \
     VALUES("mm10", "ncbiRefSeq", "braney", "105", now(), \
     "http://www.ncbi.nlm.nih.gov/genome/annotation_euk/Mus_musculus/105/", \
     "ftp://ftp.ncbi.nlm.nih.gov/genomes/M_musculus", \
     "9 February 2015" );' hgFixed
 
 #
 #############################################################################
 # hgPal downloads (DONE braney 2015-06-02)
 #   CDS FASTA from 60-way for knownGene
 
     ssh hgwdev
     screen -S mm10HgPal
     mkdir /hive/data/genomes/mm10/bed/multiz60way/pal
     cd /hive/data/genomes/mm10/bed/multiz60way/pal
     cat ../species.list | tr '[ ]' '[\n]' > order.lst
 
     export mz=multiz60way
     export gp=knownGene
     export db=mm10
     export I=0
     mkdir exonAA exonNuc
     for C in `sort -nk2 ../../../chrom.sizes | cut -f1`
     do
         I=`echo $I | awk '{print $1+1}'`
 	echo "mafGene -chrom=$C -exons -noTrans $db $mz $gp order.lst stdout | gzip -c > exonNuc/$C.exonNuc.fa.gz &"
 	echo "mafGene -chrom=$C -exons $db $mz $gp order.lst stdout | gzip -c > exonAA/$C.exonAA.fa.gz &"
         if [ $I -gt 6 ]; then
             echo "date"
             echo "wait"
             I=0
         fi
     done > $gp.jobs
     echo "date" >> $gp.jobs
     echo "wait" >> $gp.jobs
 
     time nice sh -x $gp.jobs > $gp.jobs.log 2>&1 &
     #   real    80m36.763s
 
     mz=multiz60way
     gp=knownGene
     db=mm10
     time zcat exonAA/*.gz | gzip -c > $gp.$mz.exonAA.fa.gz
     #   real    1m16.821s
     zcat exonNuc/*.gz | gzip -c > $gp.$mz.exonNuc.fa.gz
 
     rm -rf exonAA exonNuc
 
     # we're only distributing exons at the moment
     mz=multiz60way
     gp=knownGene
     db=mm10
     pd=/usr/local/apache/htdocs-hgdownload/goldenPath/$db/$mz/alignments
     mkdir -p $pd
     md5sum *.fa.gz > md5sum.txt
     ln -s `pwd`/$gp.$mz.exonAA.fa.gz $pd/$gp.exonAA.fa.gz
     ln -s `pwd`/$gp.$mz.exonNuc.fa.gz $pd/$gp.exonNuc.fa.gz
     ln -s `pwd`/md5sum.txt $pd/
 
 #
 #############################################################################
 # hgPal downloads (DONE jcasper 2016-06-22)
 #   CDS FASTA from 60-way for knownGene - rebuilt for mm10 ucsc genes v16
 
     ssh hgwdev
     screen -S mm10HgPal
     mkdir /hive/data/genomes/mm10/bed/multiz60way/pal.ucsc16
     cd /hive/data/genomes/mm10/bed/multiz60way/pal.ucsc16
     cat ../species.list | tr '[ ]' '[\n]' > order.lst
 
     export mz=multiz60way
     export gp=knownGene
     export db=mm10
     export I=0
     mkdir exonAA exonNuc
     for C in `sort -nk2 ../../../chrom.sizes | cut -f1`
     do
         I=`echo $I | awk '{print $1+1}'`
 	echo "mafGene -chrom=$C -exons -noTrans $db $mz $gp order.lst stdout | gzip -c > exonNuc/$C.exonNuc.fa.gz &"
 	echo "mafGene -chrom=$C -exons $db $mz $gp order.lst stdout | gzip -c > exonAA/$C.exonAA.fa.gz &"
         if [ $I -gt 6 ]; then
             echo "date"
             echo "wait"
             I=0
         fi
     done > $gp.jobs
     echo "date" >> $gp.jobs
     echo "wait" >> $gp.jobs
 
     time nice sh -x $gp.jobs > $gp.jobs.log 2>&1
     #   real    87m59.962s
 
     mz=multiz60way
     gp=knownGene
     db=mm10
     time zcat exonAA/*.gz | gzip -c > $gp.$mz.exonAA.fa.gz
     #   real    1m48.725s
     zcat exonNuc/*.gz | gzip -c > $gp.$mz.exonNuc.fa.gz
 
     rm -rf exonAA exonNuc
 
     # we're only distributing exons at the moment
     mz=multiz60way
     gp=knownGene
     db=mm10
     pd=/usr/local/apache/htdocs-hgdownload/goldenPath/$db/$mz/alignments
     mkdir -p $pd
     rm -f $pd/$gp.exonAA.fa.gz $pd/$gp.exonNuc.fa.gz $pd/md5sum.txt
     ln -s `pwd`/$gp.$mz.exonAA.fa.gz $pd/$gp.exonAA.fa.gz
     ln -s `pwd`/$gp.$mz.exonNuc.fa.gz $pd/$gp.exonNuc.fa.gz
     (cd $pd && md5sum *.fa.gz) > md5sum.txt
     ln -s `pwd`/md5sum.txt $pd/
 
 
 ###########################################################################
 # GENEID GENE PREDICTIONS (DONE - 2015-06-26 - Hiram)
     ssh hgwdev
     mkdir /hive/data/genomes/mm10/bed/geneid
     cd /hive/data/genomes/mm10/bed/geneid
     wget --timestamping \
 http://genome.crg.es/genepredictions/M.musculus/mm10/geneid_v1.4/00README
     wget --timestamping \
 http://genome.crg.es/genepredictions/M.musculus/mm10/geneid_v1.4/mm10.geneid.prot
     wget --timestamping \
 http://genome.crg.es/genepredictions/M.musculus/mm10/geneid_v1.4/mm10.geneid.gtf
     ldHgGene -gtf -genePredExt mm10 geneid mm10.geneid.gtf
 
     #  Read 36771 transcripts in 287332 lines in 1 files
     #  36771 groups 66 seqs 1 sources 3 feature types
     #  36771 gene predictions
 
     featureBits -enrichment mm10 refGene:CDS geneid
 # refGene:CDS 1.292%, geneid 1.584%, both 1.028%, cover 79.51%, enrich 50.19x
     featureBits -enrichment mm9 refGene:CDS geneid
 # refGene:CDS 1.305%, geneid 1.590%, both 1.040%, cover 79.65%, enrich 50.11x
 
     featureBits -countGaps mm10 geneid
 # 42028722 bases of 2730871774 (1.539%) in intersection
     featureBits -countGaps mm9 geneid
 # 41651898 bases of 2725765481 (1.528%) in intersection
 
 ##########################################################################
 # SGP GENES (DONE - 2015-07-30 - Hiram)
     mkdir /hive/data/genomes/mm10/bed/sgpGene
     cd /hive/data/genomes/mm10/bed/sgpGene
     wget --timestamping \
 http://genome.crg.es/genepredictions/M.musculus/mm10/SGP2/hg38/00README
     wget --timestamping \
 http://genome.crg.es/genepredictions/M.musculus/mm10/SGP2/hg38/mm10.sgp2.gtf
     wget --timestamping \
 http://genome.crg.es/genepredictions/M.musculus/mm10/SGP2/hg38/mm10.sgp2.gff3
 
     ldHgGene -gtf -genePredExt mm10 sgpGene mm10.sgp2.gtf
     # Read 35235 transcripts in 287314 lines in 1 files
     #   35235 groups 60 seqs 1 sources 3 feature types
     # 35235 gene predictions
 
     featureBits -enrichment mm10 refGene:CDS sgpGene
 # refGene:CDS 1.292%, sgpGene 1.430%, both 1.101%, cover 85.21%, enrich 59.59x
 
     featureBits -enrichment mm9 refGene:CDS sgpGene
 # refGene:CDS 1.305%, sgpGene 1.439%, both 1.113%, cover 85.23%, enrich 59.23x
 #########################################################################
 2015-06-29-13: import of UCSC GENCODE group processing of GENCODE VM5 (markd)
     # download files
     mkdir -p /hive/data/genomes/mm10/bed/gencodeVM5
     cd /hive/data/genomes/mm10/bed/gencodeVM5
 
     # edit hg/makeDb/outside/gencode/gencodeLoad.mk to set
     # release and transcript support versions
 
 
     # download, build and load tables
     (time nice make -j 10 -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk) >&build.1.out&
 
     # compare tables from previous release to see if number changed makes
     # sense.
         make -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk cmpRelease >gencode-cmp.tsv
 
     ## Copy and update trackDb files from previous release.
     ## Change version and use lower priority so it sorts to top of
     ## super track page.
     ## Important to make sure filter attrs.transcriptType matches current set
     ## figured out with
     select distinct transcriptType from wgEncodeGencodeAttrsVM5 order by transcriptType;
     cd kent/src/hg/makeDb/trackDb
     cp human/mm10/wgEncodeGencodeV18.ra human/mm10/wgEncodeGencodeVM5.ra
     cp human/mm10/wgEncodeGencodeV18.html human/mm10/wgEncodeGencodeVM5.html
 
     # edit these plus human/mm10/trackDb.wgEncode.ra
     # - set priorities in wgEncodeGencodeVM5.ra in reverse order with previous
     #   tracks so newest shows up first
     #     priority - set to previous version priority minus 0.001
     #     searchPriority - set each to previous minus 0.001
     # - make current track default to pack and hide previous [ONLY if it's going to be pushed]
     #     superTrack wgEncodeGencodeSuper pack
     # - Update wgEncodeGencodeSuper.html to describe new release and to
     #   pick up other updates.
 
     # update all.joiner and validate
     # look for the last section `begin Gencode V??' in all.joiner
     # and copy and update version
     # repeat this until happy, editing minCheck as needed
     # output in check/joiner.out
     cd /hive/data/genomes/mm10/bed/gencodeVM5
     make  -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk joinerCheck
 
 #########################################################################
 # lastz zebrafish danRer10 (DONE - 2015-09-11 - Hiram)
     # establish a screen to control this job with a name to indicate what it is
     screen -S mm10DanRer10
     mkdir /hive/data/genomes/mm10/bed/lastzDanRer10.2015-09-11
     cd /hive/data/genomes/mm10/bed/lastzDanRer10.2015-09-11
 
     cat << '_EOF_' > DEF
 # Mouse vs. zebrafish
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.66/bin/lastz
 BLASTZ_Y=3400
 BLASTZ_L=6000
 BLASTZ_K=2200
 BLASTZ_Q=/scratch/data/blastz/HoxD55.q
 
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
 # QUERY: zebrafish danRer10
 SEQ2_DIR=/hive/data/genomes/danRer10/danRer10.2bit
 SEQ2_LEN=/hive/data/genomes/danRer10/chrom.sizes
 SEQ2_CHUNK=10000000
 SEQ2_LAP=0
 SEQ2_LIMIT=50
 
 BASE=/hive/data/genomes/mm10/bed/lastzDanRer10.2015-09-11
 TMPDIR=/dev/shm
 '_EOF_'
     # << happy emacs
 
     # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
     #	number of jobs, 50,000 to something under 100,000
     # when not present, SEQ2_LIMIT is a default 100
     time (doBlastzChainNet.pl -verbose=2 `pwd`/DEF \
         -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
         -syntenicNet -chainMinScore=5000 -chainLinearGap=loose) > do.log 2>&1
     #	real    198m3.073s
 
     cat fb.mm10.chainDanRer10Link.txt
     #	73464192 bases of 2652783500 (2.769%) in intersection
 
     time (doRecipBest.pl -buildDir=`pwd` mm10 danRer10) > rbest.log 2>&1 &
     #    real    7m8.599s
 
     #	and for the swap
     mkdir /hive/data/genomes/danRer10/bed/blastz.mm10.swap
     cd /hive/data/genomes/danRer10/bed/blastz.mm10.swap
     time (doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzDanRer10.2015-09-11/DEF \
         -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
         -swap -chainMinScore=5000 -chainLinearGap=loose) > swap.log 2>&1 &
     #	real    16m8.387s
 
     cat  fb.danRer10.chainMm10Link.txt
     #	71611488 bases of 1369683683 (5.228%) in intersection
 
     time (doRecipBest.pl -buildDir=`pwd` danRer10 mm10) > rbest.log 2>&1
     #    real    7m34.259s
 
 #########################################################################
 2015-10-02: import of UCSC GENCODE group processing of GENCODE VM7 (markd)
     # download files
     mkdir -p /hive/data/genomes/mm10/bed/gencodeVM7
     cd /hive/data/genomes/mm10/bed/gencodeVM7
 
     # edit hg/makeDb/outside/gencode/gencodeLoad.mk to set
     # release and transcript support versions
 
 
     # download, build and load tables
     (time nice make -j 10 -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk) >&build.1.out&
 
     # compare tables from previous release to see if number changed makes
     # sense.
         make -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk cmpRelease >gencode-cmp.tsv
 
     ## Copy and update trackDb files from previous release.
     ## Change version and use lower priority so it sorts to top of
     ## super track page.  Follow instructiuons in ra file to ensure
     ## filters are correct.
     cd kent/src/hg/makeDb/trackDb
     cp mouse/mm10/wgEncodeGencodeVM4.ra mouse/mm10/wgEncodeGencodeVM7.ra
     cp mouse/mm10/wgEncodeGencodeVM4.html mouse/mm10/wgEncodeGencodeVM7.html
 
     # edit these plus mouse/mm10/trackDb.wgEncode.ra
     # - set priorities in wgEncodeGencodeVM7.ra tracks so newest shows up first
     #     priority - set to previous version priority minus 0.001
     #     searchPriority - set each to previous minus 0.001
     # - make current track default to pack and hide previous [ONLY if it's going to be pushed]
     #     superTrack wgEncodeGencodeSuper pack
     # - Update wgEncodeGencodeSuper.html to describe new release and to
     #   pick up other updates.
 
     # update all.joiner and validate
     # look for the last section `begin Gencode V??' in all.joiner
     # and copy and update version
     # repeat this until happy, editing minCheck as needed
     # output in check/joiner.out
     cd /hive/data/genomes/mm10/bed/gencodeVM7
     make  -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk joinerCheck
 
 #########################################################################
 # DBSNP 142 / SNP142 (DONE 2015-11-20 braney)
     # RedMine #15934
     screen -S mm10dbSnp
     mkdir -p /hive/data/outside/dbSNP/142/mouse_mm10
     cd /hive/data/outside/dbSNP/142/mouse_mm10
     # Look at the directory listing of ftp://ftp.ncbi.nih.gov/snp/database/organism_data/
     # to find the subdir name to use as orgDir below (mouse_10090 in this case).
     # Then click into that directory and look for file names like
     #    b(1[0-9][0-9])_
     # -- use the first num for build setting in config.ra
     # The buildAssembly setting in config.ra is empty because dbSNP stopped including
     # that in file names.
     cat > config.ra <<EOF
 db mm10
 orgDir mouse_10090
 build 142
 buildAssembly
 refAssemblyLabel GRCm38.p2
 ncbiAssemblyReportFile GCF_000001635.22.assembly.txt
 ignoreDbSnpContigsFile dbSnpContigsNotInUcsc.txt
 liftUp suggested.lft
 EOF
 
 #actually ran the script a few times to get the above config.ra with values suggested
 
     ~/kent/src/hg/utils/automation/doDbSnp.pl config.ra >& do.log & tail -f do.log
     tail -f do.log
 # *** All done!
 
 
 ##############################################################################
 # FILTER SNP142 (DONE 2015-11-21 braney)
    cd /hive/data/outside/dbSNP/142/mouse_mm10
    zcat snp142.bed.gz \
    | ~/kent/src/hg/utils/automation/categorizeSnps.pl
 #Mult:     3276456
 #Common:   8213470
 #Flagged:  0
 #leftover: 70731318
 
    foreach f ({Mult,Common}.bed.gz)
      mv $f snp142$f
    end
    # Load tables
    foreach subset (Mult Common)
      hgLoadBed -tab -onServer -tmpDir=/data/tmp -allowStartEqualEnd -renameSqlTable \
        mm10 snp142$subset -sqlTable=snp142.sql snp142$subset.bed.gz
    end
 
 
 ##############################################################################
 # DBSNP CODING ANNOTATIONS (142) (DONE 2015-11-21 braney)
    cd /hive/data/outside/dbSNP/142/mouse-mm10
    # ncbiFuncAnnotations.txt has NCBI coords: 0-based, fully closed.
    # For anything except an insertion (0 bases between flanks),
    # we need to add 1 to the end coord.  For an insertion, we need
    # to add 1 to the start coord.  Make a hash of the insertion IDs,
    # then look up each ID in ncbiFuncAnnotations.txt to tell which
    # transform to apply.
    # Note: sort -u with the keys below is too restrictive -- we need full line uniq.
    zcat ncbiFuncAnnotations.txt.gz \
    | perl -we 'open($IDS, "zcat ncbiFuncInsertions.ctg.bed.gz |") || die "ids: $!"; \
              while (<$IDS>) { chomp; $ids{$_} = 1; } \
              close($IDS); \
              %coding = (2=>1, 3=>1, 4=>1, 8=>1, 9=>1, 41=>1, 42=>1, 43=>1, 44=>1, 45=>1); \
              while (<>) { \
                chomp;  @w = split("\t"); # id, ctg, start, end, ... \
                next unless $coding{$w[5]}; \
                $bed4 = join("\t", $w[1], $w[2], $w[3], $w[0]); \
                if (exists $ids{$bed4} && $w[3] == $w[2]+1) { \
                  $w[2]++; # 2-base insertions: increment start coord \
                } else { \
                  $w[3]++; # increment end coord to get half-open \
                } \
                print join("\t", @w) . "\n"; \
              }' \
    | sort -k1n,1n -k2,2 -k3n,3n -k5,5 -k6n,6n \
    | uniq \
      > ncbiCodingAnnotations.txt
    wc -l ncbiCodingAnnotations.txt
 #3854299 ncbiCodingAnnotations.txt
    # How many & what kinds of function types?
    cut -f 6 ncbiCodingAnnotations.txt \
    | sort -n | uniq -c
 # 1258578 3 (coding-synon)
 # 1882006 8 (cds-reference -- ignored)
 #    4717 41  (nonsense)
 #  624020 42  (missense)
 #     745 43  (stop-loss)
 #   14806 44  (frameshift)
 #   69427 45  (cds-indel)
 
 
    # In b142, the functional annotations include non-coding (frame = NULL),
    # which we'll exclude here because this is supposed to be just coding stuff...
    # probably need to update how we show dbSNP's func annos anyway, e.g.
    # it is a shame that we toss out codon number and transcript offset.
    # Gather up multiple annotation lines into one line per {snp, gene, frame}:
    perl -e  'while (<>) { chomp; \
                my ($rsId, $ctg, $s, $e, $txId, $fxn, $frm, $nt, $aa, $codon) = split("\t"); \
                next if ($fxn == 8 && ($frm eq "NULL" && $aa eq "NULL" && $codon eq "NULL")); \
                if (defined $lastRs && \
                    ($lastRs != $rsId || $lastCtg ne $ctg || $lastS != $s || \
                     $lastTx ne $txId || $lastFrm ne $frm)) { \
                  if (defined $refRow) { \
                    $fxns = "$refRow->[0],$fxns";  $nts = "$refRow->[1],$nts"; \
                    $aas = "$refRow->[2],$aas";    $codons = "$refRow->[3],$codons"; \
                  } \
                  $lineOut = "$lastCtg\t$lastS\t$lastE\trs$lastRs\t$lastTx\t$lastFrm\t" . \
                        "$count\t$fxns\t$nts\t$codons\t$aas\n"; \
                  $lineOut =~ s@NULL@n/a@g; \
                  print $lineOut; \
                  $refRow = undef;  @rows = ();  ($count, $fxns, $nts, $codons, $aas) = (); \
                } \
                ($lastRs, $lastCtg, $lastS, $lastE, $lastTx, $lastFrm) = \
                    ($rsId, $ctg, $s, $e, $txId, $frm); \
                $count++; \
                if ($fxn == 8) { \
                  $refRow = [$fxn, $nt, $aa, $codon]; \
                } else { \
                 $fxns .= "$fxn,";  $nts .= "$nt,";  $aas .= "$aa,";  $codons .= "$codon,"; \
                } \
              } \
              if (defined $refRow) { \
                $fxns = "$refRow->[0],$fxns";  $nts = "$refRow->[1],$nts"; \
                $aas = "$refRow->[2],$aas";    $codons = "$refRow->[3],$codons"; \
              } \
              $lineOut = "$lastCtg\t$lastS\t$lastE\trs$lastRs\t$lastTx\t$lastFrm\t" . \
                    "$count\t$fxns\t$nts\t$codons\t$aas\n"; \
              $lineOut =~ s@NULL@n/a@g; \
              print $lineOut;' \
      ncbiCodingAnnotations.txt \
    | liftUp snp142CodingDbSnp.bed suggested.lft warn stdin
    hgLoadBed mm10 snp142CodingDbSnp -sqlTable=$HOME/kent/src/hg/lib/snp125Coding.sql \
      -renameSqlTable -tab -notItemRgb -allowStartEqualEnd \
      snp142CodingDbSnp.bed
 #Read 1951211 elements of size 11 from snp142CodingDbSnp.bed
 ##############################################################################
 # SNPMASKED SEQUENCE FOR SNP142 (DONE 2015-11-21 braney)
     mkdir /hive/data/genomes/mm10/snp142Mask
     cd /hive/data/genomes/mm10/snp142Mask
     # Identify rsIds with various problems -- we will exclude those.
     zcat /hive/data/outside/dbSNP/142/mouse_mm10/snp142.bed.gz \
     | awk '$18 ~ /MultipleAlignments|ObservedTooLong|ObservedWrongFormat|ObservedMismatch|MixedObserved/ {print $4;}' \
       | sort -u \
       > snp142ExcludeRsIds.txt
     zcat /hive/data/outside/dbSNP/142/mouse_mm10/snp142.bed.gz \
     | grep -vFwf snp142ExcludeRsIds.txt \
       > snp142Cleaned.bed
     wc -l snp142Cleaned.bed
 #76837455 snp142Cleaned.bed
 
     # Substitutions:
     mkdir substitutions
     snpMaskSingle snp142Cleaned.bed /hive/data/genomes/mm10/mm10.2bit stdout diffObserved.txt \
     | faSplit byname stdin substitutions/
 #Masked 66976283 snps in 66976283 out of 2729124706 genomic bases
 # /hive/data/genomes/mm10/mm10.2bit has 2730871774 total bases,
 #but the total number of bases in sequences for which we masked snps is 2729124706 (difference is 1747068)
 
     # Check that 1747068 is the total #bases in sequences with nothing in snp142Cleaned:
     grep -Fw single snp142Cleaned.bed | cut -f 1 | uniq > /data/tmp/1
     grep -vwf /data/tmp/1 ../chrom.sizes \
     | awk 'BEGIN {TOTAL = 0;}  {TOTAL += $2;}  END {printf "%d\n", TOTAL;}'
 #1726860
     calc 1747068-1726860
 #20208
 
     # warnings about differing observed strings at same base position:
     wc -l diffObserved.txt
 #2 diffObserved.txt
     # peanuts!  good.
     # Make sure that sizes are identical, first diffs are normal -> IUPAC,
     # and first diffs' case is preserved:
     mkdir tmpFa
     cd tmpFa
     twoBitToFa /hive/data/genomes/mm10/mm10.2bit stdout | faSplit byname stdin tmpFa
     cd ..
     foreach f (substitutions/chr*.fa.gz)
       faCmp $f tmpFa/`basename $f subst.fa.gz`fa |& grep -v "that differ"
     end
 #chr1 in substitutions/chr1.fa differs from chr1 at ../1/chr1.fa at base 10107 (y != c)
 #chr10 in substitutions/chr10.fa differs from chr10 at ../10/chr10.fa at base 60493 (R != A)
 #...
 #(output OK -- ambiguous bases replacing [agct] at SNP positions)
     foreach f (substitutions/chr*.fa)
       echo $f:t:r
       mv $f $f:r.subst.fa
     end
     # Fire off a bunch of gzip jobs in parallel:
     ls -1 substitutions/*.fa | split -l 5
     foreach f (x??)
       gzip `cat $f` &
     end
     # Wait for backgrounded gzip jobs to complete
     rm x??
 
     # Insertions & deletions not done.  To date we have only offered substs for download.
     # If there is user demand, use template from snp131 above.
 
     # Clean up and prepare for download:
     gzip snp142Cleaned.bed &
     foreach d (substitutions)
       pushd $d
         md5sum *.gz > md5sum.txt
         cp /hive/data/genomes/hg38/snp142Mask/$d/README.txt .
       popd
     end
     # Edit the README.txt.
 
     # Create download links on hgwdev.
     mkdir /usr/local/apache/htdocs-hgdownload/goldenPath/mm10/snp142Mask
     ln -s /hive/data/genomes/mm10/snp142Mask/substitutions/* \
       /usr/local/apache/htdocs-hgdownload/goldenPath/mm10/snp142Mask/
 
 ##############################################################################
 # LASTZ Rhesus rheMac8 (DONE - 2016-02-10 - Hiram)
     mkdir /hive/data/genomes/mm10/bed/lastzRheMac8.2016-02-10
     cd /hive/data/genomes/mm10/bed/lastzRheMac8.2016-02-10
 
     printf '# rhesus vs mouse
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.66/bin/lastz
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
 # QUERY: Rhesus RheMac8
 SEQ2_DIR=/hive/data/genomes/rheMac8/rheMac8.2bit
 SEQ2_LEN=/hive/data/genomes/rheMac8/chrom.sizes
 SEQ2_CHUNK=20000000
 SEQ2_LAP=0
 SEQ2_LIMIT=500
 
 BASE=/hive/data/genomes/mm10/bed/lastzRheMac8.2016-02-10
 TMPDIR=/dev/shm
 ' > DEF
 
     #	establish a screen to control this job
     screen -S mm10RheMac8
     time (doBlastzChainNet.pl -verbose=2 \
 	`pwd`/DEF \
 	-syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
 	-chainMinScore=3000 -chainLinearGap=medium) > do.log 2>&1
     #   real    239m18.376s
 
     cat fb.mm10.chainRheMac8Link.txt
     #	918841829 bases of 2652783500 (34.637%) in intersection
 
     time (doRecipBest.pl -buildDir=`pwd` mm10 rheMac8) > rbest.log 2>&1 &
     # real    421m31.807s
 
     mkdir /hive/data/genomes/rheMac8/bed/blastz.mm10.swap
     cd /hive/data/genomes/rheMac8/bed/blastz.mm10.swap
     time (doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzRheMac8.2016-02-10/DEF \
 	-swap -syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
 	-chainMinScore=3000 -chainLinearGap=medium) > swap.log 2>&1
     #	real    110m33.219s
 
     cat fb.rheMac8.chainMm10Link.txt
     #	917131079 bases of 3142093174 (29.189%) in intersection
 
     time (doRecipBest.pl -buildDir=`pwd` rheMac8 mm10) > rbest.log 2>&1
     # real    409m8.252s
 
 ##############################################################################
 # Patents (26 Feb 2016, Max)
 # convert SAM to BED
 cd /hive/data/genomes/hg19/bed/patents/data/
 samtools view -S -t ensGenomeMm10/Mus_musculus.GRCm38.75.dna.toplevel.fa.fai Mus_musculus.GRCm38.75.s90c50.sam -h > mm10.sam
 # convert to bed
 function sam2psl_pierre() { java -Dfile.encoding=UTF8 -Xmx500m    -cp "/cluster/bin/jvarkit/htsjdk-1.133/dist/commons-jexl-2.1.1.jar:/cluster/bin/jvarkit/htsjdk-1.133/dist/commons-logging-1.1.1.jar:/cluster/bin/jvarkit/htsjdk-1.133/dist/htsjdk-1.133.jar:/cluster/bin/jvarkit/htsjdk-1.133/dist/snappy-java-1.0.3-rc3.jar:/cluster/bin/jvarkit/dist-1.133/sam2psl.jar" com.github.lindenb.jvarkit.tools.misc.SamToPsl $*; }
 sam2psl_pierre mm10.sam 2> /dev/null > mm10.psl
 pslToBed mm10.psl mm10.bed
 # strip the BAM flag field from the BED name
 # careful: this line includes tab characters
 sed -ri 's/_(16|0)	/	/g' mm10.bed
 
 # now join meta with bed file
 cd ../mm10
 sort by name
 # The -S10G parameter is only supported in newer sort versions
 # # if it complains, just remove it. It will just take longer.
 time sort -k4,4 -S10G --parallel=20 mm10.bed > mm10.s4.bed
 join -t $'\t' -1 4 -2 1 ../data/mm10.s4.bed ../data/seqAndPatentSummary.tab -o '1.1 1.2 1.3 1.4 1.5 1.6 1.7 1.8 1.9 1.10 1.11 1.12 2.2 2.3 2.4 2.5 2.6 2.7 2.8 2.9 2.10 2.11 2.12' | patSeqFilterBulkAndAnnotate ../data/htPatents.txt patBulk.bed patNonBulk.bed -c ../data/seqCounts.tab
 bedSort patNonBulk.bed patNonBulk.bed
 bedSort patBulk.bed patBulk.bed
 bedToBigBed patNonBulk.bed /cluster/data/genomes/mm10/chrom.sizes patNonBulk.bb -tab -as=../patSummary.as -type=bed12+
 bedToBigBed patBulk.bed /cluster/data/genomes/mm10/chrom.sizes patBulk.bb -tab -as=../patSummary.as -type=bed12+
 hgBbiDbLink hg19 patBulk /gbdb/hg19/bbi/patBulk.bb
 hgBbiDbLink hg19 patNonBulk /gbdb/hg19/bbi/patNonBulk.bb
 
 #########################################################################
 2016-03-14: import of UCSC GENCODE group processing of GENCODE VM8 (markd)
 # not to be pushed to RR
 
     # download files
     mkdir -p /hive/data/genomes/mm10/bed/gencodeVM8
     cd /hive/data/genomes/mm10/bed/gencodeVM8
 
     # edit hg/makeDb/outside/gencode/gencodeLoad.mk to set release and ensembl versions
 
     # download, build and load tables
     (time nice make -j 10 -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk) >&build.1.out&
 
     # compare tables from previous release to see if number changed makes
     # sense.
         make -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk cmpRelease >gencode-cmp.tsv
 
     ## Copy and update trackDb files from previous release.
     ## Change version and use lower priority so it sorts to top of
     ## super track page.  Follow instructions in ra file to ensure
     ## filters are correct.
     cd kent/src/hg/makeDb/trackDb
     cp mouse/mm10/wgEncodeGencodeVM7.ra mouse/mm10/wgEncodeGencodeVM8.ra
     cp mouse/mm10/wgEncodeGencodeVM7.html mouse/mm10/wgEncodeGencodeVM8.html
 
     # edit these plus mouse/mm10/trackDb.wgEncode.ra
     # - set priorities in wgEncodeGencodeVM8.ra tracks so newest shows up first
     #     priority - set to previous version priority minus 0.001
     #     searchPriority - set each to previous minus 0.001
     # - make current track default to pack and hide previous [ONLY if it's going to be pushed]
     #     superTrack wgEncodeGencodeSuper pack
     # - Update wgEncodeGencodeSuper.html to describe new release and to
     #   pick up other updates.
 
     # update all.joiner and validate
     # look for the last section `begin Gencode V??' in all.joiner
     # and copy and update version
     # repeat this until happy, editing minCheck as needed
     # output in check/joiner.out
     cd /hive/data/genomes/mm10/bed/gencodeVM8
     make  -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk joinerCheck
 
 #########################################################################
 2016-03-14: import of UCSC GENCODE group processing of GENCODE VM9 (markd)
 
     # download files
     mkdir -p /hive/data/genomes/mm10/bed/gencodeVM9
     cd /hive/data/genomes/mm10/bed/gencodeVM9
 
     # edit hg/makeDb/outside/gencode/gencodeLoad.mk to set release and ensembl versions
 
     # download, build and load tables
     (time nice make -j 10 -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk) >&build.1.out&
 
     # compare tables from previous release to see if number changed makes
     # sense.
         make -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk cmpRelease >gencode-cmp.tsv
 
     ## Copy and update trackDb files from previous release.
     ## Change version and use lower priority so it sorts to top of
     ## super track page.  Follow instructions in ra file to ensure
     ## filters are correct.
     cd kent/src/hg/makeDb/trackDb
     cp mouse/mm10/wgEncodeGencodeVM8.ra mouse/mm10/wgEncodeGencodeVM9.ra
     cp mouse/mm10/wgEncodeGencodeVM8.html mouse/mm10/wgEncodeGencodeVM9.html
 
     # edit these plus mouse/mm10/trackDb.wgEncode.ra
     # - set priorities in wgEncodeGencodeVM9.ra tracks so newest shows up first
     #     priority - set to previous version priority minus 0.001
     #     searchPriority - set each to previous minus 0.001
     # - make current track default to pack and hide previous [ONLY if it's going to be pushed]
     #     superTrack wgEncodeGencodeSuper pack
     # - Update wgEncodeGencodeSuper.html to describe new release and to
     #   pick up other updates.
 
     # update all.joiner and validate
     # look for the last section `begin Gencode V??' in all.joiner
     # and copy and update version
     # repeat this until happy, editing minCheck as needed
     # output in check/joiner.out
     cd /hive/data/genomes/mm10/bed/gencodeVM9
     make  -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk joinerCheck
 
 ##############################################################################
 # LASTZ Rat rn6 (DONE - 2016-04-09 - Jonathan)
     mkdir /hive/data/genomes/mm10/bed/lastzRn6.2016-04-07
     cd /hive/data/genomes/mm10/bed/lastzRn6.2016-04-07
 
     printf '# rat vs mouse
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.66/bin/lastz
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
 # QUERY: Rat Rn6
 SEQ2_DIR=/hive/data/genomes/rn6/rn6.2bit
 SEQ2_LEN=/hive/data/genomes/rn6/chrom.sizes
 SEQ2_CHUNK=20000000
 SEQ2_LAP=0
 SEQ2_LIMIT=500
 
 BASE=/hive/data/genomes/mm10/bed/lastzRn6.2016-04-07
 TMPDIR=/dev/shm
 ' > DEF
 
     #	establish a screen to control this job
     screen -S mm10Rn6
     time (doBlastzChainNet.pl -verbose=2 \
 	`pwd`/DEF \
 	-syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
 	-chainMinScore=5000 -chainLinearGap=medium) > do.log 2>&1
     #   real    501m43.495s
 
     cat fb.mm10.chainRn6Link.txt
     #	1880453869 bases of 2652783500 (70.886%) in intersection
 
     time (doRecipBest.pl -buildDir=`pwd` mm10 rn6) > rbest.log 2>&1 &
     # real    766m50.090s
 
     mkdir /hive/data/genomes/rn6/bed/blastz.mm10.swap
     cd /hive/data/genomes/rn6/bed/blastz.mm10.swap
     time (doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzRn6.2016-04-07/DEF \
 	-swap -syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
 	-chainMinScore=5000 -chainLinearGap=medium) > swap.log 2>&1
     #	real    234m59.393s
 
     cat fb.rn6.chainMm10Link.txt
     #   1938597957 bases of 2729860805 (71.015%) in intersection
 
     time (doRecipBest.pl -buildDir=`pwd` rn6 mm10) > rbest.log 2>&1
     # real    882m38.624s
 
 #########################################################################
 ## 4-Way Multiz for UCSC Genes construction (TBD - 2016-04-06 - Jonathan)
 # mm10, hg38, canFam3, rn6
 
     mkdir /hive/data/genomes/mm10/bed/multiz4way
     cd /hive/data/genomes/mm10/bed/multiz4way
 
     # extract a tree for the 4 we need
     /cluster/bin/phast/tree_doctor \
 	--prune-all-but hg38,mm10,canFam3,rn6 $HOME/kent/src/hg/utils/phyloTrees/191way.nh > 4way.nh
 
     # this looks like:
     ((hg38:0.145908,(mm10:0.084509,rn6:0.091589):0.271974):0.020593,canFam3:0.165928);
 
 
     # Use this specification in the phyloGif tool:
     #	http://genome.ucsc.edu/cgi-bin/phyloGif
     #	to obtain a gif image for htdocs/images/phylo/mm10_4way.gif
 
     /cluster/bin/phast/all_dists 4way.nh > 4way.distances.txt
     #	Use this output to create the table below
     grep -i mm10 4way.distances.txt | sort -k3,3n
 #
 #	If you can fill in all the numbers in this table, you are ready for
 #	the multiple alignment procedure
 #
 #                         featureBits chainLink measures
 #                                        chainMm10Link   chain    linearGap
 #    distance                      on mm10    on other   minScore
 #  1  0.176098 - rat rn6        (% 70.886) (% 71.015)       5000     medium
 #  2  0.502391 - human hg38     (% 35.372) (% 31.653)       3000     medium
 #  3  0.543004 - dog canFam3    (% 29.144) (% 31.624)       3000     medium
 
     #   using the syntenic nets
     cd /cluster/data/mm10/bed/multiz4way
     mkdir mafLinks
     cd mafLinks
     mkdir rn6 canFam3 hg38
 
     for D in hg38 canFam3 rn6
 do
     cd $D
     ln -s ../../../lastz.${D}/mafSynNet/*.maf.gz ./
     cd ..
 done
 
     #   determine what is the newest version of multiz and use that
     cd /hive/data/genomes/mm10/bed/multiz4way
     mkdir penn
     cp -p /cluster/bin/penn/multiz.2009-01-21_patched/multiz penn
     cp -p /cluster/bin/penn/multiz.2009-01-21_patched/maf_project penn
     cp -p /cluster/bin/penn/multiz.2009-01-21_patched/autoMZ penn
 
     # the autoMultiz cluster run
     ssh ku
     cd /hive/data/genomes/mm10/bed/multiz4way
 
     # create species list and stripped down tree for autoMZ
     sed 's/[a-z][a-z]*_//g; s/:[0-9\.][0-9\.]*//g; s/;//; /^ *$/d' \
     4way.nh > tmp.nh
     echo `cat tmp.nh` | sed 's/ //g; s/,/ /g' > tree.nh
     sed 's/[()]//g; s/,/ /g' tree.nh > species.lst
 
     mkdir run maf
     cd run
 
     #   NOTE: you need to set the db and multiz dirname properly in this
     #   script
     cat > autoMultiz << '_EOF_'
 #!/bin/csh -ef
 set db = mm10
 set c = $1
 set maf = $2
 set binDir = /hive/data/genomes/mm10/bed/multiz4way/penn
 set tmp = /dev/shm/$db/multiz.$c
 set pairs = /hive/data/genomes/mm10/bed/multiz4way/mafLinks
 rm -fr $tmp
 mkdir -p $tmp
 cp ../{tree.nh,species.lst} $tmp
 pushd $tmp
 foreach s (`cat species.lst`)
     set in = $pairs/$s/$c.maf
     set out = $db.$s.sing.maf
     if ($s == $db) then
     continue
     endif
     if (-e $in.gz) then
     zcat $in.gz > $out
     else if (-e $in) then
     cp $in $out
     else
     echo "##maf version=1 scoring=autoMZ" > $out
     endif
 end
 set path = ($binDir $path); rehash
 $binDir/autoMZ + T=$tmp E=$db "`cat tree.nh`" $db.*.sing.maf $c.maf
 popd
 cp $tmp/$c.maf $maf
 rm -fr $tmp
 '_EOF_'
     # << happy emacs
     chmod +x autoMultiz
 
 cat  << '_EOF_' > template
 #LOOP
 ./autoMultiz $(root1) {check out line+ /hive/data/genomes/mm10/bed/multiz4way/maf/$(root1).maf}
 #ENDLOOP
 '_EOF_'
     # << happy emacs
 
     cut -f1 /cluster/data/mm10/chrom.sizes > chrom.lst
     gensub2 chrom.lst single template jobList
     para create jobList
     # 66 jobs
     para try ... check ... push ... etc ...
 
 # Completed: 66 of 66 jobs
 # CPU time in finished jobs:      34495s     574.91m     9.58h    0.40d  0.001 y
 # IO & Wait Time:                   826s      13.77m     0.23h    0.01d  0.000 y
 # Average job time:                 535s       8.92m     0.15h    0.01d
 # Longest finished job:            2765s      46.08m     0.77h    0.03d
 # Submission to last job:          2776s      46.27m     0.77h    0.03d
 
     #   combine results into a single file for loading and gbdb reference
     cd /hive/data/genomes/mm10/bed/multiz4way
     grep "^#" maf/chr1_GL456210_random.maf | grep -v "eof maf" > multiz4way.maf
     grep -h -v "^#" maf/*.maf >> multiz4way.maf
     grep "^#" maf/chr1_GL456210_random.maf | grep "eof maf" >> multiz4way.maf
 
     #	makes a 6.5 Gb file:
     #   -rw-rw-r-- 1 6928752890 Apr 12 10:18 multiz4way.maf
 
     # Load into database
     ssh hgwdev
     cd /hive/data/genomes/mm10/bed/multiz4way
     mkdir /gbdb/mm10/multiz4way
     ln -s /hive/data/genomes/mm10/bed/multiz4way/multiz4way.maf \
 	/gbdb/mm10/multiz4way
     #	the hgLoadMaf generates huge tmp files, locate them in /dev/shm
     cd /dev/shm
     time nice -n +19 hgLoadMaf mm10 multiz4way
     #   Loaded 5300158 mafs in 1 files from /gbdb/mm10/multiz4way
     #   real    1m41.656s
 
     cd /hive/data/genomes/mm10/bed/multiz4way
     time (cat /gbdb/mm10/multiz4way/*.maf \
         | hgLoadMafSummary -verbose=2 -minSize=10000 \
 	-mergeGap=500 -maxSize=50000 mm10 multiz4waySummary stdin)
     # Created 1310955 summary blocks from 9774995 components and 5300158 mafs
     # real    2m27.913s
     mv /dev/shm/multiz4way.tab .
 # -rw-rw-r-- 1 277435502 Apr 12 12:11 multiz4way.tab
 # -rw-rw-r-- 1  59271980 Apr 12 12:16 multiz4waySummary.tab
     wc -l multiz4way*.tab
     # 5300158 multiz4way.tab
     # 1310955 multiz4waySummary.tab
     # 6611113 total
 
 #########################################################################
 # LASTZ mouse/mm10 vs. chicken/galGal5 - (DONE - 2016-04-20 - Hiram)
     mkdir /hive/data/genomes/mm10/bed/lastzGalGal5.2016-04-20
     cd /hive/data/genomes/mm10/bed/lastzGalGal5.2016-04-20
 
     printf "# Mouse vs. chicken
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.66/bin/lastz
 BLASTZ_Y=3400
 BLASTZ_L=6000
 BLASTZ_K=2200
 BLASTZ_Q=/scratch/data/blastz/HoxD55.q
 #      A    C    G    T
 #     91  -90  -25 -100
 #    -90  100 -100  -25
 #    -25 -100  100  -90
 #   -100  -25  -90  91
 
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
 # QUERY: chicken galGal5
 SEQ2_DIR=/hive/data/genomes/galGal5/galGal5.2bit
 SEQ2_LEN=/hive/data/genomes/galGal5/chrom.sizes
 SEQ2_CHUNK=20000000
 SEQ2_LAP=0
 SEQ2_LIMIT=50
 
 BASE=/hive/data/genomes/mm10/bed/lastzGalGal5.2016-04-20
 TMPDIR=/dev/shm
 " > DEF
 
     time (doBlastzChainNet.pl `pwd`/DEF -verbose=2 \
         -chainMinScore=5000 -chainLinearGap=loose \
           -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
             -syntenicNet) > do.log 2>&1
     # real    112m25.946s
 
     cat fb.mm10.chainGalGal5Link.txt
     # 102343350 bases of 2652783500 (3.858%) in intersection
 
     time (doRecipBest.pl -buildDir=`pwd` mm10 galGal5) > rbest.log 2>&1 &
     # real    170m24.948s
 
     # and for the swap:
     mkdir /hive/data/genomes/galGal5/bed/blastz.mm10.swap
     cd /hive/data/genomes/galGal5/bed/blastz.mm10.swap
 
     time (doBlastzChainNet.pl -verbose=2 \
       /hive/data/genomes/mm10/bed/lastzGalGal5.2016-04-20/DEF \
         -swap -chainMinScore=5000 -chainLinearGap=loose \
           -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
             -syntenicNet) > swap.log 2>&1
     #  real    12m17.175s
 
     cat fb.galGal5.chainMm10Link.txt
     # 95753452 bases of 1218501075 (7.858%) in intersection
 
     time (doRecipBest.pl -buildDir=`pwd` galGal5 mm10) > rbest.log 2>&1
     # real    138m37.610s
 
 #########################################################################
 # LASTZ mouse/mm10 vs. Malayan flying lemur/galVar1 - (DONE - 2016-04-26 - Hiram)
     mkdir /hive/data/genomes/mm10/bed/lastzGalVar1.2016-04-26
     cd /hive/data/genomes/mm10/bed/lastzGalVar1.2016-04-26
 
     printf "# mouse vs Malayan flying lemur
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.66/bin/lastz
 BLASTZ_O=400
 BLASTZ_E=30
 BLASTZ_M=254
 # default BLASTZ_Q score matrix:
 #       A     C     G     T
 # A    91  -114   -31  -123
 # C  -114   100  -125   -31
 # G   -31  -125   100  -114
 # T  -123   -31  -114    91
 
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
 # QUERY: Malayan flying lemur galVar1
 SEQ2_DIR=/hive/data/genomes/galVar1/galVar1.2bit
 SEQ2_LEN=/hive/data/genomes/galVar1/chrom.sizes
 SEQ2_CHUNK=20000000
 SEQ2_LIMIT=400
 SEQ2_LAP=0
 
 BASE=/hive/data/genomes/mm10/bed/lastzGalVar1.2016-04-26
 TMPDIR=/dev/shm
 " > DEF
     # << happy emacs
 
     time (doBlastzChainNet.pl `pwd`/DEF -verbose=2 \
         -chainMinScore=3000 -chainLinearGap=medium \
           -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
             -syntenicNet) > do.log 2>&1
     # real    340m23.106s
 
     cat fb.mm10.chainGalVar1Link.txt
     # 944876157 bases of 2652783500 (35.618%) in intersection
 
     time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` mm10 galVar1) \
       > rbest.log 2>&1 &
     # real    694m27.183s
 
     # and for the swap:
     mkdir /hive/data/genomes/galVar1/bed/blastz.mm10.swap
     cd /hive/data/genomes/galVar1/bed/blastz.mm10.swap
 
     time (doBlastzChainNet.pl -verbose=2 \
       /hive/data/genomes/mm10/bed/lastzGalVar1.2016-04-26/DEF \
         -swap -chainMinScore=3000 -chainLinearGap=medium \
           -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
             -syntenicNet) > swap.log 2>&1
     #  real    173m45.678s
 
     cat fb.galVar1.chainMm10Link.txt
     # 1008272821 bases of 2802917674 (35.972%) in intersection
 
     time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` galVar1 mm10) \
        > rbest.log 2>&1
     # real    856m16.458s
 
 #########################################################################
 # lastz Chinese softshell turtle pelSin1 (DONE - 2016-05-10 - Hiram)
     # establish a screen to control this job with a name to indicate what it is
     screen -S mm10PelSin1
     mkdir /hive/data/genomes/mm10/bed/lastzPelSin1.2016-05-10
     cd /hive/data/genomes/mm10/bed/lastzPelSin1.2016-05-10
 
     printf '# Mouse vs. Chinese softshell turtle
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.66/bin/lastz
 BLASTZ_Y=3400
 BLASTZ_L=6000
 BLASTZ_K=2200
 BLASTZ_Q=/scratch/data/blastz/HoxD55.q
 
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
 # QUERY: Chinese softshell turtle pelSin1
 SEQ2_DIR=/hive/data/genomes/pelSin1/pelSin1.2bit
 SEQ2_LEN=/hive/data/genomes/pelSin1/chrom.sizes
 SEQ2_CHUNK=10000000
 SEQ2_LAP=0
 SEQ2_LIMIT=100
 
 BASE=/hive/data/genomes/mm10/bed/lastzPelSin1.2016-05-10
 TMPDIR=/dev/shm
 ' > DEF
 
     # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
     #	number of jobs, 50,000 to something under 100,000
     # when not present, SEQ2_LIMIT is a default 100
      time (doBlastzChainNet.pl -verbose=2 \
         `pwd`/DEF \
         -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
         -chainMinScore=5000 -chainLinearGap=loose) > do.log 2>&1
     # real    156m43.981s
 
     cat fb.mm10.chainPelSin1Link.txt
     #	113023930 bases of 2652783500 (4.261%) in intersection
 
     # forgot to include syntenicNet:
      time (doBlastzChainNet.pl -verbose=2 \
         -continue=syntenicNet -syntenicNet `pwd`/DEF \
         -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
         -chainMinScore=5000 -chainLinearGap=loose) > synNet.log 2>&1 &
     # real    2m9.196s
 
 
     time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` mm10 pelSin1) \
        > rbest.log 2>&1 &
     # real    221m37.947s
 
     #	and for the swap
     mkdir /hive/data/genomes/pelSin1/bed/blastz.mm10.swap
     cd /hive/data/genomes/pelSin1/bed/blastz.mm10.swap
     time (doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzPelSin1.2016-05-10/DEF \
         -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
         -syntenicNet -swap -chainMinScore=5000 -chainLinearGap=loose) \
             > swap.log 2>&1
     #	real    16m3.703s
 
     cat  fb.pelSin1.chainMm10Link.txt
     #	102485355 bases of 2106639384 (4.865%) in intersection
 
     time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` pelSin1 mm10) \
        > rbest.log 2>&1
     # real    198m33.448s
 
 #########################################################################
 # LASTZ mouse/mm10 Gorilla/panPan2 - (DONE - 2016-05-24 - Hiram)
     mkdir /hive/data/genomes/mm10/bed/lastzPanPan2.2016-05-24
     cd /hive/data/genomes/mm10/bed/lastzPanPan2.2016-05-24
 
     printf '# mouse vs bonobo
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.66/bin/lastz
 BLASTZ_M=254
 
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 # the default matrix is:
 #       A     C     G     T
 # A    91  -114   -31  -123
 # C  -114   100  -125   -31
 # G   -31  -125   100  -114
 # T  -123   -31  -114    91
 
 # QUERY: bonobo panPan2
 SEQ2_DIR=/hive/data/genomes/panPan2/panPan2.2bit
 SEQ2_LEN=/hive/data/genomes/panPan2/chrom.sizes
 SEQ2_CHUNK=20000000
 SEQ2_LIMIT=30
 SEQ2_LAP=0
 
 BASE=/hive/data/genomes/mm10/bed/lastzPanPan2.2016-05-24
 TMPDIR=/dev/shm
 ' > DEF
 
     time (doBlastzChainNet.pl `pwd`/DEF -verbose=2 \
         -chainMinScore=3000 -chainLinearGap=medium \
           -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
             -syntenicNet) > do.log 2>&1
     # real    360m9.534s
 
     cat fb.mm10.chainPanPan2Link.txt
     # 928638440 bases of 2652783500 (35.006%) in intersection
 
     time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` mm10 panPan2) \
       > rbest.log 2>&1 &
     # real    765m26.648s
 
     # and for the swap:
     mkdir /hive/data/genomes/panPan2/bed/blastz.mm10.swap
     cd /hive/data/genomes/panPan2/bed/blastz.mm10.swap
 
     time (doBlastzChainNet.pl -verbose=2 \
       /hive/data/genomes/mm10/bed/lastzPanPan2.2016-05-24/DEF \
         -swap -chainMinScore=3000 -chainLinearGap=medium \
           -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
             -syntenicNet) > swap.log 2>&1
     #  real    106m54.032s
 
     cat fb.panPan2.chainMm10Link.txt
     # 911279510 bases of 2725937399 (33.430%) in intersection
 
     time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` panPan2 mm10) \
        > rbest.log 2>&1
     # real    620m0.039s
 
 #########################################################################
 2016-07-22: import of UCSC GENCODE group processing of GENCODE VM10 (markd)
     # will not be pushed to the RR.
 
     # edit hg/makeDb/outside/gencode/gencodeLoad.mk to set release and ensembl versions
 
     # download, build and load tables
     mkdir -p /hive/data/genomes/mm10/bed/gencodeVM10
     pushd /hive/data/genomes/mm10/bed/gencodeVM10
     (time nice make -j 10 -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk) >&build.1.out&
 
     # compare tables from previous release to see if number changed makes
     # sense.  Results are in gencode-cmp.tsv
 
     ## Copy and update trackDb files from previous release.
     ## Change version and use lower priority so it sorts to top of
     ## super track page.  Follow instructions in ra file to ensure
     ## filters are correct.
     cd kent/src/hg/makeDb/trackDb
     cp mouse/mm10/wgEncodeGencodeVM9.ra mouse/mm10/wgEncodeGencodeVM10.ra
     cp mouse/mm10/wgEncodeGencodeVM9.html mouse/mm10/wgEncodeGencodeVM10.html
 
     # edit these plus mouse/mm10/trackDb.wgEncode.ra
     # - set priorities in wgEncodeGencodeVM10.ra tracks so newest shows up first
     #     priority - set to previous version priority minus 0.001
     #     searchPriority - set each to previous minus 0.001
     # - make current track default to pack and hide previous [ONLY if it's going to be pushed]
     #     superTrack wgEncodeGencodeSuper pack
     # - Update wgEncodeGencodeSuper.html to describe new release and to
     #   pick up other updates.  [ONLY if it's going to be pushed]
 
     # update all.joiner and validate
     # look for the last section `begin Gencode V??' in all.joiner
     # and copy and update version
     # repeat this until happy, editing minCheck as needed
     # output in check/joiner.out
     cd /hive/data/genomes/mm10/bed/gencodeVM10
     make  -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk joinerCheck
 
 #########################################################################
 # LASTZ mouse/mm10 Chimp/panTro5 - (DONE - 2016-08-03 - Hiram)
     mkdir /hive/data/genomes/mm10/bed/lastzPanTro5.2016-08-03
     cd /hive/data/genomes/mm10/bed/lastzPanTro5.2016-08-03
 
     printf '# mouse vs chimp
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.66/bin/lastz
 BLASTZ_M=254
 
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 SEQ1_LIMIT=10
 # the default matrix is:
 #       A     C     G     T
 # A    91  -114   -31  -123
 # C  -114   100  -125   -31
 # G   -31  -125   100  -114
 # T  -123   -31  -114    91
 
 # QUERY: chimp panTro5
 SEQ2_DIR=/hive/data/genomes/panTro5/panTro5.2bit
 SEQ2_LEN=/hive/data/genomes/panTro5/chrom.sizes
 SEQ2_CHUNK=20000000
 SEQ2_LIMIT=100
 SEQ2_LAP=0
 
 BASE=/hive/data/genomes/mm10/bed/lastzPanTro5.2016-08-03
 TMPDIR=/dev/shm
 ' > DEF
 
     time (doBlastzChainNet.pl `pwd`/DEF -verbose=2 \
         -chainMinScore=3000 -chainLinearGap=medium \
           -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
             -syntenicNet) > do.log 2>&1
     #  real    273m27.335s
 
     cat fb.mm10.chainPanTro5Link.txt
     # 935711523 bases of 2652783500 (35.273%) in intersection
 
     time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` mm10 panTro5) \
       > rbest.log 2>&1 &
     # real    624m28.225s
 
     # and for the swap:
     mkdir /hive/data/genomes/panTro5/bed/blastz.mm10.swap
     cd /hive/data/genomes/panTro5/bed/blastz.mm10.swap
 
     time (doBlastzChainNet.pl -verbose=2 \
       /hive/data/genomes/mm10/bed/lastzPanTro5.2016-08-03/DEF \
         -swap -chainMinScore=3000 -chainLinearGap=medium \
           -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
             -syntenicNet) > swap.log 2>&1
     #  real    98m32.623s
 
     cat fb.panTro5.chainMm10Link.txt
     # 965636631 bases of 3132620660 (30.825%) in intersection
 
     time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` panTro5 mm10) \
        > rbest.log 2>&1
     # real    560m21.432s
 
 #########################################################################
 # Crispr track. See ../crisprTrack/README.txt (2016-09-15 max)
 # Command: doCrispr.sh mm10 ensGene
 ##############################################################################
 
 #########################################################################
 2016-10-27: import of UCSC GENCODE group processing of GENCODE VM11 (markd)
 
     # edit hg/makeDb/outside/gencode/gencodeLoad.mk to set release and ensembl versions
 
     # download, build and load tables
     mkdir -p /hive/data/genomes/mm10/bed/gencodeVM11
     cd /hive/data/genomes/mm10/bed/gencodeVM11
     (time nice make -j 10 -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk) >&build.1.out&
 
     # compare tables from previous release to see if number changed makes
     # sense.
         make -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk cmpRelease >gencode-cmp.tsv
 
     ## Copy and update trackDb files from previous release.
     ## Change version and use lower priority so it sorts to top of
     ## super track page.  Follow instructions in ra file to ensure
     ## filters are correct.
     cd kent/src/hg/makeDb/trackDb
     cp mouse/mm10/wgEncodeGencodeVM8.ra mouse/mm10/wgEncodeGencodeVM11.ra
     cp mouse/mm10/wgEncodeGencodeVM8.html mouse/mm10/wgEncodeGencodeVM11.html
 
     # edit these plus mouse/mm10/trackDb.wgEncode.ra
     # - set priorities in wgEncodeGencodeVM11.ra tracks so newest shows up first
     #     priority - set to previous version priority minus 0.001
     #     searchPriority - set each to previous minus 0.001
     # - make current track default to pack and hide previous [ONLY if it's going to be pushed]
     #     superTrack wgEncodeGencodeSuper pack
     # - Update wgEncodeGencodeSuper.html to describe new release and to
     #   pick up other updates.
 
     # update all.joiner and validate
     # look for the last section `begin Gencode V??' in all.joiner
     # and copy and update version
     # repeat this until happy, editing minCheck as needed
     # output in check/joiner.out
     cd /hive/data/genomes/mm10/bed/gencodeVM11
     make  -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk joinerCheck
 
 ##############################################################################
 2016-12-08: import of UCSC GENCODE group processing of GENCODE VM12 (markd)
             No being pushed to RR
 
     # edit hg/makeDb/outside/gencode/gencodeLoad.mk to set release and ensembl versions
 
     # download, build and load tables
     mkdir -p /hive/data/genomes/mm10/bed/gencodeVM12
     cd /hive/data/genomes/mm10/bed/gencodeVM12
     (time nice make -j 10 -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk) >&build.1.out&
 
     ##  gencode-cmp.tsv- check to see if sizes make sense
 
     ## Copy and update trackDb files from previous release.
     ## Change version and use lower priority so it sorts to top of
     ## super track page.  Follow instructions in ra file to ensure
     ## filters are correct.
     cd kent/src/hg/makeDb/trackDb
     cp mouse/mm10/wgEncodeGencodeVM11.ra mouse/mm10/wgEncodeGencodeVM12.ra
     cp mouse/mm10/wgEncodeGencodeVM11.html mouse/mm10/wgEncodeGencodeVM12.html
 
     # edit these plus mouse/mm10/trackDb.ra
     # - set priorities in wgEncodeGencodeVM12.ra tracks so newest shows up first
     #     priority - set to previous version priority minus 0.001
     #     searchPriority - set each to previous minus 0.001
     # - make current track default to pack and hide previous [ONLY if it's going to be pushed]
     #     superTrack wgEncodeGencodeSuper pack
     # - Update wgEncodeGencodeSuper.html to describe new release and to
     #   pick up other updates.
 
     # DID NOT UPDATE all.joiner SINCE NOT BEING PUSHED PUBLIC
     # update all.joiner and validate
     # look for the last section `begin Gencode V??' in all.joiner
     # and copy and update version
     # repeat this until happy, editing minCheck as needed
     # output in check/joiner.out
     cd /hive/data/genomes/mm10/bed/gencodeVM12
     make  -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk joinerCheck
 
 ############################################################################################
 # Mouse strains VCF (DONE - 2016-11-08 - Hiram)
 
     mkdir /hive/data/genomes/mm10/bed/strainsVCF
     cd /hive/data/genomes/mm10/bed/strainsVCF
 
     # download files:
 wget --timestamping \
 ftp://ftp-mouse.sanger.ac.uk/REL-1505-SNPs_Indels/mgp.v5.merged.indels.dbSNP142.normed.vcf.gz.tbi
 
 wget --timestamping \
 ftp://ftp-mouse.sanger.ac.uk/REL-1505-SNPs_Indels/mgp.v5.merged.snps_all.dbSNP142.vcf.gz.tbi
 
 wget --timestamping \
 ftp://ftp-mouse.sanger.ac.uk/REL-1505-SNPs_Indels/mgp.v5.merged.indels.dbSNP142.normed.vcf.gz
 
 wget --timestamping \
 ftp://ftp-mouse.sanger.ac.uk/REL-1505-SNPs_Indels/mgp.v5.merged.snps_all.dbSNP142.vcf.gz
 
     # change to UCSC chrom names:
 
     zcat mgp.v5.merged.snps_all.dbSNP142.vcf.gz \
        | sed -e "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/" \
           > ucscNames.mgp.v5.merged.snps_all.dbSNP142.vcf
 
     # need to fixup the chrom names in the header, extract the header:
     grep "^#" ucscNames.mgp.v5.merged.snps_all.dbSNP142.vcf > original.header.txt
     # copy that and edit it to fixup the names:
     cp original.header.txt ucscNames.header.txt
 
     # extract the lines not in the header
     grep -v "^#" ucscNames.mgp.v5.merged.snps_all.dbSNP142.vcf > ucscNames.notHeader.txt
 
     # put it back together:
     cat ucscName.header.txt ucscNames.notHeader.txt > ucsc.mgpV5MergedSNPsAlldbSNP142.vcf
 
     # tabix gzip (about 2 hours)
     export name="ucsc.mgpV5MergedSNPsAlldbSNP142.vcf"
     /cluster/bin/tabix-0.2.6/bgzip $name
     /cluster/bin/tabix-0.2.6/tabix -p vcf $name.gz.tbi
 
     # symlink to gbdb
     mkdir /gbdb/mm10/mouseStrains
     ln -s `pwd`/ucsc.mgpV5MergedSNPsAlldbSNP142.vcf.gz \
           /gbdb/mm10/mouseStrains/mgpV5MergedSNPsAlldbSNP142.vcf.gz
     ln -s `pwd`/ucsc.mgpV5MergedSNPsAlldbSNP142.vcf.gz.tbi \
           /gbdb/mm10/mouseStrains/mgpV5MergedSNPsAlldbSNP142.vcf.gz.tbi
 
     hgBbiDbLink mm10 strainSNPs /gbdb/mm10/mouseStrains/mgpV5MergedSNPsAlldbSNP142.vcf.gz
 
     # trackDb entry in trackDb/mouse/mm10/trackDb.ra:
 
 track strainSNPs
 shortLabel Mouse SNPs
 longLabel Annotated SNPs from mouse strain comparison analysis
 group varRep
 type vcfTabix
 visibility hide
 hapClusterHeight 78
 
 #############################################################################
 # lastz turkey melGal5 (DONE - 2017-01-19 - Hiram)
     # establish a screen to control this job with a name to indicate what it is
     screen -S mm10MelGal5
     mkdir /hive/data/genomes/mm10/bed/lastzMelGal5.2017-01-19
     cd /hive/data/genomes/mm10/bed/lastzMelGal5.2017-01-19
 
     printf '# Mouse vs. turkey
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.66/bin/lastz
 BLASTZ_Y=3400
 BLASTZ_L=6000
 BLASTZ_K=2200
 BLASTZ_Q=/scratch/data/blastz/HoxD55.q
 
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
 # QUERY: turkey melGal5
 SEQ2_DIR=/hive/data/genomes/melGal5/melGal5.2bit
 SEQ2_LEN=/hive/data/genomes/melGal5/chrom.sizes
 SEQ2_CHUNK=10000000
 SEQ2_LAP=0
 SEQ2_LIMIT=300
 
 BASE=/hive/data/genomes/mm10/bed/lastzMelGal5.2017-01-19
 TMPDIR=/dev/shm
 ' > DEF
 
     # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
     #	number of jobs, 50,000 to something under 100,000
     # when not present, SEQ2_LIMIT is a default 100
     time (doBlastzChainNet.pl -verbose=2 `pwd`/DEF \
         -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
         -syntenicNet -chainMinScore=5000 -chainLinearGap=loose) > do.log 2>&1
     #	real    160m46.030s
 
     cat fb.mm10.chainMelGal5Link.txt
     #	94675126 bases of 2652783500 (3.569%) in intersection
 
     time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` mm10 melGal5) \
            > rbest.log 2>&1 &
     # real    379m35.317s
 
     #	and for the swap
     mkdir /hive/data/genomes/melGal5/bed/blastz.mm10.swap
     cd /hive/data/genomes/melGal5/bed/blastz.mm10.swap
     time (doBlastzChainNet.pl -verbose=2 -syntenicNet \
 	/hive/data/genomes/mm10/bed/lastzMelGal5.2017-01-19/DEF \
         -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
         -swap -chainMinScore=5000 -chainLinearGap=loose) > swap.log 2>&1
     #	real    31m37.466s
 
     cat  fb.melGal5.chainMm10Link.txt
     #	81470789 bases of 1093044709 (7.454%) in intersection
 
     time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` melGal5 mm10) \
            > rbest.log 2>&1
     # real    356m16.099s
 
 #############################################################################
 # LASTZ mouse/mm10 Pig-tailed macaque/macNem1 - (DONE - 2017-02-28 - Hiram)
     mkdir /hive/data/genomes/mm10/bed/lastzMacNem1.2017-02-28
     cd /hive/data/genomes/mm10/bed/lastzMacNem1.2017-02-28
 
     printf '# mouse vs Pig-tailed macaque
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.66/bin/lastz
 BLASTZ_M=254
 
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 SEQ1_LIMIT=5
 # the default matrix is:
 #       A     C     G     T
 # A    91  -114   -31  -123
 # C  -114   100  -125   -31
 # G   -31  -125   100  -114
 # T  -123   -31  -114    91
 
 # QUERY: Pig-tailed macaque macNem1
 SEQ2_DIR=/hive/data/genomes/macNem1/macNem1.2bit
 SEQ2_LEN=/hive/data/genomes/macNem1/chrom.sizes
 SEQ2_CHUNK=20000000
 SEQ2_LIMIT=30
 SEQ2_LAP=0
 
 BASE=/hive/data/genomes/mm10/bed/lastzMacNem1.2017-02-28
 TMPDIR=/dev/shm
 ' > DEF
 
     time (doBlastzChainNet.pl `pwd`/DEF -verbose=2 \
         -chainMinScore=3000 -chainLinearGap=medium \
           -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
             -syntenicNet) > do.log 2>&1
     #  real    370m19.213s
 
     cat fb.mm10.chainMacNem1Link.txt
     # 918083212 bases of 2652783500 (34.608%) in intersection
 
     time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` mm10 macNem1) \
       > rbest.log 2>&1 &
     # real    344m11.369s
 
     # and for the swap:
     mkdir /hive/data/genomes/macNem1/bed/blastz.mm10.swap
     cd /hive/data/genomes/macNem1/bed/blastz.mm10.swap
 
     time (doBlastzChainNet.pl -verbose=2 \
       /hive/data/genomes/mm10/bed/lastzMacNem1.2017-02-28/DEF \
         -swap -chainMinScore=3000 -chainLinearGap=medium \
           -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
             -syntenicNet) > swap.log 2>&1
     #  real    65m14.074s
 
     cat fb.macNem1.chainMm10Link.txt
     # 905682728 bases of 2838503083 (31.907%) in intersection
 
     time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` macNem1 mm10) \
        > rbest.log 2>&1
     # real    321m2.285s
 
 #############################################################################
 # LASTZ mouse/mm10 Angolan colobus/colAng1 - (DONE - 2017-02-28 - Hiram)
     mkdir /hive/data/genomes/mm10/bed/lastzColAng1.2017-02-28
     cd /hive/data/genomes/mm10/bed/lastzColAng1.2017-02-28
 
     printf '# mouse vs Angolan colobus
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.66/bin/lastz
 BLASTZ_M=254
 
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 SEQ1_LIMIT=5
 # the default matrix is:
 #       A     C     G     T
 # A    91  -114   -31  -123
 # C  -114   100  -125   -31
 # G   -31  -125   100  -114
 # T  -123   -31  -114    91
 
 # QUERY: Angolan colobus colAng1
 SEQ2_DIR=/hive/data/genomes/colAng1/colAng1.2bit
 SEQ2_LEN=/hive/data/genomes/colAng1/chrom.sizes
 SEQ2_CHUNK=20000000
 SEQ2_LIMIT=30
 SEQ2_LAP=0
 
 BASE=/hive/data/genomes/mm10/bed/lastzColAng1.2017-02-28
 TMPDIR=/dev/shm
 ' > DEF
 
     time (doBlastzChainNet.pl `pwd`/DEF -verbose=2 \
         -chainMinScore=3000 -chainLinearGap=medium \
           -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
             -syntenicNet) > do.log 2>&1
     #  real    376m8.949s
 
     cat fb.mm10.chainColAng1Link.txt
     # 902325064 bases of 2652783500 (34.014%) in intersection
 
     time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` mm10 colAng1) \
       > rbest.log 2>&1 &
     # real    343m38.692s
 
     # and for the swap:
     mkdir /hive/data/genomes/colAng1/bed/blastz.mm10.swap
     cd /hive/data/genomes/colAng1/bed/blastz.mm10.swap
 
     time (doBlastzChainNet.pl -verbose=2 \
       /hive/data/genomes/mm10/bed/lastzColAng1.2017-02-28/DEF \
         -swap -chainMinScore=3000 -chainLinearGap=medium \
           -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
             -syntenicNet) > swap.log 2>&1
     #  real    62m44.125s
 
     cat fb.colAng1.chainMm10Link.txt
     # 885418780 bases of 2679973137 (33.038%) in intersection
 
     time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` colAng1 mm10) \
        > rbest.log 2>&1
     # real    296m19.689s
 
 #############################################################################
 # LASTZ mouse/mm10 Gray mouse lemur/micMur3 - (DONE - 2017-03-03 - Hiram)
     mkdir /hive/data/genomes/mm10/bed/lastzMicMur3.2017-03-03
     cd /hive/data/genomes/mm10/bed/lastzMicMur3.2017-03-03
 
     printf '# mouse vs Gray mouse lemur
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.66/bin/lastz
 BLASTZ_M=254
 
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 SEQ1_LIMIT=4
 # the default matrix is:
 #       A     C     G     T
 # A    91  -114   -31  -123
 # C  -114   100  -125   -31
 # G   -31  -125   100  -114
 # T  -123   -31  -114    91
 
 # QUERY: Gray mouse lemur micMur3
 SEQ2_DIR=/hive/data/genomes/micMur3/micMur3.2bit
 SEQ2_LEN=/hive/data/genomes/micMur3/chrom.sizes
 SEQ2_CHUNK=20000000
 SEQ2_LIMIT=20
 SEQ2_LAP=0
 
 BASE=/hive/data/genomes/mm10/bed/lastzMicMur3.2017-03-03
 TMPDIR=/dev/shm
 ' > DEF
 
     time (doBlastzChainNet.pl `pwd`/DEF -verbose=2 \
         -chainMinScore=3000 -chainLinearGap=medium \
           -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
             -syntenicNet) > do.log 2>&1
     #  real    2192m13.661s
 
     cat fb.mm10.chainMicMur3Link.txt
     # 907817373 bases of 2652783500 (34.221%) in intersection
 
     time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` mm10 micMur3) \
       > rbest.log 2>&1 &
     # real    522m5.587s
 
     # and for the swap:
     mkdir /hive/data/genomes/micMur3/bed/blastz.mm10.swap
     cd /hive/data/genomes/micMur3/bed/blastz.mm10.swap
 
     time (doBlastzChainNet.pl -verbose=2 \
       /hive/data/genomes/mm10/bed/lastzMicMur3.2017-03-03/DEF \
         -swap -chainMinScore=3000 -chainLinearGap=medium \
           -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
             -syntenicNet) > swap.log 2>&1
     #  real    71m4.702s
 
     cat fb.micMur3.chainMm10Link.txt
     # 905011854 bases of 2386321975 (37.925%) in intersection
 
     time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` micMur3 mm10) \
        > rbest.log 2>&1
     # real    508m58.716s
 
 #############################################################################
 # LASTZ mouse/mm10 Gray mouse lemur/tupChi1 - (DONE - 2017-03-09 - Hiram)
     mkdir /hive/data/genomes/mm10/bed/lastzTupChi1.2017-03-09
     cd /hive/data/genomes/mm10/bed/lastzTupChi1.2017-03-09
 
     printf '# mouse vs Chinese tree shrew
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.66/bin/lastz
 BLASTZ_M=254
 
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 SEQ1_LIMIT=4
 # the default matrix is:
 #       A     C     G     T
 # A    91  -114   -31  -123
 # C  -114   100  -125   -31
 # G   -31  -125   100  -114
 # T  -123   -31  -114    91
 
 # QUERY: Chinese tree shrew tupChi1
 SEQ2_DIR=/hive/data/genomes/tupChi1/tupChi1.2bit
 SEQ2_LEN=/hive/data/genomes/tupChi1/chrom.sizes
 SEQ2_CHUNK=20000000
 SEQ2_LIMIT=200
 SEQ2_LAP=0
 
 BASE=/hive/data/genomes/mm10/bed/lastzTupChi1.2017-03-09
 TMPDIR=/dev/shm
 ' > DEF
 
     time (doBlastzChainNet.pl `pwd`/DEF -verbose=2 \
         -chainMinScore=3000 -chainLinearGap=medium \
           -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
             -syntenicNet) > do.log 2>&1
     #  real    224m24.608s
 
     cat fb.mm10.chainTupChi1Link.txt
     #  683463709 bases of 2652783500 (25.764%) in intersection
 
     time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` mm10 tupChi1) \
       > rbest.log 2>&1 &
     # real    385m2.239s
 
     # and for the swap:
     mkdir /hive/data/genomes/tupChi1/bed/blastz.mm10.swap
     cd /hive/data/genomes/tupChi1/bed/blastz.mm10.swap
 
     time (doBlastzChainNet.pl -verbose=2 \
       /hive/data/genomes/mm10/bed/lastzTupChi1.2017-03-09/DEF \
         -swap -chainMinScore=3000 -chainLinearGap=medium \
           -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
             -syntenicNet) > swap.log 2>&1
     #  real    71m4.702s
 
     cat fb.tupChi1.chainMm10Link.txt
     # 708757944 bases of 2706389135 (26.188%) in intersection
 
     time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` tupChi1 mm10) \
        > rbest.log 2>&1
     # real    508m10.564s
 
 #############################################################################
 # LASTZ mouse/mm10 Chinese pangolin/manPen1 - (DONE - 2017-03-15 - Hiram)
     mkdir /hive/data/genomes/mm10/bed/lastzManPen1.2017-03-15
     cd /hive/data/genomes/mm10/bed/lastzManPen1.2017-03-15
 
     printf '# Mouse vs. Chinese softshell turtle
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.66/bin/lastz
 BLASTZ_M=254
 
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
 # QUERY: Chinese pangolin manPen1
 SEQ2_DIR=/hive/data/genomes/manPen1/manPen1.2bit
 SEQ2_LEN=/hive/data/genomes/manPen1/chrom.sizes
 SEQ2_CHUNK=10000000
 SEQ2_LAP=0
 SEQ2_LIMIT=180
 
 BASE=/hive/data/genomes/mm10/bed/lastzManPen1.2017-03-15
 TMPDIR=/dev/shm
 ' > DEF
 
     time (doBlastzChainNet.pl -verbose=2 `pwd`/DEF \
         -syntenicNet -fileServer=hgwdev \
         -chainMinScore=3000 -chainLinearGap=medium \
         -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku) > do.log 2>&1
     #  real    404m9.925s
 
     cat fb.mm10.chainManPen1Link.txt
     #  724400544 bases of 2652783500 (27.307%) in intersection
 
     time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` mm10 manPen1) \
       > rbest.log 2>&1 &
     # real    499m21.668s
 
     # and for the swap:
     mkdir /hive/data/genomes/manPen1/bed/blastz.mm10.swap
     cd /hive/data/genomes/manPen1/bed/blastz.mm10.swap
 
     time (doBlastzChainNet.pl -verbose=2 -swap \
         /hive/data/genomes/mm10/bed/lastzManPen1.2017-03-15/DEF \
         -syntenicNet -fileServer=hgwdev \
         -chainMinScore=3000 -chainLinearGap=medium \
         -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku) > swap.log 2>&1
     #  real    71m4.702s
 
     cat fb.manPen1.chainMm10Link.txt
     # 710179682 bases of 1999066070 (35.526%) in intersection
 
     time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` manPen1 mm10) \
        > rbest.log 2>&1
     # real    495m7.361s
 
 #############################################################################
 # LASTZ mouse/mm10 vs. Golden eagle/aquChr2 - (DONE - 2017-03-16 - Hiram)
     mkdir /hive/data/genomes/mm10/bed/lastzAquChr2.2017-03-16
     cd /hive/data/genomes/mm10/bed/lastzAquChr2.2017-03-16
 
     printf "# Mouse vs. Golden eagle
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.66/bin/lastz
 BLASTZ_Y=3400
 BLASTZ_L=6000
 BLASTZ_K=2200
 BLASTZ_Q=/scratch/data/blastz/HoxD55.q
 #      A    C    G    T
 #     91  -90  -25 -100
 #    -90  100 -100  -25
 #    -25 -100  100  -90
 #   -100  -25  -90  91
 
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
 # QUERY: Golden eagle aquChr2
 SEQ2_DIR=/hive/data/genomes/aquChr2/aquChr2.2bit
 SEQ2_LEN=/hive/data/genomes/aquChr2/chrom.sizes
 SEQ2_CHUNK=20000000
 SEQ2_LAP=0
 SEQ2_LIMIT=50
 
 BASE=/hive/data/genomes/mm10/bed/lastzAquChr2.2017-03-16
 TMPDIR=/dev/shm
 " > DEF
 
     time (doBlastzChainNet.pl `pwd`/DEF -verbose=2 \
         -chainMinScore=5000 -chainLinearGap=loose \
           -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
             -syntenicNet) > do.log 2>&1
     # real    217m29.467s
 
     cat fb.mm10.chainAquChr2Link.txt
     # 105013175 bases of 2652783500 (3.959%) in intersection
 
     time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` mm10 aquChr2) \
          > rbest.log 2>&1 &
     # real    196m24.435s
 
     # and for the swap:
     mkdir /hive/data/genomes/aquChr2/bed/blastz.mm10.swap
     cd /hive/data/genomes/aquChr2/bed/blastz.mm10.swap
 
     time (doBlastzChainNet.pl -verbose=2 \
       /hive/data/genomes/mm10/bed/lastzAquChr2.2017-03-16/DEF \
         -swap -chainMinScore=5000 -chainLinearGap=loose \
           -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
             -syntenicNet) > swap.log 2>&1
     #  real    9m16.569s
 
     cat fb.aquChr2.chainMm10Link.txt
     # 89023131 bases of 1180019022 (7.544%) in intersection
 
     time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` aquChr2 mm10) \
          > rbest.log 2>&1
     # real    132m43.886s
 
 #########################################################################
 # LASTZ bison bisBis1 (DONE - 2017-03-17 - Hiram)
     mkdir /hive/data/genomes/mm10/bed/lastzBisBis1.2017-03-17
     cd /hive/data/genomes/mm10/bed/lastzBisBis1.2017-03-17
 
     printf '# Mouse vs. Bison
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.66/bin/lastz
 # maximum M allowed with lastz is only 254
 BLASTZ_M=254
 
 # TARGET: Mouse mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 SEQ1_LIMIT=50
 
 # QUERY: bison bisBis1
 SEQ2_DIR=/hive/data/genomes/bisBis1/bisBis1.2bit
 SEQ2_LEN=/hive/data/genomes/bisBis1/chrom.sizes
 SEQ2_CHUNK=20000000
 SEQ2_LAP=0
 SEQ2_LIMIT=900
 
 BASE=/hive/data/genomes/mm10/bed/lastzBisBis1.2017-03-17
 TMPDIR=/dev/shm
 ' > DEF
 
     time (doBlastzChainNet.pl -verbose=2 \
         `pwd`/DEF \
         -syntenicNet -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
         -chainMinScore=3000 -chainLinearGap=medium) > do.log 2>&1
     # real    576m23.128s
 
     cat fb.mm10.chainBisBis1Link.txt
     # 688337604 bases of 2652783500 (25.948%) in intersection
 
     time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` mm10 bisBis1) \
          > rbest.log 2>&1 &
     # real    430m48.078s
 
     #   and the swap
     mkdir /hive/data/genomes/bisBis1/bed/blastz.mm10.swap
     cd /hive/data/genomes/bisBis1/bed/blastz.mm10.swap
     time (doBlastzChainNet.pl -verbose=2 \
         /hive/data/genomes/mm10/bed/lastzBisBis1.2017-03-17/DEF \
         -swap -syntenicNet  \
         -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
         -chainMinScore=3000 -chainLinearGap=medium) > swap.log 2>&1
     #   real    169m28.369s
 
     cat fb.bisBis1.chainMm10Link.txt
     # 682104798 bases of 2757854331 (24.733%) in intersection
 
     time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` bisBis1 mm10) \
          > rbest.log 2>&1
     # real    445m5.636s
 
 ############################################################################
 # lastz frog xenTro9 (DONE - 2017-03-29 - Hiram)
     # establish a screen to control this job with a name to indicate what it is
     screen -S mm10XenTro9
     mkdir /hive/data/genomes/mm10/bed/lastzXenTro9.2017-03-29
     cd /hive/data/genomes/mm10/bed/lastzXenTro9.2017-03-29
 
     printf '# Mouse vs. frog
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.66/bin/lastz
 BLASTZ_Y=3400
 BLASTZ_L=6000
 BLASTZ_K=2200
 BLASTZ_Q=/scratch/data/blastz/HoxD55.q
 
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 SEQ1_LIMIT=10
 
 # QUERY: frog xenTro9
 SEQ2_DIR=/hive/data/genomes/xenTro9/xenTro9.2bit
 SEQ2_LEN=/hive/data/genomes/xenTro9/chrom.sizes
 SEQ2_CHUNK=10000000
 SEQ2_LAP=0
 SEQ2_LIMIT=20
 
 BASE=/hive/data/genomes/mm10/bed/lastzXenTro9.2017-03-29
 TMPDIR=/dev/shm
 ' > DEF
 
     time (doBlastzChainNet.pl -verbose=2 `pwd`/DEF \
         -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
           -syntenicNet -chainMinScore=5000 -chainLinearGap=loose) \
               > do.log 2>&1 &
     #	real    806m23.459s
 
     cat fb.mm10.chainXenTro9Link.txt
     #	87053836 bases of 2652783500 (3.282%) in intersection
 
     time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` mm10 xenTro9) \
          > rbest.log 2>&1 &
     # real    617m41.376s
 
     #	and for the swap
     mkdir /hive/data/genomes/xenTro9/bed/blastz.mm10.swap
     cd /hive/data/genomes/xenTro9/bed/blastz.mm10.swap
     time (doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzXenTro9.2017-03-29/DEF \
         -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
         -syntenicNet -swap -chainMinScore=5000 -chainLinearGap=loose) \
 	> swap.log 2>&1 &
     #	real    25m54.516s
 
     cat fb.xenTro9.chainMm10Link.txt
     #	90150612 bases of 1369865365 (6.581%) in intersection
 
     time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` xenTro9 mm10) \
          > rbest.log 2>&1 &
     # real    597m52.740s
 
 #########################################################################
 # lastz frog xenLae2 (DONE - 2017-03-29 - Hiram)
     # establish a screen to control this job with a name to indicate what it is
     screen -S mm10XenLae2
     mkdir /hive/data/genomes/mm10/bed/lastzXenLae2.2017-03-29
     cd /hive/data/genomes/mm10/bed/lastzXenLae2.2017-03-29
 
     printf '# Mouse vs. frog
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.66/bin/lastz
 BLASTZ_Y=3400
 BLASTZ_L=6000
 BLASTZ_K=2200
 BLASTZ_Q=/scratch/data/blastz/HoxD55.q
 
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 SEQ1_LIMIT=10
 
 # QUERY: frog xenLae2
 SEQ2_DIR=/hive/data/genomes/xenLae2/xenLae2.2bit
 SEQ2_LEN=/hive/data/genomes/xenLae2/chrom.sizes
 SEQ2_CHUNK=20000000
 SEQ2_LAP=0
 SEQ2_LIMIT=300
 
 BASE=/hive/data/genomes/mm10/bed/lastzXenLae2.2017-03-29
 TMPDIR=/dev/shm
 ' > DEF
 
     time (doBlastzChainNet.pl -verbose=2 `pwd`/DEF \
         -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
           -syntenicNet -chainMinScore=5000 -chainLinearGap=loose) \
               > do.log 2>&1 &
     #	real    1044m10.115s
 
     cat fb.mm10.chainXenLae2Link.txt
     #	82272699 bases of 2652783500 (3.101%) in intersection
 
     time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` mm10 xenLae2) \
          > rbest.log 2>&1 &
     # real    656m46.337s
 
     #	and for the swap
     mkdir /hive/data/genomes/xenLae2/bed/blastz.mm10.swap
     cd /hive/data/genomes/xenLae2/bed/blastz.mm10.swap
     time (doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzXenLae2.2017-03-29/DEF \
         -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
         -swap -chainMinScore=5000 -chainLinearGap=loose) > swap.log 2>&1
     #	real    26m14.884s
     time (doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzXenLae2.2017-03-29/DEF \
         -continue=syntenicNet -workhorse=hgwdev -smallClusterHub=ku \
 	-bigClusterHub=ku -syntenicNet -swap -chainMinScore=5000 \
 	-chainLinearGap=loose) > syntenicNet.log 2>&1 &
     # real    1m52.642s
 
     cat  fb.xenLae2.chainMm10Link.txt
     #	116001603 bases of 2408724787 (4.816%) in intersection
 
     time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` xenLae2 mm10) \
          > rbest.log 2>&1 &
     # real    746m4.542s
 
 #########################################################################
 # lastz turtle chrPic2 (DONE - 2017-04-05 - Hiram)
     # establish a screen to control this job with a name to indicate what it is
     screen -S mm10ChrPic2
     mkdir /hive/data/genomes/mm10/bed/lastzChrPic2.2017-04-05
     cd /hive/data/genomes/mm10/bed/lastzChrPic2.2017-04-05
 
     printf '# Mouse vs. turtle
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.66/bin/lastz
 BLASTZ_Y=3400
 BLASTZ_L=6000
 BLASTZ_K=2200
 BLASTZ_Q=/scratch/data/blastz/HoxD55.q
 
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
 # QUERY: turtle chrPic2
 SEQ2_DIR=/hive/data/genomes/chrPic2/chrPic2.2bit
 SEQ2_LEN=/hive/data/genomes/chrPic2/chrom.sizes
 SEQ2_CHUNK=10000000
 SEQ2_LAP=0
 SEQ2_LIMIT=300
 
 BASE=/hive/data/genomes/mm10/bed/lastzChrPic2.2017-04-05
 TMPDIR=/dev/shm
 ' > DEF
 
      time (doBlastzChainNet.pl -verbose=2 `pwd`/DEF \
         -syntenicNet -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
         -chainMinScore=5000 -chainLinearGap=loose) > do.log 2>&1 &
     #	real    865m16.816s
 
     # ku difficulties due to /dev/shm/ being full, continuing:
      time (doBlastzChainNet.pl -verbose=2 `pwd`/DEF \
         -syntenicNet -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
      -continue=cat -chainMinScore=5000 -chainLinearGap=loose) > cat.log 2>&1 &
     # real    13m13.959s
 
     # one big chain causing trouble, continuing:
      time (doBlastzChainNet.pl -verbose=2 `pwd`/DEF \
         -syntenicNet -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
      -continue=chainMerge -chainMinScore=5000 -chainLinearGap=loose) > chainMerge.log 2>&1 &
     # real    11m47.232s
 
     cat fb.mm10.chainChrPic2Link.txt
     #	112560591 bases of 2652783500 (4.243%) in intersection
 
     time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` mm10 chrPic2) \
          > rbest.log 2>&1 &
     # real    114m27.445s
 
     #	and for the swap
     mkdir /hive/data/genomes/chrPic2/bed/blastz.mm10.swap
     cd /hive/data/genomes/chrPic2/bed/blastz.mm10.swap
     time (doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzChrPic2.2017-04-05/DEF \
         -syntenicNet -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
         -swap -chainMinScore=5000 -chainLinearGap=loose) > swap.log 2>&1 &
     #	real    12m2.676s
 
     cat  fb.chrPic2.chainMm10Link.txt
     #	106063993 bases of 2173204089 (4.881%) in intersection
 
     time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` chrPic2 mm10) \
          > rbest.log 2>&1 &
     # real    110m9.546s
 
 #########################################################################
 2017-04-16: import of UCSC GENCODE group processing of GENCODE VM13 (markd)
 
     # edit hg/makeDb/outside/gencode/gencodeLoad.mk to set release and ensembl versions
 
     # download, build and load tables
     mkdir -p /hive/data/genomes/mm10/bed/gencodeVM13
     pushd /hive/data/genomes/mm10/bed/gencodeVM13
     (time nice make -j 10 -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk) >&build.1.out&
 
     ##  gencode-cmp.tsv check to see if sizes make sense
 
     # generate trackDb and joiner blurb
     pushd kent/src/hg/makeDb/trackDb
     ../../makeDb/outside/gencode/gencodeGenerateTrackDbs mm10 M13 88 'March 2017'
 
     # edit mouse/mm10/trackDb.ra to add new .ra file include
     make DBS=mm10
 
     # Update mouse/mm10/wgEncodeGencodeSuper.html and update 'Release Notes'
     # to describe new release. [ONLY if it's going to be pushed]
 
     # edit  all.joiner to add ~/tmp/gencodeVM13.joiner
     # verify with:
     pushd /hive/data/genomes/mm10/bed/gencodeVM13
     make  -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk joinerCheck db=mm10
 
     # commit all
 ##############################################################################
 # LASTZ Chinese hamster ovary cell line CHO-K1  criGriChoV1
 #	(DONE - 2017-04-13 - Hiram)
     #	establish a screen to control this job
     screen -S mm10criGriChoV1
     mkdir /hive/data/genomes/mm10/bed/lastzCriGriChoV1.2017-04-13
     cd /hive/data/genomes/mm10/bed/lastzCriGriChoV1.2017-04-13
 
     printf '# Chinese hamster ovary cell line vs mouse
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.66/bin/lastz
 
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 SEQ1_LIMIT=40
 
 # QUERY: Chinese hamster ovary cell line CHO-K1  criGriChoV1
 SEQ2_DIR=/hive/data/genomes/criGriChoV1/criGriChoV1.2bit
 SEQ2_LEN=/hive/data/genomes/criGriChoV1/chrom.sizes
 SEQ2_CHUNK=10000000
 SEQ2_LIMIT=250
 SEQ2_LAP=0
 
 BASE=/hive/data/genomes/mm10/bed/lastzCriGriChoV1.2017-04-13
 TMPDIR=/dev/shm
 ' > DEF
 
     time (doBlastzChainNet.pl -verbose=2 `pwd`/DEF \
 	-noDbNameCheck -syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
 	-chainMinScore=3000 -chainLinearGap=medium) > do.log 2>&1 &
     #	real    575m28.254s
 
     cat fb.mm10.chainCriGriChoV1Link.txt
     #	1553371182 bases of 2652783500 (58.556%) in intersection
 
     time (doRecipBest.pl -workhorse=hgwdev mm10 criGriChoV1 \
       -buildDir=`pwd` -workhorse=hgwdev) > rbest.log 2>&1 &
     #	real    732m16.081s
 
     mkdir /hive/data/genomes/criGriChoV1/bed/blastz.mm10.swap
     cd /hive/data/genomes/criGriChoV1/bed/blastz.mm10.swap
     time (doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzCriGriChoV1.2017-04-13/DEF \
 	-noDbNameCheck -swap -syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
 	-chainMinScore=3000 -chainLinearGap=medium) > swap.log 2>&1 &
     #	real    157m21.977s
 
     cat fb.criGriChoV1.chainMm10Link.txt
     #	1513594461 bases of 2318132242 (65.294%) in intersection
 
     time (doRecipBest.pl -workhorse=hgwdev criGriChoV1 mm10 \
       -buildDir=`pwd` -workhorse=hgwdev) > rbest.log 2>&1 &
     # real    769m8.998s
 
 ##############################################################################
 ## 4-Way Multiz (DONE - 2017-04-20 - Hiram)
     ssh hgwdev
     mkdir /hive/data/genomes/mm10/bed/tupChi1Multiz4way
     cd /hive/data/genomes/mm10/bed/tupChi1Multiz4way
 
     # from the 213-way in the source tree, select out the 5 used here:
     /cluster/bin/phast/tree_doctor \
         --prune-all-but hg38,galVar1,mm10,tupChi1 \
         /cluster/home/hiram/kent/src/hg/utils/phyloTrees/213way.nh \
           > mm10.4way.nh
     cat mm10.4way.nh
     # ((hg38:0.143908,(tupChi1:0.120000,galVar1:0.080000):0.054937):0.002000,
          mm10:0.356483);
 
     # using TreeGraph2 on Mac desktop to rearrange tree to get mm10 at top:
 # (mm10:0.356483,(hg38:0.143908,(tupChi1:0.12,galVar1:0.08):0.054937):0.002);
 
     #	what that looks like:
  ~/kent/src/hg/utils/phyloTrees/asciiTree.pl mm10.4way.nh | sed -e 's/^/# /;'
 
 # (mm10:0.356483,
 # (hg38:0.143908,
 # (tupChi1:0.12,
 # galVar1:0.08):0.054937):0.002);
 
     # extract species list from that .nh file
     sed 's/[a-z][a-z]*_//g; s/:[0-9\.][0-9\.]*//g; s/;//; /^ *$/d' \
         mm10.4way.nh | xargs echo | sed 's/ //g; s/,/ /g' \
         | sed 's/[()]//g; s/,/ /g' | tr '[ ]' '[\n]' > species.list.txt
 
     # construct db to name translation list:
     cat species.list.txt | while read DB
 do
 hgsql -N -e "select name,organism from dbDb where name=\"${DB}\";" hgcentraltest
 done | sed -e "s/\t/->/; s/ /_/g;" | sed -e 's/$/;/' | sed -e 's/\./_/g' \
         | sed -e 's/-nosed/_nosed/; s/-eating/_eating/;' > db.to.name.txt
 
     # construct a common name .nh file:
     /cluster/bin/phast/tree_doctor --rename \
     "`cat db.to.name.txt`" mm10.4way.nh | sed -e 's/00*)/)/g; s/00*,/,/g' \
        | $HOME/kent/src/hg/utils/phyloTrees/asciiTree.pl /dev/stdin \
          > mm10.4way.commonNames.nh
     cat mm10.4way.commonNames.nh | sed -e 's/^/# /;'
 # (Mouse:0.356483,
 # (Human:0.143908,
 # (Chinese_tree_shrew:0.12,
 # Malayan_flying_lemur:0.08):0.054937):0.002);
 
 #	Use this specification in the phyloGif tool:
 #	http://genome.ucsc.edu/cgi-bin/phyloGif
 #	to obtain a png image for src/hg/htdocs/images/phylo/hg38_4way.png
 
     ~/kent/src/hg/utils/phyloTrees/asciiTree.pl mm10.4way.nh > t.nh
     ~/kent/src/hg/utils/phyloTrees/scientificNames.sh t.nh \
        | $HOME/kent/src/hg/utils/phyloTrees/asciiTree.pl /dev/stdin \
           > mm10.4way.scientificNames.nh
     rm -f t.nh
     cat mm10.4way.scientificNames.nh | sed -e 's/^/# /;'
 # (Mus_musculus:0.356483,
 # (Homo_sapiens:0.143908,
 # (Tupaia_chinensis:0.12,
 # Galeopterus_variegatus:0.08):0.054937):0.002);
 
     /cluster/bin/phast/all_dists mm10.4way.nh | grep mm10 \
         | sed -e "s/mm10.//" | sort -k2n > 4way.distances.txt
     #	Use this output to create the table below
     cat 4way.distances.txt | sed -e 's/^/# /;'
 # galVar1       0.493420
 # hg38  0.502391
 # tupChi1       0.533420
 
     printf '#!/usr/bin/env perl
 
 use strict;
 use warnings;
 
 open (FH, "<4way.distances.txt") or
         die "can not read 4way.distances.txt";
 
 my $count = 0;
 while (my $line = <FH>) {
     chomp $line;
     my ($D, $dist) = split('"'"'\\s+'"'"', $line);
     my $chain = "chain" . ucfirst($D);
     my $B="/hive/data/genomes/mm10/bed/lastz.$D/fb.mm10." .
         $chain . "Link.txt";
     my $chainLinkMeasure =
         `awk '"'"'{print \\$5}'"'"' ${B} 2> /dev/null | sed -e "s/(//; s/)//"`;
     chomp $chainLinkMeasure;
     $chainLinkMeasure = 0.0 if (length($chainLinkMeasure) < 1);
     $chainLinkMeasure =~ s/\\%%//;
     my $swapFile="/hive/data/genomes/${D}/bed/lastz.mm10/fb.${D}.chainMm10Link.txt";
     my $swapMeasure = "N/A";
     if ( -s $swapFile ) {
 	$swapMeasure =
 	    `awk '"'"'{print \\$5}'"'"' ${swapFile} 2> /dev/null | sed -e "s/(//; s/)//"`;
 	chomp $swapMeasure;
 	$swapMeasure = 0.0 if (length($swapMeasure) < 1);
 	$swapMeasure =~ s/\\%%//;
     }
     my $orgName=
     `hgsql -N -e "select organism from dbDb where name='"'"'$D'"'"';" hgcentraltest`;
     chomp $orgName;
     if (length($orgName) < 1) {
         $orgName="N/A";
     }
     ++$count;
     printf "# %%02d  %%.4f (%%%% %%06.3f) (%%%% %%06.3f) - %%s %%s\\n", $count, $dist,
         $chainLinkMeasure, $swapMeasure, $orgName, $D;
 }
 close (FH);
 ' > sizeStats.pl
     chmod +x ./sizeStats.pl
     ./sizeStats.pl
 
 #	If you can fill in all the numbers in this table, you are ready for
 #	the multiple alignment procedure
 
 #       featureBits chainLink measures
 #               chainLink
 #  N distance  on hg38  on other     other species
 # 01  0.4934 (% 35.618) (% 35.972) - Malayan flying lemur galVar1
 # 02  0.5024 (% 35.372) (% 31.653) - Human hg38
 # 03  0.5334 (% 25.764) (% 26.188) - Chinese tree shrew tupChi1
 
 # None of this concern for distances matters in building the first step, the
 # maf files.  The distances will be better calibrated later.
 
     # create species list and stripped down tree for autoMZ
     sed 's/[a-z][a-z]*_//g; s/:[0-9\.][0-9\.]*//g; s/;//; /^ *$/d' \
 	mm10.4way.nh | xargs echo | sed 's/ //g; s/,/ /g' > tree.nh
 
     sed 's/[()]//g; s/,/ /g' tree.nh > species.list
     # mm10 hg38 tupChi1 galVar1
 
 
     # survey N50 for each
     for db in `cat species.list`
 do
 n50.pl /hive/data/genomes/$db/chrom.sizes
 done
 #       reading: /hive/data/genomes/mm10/chrom.sizes
 #       contig count: 455, total size: 3209286105, one half size: 1604643052
 #       reading: /hive/data/genomes/mm10/chrom.sizes
 #       contig count: 66, total size: 2730871774, one half size: 1365435887
 # cumulative    N50 count       contig  contig size
 1312176979      8       chr7    145441459
 1365435887 one half size
 1442871972      9       chr10   130694993
 #       reading: /hive/data/genomes/hg38/chrom.sizes
 #       contig count: 455, total size: 3209286105, one half size: 1604643052
 # cumulative    N50 count       contig  contig size
 1547391171      8       chrX    156040895
 1604643052 one half size
 1692529807      9       chr8    145138636
 #       reading: /hive/data/genomes/tupChi1/chrom.sizes
 #       contig count: 50750, total size: 2846580235, one half size: 1423290117
 # cumulative    N50 count       contig  contig size
 1419920836      231     KB321095        3691413
 1423290117 one half size
 1423590960      232     KB321106        3670124
 #       reading: /hive/data/genomes/galVar1/chrom.sizes
 #       contig count: 179514, total size: 3187660572, one half size: 1593830286
 # cumulative    N50 count       contig  contig size
 1593691350      3422    NW_007730159v1  245222
 1593830286 one half size
 1593936539      3423    NW_007729331v1  245189
 
     #	bash shell syntax here ...
     cd /hive/data/genomes/mm10/bed/tupChi1Multiz4way
     export H=/hive/data/genomes/mm10/bed
     mkdir mafLinks
     # good assemblies can use syntenic net:
     #  hg38
     for G in hg38
     do
       mkdir mafLinks/$G
       echo 'ln -s ${H}/lastz.$G/mafSynNet/*.maf.gz ./mafLinks/$G'
       ln -s ${H}/lastz.$G/mafSynNet/*.maf.gz ./mafLinks/$G
     done
 
     # other assemblies using recip best net:
     #  galVar1 tupBel1
     for G in galVar1 tupChi1
     do
       mkdir mafLinks/$G
       echo ln -s ${H}/lastz.$G/mafRBestNet/*.maf.gz ./mafLinks/$G
       ln -s ${H}/lastz.$G/mafRBestNet/*.maf.gz ./mafLinks/$G
     done
 
     # verify the symLinks are good:
     ls -ogrtL mafLinks/*/* | sed -e 's/^/# /; s/-rw-rw-r-- 1//;'
     ls -ogrtL mafLinks/*/* | sed -e 's/^/# /; s/-rw-rw-r-- 1//;' | head
 #  52322575 Apr 10  2015 mafLinks/hg38/chr1.maf.gz
 #  35696060 Apr 10  2015 mafLinks/hg38/chr10.maf.gz
 #  36383118 Apr 10  2015 mafLinks/hg38/chr11.maf.gz
 
     ls -ogrtL mafLinks/*/* | sed -e 's/^/# /; s/-rw-rw-r-- 1//;' | tail
 #      3104 Mar 10 00:08 mafLinks/tupChi1/chrUn_GL456379.maf.gz
 #       143 Mar 10 00:08 mafLinks/tupChi1/chrUn_GL456381.maf.gz
 #      1221 Mar 10 00:08 mafLinks/tupChi1/chrUn_GL456382.maf.gz
 
 XXX - do not need to split - Thu Apr 20 15:02:02 PDT 2017
 
     mkdir /hive/data/genomes/mm10/bed/tupChi1Multiz4way/splitRun
     cd /hive/data/genomes/mm10/bed/tupChi1Multiz4way/splitRun
     # construct a list of all possible maf file names.
 
     find ../mafLinks | grep maf.gz | sed -e 's#../mafLinks/##;' \
         | xargs -L 1 basename | sed -e 's/.gz//;' | sort -u > maf.list
 
     wc -l maf.list
     # 52 maf.list
 
     mkdir maf run
     cd run
     mkdir penn
     cp -p /cluster/bin/penn/multiz.2009-01-21_patched/multiz penn
     cp -p /cluster/bin/penn/multiz.2009-01-21_patched/maf_project penn
     cp -p /cluster/bin/penn/multiz.2009-01-21_patched/autoMZ penn
 
     #	set the db and pairs directories here
     cat > autoMultiz.csh << '_EOF_'
     printf '#!/bin/csh -ef
 set db = mm10
 set c = $1
 set result = $2
 set run = `/bin/pwd`
 set tmp = /dev/shm/$db/multiz.$c
 set pairs = /hive/data/genomes/mm10/bed/tupChi1Multiz4way/mafLinks
 /bin/rm -fr $tmp
 /bin/mkdir -p $tmp
 /bin/cp -p ../../tree.nh ../../species.list $tmp
 pushd $tmp > /dev/null
 foreach s (`/bin/sed -e "s/$db //" species.list`)
     set in = $pairs/$s/$c
     set out = $db.$s.sing.maf
     if (-e $in.gz) then
         /bin/zcat $in.gz > $out
         if (! -s $out) then
             echo "##maf version=1 scoring=autoMZ" > $out
         endif
     else if (-e $in) then
         /bin/ln -s $in $out
     else
         echo "##maf version=1 scoring=autoMZ" > $out
     endif
 end
 set path = ($run/penn $path); rehash
 $run/penn/autoMZ + T=$tmp E=$db "`cat tree.nh`" $db.*.sing.maf $c \
         > /dev/null
 popd > /dev/null
 /bin/rm -f $result
 /bin/cp -p $tmp/$c $result
 /bin/rm -fr $tmp
 ' > autoMultiz.csh
 
     chmod +x autoMultiz.csh
 
     printf '#LOOP
 ./autoMultiz.csh $(file1) {check out line+ /hive/data/genomes/mm10/bed/tupChi1Multiz4way/splitRun/maf/$(root1).maf}
 #ENDLOOP
 ' > template
 
     ln -s ../maf.list maf.list
     ssh ku
     cd /hive/data/genomes/mm10/bed/tupChi1Multiz4way/splitRun/run
     gensub2 maf.list single template jobList
     para create jobList
     para try ... check ... push ... etc...
 # Completed: 52 of 52 jobs
 # CPU time in finished jobs:      44671s     744.52m    12.41h    0.52d  0.001 y
 # IO & Wait Time:                  1129s      18.81m     0.31h    0.01d  0.000 y
 # Average job time:                 881s      14.68m     0.24h    0.01d
 # Longest finished job:            3537s      58.95m     0.98h    0.04d
 # Submission to last job:          5634s      93.90m     1.56h    0.07d
 
     # combine into one file  (the 1>&2 redirect sends the echo to stderr)
     cd /hive/data/genomes/mm10/bed/tupChi1Multiz4way
     head -1 splitRun/maf/chr2.maf > tupChi1Multiz4way.maf
     time for F in splitRun/maf/*.maf
 do
     echo "${F}" 1>&2
     egrep -v "^#" ${F}
 done >> tupChi1Multiz4way.maf
     # real    0m16.400s
 
     tail -1 splitRun/maf/chr2.maf >> tupChi1Multiz4way.maf
 # -rw-rw-r-- 1 5228617390 Apr 20 17:41 tupChi1Multiz4way.maf
 
     # Load into database
     ssh hgwdev
     cd /hive/data/genomes/mm10/bed/tupChi1Multiz4way
     mkdir /gbdb/mm10/tupChi1Multiz4way
     ln -s `pwd`/tupChi1Multiz4way.maf /gbdb/mm10/tupChi1Multiz4way
     cd /dev/shm
     time hgLoadMaf mm10 tupChi1Multiz4way
 # Loaded 5635229 mafs in 1 files from /gbdb/mm10/tupChi1Multiz4way
 # real    1m26.208s
 
 
     time hgLoadMafSummary -verbose=2 -minSize=30000 \
 	-mergeGap=1500 -maxSize=200000 mm10 tupChi1Multiz4waySummary \
 	/gbdb/mm10/tupChi1Multiz4way/tupChi1Multiz4way.maf
 # Created 743966 summary blocks from 10080651 components and 5635229 mafs from /gbdb/mm10/tupChi1Multiz4way/tupChi1Multiz4way.maf
 # real    1m45.053s
 
 # -rw-rw-r-- 1 294659136 Apr 20 21:40 tupChi1Multiz4way.tab
 # -rw-rw-r-- 1  34525860 Apr 20 22:09 tupChi1Multiz4waySummary.tab
 
     wc -l tupChi1Multiz4way*.tab
 #  5635229 tupChi1Multiz4way.tab
 #   743966 tupChi1Multiz4waySummary.tab
 
     rm tupChi1Multiz4way*.tab
 
 ##############################################################################
 # GAP ANNOTATE MULTIZ7WAY MAF AND LOAD TABLES (DONE - 2017-04-20 - Hiram)
     # mafAddIRows has to be run on single chromosome maf files, it does not
     #	function correctly when more than one reference sequence
     #	are in a single file.  Need to split of the maf file into individual
     #   maf files
     mkdir -p /hive/data/genomes/mm10/bed/tupChi1Multiz4way/anno/mafSplit
     cd /hive/data/genomes/mm10/bed/tupChi1Multiz4way/anno/mafSplit
 
     time mafSplit -outDirDepth=2 -byTarget -useFullSequenceName \
         /dev/null . ../../tupChi1Multiz4way.maf
     #   real    1m25.202s
     find . -type f | wc -l
     #   52
 
     # check for N.bed files everywhere:
     cd /hive/data/genomes/mm10/bed/tupChi1Multiz4way/anno
     for DB in `cat ../species.list`
 do
     if [ ! -s /hive/data/genomes/${DB}/${DB}.N.bed ]; then
         echo "MISS: ${DB}"
 #         cd /hive/data/genomes/${DB}
 #         twoBitInfo -nBed ${DB}.2bit ${DB}.N.bed
     else
         echo "  OK: ${DB}"
     fi
 done
 
     cd /hive/data/genomes/mm10/bed/tupChi1Multiz4way/anno
     for DB in `cat ../species.list`
 do
     echo "${DB} "
     ln -s  /hive/data/genomes/${DB}/${DB}.N.bed ${DB}.bed
     echo ${DB}.bed  >> nBeds
     ln -s  /hive/data/genomes/${DB}/chrom.sizes ${DB}.len
     echo ${DB}.len  >> sizes
 done
     # make sure they all are successful symLinks:
     ls -ogrtL
 
     screen -S gapAnno      # use a screen to control this longish job
     ssh ku
     cd /hive/data/genomes/mm10/bed/tupChi1Multiz4way/anno
     mkdir result
     find ./mafSplit -type d | sed -e 's#./mafSplit/##' | while read D
 do
     echo mkdir -p result/${D}
     mkdir -p result/${D}
 done
     printf '#LOOP
 mafAddIRows -nBeds=nBeds mafSplit/$(path1) /hive/data/genomes/mm10/mm10.2bit {check out exists+ result/$(path1)}
 #ENDLOOP
 ' > template
     # << happy emacs
 
     find ./mafSplit -type f | sed -e 's#^./mafSplit/##' > maf.list
     gensub2 maf.list single template jobList
     # there isn't the usual job limit problem here, only 52 jobs
     para create jobList
     para try ... check ... push ...
 # Completed: 52 of 52 jobs
 # CPU time in finished jobs:        749s      12.48m     0.21h    0.01d  0.000 y
 # IO & Wait Time:                   119s       1.99m     0.03h    0.00d  0.000 y
 # Average job time:                  17s       0.28m     0.00h    0.00d
 # Longest finished job:              65s       1.08m     0.02h    0.00d
 # Submission to last job:           110s       1.83m     0.03h    0.00d
 
     # verify all result files have some content, look for 0 size files:
     find ./result -type f -size 0
     # should see none
     # or in this manner:
     find ./result -type f | xargs ls -og | sort -k3nr | tail
 
     # combine into one file  (the 1>&2 redirect sends the echo to stderr)
     head -q -n 1 result/4/1/chrUn_GL456381.maf > mm10.4way.maf
     time find ./result -type f | while read F
 do
     echo "${F}" 1>&2
     grep -h -v "^#" ${F}
 done >> mm10.4way.maf
     # real    0m33.237s
 
     #	these maf files do not have the end marker, this does nothing:
     #	tail -q -n 1 result/4/0/NW_007804317v1.maf >> mm10.4way.maf
     # How about an official end marker:
     echo "##eof maf" >> mm10.4way.maf
     ls -og
 # -rw-rw-r-- 1 7580362629 Apr 20 22:27 mm10.4way.maf
 
     du -hsc mm10.4way.maf
     # 7.1G     mm10.4way.maf
 
     # construct symlinks to get the individual maf files into gbdb:
     rm /gbdb/mm10/tupChi1Multiz4way/tupChi1Multiz4way.maf   # remove previous results
     ln -s `pwd`/mm10.4way.maf /gbdb/mm10/tupChi1Multiz4way/tupChi1Multiz4way.maf
 
     # Load into database
     cd /dev/shm
     time hgLoadMaf -pathPrefix=/gbdb/mm10/tupChi1Multiz4way mm10 tupChi1Multiz4way
     # Loaded 6931895 mafs in 1 files from /gbdb/mm10/tupChi1Multiz4way
     # real    1m59.548s
 
     time hgLoadMafSummary -verbose=2 -minSize=30000 \
 	-mergeGap=1500 -maxSize=200000 mm10 tupChi1Multiz4waySummary \
         /gbdb/mm10/tupChi1Multiz4way/tupChi1Multiz4way.maf
     # Created 743966 summary blocks from 10080651 components and 6931895 mafs from /gbdb/mm10/tupChi1Multiz4way/tupChi1Multiz4way.maf
     # real    2m14.237s
 
     # -rw-rw-r-- 1  362918923 Apr 20 22:30 tupChi1Multiz4way.tab
     # -rw-rw-r-- 1   36013792 Apr 20 22:33 tupChi1Multiz4waySummary.tab
 
     rm tupChi1Multiz4way*.tab
 
 ######################################################################
 # MULTIZ7WAY MAF FRAMES (DONE - 2017-04-20 - Hiram)
     ssh hgwdev
     mkdir /hive/data/genomes/mm10/bed/tupChi1Multiz4way/frames
     cd /hive/data/genomes/mm10/bed/tupChi1Multiz4way/frames
 #   survey all the genomes to find out what kinds of gene tracks they have
     printf '#!/bin/csh -fe
 foreach db (`cat ../species.list`)
     printf "# ${db}: "
     set tables = `hgsql $db -N -e "show tables" | egrep "Gene|ncbiRefSeq"`
     foreach table ($tables)
         if ($table == "ensGene" || $table == "refGene" || \
            $table == "ncbiRefSeq" || $table == "mgcGenes" || \
            $table == "knownGene" || $table == "xenoRefGene" ) then
            set count = `hgsql $db -N -e "select count(*) from $table"`
             echo -n "${table}: ${count}, "
         endif
     end
     set orgName = `hgsql hgcentraltest -N -e \
             "select scientificName from dbDb where name='"'"'$db'"'"'"`
     set orgId = `hgsql $db -N -e \
             "select id from organism where name='"'"'$orgName'"'"'"`
     if ($orgId == "") then
         echo "Mrnas: 0"
     else
         set count = `hgsql $db -N -e "select count(*) from gbCdnaInfo where organism=$orgId"`
         echo "Mrnas: ${count}"
     endif
 end
 ' > showGenes.csh
 
     chmod +x ./showGenes.csh
     time ./showGenes.csh
 # mm10: ensGene: 103734, knownGene: 63759, mgcGenes: 26777, ncbiRefSeq: 107894, refGene: 36869, xenoRefGene: 179145, Mrnas: 5367574
 # hg38: ensGene: 208239, knownGene: 197782, mgcGenes: 35305, ncbiRefSeq: 159322, refGene: 69527, xenoRefGene: 184852, Mrnas: 11481766
 # tupChi1: refGene: 206, xenoRefGene: 343637, Mrnas: 50709
 # galVar1: ncbiRefSeq: 41547, xenoRefGene: 499145, Mrnas: 0
 
 # real    0m41.291s
 
     # from that summary, use these gene sets:
     # knownGene - hg38 mm10
     # ncbiRefSeq - galVar1
     # xenoRefGene - tupChi1
 
     mkdir genes
     #   1. knownGene: hg38 mm10
     for DB in hg38 mm10
 do
     hgsql -N -e "select name,chrom,strand,txStart,txEnd,cdsStart,cdsEnd,exonCount,exonStarts,exonEnds from knownGene" ${DB} \
       | genePredSingleCover stdin stdout | gzip -2c \
         > genes/${DB}.gp.gz
     printf "# ${DB}: "
     genePredCheck -db=${DB} genes/${DB}.gp.gz
 done
 # hg38: checked: 21375 failed: 0
 # mm10: checked: 21100 failed: 0
 
     #   2. xenoRefGene: tupChi1
     for DB in tupChi1
 do
 hgsql -N -e "select name,chrom,strand,txStart,txEnd,cdsStart,cdsEnd,exonCount,exonStarts,exonEnds
 from xenoRefGene" ${DB} \
       | genePredSingleCover stdin stdout | gzip -2c \
         > /dev/shm/${DB}.tmp.gz
     mv /dev/shm/${DB}.tmp.gz genes/$DB.gp.gz
     printf "# ${DB}: "
     genePredCheck -db=${DB} genes/${DB}.gp.gz
 done
 # tupChi1: checked: 30481 failed: 0
 
     #   3. ncbiRefSeq for galVar1
     for DB in galVar1
 do
 hgsql -N -e "select * from ncbiRefSeq" ${DB} | cut -f2- \
       | genePredSingleCover stdin stdout | gzip -2c \
         > /dev/shm/${DB}.tmp.gz
     mv /dev/shm/${DB}.tmp.gz genes/$DB.gp.gz
     echo -n "# ${DB}: "
     genePredCheck -db=${DB} genes/${DB}.gp.gz
 done
 # galVar1: checked: 23389 failed: 0
 
     # verify counts for genes are reasonable:
     for T in genes/*.gz
 do
     echo -n "# $T: "
     zcat $T | cut -f1 | sort | uniq -c | wc -l
 done
 # genes/galVar1.gp.gz: 23054
 # genes/hg38.gp.gz: 21375
 # genes/mm10.gp.gz: 21100
 # genes/tupChi1.gp.gz: 25028
 
     time (cat ../anno/mm10.4way.maf \
 	| genePredToMafFrames mm10 stdin stdout \
           `cat ../species.list.txt | xargs echo \
             | sed -e "s#\([a-zA-Z0-9]*\)#\1 genes/\1.gp.gz#g;"` \
 		| gzip > tupChi1Multiz4wayFrames.bed.gz)
     # real    1m35.311s
 
     # verify there are frames on everything, should be 5 species:
     zcat tupChi1Multiz4wayFrames.bed.gz | awk '{print $4}' | sort | uniq -c \
        | sed -e 's/^/# /;'
 #  233262 galVar1
 #  231021 hg38
 #  190782 mm10
 #  245209 tupChi1
 
     #   load the resulting file
     ssh hgwdev
     cd /hive/data/genomes/mm10/bed/tupChi1Multiz4way/frames
     time hgLoadMafFrames mm10 tupChi1Multiz4wayFrames tupChi1Multiz4wayFrames.bed.gz
     #   real    0m9.566s
 
     time featureBits -countGaps mm10 tupChi1Multiz4wayFrames
     # 38594412 bases of 2730871774 (1.413%) in intersection
     # real    0m5.681s
 
     #   enable the trackDb entries:
 # frames tupChi1Multiz4wayFrames
 # irows on
     #   appears to work OK
 
 #########################################################################
 # Phylogenetic tree from 5-way (DONE - 2017-04-20 - Hiram)
     mkdir /hive/data/genomes/mm10/bed/tupChi1Multiz4way/4d
     cd /hive/data/genomes/mm10/bed/tupChi1Multiz4way/4d
 
     # using knownGene for mm10, only transcribed genes and nothing
     #	from the randoms and other misc.
     hgsql -Ne "select name,chrom,strand,txStart,txEnd,cdsStart,cdsEnd,exonCount,exonStarts,exonEnds from knownGene where cdsEnd > cdsStart;" mm10 \
       | egrep -E -v "chrM|chrUn|random|_alt" > knownGene.gp
     wc -l *.gp
     #     93916 knownGene.gp
 
     # verify it is only on the chroms:
     cut -f2 knownGene.gp | sort | uniq -c | sort -rn | sed -e 's/^/    # /;'
     #    3949 chr2
     #    3861 chr7
     #    3496 chr11
     #    2789 chr5
     #    2782 chr4
     #    2698 chr1
     #    2585 chr9
     #    2395 chr6
     #    2304 chr3
     #    2238 chr17
     #    2206 chr8
     #    2166 chr10
     #    1930 chrX
     #    1773 chr14
     #    1717 chr15
     #    1654 chr13
     #    1509 chr12
     #    1496 chr19
     #    1489 chr16
     #    1125 chr18
     #     193 chrY
 
     genePredSingleCover knownGene.gp stdout | sort > knownGeneNR.gp
     wc -l knownGeneNR.gp
     #	21054 knownGeneNR.gp
 
     genePredCheck -db=mm10 knownGeneNR.gp
     #  checked: 21054 failed: 0
 
     # the annotated maf is:
     og ../anno/mm10.4way.maf
 # -rw-rw-r-- 1 7580362629 Apr 20 22:27 ../anno/mm10.4way.maf
 
     mkdir annoSplit
     cd annoSplit
     time mafSplit -verbose=2 -outDirDepth=2 -byTarget -useFullSequenceName \
 	/dev/null . ../../anno/mm10.4way.maf
     # real    2m13.529s
 
     find . -type f | wc -l
     #   52
     ssh ku
     mkdir /hive/data/genomes/mm10/bed/tupChi1Multiz4way/4d/run
     cd /hive/data/genomes/mm10/bed/tupChi1Multiz4way/4d/run
     mkdir ../mfa
 
     # newer versions of msa_view have a slightly different operation
     # the sed of the gp file inserts the reference species in the chr name
     printf '#!/bin/csh -fe
 set PHASTBIN = /cluster/bin/phast.build/cornellCVS/phast.2010-12-30/bin
 set GP = knownGeneNR.gp
 set r = "/hive/data/genomes/mm10/bed/tupChi1Multiz4way"
 set c = $1:r
 set infile = $r/4d/annoSplit/$2
 set outDir = $r/4d/mfa/$3:h
 set outfile = $r/4d/mfa/$3
 /bin/mkdir -p $outDir
 cd /dev/shm
 /bin/awk -v C=$c '"'"'$2 == C {print}'"'"' $r/4d/$GP | sed -e "s/\\t$c\\t/\\tmm10.$c\\t/" > $c.gp
 set NL=`wc -l $c.gp| gawk '"'"'{print $1}'"'"'`
 echo $NL
 if ("$NL" != "0") then
     $PHASTBIN/msa_view --4d --features $c.gp -i MAF $infile -o SS > $c.ss
     $PHASTBIN/msa_view -i SS --tuple-size 1 $c.ss > $outfile
 else
     echo "" > $outfile
 endif
 /bin/rm -f /dev/shm/$c.gp /dev/shm/$c.ss
 ' > 4d.csh
 
     chmod +x 4d.csh
 
     find ../annoSplit -type f | sed -e "s#../annoSplit/##" > maf.list
     wc -l maf.list
 # 52 maf.list
 
     printf '#LOOP
 4d.csh $(file1) $(path1) {check out line+ ../mfa/$(dir1)/$(dir2)$(root1).mfa}
 #ENDLOOP
 ' > template
 
     gensub2 maf.list single template jobList
     # do not have the usual problem with fast jobs here, only 52 of them total
     para create jobList
     para try ... check
     para time
 # Completed: 52 of 52 jobs
 # CPU time in finished jobs:        615s      10.26m     0.17h    0.01d  0.000 y
 # IO & Wait Time:                   122s       2.03m     0.03h    0.00d  0.000 y
 # Average job time:                  14s       0.24m     0.00h    0.00d
 # Longest finished job:              50s       0.83m     0.01h    0.00d
 # Submission to last job:            86s       1.43m     0.02h    0.00d
 
     # Not all results have contents, or finish successfully, that is OK
     # it is because not all contigs have genes, only gene sequences are measured
 
     # combine mfa files
     ssh hgwdev
     cd /hive/data/genomes/mm10/bed/tupChi1Multiz4way/4d
     # remove the broken empty files, size 0 and size 1:
     find ./mfa -type f -size 0 | xargs rm -f
     # sometimes this doesn't work, don't know why
     find ./mfa -type f -size 1 | xargs rm -f
     # when it doesn't, use this empty list procedure
     find ./mfa -type f | xargs ls -og | awk '$3 < 2' | awk '{print $NF}' \
         > empty.list
     cat empty.list | xargs rm -f
     # see what is left:
     ls -ogrt mfa/*/*/*.mfa | sort -k3nr | wc
     #       21     147    1081
 
     # want comma-less species.list
     time /cluster/bin/phast.build/cornellCVS/phast.2010-12-30/bin/msa_view \
 	--aggregate "`cat ../species.list`" mfa/*/*/*.mfa | sed s/"> "/">"/ \
 	    > 4d.all.mfa
     # real    0m1.256s
 
     # check they are all in there:
     grep "^>" 4d.all.mfa | wc -l
     # 4
     grep "^>" 4d.all.mfa | sed -e 's/^/# /;'
 # >mm10
 # >hg38
 # >tupChi1
 # >galVar1
 
     sed 's/[a-z][a-z]*_//g; s/:[0-9\.][0-9\.]*//g; s/;//; /^ *$/d' \
 	../mm10.4way.nh | xargs echo | sed -e 's/ //g' > tree_commas.nh
     # tree_commas.nh looks like:
     # (mm10,(hg38,(tupChi1,galVar1)))
 
     # use phyloFit to create tree model (output is phyloFit.mod)
     time /cluster/bin/phast.build/cornellCVS/phast.2010-12-30/bin/phyloFit \
 	    --EM --precision MED --msa-format FASTA --subst-mod REV \
 		--tree tree_commas.nh 4d.all.mfa
     #  real    0m0.727s
 
     mv phyloFit.mod all.mod
 
     grep TREE all.mod
 # TREE:
 # (mm10:0.170506,(hg38:0.114771,
 #	(tupChi1:0.187178,galVar1:0.105148):0.011794):0.170506);
 
     # compare these calculated lengths to the tree extracted from 191way:
     grep TREE all.mod | sed -e 's/TREE: //' \
        | /cluster/bin/phast/all_dists /dev/stdin | grep mm10 \
           | sed -e "s/mm10.//;"  | sort > new.dists
     /cluster/bin/phast/all_dists ../mm10.4way.nh | grep mm10 \
         | sed -e "s/mm10.//;" | sort > old.dists
      # printing out the 'new', the 'old' the 'difference' and percent difference
     join new.dists old.dists | awk '{
   printf "#\t%s\t%8.6f\t%8.6f\t%8.6f\t%8.6f\n", $1, $2, $3, $2-$3, 100*($2-$3)/$3 }' \
       | sort -k3n
 #       hg38    0.455783        0.502391        -0.046608       -9.277236
 #       galVar1 0.457954        0.493420        -0.035466       -7.187791
 #       tupChi1 0.539984        0.533420        0.006564        1.230550
 
 #########################################################################
 # phastCons 5-way (DONE - 2017-04-20 - Hiram)
     # split 4way mafs into 10M chunks and generate sufficient statistics
     # files for # phastCons
     ssh ku
     mkdir -p /hive/data/genomes/mm10/bed/tupChi1Multiz4way/cons/SS
     cd /hive/data/genomes/mm10/bed/tupChi1Multiz4way/cons/SS
     mkdir result done
 
     printf '#!/bin/csh -ef
 set d = $1
 set c = $2
 set doneDir = done/$d
 set MAF = /hive/data/genomes/mm10/bed/tupChi1Multiz4way/anno/result/$d/$c.maf
 set WINDOWS = /hive/data/genomes/mm10/bed/tupChi1Multiz4way/cons/SS/result/$d/$c
 set WC = `cat $MAF | wc -l`
 set NL = `grep "^#" $MAF | wc -l`
 if ( -s $3 ) then
     exit 0
 endif
 if ( -s $3.running ) then
     exit 0
 endif
 
 /bin/mkdir -p $doneDir
 /bin/date >> $3.running
 
 /bin/rm -fr $WINDOWS
 /bin/mkdir -p $WINDOWS
 pushd $WINDOWS > /dev/null
 if ( $WC != $NL ) then
 /cluster/bin/phast.build/cornellCVS/phast.2010-12-30/bin/msa_split \\
     $MAF -i MAF -o SS -r $WINDOWS/$c -w 10000000,0 -I 1000 -B 5000
 endif
 popd > /dev/null
 /bin/date >> $3
 /bin/rm -f $3.running
 ' > mkSS.csh
 
     chmod +x mkSS.csh
 
     printf '#LOOP
 mkSS.csh $(dir1) $(root1) {check out line+ done/$(dir1)/$(root1)}
 #ENDLOOP
 ' > template
 
     find ../../anno/result -type f | sed -e "s#../../anno/result/##" > maf.list
     wc -l maf.list
 # 52 maf.list
 
     ssh ku
     cd /hive/data/genomes/mm10/bed/tupChi1Multiz4way/cons/SS
 
     gensub2 maf.list single template jobList
     # no need to worry about fast jobs, only 52 jobs here
     para create jobList
     para try ... check ... etc
     para push
 # Completed: 52 of 52 jobs
 # CPU time in finished jobs:       1064s      17.74m     0.30h    0.01d  0.000 y
 # IO & Wait Time:                   180s       3.00m     0.05h    0.00d  0.000 y
 # Average job time:                  24s       0.40m     0.01h    0.00d
 # Longest finished job:              89s       1.48m     0.02h    0.00d
 # Submission to last job:           127s       2.12m     0.04h    0.00d
 
 
     find ./result -type f | wc -l
     # 290
 
     # Run phastCons
     #	This job is I/O intensive in its output files, beware where this
     #	takes place or do not run too many at once.
     ssh ku
     mkdir -p /hive/data/genomes/mm10/bed/tupChi1Multiz4way/cons/run.cons
     cd /hive/data/genomes/mm10/bed/tupChi1Multiz4way/cons/run.cons
 
     #	This is setup for multiple runs based on subsets, but only running
     #   the 'all' subset here.
     #   It triggers off of the current working directory
     #	$cwd:t which is the "grp" in this script.  Running:
     #	all and vertebrates
 
     printf '#!/bin/csh -fe
 set PHASTBIN = /cluster/bin/phast.build/cornellCVS/phast.2010-12-30/bin
 set c = $1
 set d = $2
 set f = $3
 set len = $4
 set cov = $5
 set rho = $6
 set grp = $cwd:t
 set cons = /hive/data/genomes/mm10/bed/tupChi1Multiz4way/cons
 set tmp = $cons/tmp/${d}_${c}
 mkdir -p $tmp
 set ssSrc = $cons/SS/result
 set useGrp = "$grp.mod"
 if (-s $cons/$grp/$grp.non-inf) then
   ln -s $cons/$grp/$grp.mod $tmp
   ln -s $cons/$grp/$grp.non-inf $tmp
   ln -s $ssSrc/$d/$f $tmp
 else
   ln -s $ssSrc/$d/$f $tmp
   ln -s $cons/$grp/$grp.mod $tmp
 endif
 pushd $tmp > /dev/null
 if (-s $grp.non-inf) then
   $PHASTBIN/phastCons $f $useGrp \
     --rho $rho --expected-length $len --target-coverage $cov --quiet \\
     --not-informative `cat $grp.non-inf` \\
     --seqname $c --idpref $c --most-conserved $c.bed --score > $c.pp
 else
   $PHASTBIN/phastCons $f $useGrp \\
     --rho $rho --expected-length $len --target-coverage $cov --quiet \\
     --seqname $c --idpref $c --most-conserved $c.bed --score > $c.pp
 endif
 popd > /dev/null
 mkdir -p pp/$d bed/$d
 sleep 4
 touch pp/$d bed/$d
 rm -f pp/$d/$c.pp
 rm -f bed/$d/$c.bed
 mv $tmp/$c.pp pp/$d
 mv $tmp/$c.bed bed/$d
 rm -fr $tmp
 rmdir --ignore-fail-on-non-empty $cons/tmp/$d:h
 ' > doPhast.csh
 
     chmod +x doPhast.csh
 
     #	this template will serve for all runs
     #	root1 == chrom name, file1 == ss file name without .ss suffix
     printf '#LOOP
 ../run.cons/doPhast.csh $(root1) $(dir1) $(file1) 45 0.3 0.3 {check out line+ pp/$(dir1)/$(root1).pp}
 #ENDLOOP
 ' > template
 
     find ../SS/result -type f | sed -e "s#../SS/result/##" > ss.list
     wc -l ss.list
     #	290 ss.list
 
     # Create parasol batch and run it
     # run for all species
     cd /hive/data/genomes/mm10/bed/tupChi1Multiz4way/cons
     mkdir -p all
     cd all
     #	Using the .mod tree
     cp -p ../../4d/all.mod ./all.mod
 
     gensub2 ../run.cons/ss.list single ../run.cons/template jobList
     para -maxJob=100 create jobList
     para try ... check ...
     para push
 # Completed: 290 of 290 jobs
 # CPU time in finished jobs:       5576s      92.93m     1.55h    0.06d  0.000 y
 # IO & Wait Time:                  1995s      33.25m     0.55h    0.02d  0.000 y
 # Average job time:                  26s       0.44m     0.01h    0.00d
 # Longest finished job:              33s       0.55m     0.01h    0.00d
 # Submission to last job:            66s       1.10m     0.02h    0.00d
 
     # create Most Conserved track
     cd /hive/data/genomes/mm10/bed/tupChi1Multiz4way/cons/all
     time cut -f1 ../../../../chrom.sizes | while read C
 do
     ls -d bed/?/?/${C} 2> /dev/null | while read D
     do
         echo ${D}/${C}*.bed 1>&2
         cat ${D}/${C}*.bed
     done | sort -k1,1 -k2,2n \
     | awk '{printf "%s\t%d\t%d\tlod=%d\t%s\n", "'${C}'", $2, $3, $5, $5;}'
 done > tmpMostConserved.bed
     # real    0m12.570s
 
     time /cluster/bin/scripts/lodToBedScore tmpMostConserved.bed \
          > mostConserved.bed
     # real    0m7.235s
 
 # -rw-rw-r-- 1 28670932 Apr 21 00:01 tmpMostConserved.bed
 # -rw-rw-r-- 1 29438194 Apr 21 00:02 mostConserved.bed
 
     # load into database
     ssh hgwdev
     cd /hive/data/genomes/mm10/bed/tupChi1Multiz4way/cons/all
     time hgLoadBed mm10 tupChi1PhastConsElements4way mostConserved.bed
     # Read 841312 elements of size 5 from mostConserved.bed
     # real    0m7.635s
 
     # on human we often try for 5% overall cov, and 70% CDS cov
     # most bets are off here for that goal, these alignments are too few
     #	and too far between
     #	--rho 0.3 --expected-length 45 --target-coverage 0.3
     time featureBits mm10 -enrichment knownGene:cds tupChi1PhastConsElements4way
 # knownGene:cds 1.333%, tupChi1PhastConsElements4way 4.368%, both 0.924%,
 #	cover 69.30%, enrich 15.86x
 #  real    0m8.883s
 
     # Create merged posterier probability file and wiggle track data files
     cd /hive/data/genomes/mm10/bed/tupChi1Multiz4way/cons/all
     mkdir downloads
 
     # the third sed fixes the chrom names, removing the partition extensions
     time (find ./pp -type f | sed -e "s#^./##; s#\.# d #g; s#-# m #;" \
 	| sort -k1,1 -k3,3n | sed -e "s# d #.#g; s# m #-#g;" | xargs cat \
 	| sed -e 's/\.[0-9][0-9]*-[0-9][0-9]* start/ start/' \
         | gzip -c > downloads/phastCons4way.wigFix.gz)
     #   real    13m32.808s
 
 # -rw-rw-r-- 1 1452731444 Apr 21 00:18 phastCons4way.wigFix.gz
 
     # check integrity of data with wigToBigWig
     time (zcat downloads/phastCons4way.wigFix.gz \
 	| wigToBigWig -verbose=2 stdin /hive/data/genomes/mm10/chrom.sizes \
 	    phastCons4way.bw) > bigWig.log 2>&1
     egrep "real|VmPeak" bigWig.log
     # pid=19728: VmPeak:    12564976 kB
     # real    17m36.198s
 
     bigWigInfo phastCons4way.bw | sed -e 's/^/# /;'
 # version: 4
 # isCompressed: yes
 # isSwapped: 0
 # primaryDataSize: 2,285,833,964
 # primaryIndexSize: 63,248,068
 # zoomLevels: 10
 # chromCount: 37
 # basesCovered: 1,155,614,560
 # mean: 0.166872
 # min: 0.000000
 # max: 1.000000
 # std: 0.286694
 
     #	encode those files into wiggle data
     time (zcat downloads/phastCons4way.wigFix.gz \
 	| wigEncode stdin phastCons4way.wig phastCons4way.wib)
     # Converted stdin, upper limit 1.00, lower limit 0.00
     #  real    6m26.433s
 
     du -hsc *.wi?
     #  1.1G    phastCons4way.wib
     #  184M    phastCons4way.wig
 
     # Load gbdb and database with wiggle.
     ln -s `pwd`/phastCons4way.wib /gbdb/mm10/tupChi1Multiz4way/phastCons4way.wib
     time hgLoadWiggle -pathPrefix=/gbdb/mm10/tupChi1Multiz4way \
 	mm10 tupChi1PhastCons4way phastCons4way.wig
     #   real    0m22.540s
 
     # use to set trackDb.ra entries for wiggle min and max
     # and verify table is loaded correctly
 
     time wigTableStats.sh mm10 tupChi1PhastCons4way
 # db.table                 min max mean       count sumData
 # mm10.tupChi1PhastCons4way  0 1 0.166872 1155614560 1.9284e+08
 #     stdDev  viewLimits
 #	0.286694 viewLimits=0:1
 
 # real    0m9.615s
 
     #  Create histogram to get an overview of all the data
     time hgWiggle -doHistogram -db=mm10 \
 	-hBinSize=0.001 -hBinCount=1000 -hMinVal=0.0 -verbose=2 \
 	    tupChi1PhastCons4way > histogram.data 2>&1
     #	real    1m9.916s
 
     #	create plot of histogram:
 
     printf 'set terminal png small x000000 xffffff xc000ff x66ff66 xffff00 x00ffff font \
 "/usr/share/fonts/default/Type1/n022004l.pfb"
 set size 1.4, 0.8
 set key left box
 set grid noxtics
 set grid ytics
 set title " Mouse mm10 Histogram tupChi1PhastCons4way track"
 set xlabel " phastCons4way score"
 set ylabel " Relative Frequency"
 set y2label " Cumulative Relative Frequency (CRF)"
 set y2range [0:1]
 set y2tics
 set yrange [0:0.02]
 
 plot "histogram.data" using 2:5 title " RelFreq" with impulses, \\
         "histogram.data" using 2:7 axes x1y2 title " CRF" with lines
 ' | gnuplot > histo.png
 
     display histo.png &
 
 #########################################################################
 # phyloP for 5-way (DONE - 2017-04-20 - Hiram)
     # run phyloP with score=LRT
     ssh ku
     mkdir /cluster/data/mm10/bed/tupChi1Multiz4way/consPhyloP
     cd /cluster/data/mm10/bed/tupChi1Multiz4way/consPhyloP
 
     mkdir run.phyloP
     cd run.phyloP
     # Adjust model file base composition background and rate matrix to be
     # representative of the chromosomes in play
     grep BACKGROUND ../../4d/all.mod | awk '{printf "%0.3f\n", $3 + $4}'
     #	0.571
     /cluster/bin/phast.build/cornellCVS/phast.2010-12-30/bin/modFreqs \
 	../../4d/all.mod 0.571 > all.mod
     # verify, the BACKGROUND should now be paired up:
     grep BACK all.mod
     #   BACKGROUND: 0.219000 0.281000 0.281000 0.219000
 
     printf '#!/bin/csh -fe
 set PHASTBIN = /cluster/bin/phast.build/cornellCVS/phast.2010-12-30/bin
 set f = $1
 set d = $f:h
 set file1 = $f:t
 set out = $2
 set cName = $f:t:r
 set grp = $cwd:t
 set cons = /hive/data/genomes/mm10/bed/tupChi1Multiz4way/consPhyloP
 set tmp = $cons/tmp/$grp/$f
 /bin/rm -fr $tmp
 /bin/mkdir -p $tmp
 set ssSrc = "/hive/data/genomes/mm10/bed/tupChi1Multiz4way/cons/SS/result/$f"
 set useGrp = "$grp.mod"
 /bin/ln -s $cons/run.phyloP/$grp.mod $tmp
 pushd $tmp > /dev/null
 $PHASTBIN/phyloP --method LRT --mode CONACC --wig-scores --chrom $cName \\
     -i SS $useGrp $ssSrc.ss > $file1.wigFix
 popd > /dev/null
 /bin/mkdir -p $out:h
 sleep 4
 /bin/touch $out:h
 /bin/mv $tmp/$file1.wigFix $out
 /bin/rm -fr $tmp
 /bin/rmdir --ignore-fail-on-non-empty $cons/tmp/$grp/$d
 /bin/rmdir --ignore-fail-on-non-empty $cons/tmp/$grp/$d:h
 /bin/rmdir --ignore-fail-on-non-empty $cons/tmp/$grp
 /bin/rmdir --ignore-fail-on-non-empty $cons/tmp
 ' > doPhyloP.csh
 
     chmod +x doPhyloP.csh
 
     # Create list of chunks
     find ../../cons/SS/result -type f | grep ".ss$" \
 	| sed -e "s/.ss$//; s#^../../cons/SS/result/##" > ss.list
     # make sure the list looks good
     wc -l ss.list
     #	290 ss.list
 
     # Create template file
     #	file1 == $chr/$chunk/file name without .ss suffix
     printf '#LOOP
 ../run.phyloP/doPhyloP.csh $(path1) {check out line+ wigFix/$(dir1)/$(file1).wigFix}
 #ENDLOOP
 ' > template
 
     ######################   Running all species  #######################
     # setup run for all species
     mkdir /hive/data/genomes/mm10/bed/tupChi1Multiz4way/consPhyloP/all
     cd /hive/data/genomes/mm10/bed/tupChi1Multiz4way/consPhyloP/all
     rm -fr wigFix
     mkdir wigFix
 
     gensub2 ../run.phyloP/ss.list single ../run.phyloP/template jobList
     #	beware overwhelming the cluster with these fast running high I/O jobs
     para create jobList
     para try ... check ... push ... etc ...
     para -maxJob=53 push
     para time > run.time
 # Completed: 290 of 290 jobs
 # CPU time in finished jobs:       1042s      17.37m     0.29h    0.01d  0.000 y
 # IO & Wait Time:                  2008s      33.47m     0.56h    0.02d  0.000 y
 # Average job time:                  11s       0.18m     0.00h    0.00d
 # Longest finished job:              22s       0.37m     0.01h    0.00d
 # Submission to last job:            84s       1.40m     0.02h    0.00d
 
     mkdir downloads
 
     time (find ./wigFix -type f | sed -e "s#^./##; s#\.# d #g; s#-# m #;" \
 	| sort -k1,1 -k3,3n | sed -e "s# d #.#g; s# m #-#g;" | xargs cat \
 	| gzip -c > downloads/phyloP4way.wigFix.gz)
     #   real    12m14.234s
 # -rw-rw-r-- 1 1357982519 Apr 21 12:39 phyloP4way.wigFix.gz
 
 
     # check integrity of data with wigToBigWig
     time (zcat downloads/phyloP4way.wigFix.gz \
 	| wigToBigWig -verbose=2 stdin /hive/data/genomes/mm10/chrom.sizes \
 	phyloP4way.bw) > bigWig.log 2>&1
     egrep "real|VmPeak" bigWig.log
     # pid=77432: VmPeak:    12564972 kB
     # real    17m47.787s
 
     bigWigInfo phyloP4way.bw | sed -e 's/^/# /;'
 # version: 4
 # isCompressed: yes
 # isSwapped: 0
 # primaryDataSize: 1,672,367,975
 # primaryIndexSize: 63,248,068
 # zoomLevels: 10
 # chromCount: 37
 # basesCovered: 1,155,614,560
 # mean: 0.108291
 # min: -2.306000
 # max: 0.719000
 # std: 0.585706
 
     #	encode those files into wiggle data
     time (zcat downloads/phyloP4way.wigFix.gz \
 	| wigEncode stdin phyloP4way.wig phyloP4way.wib)
     # Converted stdin, upper limit 0.72, lower limit -2.31
     #    real    6m41.352s
 
     du -hsc *.wi?
     # 1.1G    phyloP4way.wib
     # 188M    phyloP4way.wig
 
     # Load gbdb and database with wiggle.
     ln -s `pwd`/phyloP4way.wib /gbdb/mm10/tupChi1Multiz4way/phyloP4way.wib
     time hgLoadWiggle -pathPrefix=/gbdb/mm10/tupChi1Multiz4way mm10 \
 	tupChi1PhyloP4way phyloP4way.wig
     # real    0m22.598s
 
     # use to set trackDb.ra entries for wiggle min and max
     # and verify table is loaded correctly
 
     wigTableStats.sh mm10 tupChi1PhyloP4way
 # db.table                 min     max    mean      count   sumData
 # mm10.tupChi1PhyloP4way  -2.306 0.719 0.108291 1155614560 1.25143e+08
 #       stdDev viewLimits
 #	0.585706 viewLimits=-2.306:0.719
 
     #	that range is: 0.719+2.306 = 3.025 for hBinSize=0.003025
 
     #  Create histogram to get an overview of all the data
     time hgWiggle -doHistogram \
 	-hBinSize=0.003025 -hBinCount=1000 -hMinVal=-2.306 -verbose=2 \
 	    -db=mm10 tupChi1PhyloP4way > histogram.data 2>&1
     # real    1m4.763s
 
     # find the Y range for the 2:5 graph
     grep -v chrom histogram.data | grep "^[0-9]" | ave -col=5 stdin \
       | sed -e 's/^/# /;'
 # Q1 0.000068
 # median 0.000261
 # Q3 0.001051
 # average 0.001280
 # min 0.000000
 # max 0.075274
 # count 781
 # total 1.000007
 # standard deviation 0.003947
 
     # find the X range for the 2:5 graph
     grep "^[0-9]" histogram.data | ave -col=2 stdin \
       | sed -e 's/^/# /;'
 # Q1 -1.558820
 # median -0.965925
 # Q3 -0.366975
 # average -0.917927
 # min -2.306000
 # max 0.719000
 # count 781
 # total -716.901065
 # standard deviation 0.798757
 
     #	create plot of histogram:
     printf 'set terminal png small x000000 xffffff xc000ff x66ff66 xffff00 x00ffff font \
 "/usr/share/fonts/default/Type1/n022004l.pfb"
 set size 1.4, 0.8
 set key left box
 set grid noxtics
 set grid ytics
 set title " Mouse mm10 Histogram tupChi1PhyloP4way track"
 set xlabel " phyloP4way score"
 set ylabel " Relative Frequency"
 set y2label " Cumulative Relative Frequency (CRF)"
 set y2range [0:1]
 set y2tics
 set xtics
 set xrange [-2.6:0.85]
 set yrange [0:0.033]
 
 plot "histogram.data" using 2:5 title " RelFreq" with impulses, \
         "histogram.data" using 2:7 axes x1y2 title " CRF" with lines
 ' | gnuplot > histo.png
 
     display histo.png &
     # appears to have an odd hole in the data just past X=0 ?
 
 #############################################################################
 # hgPal downloads (DONE - 2017-04-21 - Hiram)
 #   FASTA from 5-way for knownGene, refGene and knownCanonical
 
     ssh hgwdev
     screen -S mm10HgPal
     mkdir /hive/data/genomes/mm10/bed/tupChi1Multiz4way/pal
     cd /hive/data/genomes/mm10/bed/tupChi1Multiz4way/pal
     cat ../species.list | tr '[ ]' '[\n]' > order.list
 
     # this for loop takes about 2.5 hours on this large count contig assembly
     export mz=tupChi1Multiz4way
     export gp=knownGene
     export db=mm10
     export I=0
     export D=0
     mkdir exonAA exonNuc
     printf '#!/bin/sh\n' > $gp.jobs
 
     time for C in `sort -nk2 ../../../chrom.sizes | cut -f1`
     do
         I=`echo $I | awk '{print $1+1}'`
         D=`echo $D | awk '{print $1+1}'`
         dNum=`echo $D | awk '{printf "%03d", int($1/1000)}'`
         mkdir -p exonNuc/${dNum} > /dev/null
         mkdir -p exonAA/${dNum} > /dev/null
 	echo "mafGene -chrom=$C -exons -noTrans $db $mz $gp order.list stdout | gzip -c > exonNuc/${dNum}/$C.exonNuc.fa.gz &"
 	echo "mafGene -chrom=$C -exons $db $mz $gp order.list stdout | gzip -c > exonAA/${dNum}/$C.exonAA.fa.gz &"
         if [ $I -gt 16 ]; then
             echo "date"
             echo "wait"
             I=0
         fi
     done >> $gp.jobs
     # real    0m0.772s
 
 
     echo "date" >> $gp.jobs
     echo "wait" >> $gp.jobs
 
     chmod +x  knownGene.jobs
 
     time (./$gp.jobs) > $gp.jobs.log 2>&1 &
     # real    11m18.851s
 
     export mz=multiz4way
     export gp=knownGene
     time find ./exonAA -type f | grep exonAA.fa.gz | xargs zcat \
      | gzip -c > $gp.$mz.exonAA.fa.gz
     #  real    0m8.492s
 
     time find ./exonNuc -type f | grep exonNuc.fa.gz | xargs zcat \
      | gzip -c > $gp.$mz.exonNuc.fa.gz
     #   real    0m39.199s
 
 # -rw-rw-r-- 1 33908467 Apr 21 18:49 knownGene.multiz4way.exonAA.fa.gz
 # -rw-rw-r-- 1 55392688 Apr 21 18:49 knownGene.multiz4way.exonNuc.fa.gz
 
     export mz=multiz4way
     export gp=knownGene
     export db=mm10
     export pd=/usr/local/apache/htdocs-hgdownload/goldenPath/$db/$mz/alignments
     mkdir -p $pd
     md5sum *.fa.gz > md5sum.txt
     ln -s `pwd`/$gp.$mz.exonAA.fa.gz $pd/$gp.exonAA.fa.gz
     ln -s `pwd`/$gp.$mz.exonNuc.fa.gz $pd/$gp.exonNuc.fa.gz
     ln -s `pwd`/md5sum.txt $pd/
 
     rm -rf exonAA exonNuc
 
 #############################################################################
 # construct download files for 5-way (DONE - 2017-04-21 - Hiram)
     mkdir /usr/local/apache/htdocs-hgdownload/goldenPath/mm10/multiz4way
     mkdir /usr/local/apache/htdocs-hgdownload/goldenPath/mm10/phastCons4way
     mkdir /usr/local/apache/htdocs-hgdownload/goldenPath/mm10/phyloP4way
     mkdir /hive/data/genomes/mm10/bed/tupChi1Multiz4way/downloads
     cd /hive/data/genomes/mm10/bed/tupChi1Multiz4way/downloads
     mkdir multiz4way phastCons4way phyloP4way
     cd multiz4way
     time cp -p ../../anno/mm10.4way.maf .
     #   real    0m15.285s
 
     # -rw-rw-r-- 1 7580362629 Apr 20 22:27 mm10.4way.maf
 
     du -hsc *
     #  7.1G     mm10.4way.maf
 
     time gzip *.maf
     #   real    27m2.122s
 
     # -rw-rw-r-- 1 2040574809 Apr 20 22:27 mm10.4way.maf.gz
 
     du -hsc *.maf.gz
     #  2.0G    mm10.4way.maf.gz
 
     ###########################################################################
     ## create upstream refGene maf files
     cd /hive/data/genomes/mm10/bed/tupChi1Multiz4way/downloads/tupChi1Multiz4way
     # bash script
 #!/bin/sh
 export geneTbl="knownGene"
 for S in 1000 2000 5000
 do
     echo "making upstream${S}.maf"
     featureBits mm10 ${geneTbl}:upstream:${S} -fa=/dev/null -bed=stdout \
         | perl -wpe 's/_up[^\t]+/\t0/' | sort -k1,1 -k2,2n \
         | /cluster/bin/$MACHTYPE/mafFrags mm10 tupChi1Multiz4way \
             stdin stdout \
               -orgs=/hive/data/genomes/mm10/bed/tupChi1Multiz4way/species.list \
         | gzip -c > upstream${S}.${geneTbl}.maf.gz
     echo "done upstream${S}.${geneTbl}.maf.gz"
 done
     #   real    12m55.050s
 
     md5sum *.maf.gz *.nh upstream*.gz README.txt >> md5sum.txt
 
     # some other symlinks were already made above
     # obtain the README.txt from tupChi1/multiz4way and update for this
     #   situation
     ln -s `pwd`/upstream*.gz `pwd`/README.txt \
         /usr/local/apache/htdocs-hgdownload/goldenPath/mm10/tupChi1Multiz4way
 
     grep TREE ../../4d/all.mod | awk '{print $NF}' \
       | ~/kent/src/hg/utils/phyloTrees/asciiTree.pl /dev/stdin \
          > mm10.4way.nh
     ~/kent/src/hg/utils/phyloTrees/commonNames.sh mm10.4way.nh \
       | ~/kent/src/hg/utils/phyloTrees/asciiTree.pl /dev/stdin \
          > mm10.4way.commonNames.nh
     ~/kent/src/hg/utils/phyloTrees/scientificNames.sh mm10.4way.nh \
 	| $HOME/kent/src/hg/utils/phyloTrees/asciiTree.pl /dev/stdin \
 	    > mm10.4way.scientificNames.nh
     time md5sum *.nh *.maf.gz > md5sum.txt
     #   real    0m35.144s
 
     ln -s `pwd`/* \
         /usr/local/apache/htdocs-hgdownload/goldenPath/mm10/multiz4way
 
     du -hsc *.maf.gz ../../anno/mm10.4way.maf
     #  3.0G     mm10.4way.maf.gz
     #  13G     ../../anno/mm10.4way.maf
 
     # obtain the README.txt from tupChi1/multiz4way and update for this
     #   situation
 
     #####################################################################
     cd /hive/data/genomes/mm10/bed/tupChi1Multiz4way/downloads/phastCons4way
 
     ln -s ../../cons/all/downloads/phastCons4way.wigFix.gz \
         ./mm10.phastCons4way.wigFix.gz
     ln -s ../../cons/all/phastCons4way.bw ./mm10.phastCons4way.bw
     ln -s ../../cons/all/all.mod ./mm10.phastCons4way.mod
     time md5sum *.gz *.mod *.bw > md5sum.txt
     #   real    0m20.354s
 
     # obtain the README.txt from tupChi1/phastCons4way and update for this
     #   situation
     ln -s `pwd`/* \
       /usr/local/apache/htdocs-hgdownload/goldenPath/mm10/phastCons4way
 
     #####################################################################
     cd /hive/data/genomes/mm10/bed/tupChi1Multiz4way/downloads/phyloP4way
 
     ln -s ../../consPhyloP/all/downloads/phyloP4way.wigFix.gz \
         ./mm10.phyloP4way.wigFix.gz
     ln -s ../../consPhyloP/run.phyloP/all.mod mm10.phyloP4way.mod
     ln -s ../../consPhyloP/all/phyloP4way.bw mm10.phyloP4way.bw
 
     time md5sum *.mod *.bw *.gz > md5sum.txt
     #   real    0m12.264s
 
     # obtain the README.txt from tupChi1/phyloP4way and update for this
     #   situation
     ln -s `pwd`/* \
       /usr/local/apache/htdocs-hgdownload/goldenPath/mm10/phyloP4way
 
 #############################################################################
 # wiki page for 5-way (DONE - 2017-04-21 - Hiram)
     mkdir /hive/users/hiram/bigWays/mm10.4way
     cd /hive/users/hiram/bigWays
     echo "mm10" > mm10.4way/ordered.list
     awk '{print $1}' /hive/data/genomes/mm10/bed/tupChi1Multiz4way/4way.distances.txt \
        >> mm10.4way/ordered.list
 
     # sizeStats.sh catches up the cached measurements required for data
     # in the tables.  They are usually already mostly done, only new
     # assemblies will have updates.
     ./sizeStats.sh mm10.4way/ordered.list
     # dbDb.sh constructs mm10.4way/GalVar1_5-way_conservation_alignment.html
     # may need to add new assembly references to srcReference.list and
     # urlReference.list
     ./dbDb.sh mm10 4way
     # sizeStats.pl constructs mm10.4way/GalVar1_5-way_Genome_size_statistics.html
     # this requires entries in coverage.list for new sequences
     ./sizeStats.pl mm10 4way
 
     # defCheck.pl constructs GalVar1_5-way_conservation_lastz_parameters.html
     ./defCheck.pl mm10 4way
 
     # this constructs the html pages in mm10.4way/:
 # -rw-rw-r-- 1 2800 Apr 21 21:22 Mm10_4-way_conservation_alignment.html
 # -rw-rw-r-- 1 4199 Apr 21 21:22 Mm10_4-way_Genome_size_statistics.html
 # -rw-rw-r-- 1 2995 Apr 21 21:22 Mm10_4-way_conservation_lastz_parameters.html
 
     # add those pages to the genomewiki.  Their page names are the
     # names of the .html files without the .html:
 #  Mm10_4-way_conservation_alignment
 #  Mm10_4-way_Genome_size_statistics
 #  Mm10_4-way_conservation_lastz_parameters
 
     # when you view the first one you enter, it will have links to the
     # missing two.
 
 ##############################################################################
 # LASTZ Chinese hamster criGri1 (DONE - 2017-05-12 - Hiram)
     #	establish a screen to control this job
     screen -S mm10criGri1
     mkdir /hive/data/genomes/mm10/bed/lastzCriGri1.2017-05-12
     cd /hive/data/genomes/mm10/bed/lastzCriGri1.2017-05-12
 
     printf '# mouse vs. Chinese hamster
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.66/bin/lastz
 
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 SEQ1_LIMIT=40
 
 # QUERY: Chinese hamster criGri1
 SEQ2_DIR=/hive/data/genomes/criGri1/criGri1.2bit
 SEQ2_LEN=/hive/data/genomes/criGri1/chrom.sizes
 SEQ2_CHUNK=10000000
 SEQ2_LIMIT=100
 SEQ2_LAP=0
 
 BASE=/hive/data/genomes/mm10/bed/lastzCriGri1.2017-05-12
 TMPDIR=/dev/shm
 ' > DEF
 
     time (doBlastzChainNet.pl -verbose=2 `pwd`/DEF \
 	-syntenicNet -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
 	-chainMinScore=3000 -chainLinearGap=medium) > do.log 2>&1
     #	real    289m42.628s
 
     cat fb.mm10.chainCriGri1Link.txt
     #	1577848220 bases of 2652783500 (59.479%) in intersection
 
     time (doRecipBest.pl -workhorse=hgwdev mm10 criGri1 \
       -buildDir=`pwd` -workhorse=hgwdev) > rbest.log 2>&1 &
     #	real    797m59.816s
 
     mkdir /hive/data/genomes/criGri1/bed/blastz.mm10.swap
     cd /hive/data/genomes/criGri1/bed/blastz.mm10.swap
     time (doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzCriGri1.2017-05-12/DEF \
 	-noDbNameCheck -swap -syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
 	-chainMinScore=3000 -chainLinearGap=medium) > swap.log 2>&1
     #	real    172m50.552s
 
     cat fb.criGri1.chainMm10Link.txt
     #	1589449878 bases of 2301325917 (69.067%) in intersection
 
     time (doRecipBest.pl -workhorse=hgwdev criGri1 mm10 \
       -buildDir=`pwd` -workhorse=hgwdev) > rbest.log 2>&1 &
     # real    846m34.982s
 
 ##############################################################################
 # ncbiRefSeq composite gene track (DONE - 2017-05-26 - Hiram)
     mkdir  /hive/data/genomes/mm10/bed/ncbiRefSeq.p5
     cd  /hive/data/genomes/mm10/bed/ncbiRefSeq.p5
 
     ~/kent/src/hg/utils/automation/doNcbiRefSeq.pl -buildDir=`pwd` \
       -bigClusterHub=ku -dbHost=hgwdev \
       -stop=download -fileServer=hgwdev -smallClusterHub=ku -workhorse=hgwdev \
       refseq vertebrate_mammalian Mus_musculus \
       GCF_000001635.25_GRCm38.p5 mm10
 
     ~/kent/src/hg/utils/automation/doNcbiRefSeq.pl -buildDir=`pwd` \
       -debug -bigClusterHub=ku -dbHost=hgwdev \
       -continue=process -stop=process -fileServer=hgwdev -smallClusterHub=ku \
       -workhorse=hgwdev refseq vertebrate_mammalian Mus_musculus \
       GCF_000001635.25_GRCm38.p5 mm10
 
     ~/kent/src/hg/utils/automation/doNcbiRefSeq.pl -buildDir=`pwd` \
       -debug -bigClusterHub=ku -dbHost=hgwdev \
       -continue=load -stop=load -fileServer=hgwdev -smallClusterHub=ku \
       -workhorse=hgwdev refseq vertebrate_mammalian Mus_musculus \
       GCF_000001635.25_GRCm38.p5 mm10
 
     # There are some ncRNAs missing
     faSize -detailed mm10.rna.fa \
       | pslCheck -querySizes=stdin -targetSizes=../../chrom.sizes \
        -db=mm10 ncbiRefSeqPsl
     #  checked: 85224 failed: 18 errors: 18
 
     # and joinerCheck is not completely clean:
  joinerCheck -identifier=ncbiRefSeq -keys -database=mm10 all.joiner
 Checking keys on database mm10
  mm10.ncbiRefSeqLink.id - hits 107479 of 107479 (100.000%) ok
  mm10.ncbiRefSeqCurated.name - hits 32217 of 32217 (100.000%) ok
  mm10.ncbiRefSeqPredicted.name - hits 52989 of 52989 (100.000%) ok
  mm10.ncbiRefSeqPsl.qName - hits 85206 of 85224 (99.979%)
 Error: 18 of 85224 elements (0.021%) of mm10.ncbiRefSeqPsl.qName are not in key ncbiRefSeq.name line 6045 of all.joiner
 Example miss: NR_033199.1
  mm10.ncbiRefSeqCds.id - hits 76076 of 76076 (100.000%) ok
  mm10.seqNcbiRefSeq.acc - hits 85205 of 85205 (100.000%) ok
 
     # The reason for these difficulties is because some of the original
     # GFF items were dropped due to unprocessedRoots.  The fix is to eliminate
     # the rest of these unprocessedRoots from PSL loaded file.
 
     # discovered that it didn't help to add them in, (procedure included below)
     #   then featureBits went bad:
  joinerCheck -identifier=ncbiRefSeq -keys -database=mm10 all.joiner
 Checking keys on database mm10
  mm10.ncbiRefSeqLink.id - hits 107479 of 107479 (100.000%) ok
  mm10.ncbiRefSeqCurated.name - hits 32217 of 32217 (100.000%) ok
  mm10.ncbiRefSeqPredicted.name - hits 52989 of 52989 (100.000%) ok
  mm10.ncbiRefSeqPsl.qName - hits 85206 of 85224 (99.979%)
 Error: 18 of 85224 elements (0.021%) of mm10.ncbiRefSeqPsl.qName are not in key ncbiRefSeq.name line 6045 of all.joiner
 Example miss: NR_033199.1
  mm10.ncbiRefSeqCds.id - hits 76076 of 76076 (100.000%) ok
  mm10.seqNcbiRefSeq.acc - hits 85205 of 85222 (99.980%)
 Error: 17 of 85222 elements (0.020%) of mm10.seqNcbiRefSeq.acc are not in key ncbiRefSeq.name line 6047 of all.joiner
 Example miss: NR_015480.1
 
     # eliminate items from PSL file,
     # compare name lists:
     hgsql -N -e 'select qName from ncbiRefSeqPsl;' mm10 \
       | sort -u > ncbiRefSeqPsl.qName
     hgsql -N -e 'select name from ncbiRefSeq;' mm10 \
       | sort -u > ncbiRefSeq.name
     wc -l ncbiRefSeqPsl.qName ncbiRefSeq.name
 #   85220 ncbiRefSeqPsl.qName
 #  107479 ncbiRefSeq.name
     comm -12 ncbiRefSeqPsl.qName ncbiRefSeq.name | wc -l
 #   85203
     # need to eliminate 17 items from the PSL track:
     comm -23 ncbiRefSeqPsl.qName ncbiRefSeq.name | wc -l
 #       17
     comm -23 ncbiRefSeqPsl.qName ncbiRefSeq.name | while read N
 do
    hgsql -e "select * from ncbiRefSeqPsl where qName=\"$N\";" mm10
 done | wc -l
 #         35
     comm -23 ncbiRefSeqPsl.qName ncbiRefSeq.name | while read N
 do
    hgsql -e "delete from ncbiRefSeqPsl where qName=\"$N\";" mm10
 done
 
     hgsql -N -e 'select qName from ncbiRefSeqPsl;' mm10 \
       | sort -u > ncbiRefSeqPsl.clean.qName
     wc -l ncbiRefSeqPsl.clean.qName ncbiRefSeq.name
     comm -12 ncbiRefSeqPsl.clean.qName ncbiRefSeq.name | wc -l
 #   85203 ncbiRefSeqPsl.clean.qName
 #  107479 ncbiRefSeq.name
 
     comm -12 ncbiRefSeqPsl.clean.qName ncbiRefSeq.name | wc -l
 #   85203
 
     # joinerCheck is now clean
     joinerCheck -identifier=ncbiRefSeq -keys -database=mm10 all.joiner
 Checking keys on database mm10
  mm10.ncbiRefSeqLink.id - hits 107479 of 107479 (100.000%) ok
  mm10.ncbiRefSeqCurated.name - hits 32217 of 32217 (100.000%) ok
  mm10.ncbiRefSeqPredicted.name - hits 52989 of 52989 (100.000%) ok
  mm10.ncbiRefSeqPsl.qName - hits 85206 of 85206 (100.000%) ok
  mm10.ncbiRefSeqCds.id - hits 76076 of 76076 (100.000%) ok
  mm10.seqNcbiRefSeq.acc - hits 85205 of 85205 (100.000%) ok
 
     # and pslCheck is now clean:
     faSize -detailed /gbdb/mm10/ncbiRefSeq/seqNcbiRefSeq.rna.fa \
        | pslCheck -querySizes=stdin -targetSizes=../../../chrom.sizes \
            -db=mm10 ncbiRefSeqPsl
     # checked: 85206 failed: 0 errors: 0
 
     hgsql -N -e 'select acc,size from seqNcbiRefSeq;' mm10 \
       | pslCheck -querySizes=stdin -targetSizes=../../../chrom.sizes \
          -db=mm10 ncbiRefSeqPsl
     # checked: 85206 failed: 0 errors: 0
 
     ### update hgFixed.trackVersion
     hgsql -e 'update trackVersion set version="2016-12-16" where ix=1706;' hgFixed
 
     ### XXX obsolete procedure that does not fix the problem
     mkdir /hive/data/genomes/mm10/bed/ncbiRefSeq.p5/missingRna
     cd  /hive/data/genomes/mm10/bed/ncbiRefSeq.p5/missingRna
 
     # determine missing sequences ids
 
     faSize -detailed ../mm10.rna.fa \
       | pslCheck -querySizes=stdin -targetSizes=../../../chrom.sizes \
        -db=mm10 ncbiRefSeqPsl > pslCheck.ncbiRefSeq.rna.fa.txt 2>&1
 
     egrep -v "does not exist|errors:" pslCheck.ncbiRefSeq.rna.fa.txt \
       | awk '{printf "%s\t%s\n", $5,$4}' | sort -u > idWithRange.seqListFile.tab
 
     # fetch RNA sequences from entrez:
     mkdir ncbiRna
 
 cut -f2 idWithRange.seqListFile.tab | sed -e 's#:[0-9]\+-[0-9]\+##;' \
   | while read id
 do
    wget -O /dev/stdout \
   "http://www.ncbi.nlm.nih.gov/sviewer/viewer.fcgi?db=nuccore&dopt=fasta&sendto=on&id=$id" \
      | sed -e 's/ Mus musculus .*//;' | sed -e '/^$/d' > ncbiRna/$id.fa
 done
 
 fi
 
     cat ../mm10.rna.fa ncbiRna > mm10.seqNcbiRefSeq.rna.fa
 
     rm -f /gbdb/mm10/ncbiRefSeq/seqNcbiRefSeq.rna.fa
     ln -s `pwd`/mm10.seqNcbiRefSeq.rna.fa \
         /gbdb/mm10/ncbiRefSeq/seqNcbiRefSeq.rna.fa
     hgLoadSeq -drop -seqTbl=seqNcbiRefSeq -extFileTbl=extNcbiRefSeq mm10 \
         /gbdb/mm10/ncbiRefSeq/seqNcbiRefSeq.rna.fa
 
     # now have clean pslCheck, verify both with the file and the seq table:
     faSize -detailed /gbdb/mm10/ncbiRefSeq/seqNcbiRefSeq.rna.fa \
        | pslCheck -querySizes=stdin -targetSizes=../../../chrom.sizes \
            -db=mm10 ncbiRefSeqPsl
     #  checked: 85224 failed: 0 errors: 0
 
     hgsql -N -e 'select acc,size from seqNcbiRefSeq;' mm10 \
       | pslCheck -querySizes=stdin -targetSizes=../../../chrom.sizes \
          -db=mm10 ncbiRefSeqPsl
 
     #  checked: 85224 failed: 0 errors: 0
 
 ##############################################################################
 2017-05-31: import of UCSC GENCODE group processing of GENCODE VM14 (markd)
 
     # edit hg/makeDb/outside/gencode/gencodeLoad.mk to set release and ensembl versions
 
     # download, build and load tables
     mkdir -p /hive/data/genomes/mm10/bed/gencodeVM14
     pushd /hive/data/genomes/mm10/bed/gencodeVM14
     (time nice make -j 10 -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk) >&build.1.out&
 
     ##  gencode-cmp.tsv check to see if sizes make sense
 
     # generate trackDb and joiner blurb
     pushd kent/src/hg/makeDb/trackDb
     ../../makeDb/outside/gencode/gencodeGenerateTrackDbs mm10 M14 89 'May 2017'
 
     # edit mouse/mm10/trackDb.wgEncode.ra to add new .ra file include
     make DBS=mm10
 
     # Update mouse/mm10/wgEncodeGencodeSuper.html and update 'Release Notes'
     # to describe new release. [ONLY if it's going to be pushed]
 
     # edit  all.joiner to add ~/tmp/gencodeVM14.joiner
     # verify with:
     pushd /hive/data/genomes/mm10/bed/gencodeVM14
     make  -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk joinerCheck db=mm10
 
     # commit all
 ##############################################################################
 # LASTZ zebrafish danRer11 (DONE - 2017-06-12 - Chris)
     #	establish a screen to control this job
     screen -S mm10danRer11
     mkdir /hive/data/genomes/mm10/bed/lastzDanRer11.2017-06-12
     cd /hive/data/genomes/mm10/bed/lastzDanRer11.2017-06-12
 
     printf '# mouse vs. zebrafish
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.66/bin/lastz
 
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 SEQ1_LIMIT=40
 
 # QUERY: zebrafish danRer11
 SEQ2_DIR=/hive/data/genomes/danRer11/danRer11.2bit
 SEQ2_LEN=/hive/data/genomes/danRer11/chrom.sizes
 SEQ2_CHUNK=10000000
 SEQ2_LIMIT=100
 SEQ2_LAP=0
 
 BASE=/hive/data/genomes/mm10/bed/lastzDanRer11.2017-06-12
 TMPDIR=/dev/shm
 ' > DEF
 
     time (doBlastzChainNet.pl -verbose=2 `pwd`/DEF \
 	-syntenicNet -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
 	-chainMinScore=3000 -chainLinearGap=medium) > do.log 2>&1
     #	real    289m42.628s
     cat fb.mm10.chainDanRer11Link.txt
     # 36448414 bases of 2652783500 (1.374%) in intersection
 
     time (doRecipBest.pl -workhorse=hgwdev mm10 danRer11 \
       -buildDir=`pwd` -workhorse=hgwdev) > rbest.log 2>&1 &
 
     mkdir /hive/data/genomes/danRer11/bed/blastz.mm10.swap
     cd /hive/data/genomes/danRer11/bed/blastz.mm10.swap
     time (doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzDanRer11.2017-06-12/DEF \
 	-noDbNameCheck -swap -syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
 	-chainMinScore=3000 -chainLinearGap=medium) > swap.log 2>&1
     #	real    172m50.552s
 
     cat fb.danRer11.chainMm10Link.txt
     #	45558857 bases of 1674677181 (2.720%) in intersection
 	1589449878 bases of 2301325917 (69.067%) in intersection
 
     time (doRecipBest.pl -workhorse=hgwdev danRer11 mm10 \
       -buildDir=`pwd` -workhorse=hgwdev) > rbest.log 2>&1 &
     # real    846m34.982s
 
 ##############################################################################
 # LASTZ Killer whale orcOrc1 (DONE - 2017-06-15 - Hiram)
     #	establish a screen to control this job
     screen -S mm10orcOrc1
     mkdir /hive/data/genomes/mm10/bed/lastzOrcOrc1.2017-06-15
     cd /hive/data/genomes/mm10/bed/lastzOrcOrc1.2017-06-15
 
     printf '# killer whale vs mouse
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.66/bin/lastz
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
 # QUERY: Killer whale orcOrc1
 SEQ2_DIR=/hive/data/genomes/orcOrc1/orcOrc1.2bit
 SEQ2_LEN=/hive/data/genomes/orcOrc1/chrom.sizes
 SEQ2_CHUNK=10000000
 SEQ2_LAP=0
 SEQ2_LIMIT=30
 
 BASE=/hive/data/genomes/mm10/bed/lastzOrcOrc1.2017-06-15
 TMPDIR=/dev/shm
 ' > DEF
 
     time (doBlastzChainNet.pl -verbose=2 `pwd`/DEF \
 	-syntenicNet -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
 	-chainMinScore=3000 -chainLinearGap=medium) > do.log 2>&1
     #	real    192m26.791s
 
     cat fb.mm10.chainOrcOrc1Link.txt
     # 832909116 bases of 2652783500 (31.398%) in intersection
 
     time (doRecipBest.pl -workhorse=hgwdev mm10 orcOrc1 \
       -buildDir=`pwd` -workhorse=hgwdev) > rbest.log 2>&1 &
     # real    276m44.875s
 
     mkdir /hive/data/genomes/orcOrc1/bed/blastz.mm10.swap
     cd /hive/data/genomes/orcOrc1/bed/blastz.mm10.swap
     time (doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzOrcOrc1.2017-06-15/DEF \
 	-swap -syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
 	-chainMinScore=3000 -chainLinearGap=medium) > swap.log 2>&1
     #	real    72m53.064s
 
     cat fb.orcOrc1.chainMm10Link.txt
     #	809350350 bases of 2249582125 (35.978%) in intersection
 
     time (doRecipBest.pl -workhorse=hgwdev orcOrc1 mm10 \
       -buildDir=`pwd` -workhorse=hgwdev) > rbest.log 2>&1
     # real    214m50.810s
 
 ##############################################################################
 # LASTZ Baboon papAnu3 (DONE - 2017-06-21 - Hiram)
     #	establish a screen to control this job
     screen -S mm10papAnu3
     mkdir /hive/data/genomes/mm10/bed/lastzPapAnu3.2017-06-21
     cd /hive/data/genomes/mm10/bed/lastzPapAnu3.2017-06-21
 
     printf '# mouse vs. baboon
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.66/bin/lastz
 
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 SEQ1_LIMIT=40
 
 # QUERY: baboon papAnu3
 SEQ2_DIR=/hive/data/genomes/papAnu3/papAnu3.2bit
 SEQ2_LEN=/hive/data/genomes/papAnu3/chrom.sizes
 SEQ2_CHUNK=10000000
 SEQ2_LIMIT=180
 SEQ2_LAP=0
 
 BASE=/hive/data/genomes/mm10/bed/lastzPapAnu3.2017-06-21
 TMPDIR=/dev/shm
 ' > DEF
 
     time (doBlastzChainNet.pl -verbose=2 `pwd`/DEF \
 	-syntenicNet -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
 	-chainMinScore=3000 -chainLinearGap=medium) > do.log 2>&1 &
     #	real    474m39.013s
 
     cat fb.mm10.chainPapAnu3Link.txt
     #	910628118 bases of 2652783500 (34.327%) in intersection
 
     time (doRecipBest.pl -workhorse=hgwdev mm10 papAnu3 \
       -buildDir=`pwd` -workhorse=hgwdev) > rbest.log 2>&1 &
     # real    644m20.659s
 
     mkdir /hive/data/genomes/papAnu3/bed/blastz.mm10.swap
     cd /hive/data/genomes/papAnu3/bed/blastz.mm10.swap
     time (doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzPapAnu3.2017-06-21/DEF \
 	-swap -syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
 	-chainMinScore=3000 -chainLinearGap=medium) > swap.log 2>&1
     #	real    66m35.501s
 
     cat fb.papAnu3.chainMm10Link.txt
     #	897929517 bases of 2893270787 (31.035%) in intersection
 
     time (doRecipBest.pl -workhorse=hgwdev papAnu3 mm10 \
       -buildDir=`pwd` -workhorse=hgwdev) > rbest.log 2>&1 &
     # real    578m46.893s
 
 ##############################################################################
 # LASTZ pig susScr11 (DONE - 2017-07-31 - Hiram)
     #	establish a screen to control this job
     screen -S mm10susScr11
     mkdir /hive/data/genomes/mm10/bed/lastzSusScr11.2017-07-31
     cd /hive/data/genomes/mm10/bed/lastzSusScr11.2017-07-31
 
     printf '# mouse vs. pig
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.66/bin/lastz
 
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=40000000
 SEQ1_LAP=10000
 SEQ1_LIMIT=1
 
 # QUERY: baboon susScr11
 SEQ2_DIR=/hive/data/genomes/susScr11/susScr11.2bit
 SEQ2_LEN=/hive/data/genomes/susScr11/chrom.sizes
 SEQ2_CHUNK=20000000
 SEQ2_LIMIT=1
 SEQ2_LAP=0
 
 BASE=/hive/data/genomes/mm10/bed/lastzSusScr11.2017-07-31
 TMPDIR=/dev/shm
 ' > DEF
 
     time (doBlastzChainNet.pl -verbose=2 `pwd`/DEF \
 	-syntenicNet -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
 	-chainMinScore=3000 -chainLinearGap=medium) > do.log 2>&1 &
     #	real    567m0.166s
 
     cat fb.mm10.chainSusScr11Link.txt
     #	731012356 bases of 2652783500 (27.556%) in intersection
 
     time (doRecipBest.pl -workhorse=hgwdev mm10 susScr11 \
       -buildDir=`pwd` -workhorse=hgwdev) > rbest.log 2>&1 &
     # real    455m39.565s
 
     mkdir /hive/data/genomes/susScr11/bed/blastz.mm10.swap
     cd /hive/data/genomes/susScr11/bed/blastz.mm10.swap
     time (doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzSusScr11.2017-07-31/DEF \
 	-swap -syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
 	-chainMinScore=3000 -chainLinearGap=medium) > swap.log 2>&1
     #	real    61m6.153s
 
     cat fb.susScr11.chainMm10Link.txt
     #	715277290 bases of 2472073034 (28.934%) in intersection
 
     time (doRecipBest.pl -workhorse=hgwdev susScr11 mm10 \
       -buildDir=`pwd` -workhorse=hgwdev) > rbest.log 2>&1 &
     # real    358m15.340s
 
 ##############################################################################
 # lastz nile tilapia oreNil3 (DONE - 2017-07-31 - Hiram)
     # establish a screen to control this job with a name to indicate what it is
     screen -S mm10OreNil3
     mkdir /hive/data/genomes/mm10/bed/lastzOreNil3.2017-07-31
     cd /hive/data/genomes/mm10/bed/lastzOreNil3.2017-07-31
 
     printf '# Mouse vs. nile tilapia
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.66/bin/lastz
 BLASTZ_Y=3400
 BLASTZ_L=6000
 BLASTZ_K=2200
 BLASTZ_Q=/scratch/data/blastz/HoxD55.q
 
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 SEQ1_LIMIT=5
 
 # QUERY: nile tilapia oreNil3
 SEQ2_DIR=/hive/data/genomes/oreNil3/oreNil3.2bit
 SEQ2_LEN=/hive/data/genomes/oreNil3/chrom.sizes
 SEQ2_CHUNK=10000000
 SEQ2_LAP=0
 SEQ2_LIMIT=10
 
 BASE=/hive/data/genomes/mm10/bed/lastzOreNil3.2017-07-31
 TMPDIR=/scratch/tmp
 ' > DEF
 
     # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
     #	number of jobs, 50,000 to something under 100,000
     # when not present, SEQ2_LIMIT is a default 100
     time (doBlastzChainNet.pl -verbose=2 \
         `pwd`/DEF \
         -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
         -syntenicNet -chainMinScore=5000 -chainLinearGap=loose) > do.log 2>&1 &
     #   real    307m32.926s
 
     cat fb.mm10.chainOreNil3Link.txt
     #   54152663 bases of 2652783500 (2.041%) in intersection
 
     time (doRecipBest.pl -workhorse=hgwdev mm10 oreNil3 \
       -buildDir=`pwd` -workhorse=hgwdev) > rbest.log 2>&1 &
     # real    243m27.139s
 
     #	and for the swap
     mkdir /hive/data/genomes/oreNil3/bed/blastz.mm10.swap
     cd /hive/data/genomes/oreNil3/bed/blastz.mm10.swap
     time (doBlastzChainNet.pl -verbose=2 -syntenicNet \
 	/hive/data/genomes/mm10/bed/lastzOreNil3.2017-07-31/DEF \
         -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
         -swap -chainMinScore=5000 -chainLinearGap=loose) > swap.log 2>&1 &
     #   real    8m5.590s
 
     cat  fb.oreNil3.chainMm10Link.txt
     #   55291586 bases of 1009856516 (5.475%) in intersection
 
     time (doRecipBest.pl -workhorse=hgwdev oreNil3 mm10 \
       -buildDir=`pwd` -workhorse=hgwdev) > rbest.log 2>&1 &
     # real    230m56.580s
 
 #########################################################################
 # crispr 10K track (DONE - Hiram - 2017-07-28)
     # this script was developed during this procedure, thus, the step-wise
     # procedures:
     mkdir /hive/data/genomes/mm10/bed/crispr.10K
     cd /hive/data/genomes/mm10/bed/crispr.10K
 
     time (~/kent/src/hg/utils/automation/doCrispr.pl \
       -stop=guides -buildDir=`pwd` mm10 ensGene) > guides.log 2>&1
     # real    78m39.898s
 # Completed: 99 of 99 jobs
 # CPU time in finished jobs:      12182s     203.04m     3.38h    0.14d  0.000 y
 # IO & Wait Time:                  1076s      17.93m     0.30h    0.01d  0.000 y
 # Average job time:                 134s       2.23m     0.04h    0.00d
 # Longest finished job:             181s       3.02m     0.05h    0.00d
 # Submission to last job:          4567s      76.12m     1.27h    0.05d
 
     ~/kent/src/hg/utils/automation/doCrispr.pl -continue=specScores \
       -stop=specScores -buildDir=`pwd` mm10 ensGene
 # Completed: 945820 of 1558824 jobs
 # CPU time in finished jobs:  352722192s 5878703.20m 97978.39h 4082.43d 11.185 y
 # IO & Wait Time:             1367298315s 22788305.25m 379805.09h 15825.21d 43.357 y
 # Average job time:                1819s      30.31m     0.51h    0.02d
 # Longest finished job:            8656s     144.27m     2.40h    0.10d
 # Submission to last job:       2172942s   36215.70m   603.60h   25.15d
 
     # after ku reboot, finishing:
 # Completed: 613973 of 613973 jobs
 # CPU time in finished jobs:  155165030s 2586083.83m 43101.40h 1795.89d  4.920 y
 # IO & Wait Time:             584008656s 9733477.60m 162224.63h 6759.36d 18.519 y
 # Average job time:                1204s      20.07m     0.33h    0.01d
 # Longest finished job:            8978s     149.63m     2.49h    0.10d
 # Submission to last job:       1137188s   18953.13m   315.89h   13.16d
 
 
     ~/kent/src/hg/utils/automation/doCrispr.pl -continue=effScores \
       -stop=effScores -buildDir=`pwd` mm10 ensGene
 # Completed: 13518 of 13518 jobs
 # CPU time in finished jobs:    6244711s  104078.52m  1734.64h   72.28d  0.198 y
 # IO & Wait Time:                 32457s     540.95m     9.02h    0.38d  0.001 y
 # Average job time:                 464s       7.74m     0.13h    0.01d
 # Longest finished job:            2373s      39.55m     0.66h    0.03d
 # Submission to last job:         15145s     252.42m     4.21h    0.18d
 
     ~/kent/src/hg/utils/automation/doCrispr.pl -continue=offTargets \
       -stop=offTargets -buildDir=`pwd` mm10 ensGene
 # Completed: 77942 of 77942 jobs
 # CPU time in finished jobs:    1397706s   23295.10m   388.25h   16.18d  0.044 y
 # IO & Wait Time:                313616s    5226.94m    87.12h    3.63d  0.010 y
 # Average job time:                  22s       0.37m     0.01h    0.00d
 # Longest finished job:              35s       0.58m     0.01h    0.00d
 # Submission to last job:          9239s     153.98m     2.57h    0.11d
 
 
     ~/kent/src/hg/utils/automation/doCrispr.pl -continue=load \
       -stop=load -buildDir=`pwd` mm10 ensGene
     # real    235m41.378s
 
     ##########################################################################
     # FIXUP broken files (working - Max and Hiram - 2018-04,05)
 
     # Max generated a new specScores.tab, add in the chrM specScores
     # and make a unique set in a new specScores.tab file
 
     cd /hive/data/genomes/mm10/bed/crispr.10K/uniqSpecScores
 
     printf "targetSeq\tmitSpecScore\tofftargetCount\ttargetGenomeGeneLocus\n" \
 	> max.withChrM.specScores.tab
 
     grep -h -v targetSeq ../specScores.max.tab ../addChrM/specScores.tab \
 	| $HOME/bin/x86_64/gnusort -S100G --parallel=32 -u \
 	>> max.withChrM.specScores.tab
     # real    1m39.468s
 
     # this new file is much larger than before:
 # -rw-rw-r-- 1 3616703851 Jul 31  2017 withChrM.specScores.tab
 # -rw-rw-r-- 1 5580638498 May 15 14:55 max.withChrM.specScores.tab
 
     # Now generate a new crispr.bed and crispr.bb file
 
     mkdir  /hive/data/genomes/mm10/bed/crispr.10K/maxBed
     cd  /hive/data/genomes/mm10/bed/crispr.10K/maxBed
     # setup new inputs:
     ln -s ../addChrM/withChrM.allGuides.bed withChrM.allGuides.bed
     ln -s ../uniqSpecScores/max.withChrM.specScores.tab max.withChrM.specScores.tab
     ln -s ../addChrM/withChrM.effScores.tab withChrM.effScores.tab
     ln -s ../addChrM/withChrM.offtargets.offsets.tab withChrM.offtargets.offsets.tab
     ln -s ../addChrM/offTargets ./offTargets
 
     time (/cluster/software/bin/python \
       /hive/data/outside/crisprTrack/scripts/createBigBed.py mm10 \
 	withChrM.allGuides.bed max.withChrM.specScores.tab \
      withChrM.effScores.tab withChrM.offtargets.offsets.tab) > newBed.log 2>&1
     # real    232m5.379s
 
 # -rw-rw-r-- 1 27947769791 May 15 17:55 crispr.bed
 # -rw-rw-r-- 1  6911180170 May 15 18:42 crispr.bb
 
 ##############################################################################
 # LASTZ Gorilla gorGor5 (DONE - 2017-08-04 - Hiram)
     #	establish a screen to control this job
     screen -S mm10gorGor5
     mkdir /hive/data/genomes/mm10/bed/lastzGorGor5.2017-08-04
     cd /hive/data/genomes/mm10/bed/lastzGorGor5.2017-08-04
 
     printf '# mouse vs. gorilla
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.66/bin/lastz
 
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 SEQ1_LIMIT=1
 
 # QUERY: gorilla gorGor5
 SEQ2_DIR=/hive/data/genomes/gorGor5/gorGor5.2bit
 SEQ2_LEN=/hive/data/genomes/gorGor5/chrom.sizes
 SEQ2_CHUNK=10000000
 SEQ2_LIMIT=130
 SEQ2_LAP=0
 
 BASE=/hive/data/genomes/mm10/bed/lastzGorGor5.2017-08-04
 TMPDIR=/dev/shm
 ' > DEF
 
     time (doBlastzChainNet.pl -verbose=2 `pwd`/DEF \
 	-syntenicNet -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
 	-chainMinScore=3000 -chainLinearGap=medium) > do.log 2>&1 &
     #	real    170m18.102s
 
     cat fb.mm10.chainGorGor5Link.txt
     #	934147601 bases of 2652783500 (35.214%) in intersection
 
     time (doRecipBest.pl -workhorse=hgwdev mm10 gorGor5 \
       -buildDir=`pwd` -workhorse=hgwdev) > rbest.log 2>&1 &
     # real    327m34.879s
 
     mkdir /hive/data/genomes/gorGor5/bed/blastz.mm10.swap
     cd /hive/data/genomes/gorGor5/bed/blastz.mm10.swap
     time (doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzGorGor5.2017-08-04/DEF \
 	-swap -syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
 	-chainMinScore=3000 -chainLinearGap=medium) > swap.log 2>&1
     #	real    72m34.088s
 
     cat fb.gorGor5.chainMm10Link.txt
     #	990002546 bases of 3080431298 (32.138%) in intersection
 
     time (doRecipBest.pl -workhorse=hgwdev gorGor5 mm10 \
       -buildDir=`pwd` -workhorse=hgwdev) > rbest.log 2>&1 &
     # real    297m3.002s
 
 ##############################################################################
 # refSeqFuncElems NCBI refSeq functional elements, REDONE 2017-11-29 Angie
 # previously done 2017-08-08 by Chris E
 
 mkdir /hive/data/genomes/mm10/bed/refSeqFuncElems.2017-11-29
 cd /hive/data/genomes/mm10/bed/refSeqFuncElems.2017-11-29
 
 # NOTE FOR NEXT TIME: instead of using interim GFF, in the future these annotations might be
 # folded into the same main release GFF3 from which the ncbiRefSeq* tables are extracted by
 # doNcbiRefSeq.pl.
 wget ftp://ftp.ncbi.nlm.nih.gov/genomes/Mus_musculus/GFF_interim/interim_GRCm38.p6_top_level_2017-09-26.gff3.gz
 
 # Get mapping of RefSeq NC_* chromosome accs (and NT_*, NW_*) to mm10 chrom names
 hgsql mm10 -NBe 'select alias, chrom from chromAlias where source = "refseq" order by alias' \
 > refSeqToChrom.tab
 cut -f 2 refSeqToChrom.tab | sed -e 's/^/^/' > chrom.tab
 
 # Use Terence Murphy's list of feature types (and the multi-type attribute regulatory_class)
 # to identify Functional Elements and swap in mm10 chrom names.
 # Use subColumn -miss so it doesn't quit when it sees a patch contig that doesn't map to an
 # mm10 chrom.  Use grep -f chrom.tab to filter out patch contig annotations.
 zcat interim_GRCm38.p6_top_level_2017-09-26.gff3.gz \
 | grep -P "(\t(CAAT_signal|GC_rich_promoter_region|TATA_box|enhancer|insulator|locus_control_region|mobile_genetic_element|origin_of_replication|promoter|protein_binding_site|recombination_feature|regulatory_region|repeat_region|sequence_feature|sequence_secondary_structure|silencer|stem_loop)\t|regulatory_class=)" \
 | subColumn -miss=/dev/null 1 stdin refSeqToChrom.tab stdout \
 | grep -f chrom.tab > funcElems.gff
 wc -l funcElems.gff
 #1968 funcElems.gff
 
 # Transform GFF to BED+
 ~/kent/src/hg/utils/automation/parseRefSeqFuncElems funcElems.gff /dev/stdout \
 | sort -k1,1 -k2n,2n > refSeqFuncElems.bed
 wc -l refSeqFuncElems.bed
 #1968 refSeqFuncElems.bed
 
 # Make bigBed and link from /gbdb
 bedToBigBed -tab -type=bed9+7 -as=$HOME/kent/src/hg/lib/refSeqFuncElems.as \
   refSeqFuncElems.bed /hive/data/genomes/mm10/chrom.sizes refSeqFuncElems.bb
 rm -f /gbdb/mm10/ncbiRefSeq/refSeqFuncElems.bb
 ln -s `pwd`/refSeqFuncElems.bb /gbdb/mm10/ncbiRefSeq/
 
 ##############################################################################
 2017-09-15: import of UCSC GENCODE group processing of GENCODE VM15 (markd)
     # not to push to the RR
 
     # edit hg/makeDb/outside/gencode/gencodeLoad.mk to set release and ensembl versions
 
     # download, build and load tables
     mkdir -p /hive/data/genomes/mm10/bed/gencodeVM15
     pushd /hive/data/genomes/mm10/bed/gencodeVM15
     (time nice make -j 10 -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk) >&build.1.out&
 
     ##  gencode-cmp.tsv check to see if sizes make sense
 
     # generate trackDb and joiner blurb
     pushd kent/src/hg/makeDb/trackDb
     ../../makeDb/outside/gencode/gencodeGenerateTrackDbs mm10 M15 90 'Aug 2017'
 
     # Update mouse/mm10/wgEncodeGencodeSuper.html and update 'Release Notes'
     # to describe new release. [ONLY if it's going to be pushed]
 
     # edit mouse/mm10/trackDb.gencode.ra to add new .ra file include
     make DBS=mm10
 
     ## only if being pushed to RR:
     # edit  all.joiner to add ~/tmp/gencodeVM15.joiner
     # verify with:
     pushd /hive/data/genomes/mm10/bed/gencodeVM15
     make  -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk joinerCheck db=mm10
 
     # commit all
 
 ##############################################################################
 # ncbiRefSeq composite (DONE - 2017-11-16 - Angie)
 # Previously done 2017-09-28; redone 11-16 to include mito "rna" from chrM genomic seq
 
     mkdir /hive/data/genomes/mm10/bed/ncbiRefSeq.p5.2017-11-16
     cd /hive/data/genomes/mm10/bed/ncbiRefSeq.p5.2017-11-16
 
     time (~/kent/src/hg/utils/automation/doNcbiRefSeq.pl -buildDir=`pwd` \
       refseq vertebrate_mammalian Mus_musculus \
       GCF_000001635.25_GRCm38.p5 mm10) > do.log 2>&1 & tail -f do.log
     #  *** All done !  Elapsed time: 17m36s
     # real    real    17m35.651s
 
     cat fb.ncbiRefSeq.mm10.txt
     # 105516336 bases of 2652783500 (3.978%) in intersection
 
 ##############################################################################
 # LASTZ Drill manLeu1 (DONE - 2017-09-25 - Hiram)
     mkdir /hive/data/genomes/mm10/bed/lastzManLeu1.2017-09-25
     cd /hive/data/genomes/mm10/bed/lastzManLeu1.2017-09-25
 
     printf '# drill vs mouse
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.66/bin/lastz
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
 # QUERY: Drill ManLeu1
 SEQ2_DIR=/hive/data/genomes/manLeu1/manLeu1.2bit
 SEQ2_LEN=/hive/data/genomes/manLeu1/chrom.sizes
 SEQ2_CHUNK=20000000
 SEQ2_LAP=0
 SEQ2_LIMIT=30
 
 BASE=/hive/data/genomes/mm10/bed/lastzManLeu1.2017-09-25
 TMPDIR=/dev/shm
 ' > DEF
 
     #	establish a screen to control this job
     screen -S mm10ManLeu1
     time (doBlastzChainNet.pl -verbose=2 \
 	`pwd`/DEF \
 	-syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
 	-chainMinScore=3000 -chainLinearGap=medium) > do.log 2>&1
     #   real    233m12.288s
 
     cat fb.mm10.chainManLeu1Link.txt
     #	905203366 bases of 2652783500 (34.123%) in intersection
 
     time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` mm10 manLeu1) \
 	> rbest.log 2>&1 &
     # real    362m58.840s
 
     mkdir /hive/data/genomes/manLeu1/bed/blastz.mm10.swap
     cd /hive/data/genomes/manLeu1/bed/blastz.mm10.swap
     time (doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzManLeu1.2017-09-25/DEF \
 	-swap -syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
 	-chainMinScore=3000 -chainLinearGap=medium) > swap.log 2>&1
     #	real    64m55.226s
 
     cat fb.manLeu1.chainMm10Link.txt
     #	895668222 bases of 2721424086 (32.912%) in intersection
 
     time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` manLeu1 mm10) \
 	> rbest.log 2>&1
     # real    338m57.422s
 
 ##############################################################################
 # LASTZ Ma's night monkey aotNan1 (DONE - 2017-09-25 - Hiram)
     mkdir /hive/data/genomes/mm10/bed/lastzAotNan1.2017-09-25
     cd /hive/data/genomes/mm10/bed/lastzAotNan1.2017-09-25
 
     printf '# Ma_s night monkey vs mouse
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.66/bin/lastz
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
 # QUERY: Ma_s night monkey AotNan1
 SEQ2_DIR=/hive/data/genomes/aotNan1/aotNan1.2bit
 SEQ2_LEN=/hive/data/genomes/aotNan1/chrom.sizes
 SEQ2_CHUNK=20000000
 SEQ2_LAP=0
 SEQ2_LIMIT=30
 
 BASE=/hive/data/genomes/mm10/bed/lastzAotNan1.2017-09-25
 TMPDIR=/dev/shm
 ' > DEF
 
     #	establish a screen to control this job
     screen -S mm10AotNan1
     time (doBlastzChainNet.pl -verbose=2 \
 	`pwd`/DEF \
 	-syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
 	-chainMinScore=3000 -chainLinearGap=medium) > do.log 2>&1
     #   real    400m13.309s
 
     cat fb.mm10.chainAotNan1Link.txt
     #	889500682 bases of 2652783500 (33.531%) in intersection
 
     time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` mm10 aotNan1) \
 	> rbest.log 2>&1 &
     # real    352m12.077s
 
     mkdir /hive/data/genomes/aotNan1/bed/blastz.mm10.swap
     cd /hive/data/genomes/aotNan1/bed/blastz.mm10.swap
     time (doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzAotNan1.2017-09-25/DEF \
 	-swap -syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
 	-chainMinScore=3000 -chainLinearGap=medium) > swap.log 2>&1
     #	real    68m48.755s
 
     cat fb.aotNan1.chainMm10Link.txt
     #	893851318 bases of 2714439490 (32.929%) in intersection
 
     time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` aotNan1 mm10) \
 	> rbest.log 2>&1
     # real    383m10.761s
 
 ##############################################################################
 # LASTZ Hawaiian monk seal neoSch1 (DONE - 2017-09-25 - Hiram)
     #	establish a screen to control this job
     screen -S mm10neoSch1
     mkdir /hive/data/genomes/mm10/bed/lastzNeoSch1.2017-09-25
     cd /hive/data/genomes/mm10/bed/lastzNeoSch1.2017-09-25
 
     printf '# mouse vs. Hawaiian monk seal
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.66/bin/lastz
 
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=40000000
 SEQ1_LAP=10000
 SEQ1_LIMIT=1
 
 # QUERY: Hawaiian monk seal neoSch1
 SEQ2_DIR=/hive/data/genomes/neoSch1/neoSch1.2bit
 SEQ2_LEN=/hive/data/genomes/neoSch1/chrom.sizes
 SEQ2_CHUNK=20000000
 SEQ2_LIMIT=20
 SEQ2_LAP=0
 
 BASE=/hive/data/genomes/mm10/bed/lastzNeoSch1.2017-09-25
 TMPDIR=/dev/shm
 ' > DEF
 
     time (doBlastzChainNet.pl -verbose=2 `pwd`/DEF \
 	-syntenicNet -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
 	-chainMinScore=3000 -chainLinearGap=medium) > do.log 2>&1 &
     #	real    324m0.457s
 
     cat fb.mm10.chainNeoSch1Link.txt
     #	827926012 bases of 2652783500 (31.210%) in intersection
 
     time (doRecipBest.pl -load -workhorse=hgwdev mm10 neoSch1 \
       -buildDir=`pwd` -workhorse=hgwdev) > rbest.log 2>&1 &
     # real    307m18.396s
 
     cat fb.mm10.chainRBestNeoSch1Link.txt
     #   788489846 bases of 2652783500 (29.723%) in intersection
 
     mkdir /hive/data/genomes/neoSch1/bed/blastz.mm10.swap
     cd /hive/data/genomes/neoSch1/bed/blastz.mm10.swap
     time (doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzNeoSch1.2017-09-25/DEF \
 	-swap -syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
 	-chainMinScore=3000 -chainLinearGap=medium) > swap.log 2>&1
     #	real    59m27.809s
 
     cat fb.neoSch1.chainMm10Link.txt
     #	804021579 bases of 2400839308 (33.489%) in intersection
     cat fb.neoSch1.chainSynMm10Link.txt
     #   776155245 bases of 2400839308 (32.328%) in intersection
 
     time (doRecipBest.pl -load -workhorse=hgwdev neoSch1 mm10 \
       -buildDir=`pwd` -workhorse=hgwdev) > rbest.log 2>&1 &
     # real    242m31.157s
 
     cat fb.neoSch1.chainRBestMm10Link.txt
     # 787537751 bases of 2400839308 (32.803%) in intersection
 
 ##############################################################################
 # LASTZ Sooty mangabey cerAty1 (DONE - 2017-09-27 - Hiram)
     mkdir /hive/data/genomes/mm10/bed/lastzCerAty1.2017-09-27
     cd /hive/data/genomes/mm10/bed/lastzCerAty1.2017-09-27
 
     printf '# Sooty mangabey vs mouse
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.66/bin/lastz
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
 # QUERY: Sooty mangabey CerAty1
 SEQ2_DIR=/hive/data/genomes/cerAty1/cerAty1.2bit
 SEQ2_LEN=/hive/data/genomes/cerAty1/chrom.sizes
 SEQ2_CHUNK=20000000
 SEQ2_LAP=0
 SEQ2_LIMIT=30
 
 BASE=/hive/data/genomes/mm10/bed/lastzCerAty1.2017-09-27
 TMPDIR=/dev/shm
 ' > DEF
 
     #	establish a screen to control this job
     screen -S mm10CerAty1
     time (doBlastzChainNet.pl -verbose=2 \
 	`pwd`/DEF \
 	-syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
 	-chainMinScore=3000 -chainLinearGap=medium) > do.log 2>&1
     #   real    371m15.075s
 
     cat fb.mm10.chainCerAty1Link.txt
     #	917680202 bases of 2652783500 (34.593%) in intersection
 
     time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` mm10 cerAty1) \
 	> rbest.log 2>&1 &
     # real    345m49.786s
 
     mkdir /hive/data/genomes/cerAty1/bed/blastz.mm10.swap
     cd /hive/data/genomes/cerAty1/bed/blastz.mm10.swap
     time (doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzCerAty1.2017-09-27/DEF \
 	-swap -syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
 	-chainMinScore=3000 -chainLinearGap=medium) > swap.log 2>&1
     #	real    68m6.225s
 
     cat fb.cerAty1.chainMm10Link.txt
     #	903892923 bases of 2787289397 (32.429%) in intersection
 
     time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` cerAty1 mm10) \
 	> rbest.log 2>&1
     # real    305m14.804s
 
 ##############################################################################
 # LASTZ Coquerel's sifaka to mouse/Mm10 (DONE - 2017-09-28 - Hiram)
     mkdir /hive/data/genomes/mm10/bed/lastzProCoq1.2017-09-28
     cd /hive/data/genomes/mm10/bed/lastzProCoq1.2017-09-28
 
     printf '# Coquerel_s sifaka vs mouse
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.66/bin/lastz
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
 # QUERY: proCoq1 - Coquerel_s sifaka - Propithecus coquereli
 SEQ2_DIR=/hive/data/genomes/proCoq1/proCoq1.2bit
 SEQ2_LEN=/hive/data/genomes/proCoq1/chrom.sizes
 SEQ2_CHUNK=20000000
 SEQ2_LAP=0
 SEQ2_LIMIT=50
 
 BASE=/hive/data/genomes/mm10/bed/lastzProCoq1.2017-09-28
 TMPDIR=/dev/shm
 ' > DEF
 
     #	establish a screen to control this job
     screen -S mm10ProCoq1
     time (doBlastzChainNet.pl -verbose=2 \
 	`pwd`/DEF \
 	-syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
 	-chainMinScore=3000 -chainLinearGap=medium) > do.log 2>&1
     #   real    294m43.931s
 
     cat fb.mm10.chainProCoq1Link.txt
     #	882327683 bases of 2652783500 (33.260%) in intersection
 
     time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` mm10 proCoq1) \
 	> rbest.log 2>&1 &
     # real    411m5.774s
 
     mkdir /hive/data/genomes/proCoq1/bed/blastz.mm10.swap
     cd /hive/data/genomes/proCoq1/bed/blastz.mm10.swap
     time (doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzProCoq1.2017-09-28/DEF \
 	-swap -syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
 	-chainMinScore=3000 -chainLinearGap=medium) > swap.log 2>&1
     #	real    62m48.333s
 
     cat fb.proCoq1.chainMm10Link.txt
     #	863635783 bases of 2083764538 (41.446%) in intersection
 
     time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` proCoq1 mm10) \
 	> rbest.log 2>&1
     # real    357m54.198s
 
 ##############################################################################
 # LASTZ White-faced sapajou to mouse/Mm10 (DONE - 2017-09-28 - Hiram)
     mkdir /hive/data/genomes/mm10/bed/lastzCebCap1.2017-09-28
     cd /hive/data/genomes/mm10/bed/lastzCebCap1.2017-09-28
 
     printf '# White-faced sapajou vs mouse
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.66/bin/lastz
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
 # QUERY: cebCap1 - White-faced sapajou -  Cebus capucinus imitator
 SEQ2_DIR=/hive/data/genomes/cebCap1/cebCap1.2bit
 SEQ2_LEN=/hive/data/genomes/cebCap1/chrom.sizes
 SEQ2_CHUNK=20000000
 SEQ2_LAP=0
 SEQ2_LIMIT=20
 
 BASE=/hive/data/genomes/mm10/bed/lastzCebCap1.2017-09-28
 TMPDIR=/dev/shm
 ' > DEF
 
     #	establish a screen to control this job
     screen -S mm10CebCap1
     time (doBlastzChainNet.pl -verbose=2 \
 	`pwd`/DEF \
 	-syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
 	-chainMinScore=3000 -chainLinearGap=medium) > do.log 2>&1
     #   real    293m40.906s
 
     cat fb.mm10.chainCebCap1Link.txt
     #	882776669 bases of 2652783500 (33.277%) in intersection
 
     time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` mm10 cebCap1) \
 	> rbest.log 2>&1 &
     # real    334m0.458s
 
     mkdir /hive/data/genomes/cebCap1/bed/blastz.mm10.swap
     cd /hive/data/genomes/cebCap1/bed/blastz.mm10.swap
     time (doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzCebCap1.2017-09-28/DEF \
 	-swap -syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
 	-chainMinScore=3000 -chainLinearGap=medium) > swap.log 2>&1
     #	real    63m12.596s
 
     cat fb.cebCap1.chainMm10Link.txt
     #	871126707 bases of 2610518382 (33.370%) in intersection
 
     time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` cebCap1 mm10) \
 	> rbest.log 2>&1
     # real    299m3.923s
 
 ##############################################################################
 # LASTZ White-faced spapjou/cebCap1 vs. mouse/Mm10 (DONE - 2017-10-03 - Hiram)
     mkdir /hive/data/genomes/mm10/bed/lastzCebCap1.2017-10-03
     cd /hive/data/genomes/mm10/bed/lastzCebCap1.2017-10-03
 
     printf '# White-faced sapajou vs. mouse
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.66/bin/lastz
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
 # QUERY: cebCap1 - White-faced sapajou - Cebus capucinus imitator
 SEQ2_DIR=/hive/data/genomes/cebCap1/cebCap1.2bit
 SEQ2_LEN=/hive/data/genomes/cebCap1/chrom.sizes
 SEQ2_CHUNK=20000000
 SEQ2_LAP=0
 SEQ2_LIMIT=18
 
 BASE=/hive/data/genomes/mm10/bed/lastzCebCap1.2017-10-03
 TMPDIR=/dev/shm
 ' > DEF
 
     #	establish a screen to control this job
     screen -S mm10CebCap1
     time (doBlastzChainNet.pl -verbose=2 \
 	`pwd`/DEF \
 	-syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
 	-chainMinScore=3000 -chainLinearGap=medium) > do.log 2>&1
     #   real    206m12.413s
 
     cat fb.mm10.chainCebCap1Link.txt
     #	882776669 bases of 2652783500 (33.277%) in intersection
 
     time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` mm10 cebCap1) \
 	> rbest.log 2>&1 &
     # real    331m49.541s
 
     mkdir /hive/data/genomes/cebCap1/bed/blastz.mm10.swap
     cd /hive/data/genomes/cebCap1/bed/blastz.mm10.swap
     time (doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzCebCap1.2017-10-03/DEF \
 	-swap -syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
 	-chainMinScore=3000 -chainLinearGap=medium) > swap.log 2>&1
     #	real    63m12.596s
 
     cat fb.cebCap1.chainMm10Link.txt
     #	871126707 bases of 2610518382 (33.370%) in intersection
 
     time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` cebCap1 mm10) \
 	> rbest.log 2>&1
     # real    299m3.923s
 
 ##############################################################################
 # LASTZ Sclater's lemur mouse/Mm10 (DONE - 2017-10-04 - Hiram)
     mkdir /hive/data/genomes/mm10/bed/lastzEulFla1.2017-10-04
     cd /hive/data/genomes/mm10/bed/lastzEulFla1.2017-10-04
 
     printf '# Sclater_s lemur vs. mouse
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.66/bin/lastz
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
 # QUERY: eulFla1 - Sclater_s lemur - Eulemur flavifrons
 SEQ2_DIR=/hive/data/genomes/eulFla1/eulFla1.2bit
 SEQ2_LEN=/hive/data/genomes/eulFla1/chrom.sizes
 SEQ2_CHUNK=20000000
 SEQ2_LAP=0
 SEQ2_LIMIT=18
 
 BASE=/hive/data/genomes/mm10/bed/lastzEulFla1.2017-10-04
 TMPDIR=/dev/shm
 ' > DEF
 
     #	establish a screen to control this job
     screen -S mm10EulFla1
     time (doBlastzChainNet.pl -verbose=2 \
 	`pwd`/DEF \
 	-syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
 	-chainMinScore=3000 -chainLinearGap=medium) > do.log 2>&1
     #   real    144m17.701s
 
     cat fb.mm10.chainEulFla1Link.txt
     #	916687191 bases of 2652783500 (34.556%) in intersection
 
     time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` mm10 eulFla1) \
 	> rbest.log 2>&1 &
     # real    330m53.327s
 
     mkdir /hive/data/genomes/eulFla1/bed/blastz.mm10.swap
     cd /hive/data/genomes/eulFla1/bed/blastz.mm10.swap
     time (doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzEulFla1.2017-10-04/DEF \
 	-swap -syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
 	-chainMinScore=3000 -chainLinearGap=medium) > swap.log 2>&1
     #	real    65m26.113s
 
     cat fb.eulFla1.chainMm10Link.txt
     #	887070088 bases of 2094103399 (42.360%) in intersection
 
     time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` eulFla1 mm10) \
 	> rbest.log 2>&1
     # real    270m35.579s
 
 ##############################################################################
 # LASTZ Black lemur mouse/Mm10 (DONE - 2017-10-05 - Hiram)
     mkdir /hive/data/genomes/mm10/bed/lastzEulMac1.2017-10-05
     cd /hive/data/genomes/mm10/bed/lastzEulMac1.2017-10-05
 
     printf '# Black lemur vs. mouse
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.66/bin/lastz
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
 # QUERY: eulMac1 - Black lemur - Eulemur macaco
 SEQ2_DIR=/hive/data/genomes/eulMac1/eulMac1.2bit
 SEQ2_LEN=/hive/data/genomes/eulMac1/chrom.sizes
 SEQ2_CHUNK=20000000
 SEQ2_LAP=0
 SEQ2_LIMIT=50
 
 BASE=/hive/data/genomes/mm10/bed/lastzEulMac1.2017-10-05
 TMPDIR=/dev/shm
 ' > DEF
 
     #	establish a screen to control this job
     screen -S mm10EulMac1
     time (doBlastzChainNet.pl -verbose=2 \
 	`pwd`/DEF \
 	-syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
 	-chainMinScore=3000 -chainLinearGap=medium) > do.log 2>&1
     #   real    167m31.736s
 
     cat fb.mm10.chainEulMac1Link.txt
     #	925968814 bases of 2652783500 (34.906%) in intersection
 
     time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` mm10 eulMac1) \
 	> rbest.log 2>&1 &
     # real    334m49.287s
 
     mkdir /hive/data/genomes/eulMac1/bed/blastz.mm10.swap
     cd /hive/data/genomes/eulMac1/bed/blastz.mm10.swap
     time (doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzEulMac1.2017-10-05/DEF \
 	-swap -syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
 	-chainMinScore=3000 -chainLinearGap=medium) > swap.log 2>&1
     #	real    64m52.738s
 
     cat fb.eulMac1.chainMm10Link.txt
     #	895308387 bases of 2101039320 (42.613%) in intersection
 
     time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` eulMac1 mm10) \
 	> rbest.log 2>&1
     # real    267m17.552s
 
 ##############################################################################
 2017-12-17: import of UCSC GENCODE group processing of GENCODE VM16 (markd)
     # being push to the RR
 
     # edit hg/makeDb/outside/gencode/gencodeLoad.mk to set release and ensembl versions
 
     # download, build and load tables
     mkdir -p /hive/data/genomes/mm10/bed/gencodeVM16
     pushd /hive/data/genomes/mm10/bed/gencodeVM16
     (time nice make -j 10 -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk) >&build.1.out&
 
     ##  gencode-cmp.tsv check to see if sizes make sense
 
     # generate trackDb and joiner blurb
     pushd ~/kent/src/hg/makeDb/trackDb
     ../../makeDb/outside/gencode/gencodeGenerateTrackDbs mm10 M16 91 'Dec 2017'
 
     # Update mouse/mm10/wgEncodeGencodeSuper.html and update 'Release Notes'
     # to describe new release. [ONLY if it's going to be pushed]
 
     # edit mouse/mm10/trackDb.gencode.ra to add new .ra file include
     make DBS=mm10
 
     ## only if being pushed to RR:
     # edit  all.joiner to add ~/tmp/gencodeVM16.joiner
     # verify with:
     pushd /hive/data/genomes/mm10/bed/gencodeVM16
     make  -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk joinerCheck db=mm10
 
     # commit all and make push request, the file tables.lst will have the
     # list of tables for the push request.
 
 ##############################################################################
 # LASTZ Damara mole rat vs. mouse/Mm10 (DONE - 2018-01-01 - Hiram)
     mkdir /hive/data/genomes/mm10/bed/lastzFukDam1.2018-01-01
     cd /hive/data/genomes/mm10/bed/lastzFukDam1.2018-01-01
 
     printf '# Damara mole rat vs. mouse
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.66/bin/lastz
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
 # QUERY: Damara mole rat
 SEQ2_DIR=/hive/data/genomes/fukDam1/fukDam1.2bit
 SEQ2_LEN=/hive/data/genomes/fukDam1/chrom.sizes
 SEQ2_CHUNK=20000000
 SEQ2_LIMIT=200
 SEQ2_LAP=0
 
 BASE=/hive/data/genomes/mm10/bed/lastzFukDam1.2018-01-01
 TMPDIR=/dev/shm
 ' > DEF
 
     #	establish a screen to control this job
     time (doBlastzChainNet.pl -verbose=2 \
 	`pwd`/DEF \
 	-syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
 	-chainMinScore=3000 -chainLinearGap=medium) > do.log 2>&1 &
     #   real    403m29.477s
 
     cat fb.mm10.chainFukDam1Link.txt
     # 803448015 bases of 2652783500 (30.287%) in intersection
 
     time (doRecipBest.pl -load -workhorse=hgwdev -buildDir=`pwd` mm10 fukDam1) \
 	> rbest.log 2>&1 &
     # real    391m52.435s
 
     cat fb.mm10.chainRBestFukDam1Link.txt
     # 760138280 bases of 2652783500 (28.654%) in intersection
 
     mkdir /hive/data/genomes/fukDam1/bed/blastz.mm10.swap
     cd /hive/data/genomes/fukDam1/bed/blastz.mm10.swap
     time (doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzFukDam1.2018-01-01/DEF \
 	-swap -syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
 	-chainMinScore=3000 -chainLinearGap=medium) > swap.log 2>&1
     #	real    79m46.564s
 
     cat fb.fukDam1.chainMm10Link.txt
     # 803988546 bases of 2285984782 (35.170%) in intersection
     cat fb.fukDam1.chainSynMm10Link.txt
     # 741604346 bases of 2285984782 (32.441%) in intersection
 
     time (doRecipBest.pl -load -workhorse=hgwdev -buildDir=`pwd` fukDam1 mm10) \
 	> rbest.log 2>&1
     # real    417m52.847s
 
     cat fb.fukDam1.chainRBestMm10Link.txt
     # 760190877 bases of 2285984782 (33.254%) in intersection
 
 ##############################################################################
 # LASTZ Kangaroo rat vs. mouse/Mm10 (DONE - 2018-01-01 - Hiram)
     mkdir /hive/data/genomes/mm10/bed/lastzDipOrd2.2018-01-01
     cd /hive/data/genomes/mm10/bed/lastzDipOrd2.2018-01-01
 
     printf '# Kangaroo rat vs. mouse
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.66/bin/lastz
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
 # QUERY: Kangaroo rat
 SEQ2_DIR=/hive/data/genomes/dipOrd2/dipOrd2.2bit
 SEQ2_LEN=/hive/data/genomes/dipOrd2/chrom.sizes
 SEQ2_CHUNK=20000000
 SEQ2_LIMIT=200
 SEQ2_LAP=0
 
 BASE=/hive/data/genomes/mm10/bed/lastzDipOrd2.2018-01-01
 TMPDIR=/dev/shm
 ' > DEF
 
     #	establish a screen to control this job
     time (doBlastzChainNet.pl -verbose=2 \
 	`pwd`/DEF \
 	-syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
 	-chainMinScore=3000 -chainLinearGap=medium) > do.log 2>&1 &
     #   real    351m30.983s
 
     cat fb.mm10.chainDipOrd2Link.txt
     #	645178768 bases of 2652783500 (24.321%) in intersection
 
     time (doRecipBest.pl -load -workhorse=hgwdev -buildDir=`pwd` mm10 dipOrd2) \
 	> rbest.log 2>&1 &
     # real    439m56.601s
 
     cat fb.mm10.chainRBestDipOrd2Link.txt
     # 605074450 bases of 2652783500 (22.809%) in intersection
 
     mkdir /hive/data/genomes/dipOrd2/bed/blastz.mm10.swap
     cd /hive/data/genomes/dipOrd2/bed/blastz.mm10.swap
     time (doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzDipOrd2.2018-01-01/DEF \
 	-swap -syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
 	-chainMinScore=3000 -chainLinearGap=medium) > swap.log 2>&1
     #	real    79m46.564s
 
     cat fb.dipOrd2.chainMm10Link.txt
     # 631879699 bases of 2065314047 (30.595%) in intersection
     cat fb.dipOrd2.chainSynMm10Link.txt
     # 581661824 bases of 2065314047 (28.163%) in intersection
 
     time (doRecipBest.pl -load -workhorse=hgwdev -buildDir=`pwd` dipOrd2 mm10) \
 	> rbest.log 2>&1
     # real    412m53.879s
 
     cat fb.dipOrd2.chainRBestMm10Link.txt
     # 605056621 bases of 2065314047 (29.296%) in intersection
 
 ##############################################################################
 # LASTZ Chinese hamster ovary cell line CHO-K1  criGriChoV2
 #	(DONE - 2018-01-05 - Hiram)
     #	establish a screen to control this job
     screen -S mm10criGriChoV2
     mkdir /hive/data/genomes/mm10/bed/lastzCriGriChoV2.2018-01-05
     cd /hive/data/genomes/mm10/bed/lastzCriGriChoV2.2018-01-05
 
     printf '# Chinese hamster ovary cell line vs mouse
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.66/bin/lastz
 
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 SEQ1_LIMIT=40
 
 # QUERY: Chinese hamster ovary cell line CHO-K1  criGriChoV2
 SEQ2_DIR=/hive/data/genomes/criGriChoV2/criGriChoV2.2bit
 SEQ2_LEN=/hive/data/genomes/criGriChoV2/chrom.sizes
 SEQ2_CHUNK=10000000
 SEQ2_LIMIT=20
 SEQ2_LAP=0
 
 BASE=/hive/data/genomes/mm10/bed/lastzCriGriChoV2.2018-01-05
 TMPDIR=/dev/shm
 ' > DEF
 
     time (doBlastzChainNet.pl -verbose=2 `pwd`/DEF \
 	-noDbNameCheck -syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
 	-chainMinScore=3000 -chainLinearGap=medium) > do.log 2>&1 &
     #	real    575m28.254s
 
     cat fb.mm10.chainCriGriChoV2Link.txt
     #	1583859515 bases of 2652783500 (59.706%) in intersection
 
     time (doRecipBest.pl -load -workhorse=hgwdev mm10 criGriChoV2 \
       -buildDir=`pwd` -workhorse=hgwdev) > rbest.log 2>&1 &
     #	real    1098m32.629s
 
     cat fb.mm10.chainRBestCriGriChoV2Link.txt
     # 1451345011 bases of 2652783500 (54.710%) in intersection
 
     mkdir /hive/data/genomes/criGriChoV2/bed/blastz.mm10.swap
     cd /hive/data/genomes/criGriChoV2/bed/blastz.mm10.swap
     time (doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzCriGriChoV2.2018-01-05/DEF \
 	-noDbNameCheck -swap -syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
 	-chainMinScore=3000 -chainLinearGap=medium) > swap.log 2>&1 &
     #	real    196m59.409s
 
     cat fb.criGriChoV2.chainMm10Link.txt
     #	1605002950 bases of 2323924942 (69.064%) in intersection
     cat fb.criGriChoV2.chainSynMm10Link.txt
     #   1443603212 bases of 2323924942 (62.119%) in intersection
 
     time (doRecipBest.pl -load -workhorse=hgwdev criGriChoV2 mm10 \
       -buildDir=`pwd` -workhorse=hgwdev) > rbest.log 2>&1 &
     # real    1187m10.728s
 
     cat fb.criGriChoV2.chainRBestMm10Link.txt
     # 1452526554 bases of 2323924942 (62.503%) in intersection
 
 ##############################################################################
 # LASTZ Baboon papAnu4 (DONE - 2018-01-08 - Hiram)
     #	establish a screen to control this job
     screen -S mm10papAnu4
     mkdir /hive/data/genomes/mm10/bed/lastzPapAnu4.2018-01-08
     cd /hive/data/genomes/mm10/bed/lastzPapAnu4.2018-01-08
 
     printf '# mouse vs. baboon
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.66/bin/lastz
 
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 SEQ1_LIMIT=40
 
 # QUERY: baboon papAnu4
 SEQ2_DIR=/hive/data/genomes/papAnu4/papAnu4.2bit
 SEQ2_LEN=/hive/data/genomes/papAnu4/chrom.sizes
 SEQ2_CHUNK=10000000
 SEQ2_LIMIT=180
 SEQ2_LAP=0
 
 BASE=/hive/data/genomes/mm10/bed/lastzPapAnu4.2018-01-08
 TMPDIR=/dev/shm
 ' > DEF
 
     time (doBlastzChainNet.pl -verbose=2 `pwd`/DEF \
 	-syntenicNet -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
 	-chainMinScore=3000 -chainLinearGap=medium) > do.log 2>&1 &
     #	real    783m49.438s
 
     cat fb.mm10.chainPapAnu4Link.txt
     #	919405716 bases of 2652783500 (34.658%) in intersection
 
     time (doRecipBest.pl -load -workhorse=hgwdev mm10 papAnu4 \
       -buildDir=`pwd` -workhorse=hgwdev) > rbest.log 2>&1 &
     # real    582m15.183s
 
     cat fb.mm10.chainRBestPapAnu4Link.txt
     # 875366631 bases of 2652783500 (32.998%) in intersection
 
     mkdir /hive/data/genomes/papAnu4/bed/blastz.mm10.swap
     cd /hive/data/genomes/papAnu4/bed/blastz.mm10.swap
     time (doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzPapAnu4.2018-01-08/DEF \
 	-swap -syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
 	-chainMinScore=3000 -chainLinearGap=medium) > swap.log 2>&1
     #	real    80m51.648s
 
     cat fb.papAnu4.chainMm10Link.txt
     #	907806517 bases of 2937004939 (30.909%) in intersection
     cat fb.papAnu4.chainSynMm10Link.txt
     #	866781916 bases of 2937004939 (29.512%) in intersection
 
     time (doRecipBest.pl -load -workhorse=hgwdev papAnu4 mm10 \
       -buildDir=`pwd` -workhorse=hgwdev) > rbest.log 2>&1 &
     # real    521m7.590s
 
     cat fb.papAnu4.chainRBestMm10Link.txt
     # 874097827 bases of 2937004939 (29.762%) in intersection
 
 ##############################################################################
 # LASTZ guinea pig cavApe1 (DONE - 2018-01-08 - Hiram)
     # establish a screen to control this job with a name to indicate what it is
     screen -S mm10CavApe1
     mkdir /hive/data/genomes/mm10/bed/lastzCavApe1.2018-01-08
     cd /hive/data/genomes/mm10/bed/lastzCavApe1.2018-01-08
 
     printf '# guinea pig vs mouse
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.66/bin/lastz
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
 # QUERY: guinea pig CavApe1
 SEQ2_DIR=/hive/data/genomes/cavApe1/cavApe1.2bit
 SEQ2_LEN=/hive/data/genomes/cavApe1/chrom.sizes
 SEQ2_CHUNK=10000000
 SEQ2_LAP=0
 SEQ2_LIMIT=10
 
 BASE=/hive/data/genomes/mm10/bed/lastzCavApe1.2018-01-08
 TMPDIR=/dev/shm
 ' > DEF
 
     time (doBlastzChainNet.pl `pwd`/DEF -verbose=2 \
 	-syntenicNet -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
 	-chainMinScore=3000 -chainLinearGap=medium) > do.log 2>&1 &
     #	real    514m28.099s
 
     cat fb.mm10.chainCavApe1Link.txt
     #	424603451 bases of 2652783500 (16.006%) in intersection
 
     time (doRecipBest.pl -load -workhorse=hgwdev mm10 cavApe1 \
       -buildDir=`pwd`) > rbest.log 2>&1 &
     # real    481m13.804s
 
     cat fb.mm10.chainRBestCavApe1Link.txt
     # 394844156 bases of 2652783500 (14.884%) in intersection
 
     # and for the swap
     mkdir /hive/data/genomes/cavApe1/bed/blastz.mm10.swap
     cd /hive/data/genomes/cavApe1/bed/blastz.mm10.swap
     time (doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzCavApe1.2018-01-08/DEF \
 	-swap -syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
 	-chainMinScore=3000 -chainLinearGap=medium) > swap.log 2>&1 &
     #	real    38m53.866s
 
     cat fb.cavApe1.chainMm10Link.txt
     #	420563721 bases of 1749140834 (24.044%) in intersection
     cat fb.cavApe1.chainSynMm10Link.txt
     # 364825271 bases of 1749140834 (20.857%) in intersection
 
     time (doRecipBest.pl -load -workhorse=hgwdev cavApe1 mm10 \
       -buildDir=`pwd`) > rbest.log 2>&1 &
     # real    438m45.544s
 
     cat fb.cavApe1.chainRBestMm10Link.txt
     # 395976886 bases of 1749140834 (22.638%) in intersection
 
 ##############################################################################
 # lastz Medium Ground Finch ficAlb1 (DONE - 2018-01-09 - Hiram)
     # establish a screen to control this job with a name to indicate what it is
     screen -S mm10
     mkdir /hive/data/genomes/mm10/bed/lastzFicAlb1.2018-01-09
     cd /hive/data/genomes/mm10/bed/lastzFicAlb1.2018-01-09
 
     printf '# Mouse vs.  Collared flycatcher
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.66/bin/lastz
 BLASTZ_Y=3400
 BLASTZ_L=6000
 BLASTZ_K=2200
 BLASTZ_Q=/scratch/data/blastz/HoxD55.q
 
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
 # QUERY: Collard flycatcher/FicAlb1
 SEQ2_DIR=/hive/data/genomes/ficAlb1/ficAlb1.2bit
 SEQ2_LEN=/hive/data/genomes/ficAlb1/chrom.sizes
 SEQ2_CHUNK=10000000
 SEQ2_LAP=0
 SEQ2_LIMIT=50
 
 BASE=/hive/data/genomes/mm10/bed/lastzFicAlb1.2018-01-09
 TMPDIR=/dev/shm
 ' > DEF
 
     time (doBlastzChainNet.pl -verbose=2 \
         `pwd`/DEF \
         -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
         -syntenicNet -chainMinScore=5000 -chainLinearGap=loose) > do.log 2>&1 &
     #   real    167m34.472s
 
     cat fb.mm10.chainFicAlb1Link.txt
     #   98177848 bases of 2652783500 (3.701%) in intersection
 
     time (doRecipBest.pl -load -workhorse=hgwdev mm10 ficAlb1 \
       -buildDir=`pwd` -workhorse=hgwdev) > rbest.log 2>&1 &
     # real    246m1.019s
 
     cat fb.mm10.chainRBestFicAlb1Link.txt
     # 76370866 bases of 2652783500 (2.879%) in intersection
 
     #	and for the swap
     mkdir /hive/data/genomes/ficAlb1/bed/blastz.mm10.swap
     cd /hive/data/genomes/ficAlb1/bed/blastz.mm10.swap
     time (doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzFicAlb1.2018-01-09/DEF \
         -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
         -swap -chainMinScore=5000 -chainLinearGap=loose) > swap.log 2>&1 &
     # real    8m5.637s
 
     cat  fb.ficAlb1.chainMm10Link.txt
     #   85384367 bases of 1102325870 (7.746%) in intersection
 
     time (doRecipBest.pl -load -workhorse=hgwdev ficAlb1 mm10 \
       -buildDir=`pwd` -workhorse=hgwdev) > rbest.log 2>&1 &
     # real    209m22.159s
 
     cat fb.ficAlb1.chainRBestMm10Link.txt
     # 76183235 bases of 1102325870 (6.911%) in intersection
 
 ##########################################################################
 # lastz Lamprey petMar3 (DONE - 2018-01-25 - Hiram)
     # establish a screen to control this job with a name to indicate what it is
     screen -S petMar3
     mkdir /hive/data/genomes/mm10/bed/lastzPetMar3.2018-01-25
     cd /hive/data/genomes/mm10/bed/lastzPetMar3.2018-01-25
 
     printf '# Mouse vs. Lamprey
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.66/bin/lastz
 BLASTZ_Y=3400
 BLASTZ_L=6000
 BLASTZ_K=2200
 BLASTZ_M=50
 BLASTZ_Q=/scratch/data/blastz/HoxD55.q
 
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
 # QUERY: Lamprey PetMar3
 SEQ2_DIR=/hive/data/genomes/petMar3/petMar3.2bit
 SEQ2_LEN=/hive/data/genomes/petMar3/chrom.sizes
 SEQ2_CHUNK=10000000
 SEQ2_LAP=0
 SEQ2_LIMIT=60
 
 BASE=/hive/data/genomes/mm10/bed/lastzPetMar3.2018-01-25
 TMPDIR=/dev/shm
 ' > DEF
 
     # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
     #	number of jobs, 50,000 to something under 100,000
     # when not present, SEQ2_LIMIT is a default 100
     time (doBlastzChainNet.pl -verbose=2 \
         `pwd`/DEF \
         -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
         -syntenicNet -chainMinScore=5000 -chainLinearGap=loose) > do.log 2>&1 &
     #   real    119m5.528s
 
     cat fb.mm10.chainPetMar3Link.txt
     #   36835173 bases of 2652783500 (1.389%) in intersection
 
     time (doRecipBest.pl -workhorse=hgwdev -load -buildDir=`pwd` mm10 petMar3) \
 	> rbest.log 2>&1 &
     # real    201m40.789s
 
     cat fb.mm10.chainRBestPetMar3Link.txt
     # 21623456 bases of 2652783500 (0.815%) in intersection
 
     #	and for the swap
     mkdir /hive/data/genomes/petMar3/bed/blastz.mm10.swap
     cd /hive/data/genomes/petMar3/bed/blastz.mm10.swap
     time (doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzPetMar3.2018-01-25/DEF \
         -syntenicNet -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
         -swap -chainMinScore=5000 -chainLinearGap=loose) > swap.log 2>&1 &
     # real    7m57.582s
 
     #   real    7m2.754s
     cat  fb.petMar3.chainMm10Link.txt
     #	39217857 bases of 1043181598 (3.759%) in intersection
 
     cat fb.petMar3.chainSynMm10Link.txt
     # 1381239 bases of 1043181598 (0.132%) in intersection
 
     time (doRecipBest.pl -workhorse=hgwdev -load -buildDir=`pwd` petMar3 mm10) \
 	> rbest.log 2>&1 &
     # real    206m59.727s
 
     cat fb.petMar3.chainRBestMm10Link.txt
     # 21335101 bases of 1043181598 (2.045%) in intersection
 
 #########################################################################
 2018-03-08: update UCSC GENCODE VM16 to include protein id (for VAI) and fix PAR tag
 
 cd /hive/data/genomes/mm10/bed/gencodeVM16
 
 # save existing data
 mkdir -p prev/pre-proteinId
 mv tables/wgEncodeGencodeAttrsVM16.tab tables/wgEncodeGencodeTagVM16.tab prev/pre-proteinId/
 mv loaded/wgEncodeGencodeAttrsVM16.tab.loaded loaded/wgEncodeGencodeTagVM16.tab.loaded prev/pre-proteinId/
 mv data/gencode.tsv prev/pre-proteinId/
 cp -p data/gencode.vM16.transcriptionSupportLevel.tab prev/pre-proteinId/
 
 # edit gencodeLoad.mk to set mm10 as target
 
 # get gencode.tsv without rebuild TSL file or loading tables that don't change
 ~markd/compbio/ccds/ccds2/output/bin/x86_64/opt/gencodeGxfToAttrs --keepGoing data/release_M16/gencode.vM16.chr_patch_hapl_scaff.annotation.gtf.gz data/gencode.tsv
 make  -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk   loaded/wgEncodeGencodeAttrsVM16.tab.loaded  loaded/wgEncodeGencodeTagVM16.tab.loaded
 
 # 2018-03-19: update search to include protein id
 cd kent/src/hg/makeDb/trackDb
 ../../makeDb/outside/gencode/gencodeGenerateTrackDbs mm10 M16 91 'Dec 2017'
 
 #########################################################################
 # lastz garter snake/thaSir1 (DONE - 2018-03-13 - Hiram)
     # establish a screen to control this job with a name to indicate what it is
     screen -S mm10ThaSir1
     mkdir /hive/data/genomes/mm10/bed/lastzThaSir1.2018-03-13
     cd /hive/data/genomes/mm10/bed/lastzThaSir1.2018-03-13
 
     # note: first time with this new 1.04.00 version of lastz
 
     printf '# Mouse vs. garter snake
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.04.00/bin/lastz
 BLASTZ_Y=3400
 BLASTZ_L=6000
 BLASTZ_K=2200
 BLASTZ_M=50
 BLASTZ_Q=/scratch/data/blastz/HoxD55.q
 #      A    C    G    T
 #     91  -90  -25 -100
 #    -90  100 -100  -25
 #    -25 -100  100  -90
 #   -100  -25  -90  91
 
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
 # QUERY: garter snake thaSir1
 SEQ2_DIR=/hive/data/genomes/thaSir1/thaSir1.2bit
 SEQ2_LEN=/hive/data/genomes/thaSir1/chrom.sizes
 SEQ2_CHUNK=10000000
 SEQ2_LAP=0
 SEQ2_LIMIT=15
 
 BASE=/hive/data/genomes/mm10/bed/lastzThaSir1.2018-03-13
 TMPDIR=/dev/shm
 ' > DEF
 
     # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable
     #	number of jobs, 50,000 to something under 100,000
     # when not present, SEQ2_LIMIT is a default 100
     time (doBlastzChainNet.pl -verbose=2 \
         `pwd`/DEF -syntenicNet \
         -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
         -chainMinScore=5000 -chainLinearGap=loose) > do.log 2>&1 &
     #	real    112m40.572s
 
     cat fb.mm10.chainThaSir1Link.txt
     #	78464036 bases of 2652783500 (2.958%) in intersection
 
     time (doRecipBest.pl -workhorse=hgwdev -load -buildDir=`pwd` mm10 thaSir1) \
 	> rbest.log 2>&1 &
     # real    266m17.520s
 
     cat fb.mm10.chainRBestThaSir1Link.txt
     # 54099233 bases of 2652783500 (2.039%) in intersection
 
     #	and for the swap
     mkdir /hive/data/genomes/thaSir1/bed/blastz.mm10.swap
     cd /hive/data/genomes/thaSir1/bed/blastz.mm10.swap
     time (doBlastzChainNet.pl -verbose=2 -syntenicNet \
 	/hive/data/genomes/mm10/bed/lastzThaSir1.2018-03-13/DEF \
           -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
            -swap -chainMinScore=5000 -chainLinearGap=loose) > swap.log 2>&1 &
     #	real    11m28.892s
 
     cat  fb.thaSir1.chainMm10Link.txt
     #	63814138 bases of 1122701795 (5.684%) in intersection
     cat fb.thaSir1.chainSynMm10Link.txt
     # 20728394 bases of 1122701795 (1.846%) in intersection
 
     time (doRecipBest.pl -workhorse=hgwdev -load -buildDir=`pwd` thaSir1 mm10) \
 	> rbest.log 2>&1 &
     # real    234m31.934s
 
     cat fb.thaSir1.chainRBestMm10Link.txt
     # 54778217 bases of 1122701795 (4.879%) in intersection
 
 ##############################################################################
 # LASTZ cat felCat9 (DONE - 2018-03-14 - Hiram)
     # establish a screen to control this job with a name to indicate what it is
     screen -S mm10FelCat9
     mkdir /hive/data/genomes/mm10/bed/lastzFelCat9.2018-03-14
     cd /hive/data/genomes/mm10/bed/lastzFelCat9.2018-03-14
 
     printf '# cat vs mouse
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.04.00/bin/lastz
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
 # QUERY: cat FelCat9
 SEQ2_DIR=/hive/data/genomes/felCat9/felCat9.2bit
 SEQ2_LEN=/hive/data/genomes/felCat9/chrom.sizes
 SEQ2_CHUNK=20000000
 SEQ2_LAP=0
 SEQ2_LIMIT=50
 
 BASE=/hive/data/genomes/mm10/bed/lastzFelCat9.2018-03-14
 TMPDIR=/dev/shm
 ' > DEF
 
     time (doBlastzChainNet.pl -verbose=2 \
 	`pwd`/DEF \
 	-syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
 	-chainMinScore=3000 -chainLinearGap=medium) > do.log 2>&1 &
     #   real    395m23.091s
 
     cat fb.mm10.chainFelCat9Link.txt
     #   801023018 bases of 2652783500 (30.196%) in intersection
 
     time (doRecipBest.pl -load  mm10 felCat9 -buildDir=`pwd` \
 	-workhorse=hgwdev) > rbest.log 2>&1 &
     #	real    486m55.606s
 
     cat fb.mm10.chainRBestFelCat9Link.txt
     # 761411281 bases of 2652783500 (28.702%) in intersection
 
     mkdir /hive/data/genomes/felCat9/bed/blastz.mm10.swap
     cd /hive/data/genomes/felCat9/bed/blastz.mm10.swap
     time (doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzFelCat9.2018-03-14/DEF \
 	-swap -syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
 	-chainMinScore=3000 -chainLinearGap=medium) > swap.log 2>&1 &
     #	 real    70m51.860s
 
     cat fb.felCat9.chainMm10Link.txt
     #   779862191 bases of 2476453204 (31.491%) in intersection
     cat fb.felCat9.chainSynMm10Link.txt
     # 754481540 bases of 2476453204 (30.466%) in intersection
 
     time (doRecipBest.pl -load  felCat9 mm10 -buildDir=`pwd` \
 	-workhorse=hgwdev) > rbest.log 2>&1 &
     # real    375m4.937s
 
     cat fb.felCat9.chainRBestMm10Link.txt
     # 760753851 bases of 2476453204 (30.719%) in intersection
 
 ##############################################################################
 # LASTZ Beaver casCan1 vs. mouse/Mm10 (DONE - 2018-03-19 - Hiram)
     mkdir /hive/data/genomes/mm10/bed/lastzCasCan1.2018-03-19
     cd /hive/data/genomes/mm10/bed/lastzCasCan1.2018-03-19
 
     # note: first time with this new 1.04.00 version of lastz
 
     printf '# Beaver vs. mouse
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.04.00/bin/lastz
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LIMIT=50
 SEQ1_LAP=10000
 
 # QUERY: Beaver
 SEQ2_DIR=/hive/data/genomes/casCan1/casCan1.2bit
 SEQ2_LEN=/hive/data/genomes/casCan1/chrom.sizes
 SEQ2_CHUNK=20000000
 SEQ2_LIMIT=50
 SEQ2_LAP=0
 
 BASE=/hive/data/genomes/mm10/bed/lastzCasCan1.2018-03-19
 TMPDIR=/dev/shm
 ' > DEF
 
     #	establish a screen to control this job
     time (doBlastzChainNet.pl -verbose=2 \
 	`pwd`/DEF \
 	-syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
 	-chainMinScore=3000 -chainLinearGap=medium) > do.log 2>&1 &
     #   real    455m47.982s
 
     cat fb.mm10.chainCasCan1Link.txt
     #	969752969 bases of 2652783500 (36.556%) in intersection
 
     time (doRecipBest.pl -load -workhorse=hgwdev -buildDir=`pwd` mm10 casCan1) \
 	> rbest.log 2>&1 &
     # real    981m12.451s
 
     cat fb.mm10.chainRBestCasCan1Link.txt
     # 912108399 bases of 2652783500 (34.383%) in intersection
 
     mkdir /hive/data/genomes/casCan1/bed/blastz.mm10.swap
     cd /hive/data/genomes/casCan1/bed/blastz.mm10.swap
     time (doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzCasCan1.2018-03-19/DEF \
 	-swap -syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
 	-chainMinScore=3000 -chainLinearGap=medium) > swap.log 2>&1
     #	real    100m12.450s
 
     cat fb.casCan1.chainMm10Link.txt
     # 1027587643 bases of 2517974654 (40.810%) in intersection
     cat fb.casCan1.chainSynMm10Link.txt
     # 876969229 bases of 2517974654 (34.828%) in intersection
 
     time (doRecipBest.pl -load -workhorse=hgwdev -buildDir=`pwd` casCan1 mm10) \
 	> rbest.log 2>&1
     # real    1280m7.127s
 
     cat fb.casCan1.chainRBestMm10Link.txt
     # 911437520 bases of 2517974654 (36.197%) in intersection
 
 ##############################################################################
 # LASTZ mouse/mm10 Chimp/panTro6 - (DONE - 2018-03-24 - Hiram)
     mkdir /hive/data/genomes/mm10/bed/lastzPanTro6.2018-03-24
     cd /hive/data/genomes/mm10/bed/lastzPanTro6.2018-03-24
 
     printf '# mouse vs chimp
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.04.00/bin/lastz
 BLASTZ_M=254
 
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 SEQ1_LIMIT=10
 # the default matrix is:
 #       A     C     G     T
 # A    91  -114   -31  -123
 # C  -114   100  -125   -31
 # G   -31  -125   100  -114
 # T  -123   -31  -114    91
 
 # QUERY: chimp panTro6
 SEQ2_DIR=/hive/data/genomes/panTro6/panTro6.2bit
 SEQ2_LEN=/hive/data/genomes/panTro6/chrom.sizes
 SEQ2_CHUNK=20000000
 SEQ2_LIMIT=40
 SEQ2_LAP=0
 
 BASE=/hive/data/genomes/mm10/bed/lastzPanTro6.2018-03-24
 TMPDIR=/dev/shm
 ' > DEF
 
     time (doBlastzChainNet.pl `pwd`/DEF -verbose=2 \
         -chainMinScore=3000 -chainLinearGap=medium \
           -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
             -syntenicNet) > do.log 2>&1 &
     #  real    347m21.874s
 
     cat fb.mm10.chainPanTro6Link.txt
     # 935720585 bases of 2652783500 (35.273%) in intersection
 
     time (doRecipBest.pl -load -workhorse=hgwdev -buildDir=`pwd` \
 	mm10 panTro6) > rbest.log 2>&1 &
     # real    565m15.871s
 
     cat fb.mm10.chainRBestPanTro6Link.txt
     # 891553355 bases of 2652783500 (33.608%) in intersection
 
     # and for the swap:
     mkdir /hive/data/genomes/panTro6/bed/blastz.mm10.swap
     cd /hive/data/genomes/panTro6/bed/blastz.mm10.swap
 
     time (doBlastzChainNet.pl -verbose=2 \
       /hive/data/genomes/mm10/bed/lastzPanTro6.2018-03-24/DEF \
         -swap -chainMinScore=3000 -chainLinearGap=medium \
           -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
             -syntenicNet) > swap.log 2>&1
     #  real    78m57.631s
 
     cat fb.panTro6.chainMm10Link.txt
     # 934668641 bases of 3018592990 (30.964%) in intersection
     cat fb.panTro6.chainSynMm10Link.txt
     # 889944141 bases of 3018592990 (29.482%) in intersection
 
     time (doRecipBest.pl -load -workhorse=hgwdev -buildDir=`pwd` \
 	panTro6 mm10) > rbest.log 2>&1 &
     # real    504m47.811s
 
     cat fb.panTro6.chainRBestMm10Link.txt
     # 890065520 bases of 3018592990 (29.486%) in intersection
 
 ##############################################################################
 # LASTZ mouse/mm10 Orangutan/ponAbe3 - (DONE - 2018-03-26 - Hiram)
     mkdir /hive/data/genomes/mm10/bed/lastzPonAbe3.2018-03-26
     cd /hive/data/genomes/mm10/bed/lastzPonAbe3.2018-03-26
 
     printf '# mouse vs orangutan
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.04.00/bin/lastz
 BLASTZ_M=254
 
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 SEQ1_LIMIT=5
 # the default matrix is:
 #       A     C     G     T
 # A    91  -114   -31  -123
 # C  -114   100  -125   -31
 # G   -31  -125   100  -114
 # T  -123   -31  -114    91
 
 # QUERY: orangutan ponAbe3
 SEQ2_DIR=/hive/data/genomes/ponAbe3/ponAbe3.2bit
 SEQ2_LEN=/hive/data/genomes/ponAbe3/chrom.sizes
 SEQ2_CHUNK=20000000
 SEQ2_LIMIT=20
 SEQ2_LAP=0
 
 BASE=/hive/data/genomes/mm10/bed/lastzPonAbe3.2018-03-26
 TMPDIR=/dev/shm
 ' > DEF
 
     time (doBlastzChainNet.pl `pwd`/DEF -verbose=2 \
         -chainMinScore=3000 -chainLinearGap=medium \
           -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
             -syntenicNet) > do.log 2>&1 &
     #  real    461m46.426s
 
     cat fb.mm10.chainPonAbe3Link.txt
     # 936755064 bases of 2652783500 (35.312%) in intersection
 
     time (doRecipBest.pl -load -workhorse=hgwdev -buildDir=`pwd` \
 	mm10 ponAbe3) > rbest.log 2>&1 &
     # real    554m41.676s
 
     cat fb.mm10.chainRBestPonAbe3Link.txt
     # 892145302 bases of 2652783500 (33.631%) in intersection
 
     # and for the swap:
     mkdir /hive/data/genomes/ponAbe3/bed/blastz.mm10.swap
     cd /hive/data/genomes/ponAbe3/bed/blastz.mm10.swap
 
     time (doBlastzChainNet.pl -verbose=2 \
       /hive/data/genomes/mm10/bed/lastzPonAbe3.2018-03-26/DEF \
         -swap -chainMinScore=3000 -chainLinearGap=medium \
           -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
             -syntenicNet) > swap.log 2>&1
     #  real    78m29.160s
 
     cat fb.ponAbe3.chainMm10Link.txt
     # 929970181 bases of 3043444524 (30.557%) in intersection
     cat fb.ponAbe3.chainSynMm10Link.txt
     # 890801507 bases of 3043444524 (29.270%) in intersection
 
     time (doRecipBest.pl -load -workhorse=hgwdev -buildDir=`pwd` \
 	ponAbe3 mm10) > rbest.log 2>&1 &
     # real    496m49.168s
 
     cat fb.ponAbe3.chainRBestMm10Link.txt
     # 890774155 bases of 3043444524 (29.269%) in intersection
 
 #########################################################################
 # LASTZ mouse/mm10 sheep/oviAri4 - (DONE - 2018-04-25 - Hiram)
     mkdir /hive/data/genomes/mm10/bed/lastzOviAri4.2018-04-25
     cd /hive/data/genomes/mm10/bed/lastzOviAri4.2018-04-25
 
     printf '# mouse vs sheep
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.04.00/bin/lastz
 BLASTZ_M=254
 # default BLASTZ_Q score matrix:
 #       A     C     G     T
 # A    91  -114   -31  -123
 # C  -114   100  -125   -31
 # G   -31  -125   100  -114
 # T  -123   -31  -114    91
 
 # TARGET: mouse mm10
 SEQ1_DIR=/hive/data/genomes/mm10/mm10.2bit
 SEQ1_LEN=/hive/data/genomes/mm10/chrom.sizes
 SEQ1_CHUNK=40000000
 SEQ1_LIMIT=2
 SEQ1_LAP=10000
 
 # QUERY: sheep oviAri4
 SEQ2_DIR=/hive/data/genomes/oviAri4/oviAri4.2bit
 SEQ2_LEN=/hive/data/genomes/oviAri4/chrom.sizes
 SEQ2_CHUNK=20000000
 SEQ2_LIMIT=10
 SEQ2_LAP=0
 
 BASE=/hive/data/genomes/mm10/bed/lastzOviAri4.2018-04-25
 TMPDIR=/dev/shm
 ' > DEF
     # << happy emacs
 
     time (doBlastzChainNet.pl `pwd`/DEF -verbose=2 \
         -chainMinScore=3000 -chainLinearGap=medium \
           -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
             -syntenicNet) > do.log 2>&1 &
     # Command failed:
     # ssh -x -o 'StrictHostKeyChecking = no' -o 'BatchMode = yes' hgwdev \
     #   nice /hive/data/genomes/mm10/bed/lastzOviAri4.2018-04-25/axtChain/netSynteny.csh
     #
     # real    237m24.916s
 
     # used the wrong version of doBlastzChainNet.pl which failed at the
     # syntenic net step. Clean up and re-try with the fixed up script:
     rm mm10.oviAri4.syn.chain.gz
     rm mm10.oviAri4.syn.net.gz
 
     time (doBlastzChainNet.pl `pwd`/DEF -verbose=2 \
         -continue=syntenicNet \
         -chainMinScore=3000 -chainLinearGap=medium \
           -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
             -syntenicNet) >> do.log 2>&1 &
     # real  18m40.051s
 
     cat fb.mm10.chainOviAri4Link.txt
     # 693504453 bases of 2652783500 (26.143%) in intersection
 
     time (doRecipBest.pl -load -workhorse=hgwdev -buildDir=`pwd` mm10 oviAri4) > rbest.log 2>&1 &
     # real    485m29.546s
 
     # and for the swap:
     mkdir /hive/data/genomes/oviAri4/bed/blastz.mm10.swap
     cd /hive/data/genomes/oviAri4/bed/blastz.mm10.swap
 
     time (doBlastzChainNet.pl -verbose=2 \
       /hive/data/genomes/mm10/bed/lastzOviAri4.2018-04-25/DEF \
         -swap -chainMinScore=3000 -chainLinearGap=medium \
           -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
             -syntenicNet) > swap.log 2>&1 &
     #  real    63m12.935s
 
     cat fb.oviAri4.chainMm10Link.txt
     # 680117358 bases of 2587515673 (26.285%) in intersection
 
     time (doRecipBest.pl -load -workhorse=hgwdev -buildDir=`pwd` oviAri4 mm10) > rbest.log 2>&1 &
     # real    437m1.637s
 
 #########################################################################
 # RepeatMasker Visualization track update (TBD - 2018-05-15 - ChrisL)
 
     screen -S rmskJoined.2018-05-15
 
     # if this is an update to an already existing rmsk build, re-run
     # masking with new libraries. Otherwise skip to rmskJoined below
     mkdir /hive/data/genomes/mm10/bed/repeatMasker.2018-05-15
     cd /hive/data/genomes/mm10/bed/repeatMasker.2018-05-15
 
     time (doRepeatMasker.pl -stop=mask -bigClusterHub=ku \
        -workhorse=hgwdev -dbHost=hgwdev -buildDir=`pwd` mm10) > mask.log 2>&1 &
     # real    705m12.538s
 
     # fill in grep to get rid of the missing id items (not necessary this run):
     # grep -v "" \
     #     mm10.fa.out > clean.mm10.fa.out
     # mv clean.mm10.fa.out mm10.fa.out
 
     # finish the last step of doCat.csh, if necessary:
     # /cluster/bin/scripts/extractNestedRepeats.pl mm10.fa.out | sort -k1,1 -k2,2n > mm10.nestedRepeats.bed
 
     # rmskJoinedCurrent steps
     mkdir /hive/data/genomes/mm10/bed/rmskJoined.2018-05-15
     cd /hive/data/genomes/mm10/bed/rmskJoined.2018-05-15
 
     ln -s ../repeatMasker.2018-05-15/mm10.sorted.fa.out .
     ln -s ../repeatMasker.2018-05-15/mm10.fa.align .
 
     time (/scratch/data/RepeatMasker/util/rmToUCSCTables.pl \
         -out mm10.sorted.fa.out -align mm10.fa.align.gz) > rerun.log 2>&1 &
     # real    102m53.576s
 
     # confirm the counts are different from the previous version:
     # wc -l ../rmskJoined/mm10.fa.align.tsv ../rmskJoined/mm10.sorted.fa.join.bed ../rmskJoined/mm10.sorted.fa.out.tsv
    5918456 ../rmskJoined/mm10.fa.align.tsv
    4657599 ../rmskJoined/mm10.sorted.fa.join.bed
    5249545 ../rmskJoined/mm10.sorted.fa.out.tsv
   15825600 total
     # wc -l *.tsv
    5888031 mm10.fa.align.tsv
    4646880 mm10.sorted.fa.join.tsv
    5235053 mm10.sorted.fa.out.tsv
   15769964 total
 
     # sub rmskJoinedBaseline for rmskJoinedCurrent if this is the first version for this assembly
     hgLoadBed -sqlTable=$HOME/kent/src/hg/lib/rmskJoined.sql \
         -renameSqlTable -verbose=4 -tab \
             -type=bed4+9 -as=$HOME/kent/src/hg/lib/rmskJoined.as mm10 \
                 rmskJoinedCurrent mm10.sorted.fa.join.tsv \
                     > loadJoined.log 2>&1
     # Error line 1028733 of mm10.sorted.fa.join.tsv:
     # chromStart after chromEnd (21000277 > 21000266)
     # is it  the only one ?
     awk -F'\t' '{if ($2 > $3) sum+=1} END {print sum}' mm10.sorted.fa.join.tsv
     # 1
 
     # remove it and run above hgLoadBed again:
     awk -F'\t' '{if ($2 < $3) print;}' mm10.sorted.fa.join.tsv  > mm10.sorted.fa.join.cleaned
     mv mm10.sorted.fa.join.cleaned mm10.sorted.fa.join.tsv
 
     # sub rmskAlignBaseline for rmskAlignCurrent if this is the first version for this assembly
     hgLoadSqlTab mm10 rmskAlignCurrent \
         /cluster/home/chmalee/kent/src/hg/lib/rmskAlign.sql \
             mm10.fa.align.tsv > loadAlign.log 2>&1
 
     # sub rmskOutBaseline for rmskOutCurrent if this is the first version for this assembly
     hgLoadOutJoined -verbose=2 -table=rmskOutCurrent mm10 mm10.sorted.fa.out > loadOut.log 2>&1
 
     featureBits -countGaps mm10 rmskJoinedBaseline
     # 2243948952 bases of 2730871774 (82.170%) in intersection
     featureBits -countGaps mm10 rmskJoinedCurrent
     # 2249729653 bases of 2730871774 (82.381%) in intersection
 
 #########################################################################
 # LASTZ mouse/mm10 horse/equCab3 - (DONE - 2018-05-25 - Hiram)
     mkdir /hive/data/genomes/mm10/bed/lastzEquCab3.2018-05-25
     cd /hive/data/genomes/mm10/bed/lastzEquCab3.2018-05-25
 
     printf '# mouse vs horse
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.04.00/bin/lastz
 BLASTZ_M=254
 # default BLASTZ_Q score matrix:
 #       A     C     G     T
 # A    91  -114   -31  -123
 # C  -114   100  -125   -31
 # G   -31  -125   100  -114
 # T  -123   -31  -114    91
 
 # TARGET: mouse mm10
 SEQ1_DIR=/hive/data/genomes/mm10/mm10.2bit
 SEQ1_LEN=/hive/data/genomes/mm10/chrom.sizes
 SEQ1_CHUNK=40000000
 SEQ1_LIMIT=2
 SEQ1_LAP=10000
 
 # QUERY: horse equCab3
 SEQ2_DIR=/hive/data/genomes/equCab3/equCab3.2bit
 SEQ2_LEN=/hive/data/genomes/equCab3/chrom.sizes
 SEQ2_CHUNK=20000000
 SEQ2_LIMIT=10
 SEQ2_LAP=0
 
 BASE=/hive/data/genomes/mm10/bed/lastzEquCab3.2018-05-25
 TMPDIR=/dev/shm
 ' > DEF
     # << happy emacs
 
     time (doBlastzChainNet.pl `pwd`/DEF -verbose=2 \
         -chainMinScore=3000 -chainLinearGap=medium \
           -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
             -syntenicNet) > do.log 2>&1 &
     # real    605m50.368s
 
     cat fb.mm10.chainEquCab3Link.txt
     # 921489718 bases of 2652783500 (34.737%) in intersection
 
     cat fb.mm10.chainSynEquCab3Link.txt
     # 876836391 bases of 2652783500 (33.053%) in intersection
 
     time (doRecipBest.pl -load -workhorse=hgwdev -buildDir=`pwd` mm10 equCab3) > rbest.log 2>&1 &
     # real    398m20.685s
 
     cat fb.mm10.chainRBest.EquCab3.txt
     # 876785778 bases of 2652783500 (33.052%) in intersection
 
     # and for the swap:
     mkdir /hive/data/genomes/equCab3/bed/blastz.mm10.swap
     cd /hive/data/genomes/equCab3/bed/blastz.mm10.swap
 
     time (doBlastzChainNet.pl -verbose=2 \
       /hive/data/genomes/mm10/bed/lastzEquCab3.2018-05-25/DEF \
         -swap -chainMinScore=3000 -chainLinearGap=medium \
           -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
             -syntenicNet) > swap.log 2>&1 &
     #  real    83m14.250s
 
     cat fb.equCab3.chainMm10Link.txt
     # 930516778 bases of 2497530654 (37.257%) in intersection
     cat fb.equCab3.chainSynMm10Link.txt
     # 897238830 bases of 2497530654 (35.925%) in intersection
 
     time (doRecipBest.pl -load -workhorse=hgwdev -buildDir=`pwd` equCab3 mm10) > rbest.log 2>&1 &
     # real    318m40.520s
 
     cat fb.equCab3.chainRBest.Mm10.txt
     # 875954606 bases of 2497530654 (35.073%) in intersection
 
 #########################################################################
 # LASTZ mouse/mm10 Minke whale/balAcu1 - (DONE - 2018-06-13 - Hiram)
     mkdir /hive/data/genomes/mm10/bed/lastzBalAcu1.2018-06-13
     cd /hive/data/genomes/mm10/bed/lastzBalAcu1.2018-06-13
 
     printf '# mouse vs Minke whale
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.04.00/bin/lastz
 BLASTZ_M=254
 # default BLASTZ_Q score matrix:
 #       A     C     G     T
 # A    91  -114   -31  -123
 # C  -114   100  -125   -31
 # G   -31  -125   100  -114
 # T  -123   -31  -114    91
 
 # TARGET: mouse mm10
 SEQ1_DIR=/hive/data/genomes/mm10/mm10.2bit
 SEQ1_LEN=/hive/data/genomes/mm10/chrom.sizes
 SEQ1_CHUNK=40000000
 SEQ1_LIMIT=2
 SEQ1_LAP=10000
 
 # QUERY: Minke whale balAcu1
 SEQ2_DIR=/hive/data/genomes/balAcu1/balAcu1.2bit
 SEQ2_LEN=/hive/data/genomes/balAcu1/chrom.sizes
 SEQ2_CHUNK=20000000
 SEQ2_LIMIT=40
 SEQ2_LAP=0
 
 BASE=/hive/data/genomes/mm10/bed/lastzBalAcu1.2018-06-13
 TMPDIR=/dev/shm
 ' > DEF
     # << happy emacs
 
     time (doBlastzChainNet.pl `pwd`/DEF -verbose=2 \
         -chainMinScore=3000 -chainLinearGap=medium \
           -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
             -syntenicNet) > do.log 2>&1 &
     # real    190m45.265s
 
     cat fb.mm10.chainBalAcu1Link.txt
     # 851790136 bases of 2652783500 (32.109%) in intersection
 
     cat fb.mm10.chainSynBalAcu1Link.txt
     # 806407823 bases of 2652783500 (30.399%) in intersection
 
     time (doRecipBest.pl -load -workhorse=hgwdev -buildDir=`pwd` mm10 balAcu1) > rbest.log 2>&1 &
     # real    287m58.329s
 
     cat fb.mm10.chainRBest.BalAcu1.txt
     # 811435554 bases of 2652783500 (30.588%) in intersection
 
     # and for the swap:
     mkdir /hive/data/genomes/balAcu1/bed/blastz.mm10.swap
     cd /hive/data/genomes/balAcu1/bed/blastz.mm10.swap
 
     time (doBlastzChainNet.pl -verbose=2 \
       /hive/data/genomes/mm10/bed/lastzBalAcu1.2018-06-13/DEF \
         -swap -chainMinScore=3000 -chainLinearGap=medium \
           -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
             -syntenicNet) > swap.log 2>&1 &
     #  real    67m0.560s
 
     cat fb.balAcu1.chainMm10Link.txt
     # 832845143 bases of 2286657046 (36.422%) in intersection
 
     cat fb.balAcu1.chainSynMm10Link.txt
     # 802734600 bases of 2286657046 (35.105%) in intersection
 
     time (doRecipBest.pl -load -workhorse=hgwdev -buildDir=`pwd` balAcu1 mm10) > rbest.log 2>&1 &
     # real    241m51.110s
 
     cat fb.balAcu1.chainRBest.Mm10.txt
     # 810427625 bases of 2286657046 (35.442%) in intersection
 
 #########################################################################
 2018-07-01: import of UCSC GENCODE group processing of GENCODE VM17 (markd)
     # not being push to the RR
 
     # edit hg/makeDb/outside/gencode/gencodeLoad.mk to set release and ensembl versions
 
     # download, build and load tables
     mkdir -p /hive/data/genomes/mm10/bed/gencodeVM17
     pushd /hive/data/genomes/mm10/bed/gencodeVM17
     (time nice make -j 10 -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk) >&build.1.out&
 
     ##  gencode-cmp.tsv check to see if sizes make sense
 
     # generate trackDb and joiner blurb
     pushd ~/kent/src/hg/makeDb/trackDb
     ../../makeDb/outside/gencode/gencodeGenerateTrackDbs mm10 M17 92 'Mar 2018'
 
     ## only if being pushed to RR:
     # Update mouse/mm10/wgEncodeGencodeSuper.html
     # Update 'Release Notes' to describe new release.
 
     # edit mouse/mm10/trackDb.gencode.ra to add new .ra file include
     make DBS=mm10
 
     ## only if being pushed to RR:
     # edit  all.joiner to add ~/tmp/gencodeVM17.joiner
     # verify with:
     pushd /hive/data/genomes/mm10/bed/gencodeVM17
     make  -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk joinerCheck db=mm10
 
     # commit all and make push request, the file tables.lst will have the
     # list of tables for the push request.
 
 ##############################################################################
 # LASTZ mouse/mm10 Axolotl/ambMex1 - (DONE - 2018-07-09 - Hiram)
     mkdir /hive/data/genomes/mm10/bed/lastzAmbMex1.2018-07-09
     cd /hive/data/genomes/mm10/bed/lastzAmbMex1.2018-07-09
 
     printf '# mouse vs Axolotl
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.04.00/bin/lastz
 BLASTZ_M=254
 # default BLASTZ_Q score matrix:
 #       A     C     G     T
 # A    91  -114   -31  -123
 # C  -114   100  -125   -31
 # G   -31  -125   100  -114
 # T  -123   -31  -114    91
 
 # TARGET: mouse mm10
 SEQ1_DIR=/hive/data/genomes/mm10/mm10.2bit
 SEQ1_LEN=/hive/data/genomes/mm10/chrom.sizes
 SEQ1_CHUNK=40000000
 SEQ1_LIMIT=2
 SEQ1_LAP=10000
 
 # QUERY: Axolotl ambMex1
 SEQ2_DIR=/hive/data/genomes/ambMex1/ambMex1.2bit
 SEQ2_LEN=/hive/data/genomes/ambMex1/chrom.sizes
 SEQ2_CHUNK=80000000
 SEQ2_LIMIT=800
 SEQ2_LAP=0
 
 BASE=/hive/data/genomes/mm10/bed/lastzAmbMex1.2018-07-09
 TMPDIR=/dev/shm
 ' > DEF
     # << happy emacs
 
     time (doBlastzChainNet.pl `pwd`/DEF -verbose=2 \
         -chainMinScore=5000 -chainLinearGap=loose \
           -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
             -syntenicNet) > do.log 2>&1 &
     # real    881m7.910s
 
     cat fb.mm10.chainAmbMex1Link.txt
     # 52143617 bases of 2652783500 (1.966%) in intersection
 
     cat fb.mm10.chainSynAmbMex1Link.txt
     # 2686570 bases of 2652783500 (0.101%) in intersection
 
     time (doRecipBest.pl -load -workhorse=hgwdev -buildDir=`pwd` mm10 ambMex1) > rbest.log 2>&1 &
     # real    478m39.331s
 
     # something odd went haywire at the download step
     time (doRecipBest.pl -load -continue=download -workhorse=hgwdev -buildDir=`pwd` mm10 ambMex1) > download.log 2>&1 &
     # real    1m42.883s
 
     cat fb.mm10.chainRBest.AmbMex1.txt
     # 36938030 bases of 2652783500 (1.392%) in intersection
 
     # and for the swap:
     mkdir /hive/data/genomes/ambMex1/bed/blastz.mm10.swap
     cd /hive/data/genomes/ambMex1/bed/blastz.mm10.swap
 
     time (doBlastzChainNet.pl -verbose=2 \
       /hive/data/genomes/mm10/bed/lastzAmbMex1.2018-07-09/DEF \
         -swap -chainMinScore=5000 -chainLinearGap=loose \
           -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
             -syntenicNet) > swap.log 2>&1 &
     #  real    39m28.757s
 
     cat fb.ambMex1.chainMm10Link.txt
     # 87124587 bases of 28366694468 (0.307%) in intersection
 
     cat fb.ambMex1.chainSynMm10Link.txt
     # 2893381 bases of 28366694468 (0.010%) in intersection
 
     time (doRecipBest.pl -load -workhorse=hgwdev -buildDir=`pwd` ambMex1 mm10) > rbest.log 2>&1 &
     # real    568m10.621s
 
     # something odd went haywire at the download step
     time (doRecipBest.pl -load -continue=download -workhorse=hgwdev -buildDir=`pwd` ambMex1 mm10) > download.log 2>&1 &
     # real    3m16.404s
 
     cat fb.ambMex1.chainRBest.Mm10.txt
     # 38584422 bases of 28366694468 (0.136%) in intersection
 
 ##############################################################################
 2018-08-03: import of UCSC GENCODE group processing of GENCODE VM18 (markd)
     # being push to the RR
 
     # edit hg/makeDb/outside/gencode/gencodeLoad.mk to set release and ensembl versions
 
     # download, build and load tables
     mkdir -p /hive/data/genomes/mm10/bed/gencodeVM18
     pushd /hive/data/genomes/mm10/bed/gencodeVM18
     (time nice make -j 10 -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk) >&build.1.out&
 
     ##  gencode-cmp.tsv check to see if sizes make sense
 
     # generate trackDb and joiner blurb
     pushd ~/kent/src/hg/makeDb/trackDb
     ../../makeDb/outside/gencode/gencodeGenerateTrackDbs mm10 M18 93 'July 2018'
 
     ## only if being pushed to RR:
     # Update mouse/mm10/wgEncodeGencodeSuper.html
     # Update 'Release Notes' to describe new release.
 
     # edit mouse/mm10/trackDb.gencode.ra to add new .ra file include
     make DBS=mm10
 
     ## only if being pushed to RR:
     # edit  all.joiner to add ~/tmp/gencodeVM18.joiner
     # verify with:
     pushd /hive/data/genomes/mm10/bed/gencodeVM18
     make  -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk joinerCheck db=mm10
 
     # commit all and make push request, the file tables.lst will have the
     # list of tables for the push request.
 
     cd ~/kent/src/hg/makeDb/trackDb
     make alpha DBS=mm10
 
 ##############################################################################
 # LASTZ mouse/mm10 vs. chicken/galGal6 - (DONE - 2018-10-12 - Hiram)
     mkdir /hive/data/genomes/mm10/bed/lastzGalGal6.2018-10-12
     cd /hive/data/genomes/mm10/bed/lastzGalGal6.2018-10-12
 
     printf "# Mouse vs. chicken
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.04.00/bin/lastz
 BLASTZ_Y=3400
 BLASTZ_L=6000
 BLASTZ_K=2200
 BLASTZ_Q=/scratch/data/blastz/HoxD55.q
 #      A    C    G    T
 #     91  -90  -25 -100
 #    -90  100 -100  -25
 #    -25 -100  100  -90
 #   -100  -25  -90  91
 
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
 # QUERY: chicken galGal6
 SEQ2_DIR=/hive/data/genomes/galGal6/galGal6.2bit
 SEQ2_LEN=/hive/data/genomes/galGal6/chrom.sizes
 SEQ2_CHUNK=20000000
 SEQ2_LAP=0
 SEQ2_LIMIT=50
 
 BASE=/hive/data/genomes/mm10/bed/lastzGalGal6.2018-10-12
 TMPDIR=/dev/shm
 " > DEF
 
     time (doBlastzChainNet.pl `pwd`/DEF -verbose=2 \
         -chainMinScore=5000 -chainLinearGap=loose \
           -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
             -syntenicNet) > do.log 2>&1
     # real    84m14.188s
 
     cat fb.mm10.chainGalGal6Link.txt
     # 101151132 bases of 2652783500 (3.813%) in intersection
     cat fb.mm10.chainSynGalGal6Link.txt
     # 70707720 bases of 2652783500 (2.665%) in intersection
 
     time (doRecipBest.pl -load -workhorse=hgwdev -buildDir=`pwd` mm10 galGal6) > rbest.log 2>&1 &
     # real    116m19.316s
 
     cat fb.mm10.chainRBest.GalGal6.txt 
     # 79649474 bases of 2652783500 (3.002%) in intersection
 
     # and for the swap:
     mkdir /hive/data/genomes/galGal6/bed/blastz.mm10.swap
     cd /hive/data/genomes/galGal6/bed/blastz.mm10.swap
 
     time (doBlastzChainNet.pl -verbose=2 \
       /hive/data/genomes/mm10/bed/lastzGalGal6.2018-10-12/DEF \
         -swap -chainMinScore=5000 -chainLinearGap=loose \
           -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
             -syntenicNet) > swap.log 2>&1
     #  real    6m41.043s
 
     cat fb.galGal6.chainMm10Link.txt
     # 88539346 bases of 1055588482 (8.388%) in intersection
 
 
     time (doRecipBest.pl -load -workhorse=hgwdev -buildDir=`pwd` galGal6 mm10) > rbest.log 2>&1 &
     # real    94m11.007s
 
     cat fb.galGal6.chainRBest.Mm10.txt
     # 79474812 bases of 1055588482 (7.529%) in intersection
 
 #########################################################################
 # LASTZ mouse/mm10 Minke whale/bosTau9 - (DONE - 2018-11-08 - Hiram)
     mkdir /hive/data/genomes/mm10/bed/lastzBosTau9.2018-11-08
     cd /hive/data/genomes/mm10/bed/lastzBosTau9.2018-11-08
 
     printf '# mouse vs cow
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.04.00/bin/lastz
 BLASTZ_M=254
 # default BLASTZ_Q score matrix:
 #       A     C     G     T
 # A    91  -114   -31  -123
 # C  -114   100  -125   -31
 # G   -31  -125   100  -114
 # T  -123   -31  -114    91
 
 # TARGET: mouse mm10
 SEQ1_DIR=/hive/data/genomes/mm10/mm10.2bit
 SEQ1_LEN=/hive/data/genomes/mm10/chrom.sizes
 SEQ1_CHUNK=40000000
 SEQ1_LIMIT=2
 SEQ1_LAP=10000
 
 # QUERY: cow bosTau9
 SEQ2_DIR=/hive/data/genomes/bosTau9/bosTau9.2bit
 SEQ2_LEN=/hive/data/genomes/bosTau9/chrom.sizes
 SEQ2_CHUNK=20000000
 SEQ2_LIMIT=10
 SEQ2_LAP=0
 
 BASE=/hive/data/genomes/mm10/bed/lastzBosTau9.2018-11-08
 TMPDIR=/dev/shm
 ' > DEF
     # << happy emacs
 
     time (doBlastzChainNet.pl `pwd`/DEF -verbose=2 \
         -chainMinScore=3000 -chainLinearGap=medium \
           -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
             -syntenicNet) > do.log 2>&1 &
     # real    211m46.258s
 
     cat fb.mm10.chainBosTau9Link.txt
     # 703580224 bases of 2652783500 (26.522%) in intersection
     cat fb.mm10.chainSynBosTau9Link.txt
     # 659095603 bases of 2652783500 (24.845%) in intersection
 
     time (doRecipBest.pl -load -workhorse=hgwdev -buildDir=`pwd` mm10 bosTau9) > rbest.log 2>&1 &
     # real    214m24.819s
 
     cat fb.mm10.chainRBest.BosTau9.txt
     # 667950653 bases of 2652783500 (25.179%) in intersection
 
     # and for the swap:
     mkdir /hive/data/genomes/bosTau9/bed/blastz.mm10.swap
     cd /hive/data/genomes/bosTau9/bed/blastz.mm10.swap
 
     time (doBlastzChainNet.pl -verbose=2 \
       /hive/data/genomes/mm10/bed/lastzBosTau9.2018-11-08/DEF \
         -swap -chainMinScore=3000 -chainLinearGap=medium \
           -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
             -syntenicNet) > swap.log 2>&1 &
     #  real    41m22.962s
 
     cat fb.bosTau9.chainMm10Link.txt
     # 695248613 bases of 2715853792 (25.600%) in intersection
     cat fb.bosTau9.chainSynMm10Link.txt
     # 660591041 bases of 2715853792 (24.324%) in intersection
 
     time (doRecipBest.pl -load -workhorse=hgwdev -buildDir=`pwd` bosTau9 mm10) > rbest.log 2>&1 &
     # real    204m36.465s
 
     cat fb.bosTau9.chainRBest.Mm10.txt
     # 667305554 bases of 2715853792 (24.571%) in intersection
 
 #########################################################################
 2018-11-10: import of UCSC GENCODE group processing of GENCODE VM19 (markd)
     # not being push to the RR
 
     # edit hg/makeDb/outside/gencode/gencodeLoad.mk to set release and ensembl versions
 
     # download, build and load tables
     mkdir -p /hive/data/genomes/mm10/bed/gencodeVM19
     pushd /hive/data/genomes/mm10/bed/gencodeVM19
     (time nice make -j 10 -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk) >&build.1.out&
 
     ##  gencode-cmp.tsv check to see if sizes make sense
 
     # generate trackDb and joiner blurb
     pushd ~/kent/src/hg/makeDb/trackDb
     ../../makeDb/outside/gencode/gencodeGenerateTrackDbs mm10 M19 94 'Oct 2018'
 
     ## only if being pushed to RR:
     (skipped)
     # Update mouse/mm10/wgEncodeGencodeSuper.html
     # Update 'Release Notes' to describe new release.
 
     # edit mouse/mm10/trackDb.gencode.ra to add new .ra file include
     make DBS=mm10
 
     ## only if being pushed to RR: (SKIPPED)
     # edit  all.joiner to add ~/tmp/gencodeVM19.joiner
     # verify with:
     pushd /hive/data/genomes/mm10/bed/gencodeVM19
     make  -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk joinerCheck db=mm10
 
     # commit all and make push request, the file tables.lst will have the
     # list of tables for the push request.
 
     cd ~/kent/src/hg/makeDb/trackDb
     make alpha DBS=mm10
 
 ##############################################################################
 # LASTZ mouse/mm10 vs. Japanese quail/cotJap2 - (DONE - 2018-11-15 - Hiram)
     mkdir /hive/data/genomes/mm10/bed/lastzCotJap2.2018-11-15
     cd /hive/data/genomes/mm10/bed/lastzCotJap2.2018-11-15
 
     printf "# Mouse vs. Japanese quail
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.04.00/bin/lastz
 BLASTZ_Y=3400
 BLASTZ_L=6000
 BLASTZ_K=2200
 BLASTZ_Q=/scratch/data/blastz/HoxD55.q
 #      A    C    G    T
 #     91  -90  -25 -100
 #    -90  100 -100  -25
 #    -25 -100  100  -90
 #   -100  -25  -90  91
 
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
 # QUERY: Japanese quail cotJap2
 SEQ2_DIR=/hive/data/genomes/cotJap2/cotJap2.2bit
 SEQ2_LEN=/hive/data/genomes/cotJap2/chrom.sizes
 SEQ2_CHUNK=20000000
 SEQ2_LAP=0
 SEQ2_LIMIT=50
 
 BASE=/hive/data/genomes/mm10/bed/lastzCotJap2.2018-11-15
 TMPDIR=/dev/shm
 " > DEF
 
     time (doBlastzChainNet.pl `pwd`/DEF -verbose=2 \
         -chainMinScore=5000 -chainLinearGap=loose \
           -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
             -syntenicNet) > do.log 2>&1
     # real    82m16.032s
 
     cat fb.mm10.chainCotJap2Link.txt
     # 97251364 bases of 2652783500 (3.666%) in intersection
     cat fb.mm10.chainSynCotJap2Link.txt
     # 67653818 bases of 2652783500 (2.550%) in intersection
 
     time (doRecipBest.pl -load -workhorse=hgwdev -buildDir=`pwd` mm10 cotJap2) > rbest.log 2>&1 &
     # real    104m58.905s
 
     cat fb.mm10.chainRBest.CotJap2.txt 
     # 76298136 bases of 2652783500 (2.876%) in intersection
 
     # and for the swap:
     mkdir /hive/data/genomes/cotJap2/bed/blastz.mm10.swap
     cd /hive/data/genomes/cotJap2/bed/blastz.mm10.swap
 
     time (doBlastzChainNet.pl -verbose=2 \
       /hive/data/genomes/mm10/bed/lastzCotJap2.2018-11-15/DEF \
         -swap -chainMinScore=5000 -chainLinearGap=loose \
           -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
             -syntenicNet) > swap.log 2>&1
     #  real    6m37.873s
 
     cat fb.cotJap2.chainMm10Link.txt
     # 82592561 bases of 917263224 (9.004%) in intersection
     cat fb.cotJap2.chainSynMm10Link.txt
     # 66583746 bases of 917263224 (7.259%) in intersection
 
     # mistakenly started this on ku, it failed at the download step since
     # it could not see the /gbdb/mm10/ hierarchy:
     time (doRecipBest.pl -load -workhorse=hgwdev -buildDir=`pwd` cotJap2 mm10) > rbest.log 2>&1 &
     # real    79m48.767s
 
     # continue on hgwdev
     time (doRecipBest.pl -load -workhorse=hgwdev -continue=download -buildDir=`pwd` cotJap2 mm10) > rbest.download.log 2>&1 &
     # real    1m40.970s
 
     cat fb.cotJap2.chainRBest.Mm10.txt
     # 76078816 bases of 917263224 (8.294%) in intersection
 
 ##############################################################################
 2018-11-30: import of UCSC GENCODE group processing of GENCODE VM20 prerelease (markd)
     # This is a prerelease for testing and is *not* to pushed until the final release.
 
     # edit hg/makeDb/outside/gencode/gencodeLoad.mk to set release and ensembl versions
 
     # download, build and load tables
     mkdir -p /hive/data/genomes/mm10/bed/gencodeVM20
     pushd /hive/data/genomes/mm10/bed/gencodeVM20
     (time nice make -j 10 -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk) >&build.1.out&
 
     ##  gencode-cmp.tsv check to see if sizes make sense
 
     # generate trackDb and joiner blurb
     pushd ~/kent/src/hg/makeDb/trackDb
     ../../makeDb/outside/gencode/gencodeGenerateTrackDbs mm10 M20 95 'Dec 2018'
 
     ## only if being pushed to RR:
     (skipped)
     # Update mouse/mm10/wgEncodeGencodeSuper.html
     # Update 'Release Notes' to describe new release.
 
     # edit mouse/mm10/trackDb.gencode.ra to add new .ra file include
     make DBS=mm10
 
     ## only if being pushed to RR: (SKIPPED)
     # edit  all.joiner to add ~/tmp/gencodeVM20.joiner
     # verify with:
     pushd /hive/data/genomes/mm10/bed/gencodeVM20
     make  -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk joinerCheck db=mm10
 
     # commit all and make push request, the file tables.lst will have the
     # list of tables for the push request.
 
     cd ~/kent/src/hg/makeDb/trackDb
     make alpha DBS=mm10
 
 #########################################################################
 2019-01-17: tabula muris track (max)
 # download 7Tb of data from Amazon, using token, CZI pays (got token by email, via Angela Pisco, James Webber)
 export AWS_ACCESS_KEY_ID=xxxxx
 export AWS_SESSION_TOKEN=xxxxx
 aws s3 sync s3://czbiohub-tabula-muris/tabula_muris_bam_files/ . --delete
 cd ~/projects/czi/cbData/ucsc/tabulaMuris
 csvToTab TM_facs_metadata.csv > TM_facs_metadata.tsv
 cat TM_facs_metadata.csv | tr '.' '-' | csvToTab > TM_facs_metadata.fix.tsv
 # this is not necessary anymore, the new mm10.sizes file comes with cbTrackHub and 
 # includes the ERCCs
 hgsql -N -e 'select alias,chrom from chromAlias;' mm10 > mm10.chromAlias.tab
 faSize ERCC92.fa -detailed > ERCC.sizes
 cat /hive/data/genomes/mm10/chrom.sizes ERCC.sizes > mm10ercc.sizes 
 
 # the next one requires single cell browser, from https://github.com/maximilianh/cellBrowser
 cbTrackHub mm10 bam/ TM_facs_metadata.fix.tsv cell_ontology_class hub/ --name "TabulaMuris"
 
 #########################################################################
 # LIFTOVER TO GRCm38B (DONE - 2018-03-01 - Hiram)
     ssh hgwdev
     mkdir /hive/data/genomes/mm10/bed/blat.GRCm38B.2019-03-01
     cd /hive/data/genomes/mm10/bed/blat.GRCm38B.2019-03-01
     doSameSpeciesLiftOver.pl -verbose=2 \
 	-fileServer=hgwdev \
         -debug -bigClusterHub=ku -dbHost=hgwdev -workhorse=hgwdev \
         -ooc=/hive/data/genomes/mm10/jkStuff/mm10.11.ooc \
          mm10 GRCm38B
     doSameSpeciesLiftOver.pl -verbose=2 \
 	-debug -fileServer=hgwdev \
 	-query2Bit=/hive/data/genomes/mm10/mm10.2bit \
 	-querySizes=/hive/data/genomes/mm10/chrom.sizes \
 	-target2Bit=/hive/data/genomes/GRCm38B/GRCm38B.2bit \
 	-targetSizes=/hive/data/genomes/GRCm38B/chrom.sizes \
         -bigClusterHub=ku -dbHost=hgwdev -workhorse=hgwdev \
         -ooc=/hive/data/genomes/mm10/mm10.11.ooc mm10 GRCm38B
 
     time (doSameSpeciesLiftOver.pl -verbose=2 \
 	-fileServer=hgwdev \
 	-query2Bit=/hive/data/genomes/mm10/mm10.2bit \
 	-querySizes=/hive/data/genomes/mm10/chrom.sizes \
 	-target2Bit=/hive/data/genomes/GRCm38B/GRCm38B.2bit \
 	-targetSizes=/hive/data/genomes/GRCm38B/chrom.sizes \
         -bigClusterHub=ku -dbHost=hgwdev -workhorse=hgwdev \
         -ooc=/hive/data/genomes/mm10/mm10.11.ooc \
          mm10 GRCm38B) > doLiftOverToGRCm38B.log 2>&1
     # real    156m50.777s
 
     # see if the liftOver menus function in the browser from mm10 to GRCm38B
 
 #########################################################################
 #############################################################################
 # hgPal downloads (rebuilt knownGene and knownCanonical 2019-04-01 braney )
 
     ssh hgwdev
     mkdir /hive/data/genomes/mm10/bed/multiz60way/pal.ucsc18
     cd /hive/data/genomes/mm10/bed/multiz60way/pal.ucsc18
     cat ../species.list | tr '[ ]' '[\n]' > order.list
 
     export mz=multiz60way
     export gp=knownGene
     export db=mm10
     export I=0
     mkdir exonAA exonNuc
     for C in `sort -nk2 ../../../chrom.sizes | cut -f1`
     do
         I=`echo $I | awk '{print $1+1}'`
 	echo "mafGene -chrom=$C -exons -noTrans $db $mz $gp order.list stdout | gzip -c > exonNuc/$C.exonNuc.fa.gz &"
 	echo "mafGene -chrom=$C -exons $db $mz $gp order.list stdout | gzip -c > exonAA/$C.exonAA.fa.gz &"
         if [ $I -gt 6 ]; then
             echo "date"
             echo "wait"
             I=0
         fi
     done > $gp.jobs
     echo "date" >> $gp.jobs
     echo "wait" >> $gp.jobs
 
     time sh -x ./$gp.jobs > $gp.jobs.log 2>&1 &
     # real    59m23.279s
 
     time zcat exonAA/*.gz | gzip -c > $gp.$mz.exonAA.fa.gz
     # real    1m35.590s
     time zcat exonNuc/*.gz | gzip -c > $gp.$mz.exonNuc.fa.gz
     # real    7m46.538s
 
     export mz=multiz60way
     export gp=knownGene
     export db=mm10
     export pd=/usr/local/apache/htdocs-hgdownload/goldenPath/$db/$mz/alignments
     rm -rf $pd
     mkdir -p $pd
     ln -s `pwd`/$gp.$mz.exonAA.fa.gz $pd/$gp.exonAA.fa.gz
     ln -s `pwd`/$gp.$mz.exonNuc.fa.gz $pd/$gp.exonNuc.fa.gz
 
     rm -rf exonAA exonNuc
 
     cd /hive/data/genomes/mm10/bed/multiz60way/pal
     export mz=multiz60way
     export gp=ncbiRefSeq
     export db=mm10
     export I=0
     mkdir exonAA exonNuc
     for C in `sort -nk2 ../../../chrom.sizes | cut -f1`
     do
         I=`echo $I | awk '{print $1+1}'`
 	echo "mafGene -chrom=$C -exons -noTrans $db $mz $gp order.list stdout | gzip -c > exonNuc/$C.exonNuc.fa.gz &"
 	echo "mafGene -chrom=$C -exons $db $mz $gp order.list stdout | gzip -c > exonAA/$C.exonAA.fa.gz &"
         if [ $I -gt 6 ]; then
             echo "date"
             echo "wait"
             I=0
         fi
     done > $gp.jobs
     echo "date" >> $gp.jobs
     echo "wait" >> $gp.jobs
 
     time sh -x $gp.jobs > $gp.jobs.log 2>&1
     # real    126m0.688s
 
     export mz=multiz60way
     export gp=ncbiRefSeq
     export db=mm10
     time zcat exonAA/*.gz | gzip -c > $gp.$mz.exonAA.fa.gz
     # real    2m56.817s
     time zcat exonNuc/*.gz | gzip -c > $gp.$mz.exonNuc.fa.gz
     # real    14m8.080s
 
     rm -rf exonAA exonNuc
 
     # we're only distributing exons at the moment
     export mz=multiz60way
     export gp=ncbiRefSeq
     export db=mm10
     export pd=/usr/local/apache/htdocs-hgdownload/goldenPath/$db/$mz/alignments
     ln -s `pwd`/$gp.$mz.exonAA.fa.gz $pd/$gp.exonAA.fa.gz
     ln -s `pwd`/$gp.$mz.exonNuc.fa.gz $pd/$gp.exonNuc.fa.gz
 
     ### And knownCanonical
     cd /hive/data/genomes/mm10/bed/multiz60way/pal
     export mz=multiz60way
     export gp=knownCanonical
     export db=mm10
     mkdir exonAA exonNuc knownCanonical
 
     time cut -f1 ../../../chrom.sizes | while read C
     do
         echo $C 1>&2
 	hgsql mm10 -N -e "select chrom, chromStart, chromEnd, transcript from knownCanonical where chrom='$C'" > knownCanonical/$C.known.bed
     done
     #   real    0m15.897s
 
     ls knownCanonical/*.known.bed | while read F
     do
       if [ -s $F ]; then
          echo $F | sed -e 's#knownCanonical/##; s/.known.bed//'
       fi
     done | while read C
     do
 	echo "date"
 	echo "mafGene -geneBeds=knownCanonical/$C.known.bed -exons -noTrans $db $mz knownGene order.list stdout | \
 	    gzip -c > exonNuc/$C.exonNuc.fa.gz"
 	echo "mafGene -geneBeds=knownCanonical/$C.known.bed -exons $db $mz knownGene order.list stdout | \
 	    gzip -c > exonAA/$C.exonAA.fa.gz"
     done > $gp.$mz.jobs
 
     time sh -x $gp.$mz.jobs > $gp.$mz.job.log 2>&1 
     # 267m58.813s
 
     rm *.known.bed
     export mz=multiz60way
     export gp=knownCanonical
     export db=mm10
     zcat exonAA/c*.gz | gzip -c > $gp.$mz.exonAA.fa.gz &
     zcat exonNuc/c*.gz | gzip -c > $gp.$mz.exonNuc.fa.gz &
     # about 6 minutes
 
     rm -rf exonAA exonNuc
 
     export mz=multiz60way
     export gp=knownCanonical
     export db=mm10
     export pd=/usr/local/apache/htdocs-hgdownload/goldenPath/$db/$mz/alignments
     mkdir -p $pd
     ln -s `pwd`/$gp.$mz.exonAA.fa.gz $pd/$gp.exonAA.fa.gz
     ln -s `pwd`/$gp.$mz.exonNuc.fa.gz $pd/$gp.exonNuc.fa.gz
 
     cd  $pd
     md5sum *.fa.gz > md5sum.txt
 ##############################################################################
 2019-04-08: import of UCSC GENCODE group processing of GENCODE VM20 (markd)
     # edit hg/makeDb/outside/gencode/gencodeLoad.mk to set release and ensembl versions
 
     # download, build and load tables
     mkdir -p /hive/data/genomes/mm10/bed/gencodeVM21
     pushd /hive/data/genomes/mm10/bed/gencodeVM21
     (time nice make -j 10 -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk) >&build.1.out&
 
     ##  gencode-cmp.tsv check to see if sizes make sense
 
     # generate trackDb and joiner blurb
     pushd ~/kent/src/hg/makeDb/trackDb
     ../../makeDb/outside/gencode/gencodeGenerateTrackDbs mm10 M21 96 'Apr 2019'
 
     ## only if being pushed to RR:
     # Update mouse/mm10/wgEncodeGencodeSuper.html
     # Update 'Release Notes' to describe new release.
 
     # edit mouse/mm10/trackDb.gencode.ra to add new .ra file include
     make DBS=mm10
 
     ## only if being pushed to RR:
     # edit  all.joiner to add ~/tmp/gencodeVM21.joiner
     # verify with:
     pushd /hive/data/genomes/mm10/bed/gencodeVM21
     make  -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk joinerCheck db=mm10
 
     # commit all and make push request, the file tables.lst will have the
     # list of tables for the push request.
 
     cd ~/kent/src/hg/makeDb/trackDb
     make alpha DBS=mm10
 
     # commit all
     # if pushing public, add ticket and MARK QA READY
 
 #########################################################################
 2019-07-03: import of UCSC GENCODE group processing of GENCODE VM22 (markd)
     # Replaced import of pre-release
     # edit hg/makeDb/outside/gencode/gencodeLoad.mk to set release and ensembl versions
 
     # download, build and load tables
     mkdir -p /hive/data/genomes/mm10/bed/gencodeVM22
     pushd /hive/data/genomes/mm10/bed/gencodeVM22
     (time nice make -j 10 -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk) >&build.1.out&
 
     ##  gencode-cmp.tsv check to see if sizes make sense
 
     # generate trackDb and joiner blurb
     pushd ~/kent/src/hg/makeDb/trackDb
     ../../makeDb/outside/gencode/gencodeGenerateTrackDbs mm10 M22 97 'June 2019'
 
     ## only if being pushed to RR:
     # Update mouse/mm10/wgEncodeGencodeSuper.html
     # Update 'Release Notes' to describe new release.
 
     # edit mouse/mm10/trackDb.gencode.ra to add new .ra file include
     make DBS=mm10
 
     ## only if being pushed to RR:
     # edit  all.joiner to add ~/tmp/gencodeVM22.joiner
     # verify with:
     pushd /hive/data/genomes/mm10/bed/gencodeVM22
     make  -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk joinerCheck db=mm10
 
     # commit all and make push request, the file tables.lst will have the
     # list of tables for the push request.
 
     cd ~/kent/src/hg/makeDb/trackDb
     make alpha DBS=mm10
 
     # commit all
     # if pushing public, add ticket and MARK QA READY
 
 ##############################################################################
 # LASTZ Rat regenRn0 (DONE - 2019-07-01 - Jonathan)
     mkdir /hive/data/genomes/mm10/bed/lastzRegenRn0.2019-07-01
     cd /hive/data/genomes/mm10/bed/lastzRegenRn0.2019-07-01
 
     printf '# rat vs mouse
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.04.00/bin/lastz
 # TARGET: Mouse Mm10
 SEQ1_DIR=/scratch/data/mm10/mm10.2bit
 SEQ1_LEN=/scratch/data/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
 # QUERY: Rat RegenRn0
 SEQ2_DIR=/hive/data/genomes/regenRn0/regenRn0.2bit
 SEQ2_LEN=/hive/data/genomes/regenRn0/chrom.sizes
 SEQ2_CHUNK=20000000
 SEQ2_LAP=0
 SEQ2_LIMIT=500
 
 BASE=/hive/data/genomes/mm10/bed/lastzRegenRn0.2019-07-01
 TMPDIR=/dev/shm
 ' > DEF
 
     #	establish a screen to control this job
     screen -S mm10RegenRn0
     time (doBlastzChainNet.pl -verbose=2 `pwd`/DEF \
 	-noDbNameCheck -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
 	-syntenicNet -chainMinScore=5000 -chainLinearGap=medium) > do.log 2>&1
     #   real    196m22.733s
 
     cat fb.mm10.chainRegenRn0Link.txt
     #	1843678500 bases of 2652783500 (69.500%) in intersection
     cat fb.mm10.chainSynRegenRn0Link.txt
     #   1720395177 bases of 2652783500 (64.852%) in intersection
 
     time (doRecipBest.pl -workhorse=hgwdev -load -buildDir=`pwd` mm10 regenRn0) > rbest.log 2>&1 &
     # real    494m43.241s
 
     cat fb.mm10.chainRBest.RegenRn0.txt
     # 1694384084 bases of 2652783500 (63.872%) in intersection
 
     mkdir /hive/data/genomes/regenRn0/bed/blastz.mm10.swap
     cd /hive/data/genomes/regenRn0/bed/blastz.mm10.swap
     time (doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzRegenRn0.2019-07-01/DEF \
 	-swap -syntenicNet -noDbNameCheck \
 	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
 	-chainMinScore=5000 -chainLinearGap=medium) > swap.log 2>&1
     #	real    106m31.449s
 
     cat fb.regenRn0.chainMm10Link.txt
     #   1803664991 bases of 2534810853 (71.156%) in intersection
     cat fb.regenRn0.chainSynMm10Link.txt
     #   1712372147 bases of 2534810853 (67.554%) in intersection
 
     time (doRecipBest.pl -workhorse=hgwdev -load -buildDir=`pwd` regenRn0 mm10) > rbest.log 2>&1
     # real    536m51.292s
 
     cat fb.regenRn0.chainRBest.Mm10.txt
     # 1695272967 bases of 2534810853 (66.880%) in intersection
 
 ##############################################################################
 # LASTZ Rhesus rheMac10 (DONE - 2019-07-03 - Hiram)
     mkdir /hive/data/genomes/mm10/bed/lastzRheMac10.2019-07-03
     cd /hive/data/genomes/mm10/bed/lastzRheMac10.2019-07-03
 
     printf '# rhesus vs mouse
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.04.00/bin/lastz
 # TARGET: Mouse Mm10
 SEQ1_DIR=/hive/data/genomes/mm10/mm10.2bit
 SEQ1_LEN=/hive/data/genomes/mm10/chrom.sizes
 SEQ1_CHUNK=20000000
 SEQ1_LAP=10000
 
 # QUERY: Rhesus RheMac10
 SEQ2_DIR=/hive/data/genomes/rheMac10/rheMac10.2bit
 SEQ2_LEN=/hive/data/genomes/rheMac10/chrom.sizes
 SEQ2_CHUNK=20000000
 SEQ2_LAP=0
 SEQ2_LIMIT=500
 
 BASE=/hive/data/genomes/mm10/bed/lastzRheMac10.2019-07-03
 TMPDIR=/dev/shm
 ' > DEF
 
     #	establish a screen to control this job
     screen -S mm10RheMac10
     time (doBlastzChainNet.pl -verbose=2 `pwd`/DEF \
 	-syntenicNet -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
 	-chainMinScore=3000 -chainLinearGap=medium) > do.log 2>&1
     #   real    211m21.922s
 
     cat fb.mm10.chainRheMac10Link.txt
     #	923559693 bases of 2652783500 (34.815%) in intersection
     cat fb.mm10.chainSynRheMac10Link.txt
     #   878479553 bases of 2652783500 (33.115%) in intersection
 
     time (doRecipBest.pl -workhorse=hgwdev -load -buildDir=`pwd` mm10 rheMac10) > rbest.log 2>&1 &
     # real    315m43.465s
 
     cat fb.mm10.chainRBest.RheMac10.txt
     # 879885863 bases of 2652783500 (33.168%) in intersection
 
     mkdir /hive/data/genomes/rheMac10/bed/blastz.mm10.swap
     cd /hive/data/genomes/rheMac10/bed/blastz.mm10.swap
     time (doBlastzChainNet.pl -verbose=2 \
 	/hive/data/genomes/mm10/bed/lastzRheMac10.2019-07-03/DEF \
 	-swap -syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
 	-chainMinScore=3000 -chainLinearGap=medium) > swap.log 2>&1
     #	real    52m48.045s
 
     cat fb.rheMac10.chainMm10Link.txt
     #	918551088 bases of 2936892733 (31.276%) in intersection
     cat fb.rheMac10.chainSynMm10Link.txt
     #   876230433 bases of 2936892733 (29.835%) in intersection
 
     time (doRecipBest.pl -workhorse=hgwdev -load -buildDir=`pwd` rheMac10 mm10) > rbest.log 2>&1
     # real    303m40.303s
 
     cat fb.rheMac10.chainRBest.Mm10.txt
     # 878542993 bases of 2936892733 (29.914%) in intersection
 
 ##############################################################################
 2019-08-30: import of UCSC GENCODE group processing of GENCODE VM23 (markd)
     # PRE-RELEASE
 
     # edit hg/makeDb/outside/gencode/gencodeLoad.mk to set release and ensembl versions
 
     # download, build and load tables
     mkdir -p /hive/data/genomes/mm10/bed/gencodeVM23
     pushd /hive/data/genomes/mm10/bed/gencodeVM23
 
     (time nice make -j 10 -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk) >&build.1.out&
 
     # compare tables from previous release to see if number changed makes
     # sense.  Results are in gencode-cmp.tsv
 
     # generate trackDb and joiner blurb
     pushd ~/kent/src/hg/makeDb/trackDb
     ../../makeDb/outside/gencode/gencodeGenerateTrackDbs mm10 M23 98 'Sept 2019'
 
     # If being pushed public, update 'Release Notes' in
     # human/mm10/wgEncodeGencodeSuper.html
 
     # edit human/mm10/trackDb.gencode.ra to add new .ra file include
     make DBS=mm10
 
     ## only if being pushed to RR:
     # edit all.joiner to add ~/tmp/gencodeVM23.joiner
     # verify with:
     pushd /hive/data/genomes/mm10/bed/gencodeVM23
     make  -f ~/kent/src/hg/makeDb/outside/gencode/gencodeLoad.mk joinerCheck
 
     # commit all
     # if pushing public: add ticket and MARK QA READY
 
+    # 
+
 #########################################################################