6b1648806fc1de0da4b16bb23f51e0fd52065ef7
gperez2
  Wed Apr 13 13:12:33 2022 -0700
making ReMap hub into native track, refs #28960

diff --git src/hg/makeDb/doc/dm6.txt src/hg/makeDb/doc/dm6.txt
index f04ebd8..9e5655e 100644
--- src/hg/makeDb/doc/dm6.txt
+++ src/hg/makeDb/doc/dm6.txt
@@ -1,3835 +1,3857 @@
 # for emacs: -*- mode: sh; -*-
 
 # This file describes browser build for the dm6
 # Drosophila melanogaster -- BDGP Release 6.0 + ISO1 MT (Aug, 2014)
 #  The FlyBase Consortium/Berkeley Drosophila Genome Project/Celera Genomics
 #  BDGP Release 6 + ISO1 MT
 
 
 #############################################################################
 # fetch sequence from new style download directory (DONE - 2014-08-27 - Hiram)
     # NCBI has redesigned their FTP download site, new type of address
     #      and naming scheme
     mkdir -p /hive/data/genomes/dm6/genbank
     cd /hive/data/genomes/dm6/genbank
 
     rsync -L -a -P rsync://ftp.ncbi.nlm.nih.gov/genomes/genbank/invertebrate/Drosophila_melanogaster/latest_assembly_versions/GCA_000001215.4_Release_6_plus_ISO1_MT/ ./
 
     # measure what we have here:
     faSize GCA_000001215.4_Release_6_plus_ISO1_MT_assembly_structure/Primary_Assembly/assembled_chromosomes/FASTA/*.fna.gz \
      GCA_000001215.4_Release_6_plus_ISO1_MT_assembly_structure/non-nuclear/assembled_chromosomes/FASTA/chrMT.fna.gz \
      GCA_000001215.4_Release_6_plus_ISO1_MT_assembly_structure/Primary_Assembly/unplaced_scaffolds/FASTA/*.fna.gz \
 GCA_000001215.4_Release_6_plus_ISO1_MT_assembly_structure/Primary_Assembly/unlocalized_scaffolds/FASTA/*.fna.gz
 
     # 143726002 bases (1152978 N's 142573024 real 142573024 upper 0 lower)
     #  in 1870 sequences in 11 files
     # Total size: mean 76858.8 sd 1382100.2 min 544 (DS485566.2)
     #   max 32079331 (AE014297.3) median 1577
 
 #############################################################################
 # fixup to UCSC naming scheme (DONE - 2014-08-27 - Hiram)
     mkdir /hive/data/genomes/dm6/ucsc
     cd /hive/data/genomes/dm6/ucsc
     # evolving these scripts over time, could become source tree scripts
 
     cat << '_EOF_' > ucscCompositeAgp.pl
 #!/bin/env perl
 
 use strict;
 use warnings;
 
 my %accToChr;
 
 my $primary="../genbank/GCA_000001215.4_Release_6_plus_ISO1_MT_assembly_structure/Primary_Assembly";
 
 open (FH, "<$primary/assembled_chromosomes/chr2acc") or
         die "can not read Primary_Assembly/assembled_chromosomes/chr2acc";
 while (my $line = <FH>) {
     next if ($line =~ m/^#/);
     chomp $line;
     my ($chrN, $acc) = split('\s+', $line);
     $accToChr{$acc} = $chrN;
 }
 close (FH);
 
 foreach my $acc (keys %accToChr) {
     my $chrN =  $accToChr{$acc};
     print "$acc $accToChr{$acc}\n";
     open (FH, "zcat $primary/assembled_chromosomes/AGP/chr${chrN}.comp.agp.gz|") or die "can not read chr${chrN}.comp.agp.gz";
     open (UC, ">chr${chrN}.agp") or die "can not write to chr${chrN}.agp";
     while (my $line = <FH>) {
         if ($line =~ m/^#/) {
             print UC $line;
         } else {
             $line =~ s/^$acc/chr${chrN}/;
             print UC $line;
         }
     }
     close (FH);
     close (UC);
     open (FH, "zcat $primary/assembled_chromosomes/FASTA/chr${chrN}.fna.gz|") or die "can not read chr${chrN}.fna.gz";
     open (UC, ">chr${chrN}.fa") or die "can not write to chr${chrN}.fa";
     while (my $line = <FH>) {
         if ($line =~ m/^>/) {
             printf UC ">chr${chrN}\n";
         } else {
             print UC $line;
         }
     }
     close (FH);
     close (UC);
 }
 '_EOF_'
     # << happy emacs
 
     cat << '_EOF_' > unlocalized.pl
 #!/bin/env perl
 
 use strict;
 use warnings;
 
 my %accToChr;
 my %chrNames;
 
 my $primary="../genbank/GCA_000001215.4_Release_6_plus_ISO1_MT_assembly_structure/Primary_Assembly";
 
 open (FH, "<$primary/unlocalized_scaffolds/unlocalized.chr2scaf") or
         die "can not read Primary_Assembly/unlocalized_scaffolds/unlocalized.chr2scaf";
 while (my $line = <FH>) {
     next if ($line =~ m/^#/);
     chomp $line;
     my ($chrN, $acc) = split('\s+', $line);
     $acc =~ s/\./v/;
     $accToChr{$acc} = $chrN;
     $chrNames{$chrN} += 1;
 }
 close (FH);
 
 foreach my $chrN (keys %chrNames) {
     my $agpFile =  "$primary/unlocalized_scaffolds/AGP/chr$chrN.unlocalized.scaf.agp.gz";
     my $fastaFile =  "$primary/unlocalized_scaffolds/FASTA/chr$chrN.unlocalized.scaf.fna.gz";
     open (FH, "zcat $agpFile|") or die "can not read $agpFile";
     open (UC, ">chr${chrN}_random.agp") or die "can not write to chr${chrN}_random.agp";
     while (my $line = <FH>) {
         if ($line =~ m/^#/) {
             print UC $line;
         } else {
             chomp $line;
             my (@a) = split('\t', $line);
             my $acc = $a[0];
             $acc =~ s/\./v/;
             die "ERROR: chrN $chrN not correct for $acc"
                 if ($accToChr{$acc} ne $chrN);
             my $ucscName = "chr${chrN}_${acc}_random";
             printf UC "%s", $ucscName;
             for (my $i = 1; $i < scalar(@a); ++$i) {
                 printf UC "\t%s", $a[$i];
             }
             printf UC "\n";
         }
     }
     close (FH);
     close (UC);
     printf "chr%s\n", $chrN;
     open (FH, "zcat $fastaFile|") or die "can not read $fastaFile";
     open (UC, ">chr${chrN}_random.fa") or die "can not write to chr${chrN}_random.fa";
     while (my $line = <FH>) {
         if ($line =~ m/^>/) {
             chomp $line;
             my $acc = $line;
             $acc =~ s/ .*//;
             $acc =~ s/\./v/;
             $acc =~ s/>//;
             die "ERROR: chrN $chrN not correct for $acc"
                 if ($accToChr{$acc} ne $chrN);
             my $ucscName = "chr${chrN}_${acc}_random";
             printf UC ">$ucscName\n";
         } else {
             print UC $line;
         }
     }
     close (FH);
     close (UC);
 }
 '_EOF_'
     # << happy emacs
 
     cat << '_EOF_' > unplaced.pl
 #!/bin/env perl
 
 use strict;
 use warnings;
 
 my $primary="../genbank/GCA_000001215.4_Release_6_plus_ISO1_MT_assembly_structure/Primary_Assembly";
 
 my $agpFile =  "$primary/unplaced_scaffolds/AGP/unplaced.scaf.agp.gz";
 my $fastaFile =  "$primary/unplaced_scaffolds/FASTA/unplaced.scaf.fna.gz";
 open (FH, "zcat $agpFile|") or die "can not read $agpFile";
 open (UC, ">chrUn.agp") or die "can not write to chrUn.agp";
 while (my $line = <FH>) {
     if ($line =~ m/^#/) {
         print UC $line;
     } else {
         $line =~ s/\./v/;
         printf UC "chrUn_%s", $line;
     }
 }
 close (FH);
 close (UC);
 
 open (FH, "zcat $fastaFile|") or die "can not read $fastaFile";
 open (UC, ">chrUn.fa") or die "can not write to chrUn.fa";
 while (my $line = <FH>) {
     if ($line =~ m/^>/) {
         chomp $line;
         $line =~ s/ .*//;
         $line =~ s/\./v/;
         $line =~ s/>//;
         printf UC ">chrUn_$line\n";
     } else {
         print UC $line;
     }
 }
 close (FH);
 close (UC);
 '_EOF_'
     # << happy emacs
 
     chmod +x ucscCompositeAgp.pl unlocalized.pl unplaced.pl
     ./ucscCompositeAgp.pl
     ./unlocalized.pl
     ./unplaced.pl
 
     zcat ../genbank/GCA_000001215.4_Release_6_plus_ISO1_MT_assembly_structure/non-nuclear/assembled_chromosomes/AGP/chrMT.comp.agp.gz \
         | grep -v "^#" | sed -e 's/^KJ947872.2/chrM/' > chrM.agp
 
     zcat ../genbank/GCA_000001215.4_Release_6_plus_ISO1_MT_assembly_structure/non-nuclear/assembled_chromosomes/FASTA/chrMT.fna.gz \
         | sed -e 's/^>.*/>chrM/' > chrM.fa
 
     # verify this sequence is still the same as it was before
     #  these translations
     faSize *.fa
 
     # 143726002 bases (1152978 N's 142573024 real 142573024 upper 0 lower)
     #   in 1870 sequences in 11 files
     # Total size: mean 76858.8 sd 1382100.2 min 544 (chrUn_DS485566v2)
     #   max 32079331 (chr3R) median 1577
 
 #############################################################################
 # pick up photo (DONE - 2014-06-06 - Chin)
     mkdir /hive/data/genomes/dm6/photo
     cd /hive/data/genomes/dm6/photo
     wget --timestamping \
 http://upload.wikimedia.org/wikipedia/commons/1/1d/Drosophila_melanogaster.jpg
 
     convert -geometry "190x300" Drosophila_melanogaster.jpg New-Drosophila_melanogaster.jpg
 
     # check in the New-Drosophila_melanogaster.jpg file into
     # the source tree:
 # -rw-rw-r-- 1 9842 Jun  6 14:55 Drosophila_melanogaster.jpg
 
     # and get that installed in /usr/local/apache/htdocs/images/
 
 #############################################################################
 #  Initial database build (DONE - 2014-08-28 - Hiram)
 
     cd /hive/data/genomes/dm6
     cat << '_EOF_' > dm6.config.ra
 # Config parameters for makeGenomeDb.pl:
 db dm6
 clade insect
 genomeCladePriority 10
 scientificName Drosophila melanogaster
 commonName D. melanogaster
 assemblyDate Aug. 2014
 assemblyLabel The FlyBase Consortium/Berkeley Drosophila Genome Project/Celera Genomics
 assemblyShortLabel BDGP Release 6 + ISO1 MT
 orderKey 9060
 # chrM mito sequence KJ947872.2 included
 mitoAcc none
 fastaFiles /hive/data/genomes/dm6/ucsc/chr*.fa
 agpFiles /hive/data/genomes/dm6/ucsc/chr*.agp
 # qualFiles /dev/null
 dbDbSpeciesDir drosophila
 photoCreditURL http://de.wikipedia.org/wiki/Benutzer:Mr.checker
 photoCreditName Wikimedia Commons/Mr.checker
 ncbiGenomeId 47
 ncbiAssemblyId 202931
 ncbiAssemblyName BDGP Release 6 + ISO1 MT
 ncbiBioProject 164
 genBankAccessionID GCA_000001215.4
 taxId 7227
 '_EOF_'
     # << happy emacs
 
     # stepwise to verify sequence and AGP file
     time makeGenomeDb.pl -workhorse=hgwdev -dbHost=hgwdev -fileServer=hgwdev \
 	-stop=seq dm6.config.ra > seq.log 2>&1
     # real    0m9.989s
 
     # verify sequence and AGP are OK:
     time makeGenomeDb.pl -workhorse=hgwdev -dbHost=hgwdev -fileServer=hgwdev \
 	-continue=agp -stop=agp dm6.config.ra > agp.log 2>&1
     # real    0m4.901s
 
     # then finish it off:
     time nice -n +19 makeGenomeDb.pl -workhorse=hgwdev -dbHost=hgwdev \
        -fileServer=hgwdev -continue=db dm6.config.ra > db.log 2>&1
     # real    1m27.449s
 
 ##########################################################################
 # running repeat masker (DONE - 2014-08-28 - Hiram)
     mkdir /hive/data/genomes/dm6/bed/repeatMasker
     cd /hive/data/genomes/dm6/bed/repeatMasker
     time  doRepeatMasker.pl -buildDir=`pwd` \
 	-bigClusterHub=ku -dbHost=hgwdev -workhorse=hgwdev \
 	-smallClusterHub=ku dm6 > do.log 2>&1 &
     # real     44m51.83
 
     cat faSize.rmsk.txt
     # 143726002 bases (1152978 N's 142573024 real 112610723 upper
     #    29962301 lower) in 1870 sequences in 1 files
     # Total size: mean 76858.8 sd 1382100.2 min 544 (chrUn_DS485566v2)
     #   max 32079331 (chr3R) median 1577
     # %20.85 masked total, %21.02 masked real
 
     egrep -i "versi|relea" do.log
     # RepeatMasker version open-4.0.5
     #    January 31 2015 (open-4-0-5) version of RepeatMasker
     # CC   RELEASE 20140131;
 
     featureBits -countGaps dm6 rmsk
     # 29968013 bases of 143726002 (20.851%) in intersection
     # why is it different than the faSize above ?
     # because rmsk masks out some N's as well as bases, the count above
     #	separates out the N's from the bases, it doesn't show lower case N's
 
 ##########################################################################
 # running simple repeat (DONE 2014-08-28 - Hiram)
     mkdir /hive/data/genomes/dm6/bed/simpleRepeat
     cd /hive/data/genomes/dm6/bed/simpleRepeat
     time doSimpleRepeat.pl -buildDir=`pwd` -bigClusterHub=ku \
 	-dbHost=hgwdev -workhorse=hgwdev -smallClusterHub=ku \
 	dm6 > do.log 2>&1 &
     #  real    23m17.471s
 
     cat fb.simpleRepeat
     #  3925949 bases of 142573024 (2.754%) in intersection
 
     # add to rmsk after it is done:
     cd /hive/data/genomes/dm6
     twoBitMask dm6.rmsk.2bit \
 	-add bed/simpleRepeat/trfMask.bed dm6.2bit
     #	you can safely ignore the warning about fields >= 13
 
     twoBitToFa dm6.2bit stdout | faSize stdin > faSize.dm6.2bit.txt
     cat faSize.dm6.2bit.txt
     # 143726002 bases (1152978 N's 142573024 real 112531045 upper
     #   30041979 # lower) in 1870 sequences in 1 files
     # Total size: mean 76858.8 sd 1382100.2 min 544 (chrUn_DS485566v2)
     #   max 32079331 (chr3R) median 1577
     # %20.90 masked total, %21.07 masked real
 
     rm /gbdb/dm6/dm6.2bit
     ln -s `pwd`/dm6.2bit /gbdb/dm6/dm6.2bit
 
 #########################################################################
 # Verify all gaps are marked, add any N's not in gap as type 'other'
 #	(DONE 2014-08-28 - Hiram)
     mkdir /hive/data/genomes/dm6/bed/gap
     cd /hive/data/genomes/dm6/bed/gap
     time nice -n +19 findMotif -motif=gattaca -verbose=4 \
 	-strand=+ ../../dm6.unmasked.2bit > findMotif.txt 2>&1
     #	real    0m1.290s
     grep "^#GAP " findMotif.txt | sed -e "s/^#GAP //" > allGaps.bed
     time featureBits -countGaps dm6 -not gap -bed=notGap.bed
     # 143726002 bases of 143726002 (100.000%) in intersection
 
 
     featureBits dm6 -not gap -bed=notGap.bed allGaps.bed
     time featureBits -countGaps dm6 allGaps.bed notGap.bed -bed=new.gaps.bed
     # 143726002 bases of 143726002 (100.000%) in intersection
     time featureBits -countGaps dm6 allGaps.bed notGap.bed -bed=new.gaps.bed
     # 1152978 bases of 143726002 (0.802%) in intersection
 
     # there are some very large gaps that are unannotated:
     awk '{print $3-$2,$0}' new.gaps.bed  | sort -rn | head
     # 53860 chr3L     26952525        27006385        chr3L.7
     # 53800 chr3L     26882321        26936121        chr3L.6
     # 32000 chrX      22317804        22349804        chrX.5
 
     # the original 'component' AGP files have zero annotations in
     # the chromosomes.  In fact, there are no gaps annotated:
     hgsql -e 'select count(*) from gap;' dm6
 #   +----------+
 #   | count(*) |
 #   +----------+
 #   |        0 |
 #   +----------+
 
     #	what is the highest index in the existing gap table:
     hgsql -N -e "select ix from gap;" dm6 | sort -n | tail -1
     #	(no output, assume 0)
     cat << '_EOF_' > mkGap.pl
 #!/bin/env perl
 
 use strict;
 use warnings;
 
 my $ix=0;
 
 open (FH,"<new.gaps.bed") or die "can not read new.gaps.bed";
 while (my $line = <FH>) {
     my ($chrom, $chromStart, $chromEnd, $rest) = split('\s+', $line);
     ++$ix;
     printf "%s\t%d\t%d\t%d\tN\t%d\tother\tyes\n", $chrom, $chromStart,
         $chromEnd, $ix, $chromEnd-$chromStart;
 }
 close (FH);
 '_EOF_'
     # << happy emacs
     chmod +x ./mkGap.pl
     ./mkGap.pl > other.bed
     featureBits -countGaps dm6 other.bed
     #  1152978 bases of 143726002 (0.802%) in intersection
 
     wc -l other.bed
     #	572
     hgLoadBed -sqlTable=$HOME/kent/src/hg/lib/gap.sql \
 	-noLoad dm6 otherGap other.bed
     #   Read 572 elements of size 8 from other.bed
     #	starting with this many
     hgsql -e "select count(*) from gap;" dm6
     #	 0
     hgsql dm6 -e 'load data local infile "bed.tab" into table gap;'
     #	result count:
     hgsql -e "select count(*) from gap;" dm6
     #	572
     # == 0 + 572
     # verify we aren't adding gaps where gaps already exist
     # this would output errors if that were true:
     gapToLift -minGap=1 dm6 nonBridged.lift -bedFile=nonBridged.bed
     # see example in danRer7.txt
     # there are no non-bridged gaps here:
     hgsql -N -e "select bridge from gap;" dm6 | sort | uniq -c
     #    572 yes
 
 ##########################################################################
 ## WINDOWMASKER (DONE - 2014-08-28 - Hiram)
     mkdir /hive/data/genomes/dm6/bed/windowMasker
     cd /hive/data/genomes/dm6/bed/windowMasker
     time nice -n +19 doWindowMasker.pl -buildDir=`pwd` -workhorse=hgwdev \
 	-dbHost=hgwdev dm6 > do.log 2>&1 &
     #  real    6m13.489s
 
     # Masking statistics
     cat faSize.dm6.cleanWMSdust.txt
     #  143726002 bases (1152978 N's 142573024 real 108961589 upper
     #    33611435  lower) in 1870 sequences in 1 files
     # Total size: mean 76858.8 sd 1382100.2 min 544 (chrUn_DS485566v2)
     #   max 32079331 (chr3R) median 1577
     # %23.39 masked total, %23.57 masked real
 
     # how much does this window masker and repeat masker overlap:
     # if RM finished before this got here, the answer is in:
     cat fb.dm6.rmsk.windowmaskerSdust.txt
     # 16809665 bases of 143726002 (11.696%) in intersection
 
 #############################################################################
 # cytoBandIdeo - (OBSOLETE, redone below 2014-08-28 - Hiram)
     mkdir /hive/data/genomes/dm6/bed/cytoBand
     cd /hive/data/genomes/dm6/bed/cytoBand
     makeCytoBandIdeo.csh dm6
 
 #########################################################################
 # CHROMOSOME BANDS (DONE - 2014-10-09 - Hiram)
     ssh hgwdev
     cd /cluster/data/dm6/bed/cytoBand
     # As of 5.3, the GFF now includes what we need to make cytoBandIdeo
     # directly:
 
     zgrep chromosome_band ../../flyBase/dmel-all-r6.02.gff.gz > chromBand.gff
 
     cat << '_EOF_' > gffToBand.pl
 #!/usr/bin/env perl
 
 use warnings;
 use strict;
 
 open (FH, "<chromBand.gff") or die "can not read chromBand.gff";
 while (my $line = <FH>) {
   chomp $line;
   my @w = split('\t', $line);
   $w[3]--;
   $w[3] = 0 if ($w[3] < 0);
   $w[8] =~ s/^.*Name=band-//;
   $w[8] =~ s/[;].*$//;
   if ($w[8] !~ /^\d+[A-Z]\d+$/) { next; }
   if ($w[4] <= 0) { next; }
   printf "chr%s\t%d\t%d\t%s\tn/a\n", $w[0], $w[3], $w[4], $w[8];
 }
 close (FH);
 '_EOF_'
     # << happy emacs
     chmod +x gffToBand.pl
     ./gffToBand.pl > dm6.cytoBand.tab
 
     # re-use items from the previously loaded cytoBand data to add
     # to the ones defined for the 'real' cytoBand data:
     egrep -v -w "chr2L|chr2R|chr3L|chr3R|chr4|chrX" dm6.cytoBand \
        > dm6.combined.bed
     cat dm6.cytoBand.tab >> dm6.combined.bed
 
     featureBits -bed=notCovered.bed -not -countGaps dm6 dm6.combined.bed
     awk '{printf "%s\t%d\t%d\tn/a\tgneg\n", $1, $2, $3}' notCovered.bed \
        >> dm6.combined.bed
 
     featureBits -countGaps dm6 dm6.combined.bed
 
     maxLen=`awk '{print length($1)}' dm6.combined.bed | sort -rn | head -1`
 
     sed -e "s/12/${maxLen}/;" ~/kent/src/hg/lib/cytoBand.sql > dm6.cytoBand.sql
 
     hgLoadSqlTab dm6 cytoBand dm6.cytoBand.sql dm6.combined.bed
 
     checkTableCoords -verbose=2  dm6 cytoBand
 
     hgLoadSqlTab dm6 cytoBandIdeo dm6.cytoBand.sql dm6.combined.bed
 
     checkTableCoords -verbose=2  dm6 cytoBandIdeo
 
 ##########################################################################
 # cpgIslands - (DONE - 2014-08-28 - Hiram)
     mkdir /hive/data/genomes/dm6/bed/cpgIslands
     cd /hive/data/genomes/dm6/bed/cpgIslands
     time doCpgIslands.pl -dbHost=hgwdev -bigClusterHub=ku \
       -workhorse=hgwdev -smallClusterHub=ku dm6 > do.log 2>&1 &
     #  real    35m11.446s
 
     cat fb.dm6.cpgIslandExt.txt
     #   15067148 bases of 142573024 (10.568%) in intersection
 
 ##############################################################################
 # cpgIslands on UNMASKED sequence (DONE - 2014-08-28 - Hiram)
     mkdir /hive/data/genomes/dm6/bed/cpgIslandsUnmasked
     cd /hive/data/genomes/dm6/bed/cpgIslandsUnmasked
 
     # run stepwise so the loading can be done in a different table
     time doCpgIslands.pl -dbHost=hgwdev -bigClusterHub=ku -buildDir=`pwd` \
        -tableName=cpgIslandExtUnmasked \
           -maskedSeq=/hive/data/genomes/dm6/dm6.unmasked.2bit \
              -workhorse=hgwdev -smallClusterHub=ku dm6 > do.log 2>&1 &
     #  real    40m18.250s
 
     cat fb.dm6.cpgIslandExtUnmasked.txt
     # 21347147 bases of 142573024 (14.973%) in intersection
 
 #########################################################################
 # genscan - (DONE 2014-08-28 - Hiram)
     mkdir /hive/data/genomes/dm6/bed/genscan
     cd /hive/data/genomes/dm6/bed/genscan
     time doGenscan.pl -buildDir=`pwd` -workhorse=hgwdev -dbHost=hgwdev \
       -bigClusterHub=ku dm6 > do.log 2>&1  &
     # real    35m23.353s
 
     cat fb.dm6.genscan.txt
     #  24277318 bases of 142573024 (17.028%) in intersection
 
     cat fb.dm6.genscanSubopt.txt
     #  676185 bases of 142573024 (0.474%) in intersection
 
 ########################################################################
 # MAKE 11.OOC FILE FOR BLAT/GENBANK (DONE 2014-08-28 - Hiram)
     # Use -repMatch=100 (based on size -- for human we use 1024, and
     # fly size is ~4.96% of human judging by gapless dm1 genome size from
     # featureBits -- we would use 50, but bump that up to 100
     # a bit to be more  conservative).
     #  143726002 bases (1152978 N's 142573024 real 108961589 upper
     calc \( 142573024 / 2897316137 \) \* 1024
     # ( 142573024 / 2897316137 ) * 1024 = 50.389661
     # round up to 100
 
     cd /hive/data/genomes/dm6
     blat dm6.2bit /dev/null /dev/null -tileSize=11 \
       -makeOoc=jkStuff/dm6.11.ooc -repMatch=100
     #	Wrote 4574 overused 11-mers to jkStuff/dm6.11.ooc
 
     # there are *only* bridged gaps, no lift file needed for genbank
     # Ignore the 2 yes non-bridged gap
     hgsql -N -e "select bridge from gap;" dm6 | sort | uniq -c
     #    572 yes
 
 #########################################################################
 # MAKE 11.OOC FILE FOR BLAT (DONE 2014-06-07 - Chin)
 # dm3 11.ooc is needed to lift flyBase from dm3 to dm6 for encode3 submission
 
     # Use -repMatch=100 (based on size -- for human we use 1024, and
     # fly size is ~4.4% of human judging by gapless dm1 genome size from
     # featureBits -- we would use 45, but bump that up a bit to be more
     # conservative).
     cd /hive/data/genomes/dm3
     blat dm3.2bit /dev/null /dev/null -tileSize=11 \
       -makeOoc=jkStuff/dm3.11.ooc -repMatch=100
     # Wrote 7657 overused 11-mers to /hive/data/genomes/dm3/11.ooc
 
 #############################################################################
 # dm6 LIFTOVER to dm3 (DONE - 2015-01-20 - Hiram)
     sh hgwdev
     mkdir /hive/data/genomes/dm6/bed/blat.dm3.2015-01-20
     cd /hive/data/genomes/dm6/bed/blat.dm3.2015-01-20
 
     # -debug run to create run dir, preview scripts...
     doSameSpeciesLiftOver.pl -verbose=2 \
         -debug -ooc=/hive/data/genomes/dm6/jkStuff/dm6.11.ooc \
           -bigClusterHub=ku -dbHost=hgwdev -workhorse=hgwdev \
          dm6 dm3
 
     # if that appears OK, then running:
     time doSameSpeciesLiftOver.pl -verbose=2 \
         -ooc=/hive/data/genomes/dm6/jkStuff/dm6.11.ooc \
           -bigClusterHub=ku -dbHost=hgwdev -workhorse=hgwdev \
          dm6 dm3 > do.log 2>&1
     # about 36 minutes
 
     # verify the convert link on the browser is now active from hg19 to hg38
 
 #############################################################################
 # dm3 LIFTOVER to dm6 (DONE - 2014-08-28 - Chin)
 # will move steps to dm3.txt when dm6 is released.
     sh hgwdev
     cd /hive/data/genomes/dm3
     ln -s jkStuff/dm3.11.ooc 11.ooc
 
     # -debug run to create run dir, preview scripts...
     doSameSpeciesLiftOver.pl -debug dm3 dm6
     cd /hive/data/genomes/dm3/bed/blat.dm6.2014-08-28
 
     time nice -n +19 doSameSpeciesLiftOver.pl -verbose=2 \
         -bigClusterHub=ku -dbHost=hgwdev -workhorse=hgwdev \
          dm3 dm6 > do.log 2>&1 &
     # real    22m18.988s
 
 #########################################################################
 # LIFTOVER FLYBASE 5.12 ANNOTATIONS FROM DM3 (working 2014-06-09 - Chin)
 # NOTE:
 # In order to make flyBase gene ui to work, more steps are needed, other wise,
 # hgc will shows errors like "Doh, no go for CG3727-RB from flyBasePep"
 # all the time. Major objevtive here is to create flyBaseGene tables
 # so that we can do featureBits on it for encode3 submission.
     ssh hgwdev
     mkdir /hive/data/genomes/dm6/bed/flybase5.12.liftOver
     cd /hive/data/genomes/dm6/bed/flybase5.12.liftOver
     hgsql dm3 -N -e 'select * from flyBaseGene' | cut -f2- > dm3.flyBaseGene.tab
     liftOver -genePred dm3.flyBaseGene.tab \
       /hive/data/genomes/dm3/bed/liftOver/dm3ToDm6.over.chain.gz \
       flyBaseLiftGene.tab flyBaseLiftGene.unmapped.tab
     wc -l *.tab
     #  21243 dm3.flyBaseGene.tab
     #  21216 flyBaseLiftGene.tab
     #     54 flyBaseLiftGene.unmapped.tab
 
     ldHgGene -predTab dm6 flyBaseGene flyBaseLiftGene.tab
 
 ############################################################################
 # set same default position asm dm3 (DONE - 2014-08-28 - Hiram)
     hgsql -e \
 'update dbDb set defaultPos="chr2L:826,001-851,000" where name="dm6";' \
      hgcentraltest
 
 ############################################################################
 # create ucscToINSDC name mapping (DONE - 2014-08-29 - Hiram)
     mkdir /hive/data/genomes/dm6/bed/ucscToINSDC
     cd /hive/data/genomes/dm6/bed/ucscToINSDC
 
     # this script has been maturing over time, it is close to complete.
     # to find a latest copy of it:
     # ls -ogrt /hive/data/genomes/*/bed/ucscToINSDC/translateNames.sh
 
     cp /hive/data/genomes/rn6/bed/ucscToINSDC/translateNames.sh .
     # needed to be fixed up here for new genbank organization
 
     # to see chrM accession:
     hgsql -e 'select * from gold;' dm6 | grep chrM
     # use that accession here:
     ./translateNames.sh KJ947872.2
 
     awk '{printf "%s\t0\t%d\n", $1,$2}' ../../chrom.sizes | sort \
         > name.coordinate.tab
 
     join name.coordinate.tab ucscToINSDC.txt | tr '[ ]' '[\t]' > ucscToINSDC.bed
 
     cut -f1 ucscToINSDC.bed | awk '{print length($0)}' | sort -n | tail -1
 # 22
 
     # use the 22 in this sed:
     sed -e "s/21/22/" $HOME/kent/src/hg/lib/ucscToINSDC.sql \
         | hgLoadSqlTab dm6 ucscToINSDC stdin ucscToINSDC.bed
     checkTableCoords dm6 ucscToINSDC
     # should cover all bases
     featureBits -countGaps dm6 ucscToINSDC
     # 143726002 bases of 143726002 (100.000%) in intersection
 
 #########################################################################
 # fixup search rule for assembly track/gold table (DONE - 2014-07-09 - Hiram)
    export maxLen=`hgsql -N -e 'select frag from gold;' dm6 | awk '{print length($0)}' | sort -run | head -1`
 
    echo $maxLen
    #  10
 
 export C=1
 while [ $C -le $maxLen ];
 do
   echo -n " $C: "
   hgsql -N -e 'select frag from gold;' dm6 | sort -u \
     | awk '{ print substr($0,'$C',1) }' | sort -u | xargs echo | sed -e 's/ //g'
   C=`echo $C | awk '{print $1+1}'`
 done
 #  1: ACDK
 #  2: EJPS
 #  3: 049
 #  4: 0148
 #  5: 34567
 #  6: 0123456789
 #  7: 0123456789
 #  8: 0123456789
 #  9: .
 # 10: 123456
 
     # since we have the composite fragment IDs, there are a variety of
     # dot versions:
     hgsql -N -e 'select frag from gold;' dm6 | sed -e 's/.*\.//' \
        | sort | uniq -c  | sort -rn
 #  1861 1
 #     3 5
 #     3 2
 #     1 6
 #     1 4
 #     1 3
 
     # hence, add to trackDb/drosophila/dm6/trackDb.ra
 
 searchTable gold
 searchMethod prefix
 searchType bed
 shortCircuit 1
 termRegex [ACDK][EJPS][0-9]+(\.[0-9]+)*
 query select chrom,chromStart,chromEnd,frag from %s where frag like '%s%%'
 searchPriority 8
 
   # test pattern:
    hgsql -N -e 'select frag from gold;' dm6 | wc -l
    # 1870
    hgsql -N -e 'select frag from gold;' dm6 \
      | egrep -e '[ACDK][EJPS][0-9]+(\.[0-9]+)*' | wc -l
    # 1870
    hgsql -N -e 'select frag from gold;' dm6 \
      | egrep -v -e '[ACDK][EJPS][0-9]+(\.[0-9]+)*' | wc -l
    # 0
 
 ############################################################################
 # AUTO UPDATE GENBANK (DONE - 2012-05-04 - Hiram)
     # examine the file:
     /cluster/data/genbank/data/organism.lst
     # for your species to see what counts it has for:
 # organism       mrnaCnt estCnt  refSeqCnt
 # Drosophila melanogaster	61221	821011	30813
 
     # to decide which "native" mrna or ests you want to specify in genbank.conf
 
     ssh hgwdev
     cd $HOME/kent/src/hg/makeDb/genbank
     git pull
     # edit etc/genbank.conf to add before dm3 section::
 
 # dm6 (D. melanogaster)
 dm6.serverGenome = /hive/data/genomes/dm6/dm6.2bit
 dm6.clusterGenome = /hive/data/genomes/dm6/dm6.2bit
 dm6.ooc = /hive/data/genomes/dm6/jkStuff/dm6.11.ooc
 dm6.lift = no
 dm6.refseq.mrna.native.pslCDnaFilter  = ${finished.refseq.mrna.native.pslCDnaFilter}
 dm6.refseq.mrna.xeno.pslCDnaFilter    = ${finished.refseq.mrna.xeno.pslCDnaFilter}
 dm6.genbank.mrna.native.pslCDnaFilter = ${finished.genbank.mrna.native.pslCDnaFilter}
 dm6.genbank.mrna.xeno.pslCDnaFilter   = ${finished.genbank.mrna.xeno.pslCDnaFilter}
 dm6.genbank.est.native.pslCDnaFilter  = ${finished.genbank.est.native.pslCDnaFilter}
 dm6.genbank.mrna.xeno.load = yes
 # dm6.upstreamGeneTbl = refGene
 # dm6.upstreamMaf = multiz15way
 # /hive/data/genomes/dm3/bed/multiz15way/species.lst
 
     git commit -m "adding dm6 fly refs #4049" etc/genbank.conf
     git push
     make etc-update
 
     ssh hgwdev			# used to do this on "genbank" machine
     screen -S dm6           # long running job managed in screen
     cd /cluster/data/genbank
     time nice -n +19 ./bin/gbAlignStep -initial dm6
     #	logFile: var/build/logs/2014.08.29-10:41:04.dm6.initalign.log
     #   real    199m22.304s
 
     # load database when finished
     ssh hgwdev
     cd /cluster/data/genbank
     time nice -n +19 ./bin/gbDbLoadStep -drop -initialLoad dm6 &
     #	logFile: var/dbload/hgwdev/logs/2014.09.03-15:51:49.dm6.dbload.log
     #   real    19m24.163s
 
     # enable daily alignment and update of hgwdev (DONE - 2012-02-09 - Hiram)
     cd ~/kent/src/hg/makeDb/genbank
     git pull
     # add dm6 to:
         etc/align.dbs etc/hgwdev.dbs
     git commit -m "Added dm6 refs #4049" etc/align.dbs etc/hgwdev.dbs
     git push
     make etc-update
 
 #########################################################################
 #  BLATSERVERS ENTRY (DONE - 2014-08-29 - Hiram)
 #	After getting a blat server assigned by the Blat Server Gods,
     ssh hgwdev
 
     hgsql -e 'INSERT INTO blatServers (db, host, port, isTrans, canPcr) \
 	VALUES ("dm6", "blat4c", "17858", "1", "0"); \
 	INSERT INTO blatServers (db, host, port, isTrans, canPcr) \
 	VALUES ("dm6", "blat4c", "17859", "0", "1");' \
 	    hgcentraltest
     #	test it with some sequence
 
 ##############################################################################
 # LASTZ D. ananassae DroAna3 (DONE 2014-08-29 - Hiram)
     screen -S droAna3-dm6
     mkdir /hive/data/genomes/dm6/bed/lastzDroAna3.2014-08-29
     cd /hive/data/genomes/dm6/bed/lastzDroAna3.2014-08-29
 
     cat << '_EOF_' > DEF
 # D. melanogaster vs. D. ananassae
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.52/bin/lastz
 BLASTZ_H=2000
 BLASTZ_Y=3400
 BLASTZ_L=4000
 BLASTZ_K=2200
 BLASTZ_Q=/cluster/data/blastz/HoxD55.q
 
 # TARGET - D. melanogaster
 SEQ1_DIR=/hive/data/genomes/dm6/dm6.2bit
 SEQ1_CHUNK=5000000
 SEQ1_LAP=10000
 SEQ1_LEN=/hive/data/genomes/dm6/chrom.sizes
 
 # QUERY - D. ananassae
 SEQ2_DIR=/hive/data/genomes/droAna3/droAna3.2bit
 SEQ2_CHUNK=5000000
 SEQ2_LAP=10000
 SEQ2_LEN=/hive/data/genomes/droAna3/chrom.sizes
 
 BASE=/hive/data/genomes/dm6/bed/lastzDroAna3.2014-08-29
 TMPDIR=/dev/shm
 '_EOF_'
     # << this line keeps emacs coloring happy
     time doBlastzChainNet.pl `pwd`/DEF \
         -syntenicNet -fileServer=hgwdev \
         -chainMinScore=3000  -chainLinearGap=loose \
         -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > do.log 2>&1
     # real    64m19.065s
 
     cat fb.dm6.chainDroAna3Link.txt
     # 95925289 bases of 142573024 (67.282%) in intersection
 
     #   running the swap
     mkdir /hive/data/genomes/droAna3/bed/blastz.dm6.swap
     cd /hive/data/genomes/droAna3/bed/blastz.dm6.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
         /hive/data/genomes/dm6/bed/lastzDroAna3.2014-08-29/DEF \
         -swap -syntenicNet -chainMinScore=3000 -chainLinearGap=loose \
         -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > swap.log 2>&1
     # real    10m43.121s
     cat fb.droAna3.chainDm6Link.txt
     # 125698535 bases of 213918817 (58.760%) in intersection
 
 ###########################################################################
 # LASTZ D. sechellia DroSec1 (DONE 2014-08-29 - Hiram)
     screen -S droSec1-dm6
     mkdir /hive/data/genomes/dm6/bed/lastzDroSec1.2014-08-29
     cd /hive/data/genomes/dm6/bed/lastzDroSec1.2014-08-29
 
     cat << '_EOF_' > DEF
 # D. melanogaster vs. D. sechellia
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.52/bin/lastz
 BLASTZ_H=2000
 BLASTZ_Y=3400
 BLASTZ_L=4000
 BLASTZ_K=2200
 BLASTZ_Q=/cluster/data/blastz/HoxD55.q
 
 # TARGET - D. melanogaster
 SEQ1_DIR=/hive/data/genomes/dm6/dm6.2bit
 SEQ1_CHUNK=5000000
 SEQ1_LAP=10000
 SEQ1_LEN=/hive/data/genomes/dm6/chrom.sizes
 
 # QUERY - D. sechellia
 SEQ2_DIR=/hive/data/genomes/droSec1/droSec1.2bit
 SEQ2_CHUNK=5000000
 SEQ2_LAP=10000
 SEQ2_LEN=/hive/data/genomes/droSec1/chrom.sizes
 
 BASE=/hive/data/genomes/dm6/bed/lastzDroSec1.2014-08-29
 TMPDIR=/dev/shm
 '_EOF_'
     # << happy emacs
 
     time doBlastzChainNet.pl `pwd`/DEF \
         -syntenicNet -fileServer=hgwdev \
         -chainMinScore=3000  -chainLinearGap=loose \
         -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > do.log 2>&1
     # real    47m43.186s
     cat fb.dm6.chainDroSec1Link.txt
     # 117709624 bases of 142573024 (82.561%) in intersection
 
     #   running the swap
     mkdir /hive/data/genomes/droSec1/bed/blastz.dm6.swap
     cd /hive/data/genomes/droSec1/bed/blastz.dm6.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
         /hive/data/genomes/dm6/bed/lastzDroSec1.2014-08-29/DEF \
         -swap -syntenicNet -chainMinScore=3000 -chainLinearGap=loose \
         -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > swap.log 2>&1
     # real    9m56.239s
     cat fb.droSec1.chainDm6Link.txt
     # 128262529 bases of 157238575 (81.572%) in intersection
 
 ###########################################################################
 # LASTZ D. simulans DroSim1 (DONE 2014-08-29 - Hiram)
     screen -S droSim1-dm6
     mkdir /hive/data/genomes/dm6/bed/lastzDroSim1.2014-08-29
     cd /hive/data/genomes/dm6/bed/lastzDroSim1.2014-08-29
 
     cat << '_EOF_' > DEF
 # D. melanogaster vs. D. simulans
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.52/bin/lastz
 BLASTZ_H=2000
 BLASTZ_Y=3400
 BLASTZ_L=4000
 BLASTZ_K=2200
 BLASTZ_Q=/cluster/data/blastz/HoxD55.q
 
 # TARGET - D. melanogaster
 SEQ1_DIR=/hive/data/genomes/dm6/dm6.2bit
 SEQ1_CHUNK=5000000
 SEQ1_LAP=10000
 SEQ1_LEN=/hive/data/genomes/dm6/chrom.sizes
 
 # QUERY - D. simulans
 SEQ2_DIR=/hive/data/genomes/droSim1/droSim1.2bit
 SEQ2_CHUNK=5000000
 SEQ2_LAP=10000
 SEQ2_LEN=/hive/data/genomes/droSim1/chrom.sizes
 
 BASE=/hive/data/genomes/dm6/bed/lastzDroSim1.2014-08-29
 TMPDIR=/dev/shm
 '_EOF_'
     # << happy emacs
 
     time doBlastzChainNet.pl `pwd`/DEF \
         -syntenicNet -fileServer=hgwdev \
         -chainMinScore=3000  -chainLinearGap=loose \
         -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > do.log 2>&1
     # real    39m35.348s
     cat fb.dm6.chainDroSim1Link.txt
     # 114117667 bases of 142573024 (80.042%) in intersection
 
     #   running the swap
     mkdir /hive/data/genomes/droSim1/bed/blastz.dm6.swap
     cd /hive/data/genomes/droSim1/bed/blastz.dm6.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
         /hive/data/genomes/dm6/bed/lastzDroSim1.2014-08-29/DEF \
         -swap -syntenicNet -chainMinScore=3000 -chainLinearGap=loose \
         -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > swap.log 2>&1
     # real    6m50.918s
     cat fb.droSim1.chainDm6Link.txt
     # 113676741 bases of 127256433 (89.329%) in intersection
 
 ###########################################################################
 # LASTZ D. erecta DroEre2 (DONE 2014-08-29 - Hiram)
     screen -S droEre2-dm6
     mkdir /hive/data/genomes/dm6/bed/lastzDroEre2.2014-08-29
     cd /hive/data/genomes/dm6/bed/lastzDroEre2.2014-08-29
 
     cat << '_EOF_' > DEF
 # D. melanogaster vs. D. erecta
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.52/bin/lastz
 BLASTZ_H=2000
 BLASTZ_Y=3400
 BLASTZ_L=4000
 BLASTZ_K=2200
 BLASTZ_Q=/cluster/data/blastz/HoxD55.q
 
 # TARGET - D. melanogaster
 SEQ1_DIR=/hive/data/genomes/dm6/dm6.2bit
 SEQ1_CHUNK=5000000
 SEQ1_LAP=10000
 SEQ1_LEN=/hive/data/genomes/dm6/chrom.sizes
 
 # QUERY - D. erecta
 SEQ2_DIR=/hive/data/genomes/droEre2/droEre2.2bit
 SEQ2_CHUNK=5000000
 SEQ2_LAP=10000
 SEQ2_LEN=/hive/data/genomes/droEre2/chrom.sizes
 
 BASE=/hive/data/genomes/dm6/bed/lastzDroEre2.2014-08-29
 TMPDIR=/dev/shm
 '_EOF_'
     # << happy emacs
 
     time doBlastzChainNet.pl `pwd`/DEF \
         -syntenicNet -fileServer=hgwdev \
         -chainMinScore=3000  -chainLinearGap=loose \
         -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > do.log 2>&1
     # real    19m31.724s
     cat fb.dm6.chainDroEre2Link.txt
     # 114091585 bases of 142573024 (80.023%) in intersection
 
     #   running the swap
     mkdir /hive/data/genomes/droEre2/bed/blastz.dm6.swap
     cd /hive/data/genomes/droEre2/bed/blastz.dm6.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
         /hive/data/genomes/dm6/bed/lastzDroEre2.2014-08-29/DEF \
         -swap -syntenicNet -chainMinScore=3000 -chainLinearGap=loose \
         -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > swap.log 2>&1
     # real    8m35.520s
     cat fb.droEre2.chainDm6Link.txt
     # 122430082 bases of 145084019 (84.386%) in intersection
 
 ###########################################################################
 # LASTZ D. grimshawi DroGri2 (DONE 2014-08-29 - Hiram)
     screen -S droGri2-dm6
     mkdir /hive/data/genomes/dm6/bed/lastzDroGri2.2014-08-29
     cd /hive/data/genomes/dm6/bed/lastzDroGri2.2014-08-29
 
     cat << '_EOF_' > DEF
 # D. melanogaster vs. D. grimshawi
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.52/bin/lastz
 BLASTZ_H=2000
 BLASTZ_Y=3400
 BLASTZ_L=4000
 BLASTZ_K=2200
 BLASTZ_Q=/cluster/data/blastz/HoxD55.q
 
 # TARGET - D. melanogaster
 SEQ1_DIR=/hive/data/genomes/dm6/dm6.2bit
 SEQ1_CHUNK=5000000
 SEQ1_LAP=10000
 SEQ1_LEN=/hive/data/genomes/dm6/chrom.sizes
 
 # QUERY - D. grimshawi
 SEQ2_DIR=/hive/data/genomes/droGri2/droGri2.2bit
 SEQ2_CHUNK=5000000
 SEQ2_LAP=10000
 SEQ2_LEN=/hive/data/genomes/droGri2/chrom.sizes
 
 BASE=/hive/data/genomes/dm6/bed/lastzDroGri2.2014-08-29
 TMPDIR=/dev/shm
 '_EOF_'
     # << happy emacs
 
     time doBlastzChainNet.pl `pwd`/DEF \
         -syntenicNet -fileServer=hgwdev \
         -chainMinScore=3000  -chainLinearGap=loose \
         -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > do.log 2>&1
     # real    43m54.742s
     cat fb.dm6.chainDroGri2Link.txt
     # 72682996 bases of 142573024 (50.979%) in intersection
 
     #   running the swap
     mkdir /hive/data/genomes/droGri2/bed/blastz.dm6.swap
     cd /hive/data/genomes/droGri2/bed/blastz.dm6.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
         /hive/data/genomes/dm6/bed/lastzDroGri2.2014-08-29/DEF \
         -swap -syntenicNet -chainMinScore=3000 -chainLinearGap=loose \
         -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > swap.log 2>&1
     # real    6m15.091s
     cat fb.droGri2.chainDm6Link.txt
     # 79673484 bases of 186090669 (42.814%) in intersection
 
 ###########################################################################
 # LASTZ D. persimilis DroPer1 (DONE 2014-08-29 - Hiram)
     screen -S droPer1-dm6
     mkdir /hive/data/genomes/dm6/bed/lastzDroPer1.2014-08-29
     cd /hive/data/genomes/dm6/bed/lastzDroPer1.2014-08-29
 
     cat << '_EOF_' > DEF
 # D. melanogaster vs. D. persimilis
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.52/bin/lastz
 BLASTZ_H=2000
 BLASTZ_Y=3400
 BLASTZ_L=4000
 BLASTZ_K=2200
 BLASTZ_Q=/cluster/data/blastz/HoxD55.q
 
 # TARGET - D. melanogaster
 SEQ1_DIR=/hive/data/genomes/dm6/dm6.2bit
 SEQ1_CHUNK=5000000
 SEQ1_LAP=10000
 SEQ1_LEN=/hive/data/genomes/dm6/chrom.sizes
 
 # QUERY - D. persimilis
 SEQ2_DIR=/hive/data/genomes/droPer1/droPer1.2bit
 SEQ2_CHUNK=5000000
 SEQ2_LAP=10000
 SEQ2_LEN=/hive/data/genomes/droPer1/chrom.sizes
 
 BASE=/hive/data/genomes/dm6/bed/lastzDroPer1.2014-08-29
 TMPDIR=/dev/shm
 '_EOF_'
     # << happy emacs
 
     time doBlastzChainNet.pl `pwd`/DEF \
         -syntenicNet -fileServer=hgwdev \
         -chainMinScore=3000  -chainLinearGap=loose \
         -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > do.log 2>&1
     # real    49m9.697s
     cat fb.dm6.chainDroPer1Link.txt
     # 84187839 bases of 142573024 (59.049%) in intersection
 
     #   running the swap
     mkdir /hive/data/genomes/droPer1/bed/blastz.dm6.swap
     cd /hive/data/genomes/droPer1/bed/blastz.dm6.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
         /hive/data/genomes/dm6/bed/lastzDroPer1.2014-08-29/DEF \
         -swap -syntenicNet -chainMinScore=3000 -chainLinearGap=loose \
         -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > swap.log 2>&1
     # real    13m28.190s
     cat fb.droPer1.chainDm6Link.txt
     # 92967575 bases of 175583556 (52.948%) in intersection
 
 ###########################################################################
 # LASTZ D. virilis DroVir3 (DONE 2014-08-29 - Hiram)
     screen -S droVir3-dm6
     mkdir /hive/data/genomes/dm6/bed/lastzDroVir3.2014-08-29
     cd /hive/data/genomes/dm6/bed/lastzDroVir3.2014-08-29
 
     cat << '_EOF_' > DEF
 # D. melanogaster vs. D. virilis
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.52/bin/lastz
 BLASTZ_H=2000
 BLASTZ_Y=3400
 BLASTZ_L=4000
 BLASTZ_K=2200
 BLASTZ_Q=/cluster/data/blastz/HoxD55.q
 
 # TARGET - D. melanogaster
 SEQ1_DIR=/hive/data/genomes/dm6/dm6.2bit
 SEQ1_CHUNK=5000000
 SEQ1_LAP=10000
 SEQ1_LEN=/hive/data/genomes/dm6/chrom.sizes
 
 # QUERY - D. virilis
 SEQ2_DIR=/hive/data/genomes/droVir3/droVir3.2bit
 SEQ2_CHUNK=5000000
 SEQ2_LAP=10000
 SEQ2_LEN=/hive/data/genomes/droVir3/chrom.sizes
 
 BASE=/hive/data/genomes/dm6/bed/lastzDroVir3.2014-08-29
 TMPDIR=/dev/shm
 '_EOF_'
     # << happy emacs
 
     time doBlastzChainNet.pl `pwd`/DEF \
         -syntenicNet -fileServer=hgwdev \
         -chainMinScore=3000  -chainLinearGap=loose \
         -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > do.log 2>&1
     # real    53m38.503s
     cat fb.dm6.chainDroVir3Link.txt
     # 74048111 bases of 142573024 (51.937%) in intersection
 
     #   running the swap
     mkdir /hive/data/genomes/droVir3/bed/blastz.dm6.swap
     cd /hive/data/genomes/droVir3/bed/blastz.dm6.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
         /hive/data/genomes/dm6/bed/lastzDroVir3.2014-08-29/DEF \
         -swap -syntenicNet -chainMinScore=3000 -chainLinearGap=loose \
         -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > swap.log 2>&1
     # real    6m56.701s
     cat fb.droVir3.chainDm6Link.txt
     # 82924532 bases of 189205863 (43.828%) in intersection
 
 ###########################################################################
 # LASTZ T. castaneum TriCas2 (DONE 2014-08-29 - Hiram)
     screen -S triCas2-dm6
     mkdir /hive/data/genomes/dm6/bed/lastzTriCas2.2014-08-29
     cd /hive/data/genomes/dm6/bed/lastzTriCas2.2014-08-29
 
     cat << '_EOF_' > DEF
 # D. melanogaster vs. T. castaneum
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.52/bin/lastz
 BLASTZ_H=2000
 BLASTZ_Y=3400
 BLASTZ_L=4000
 BLASTZ_K=2200
 BLASTZ_Q=/cluster/data/blastz/HoxD55.q
 
 # TARGET - D. melanogaster
 SEQ1_DIR=/hive/data/genomes/dm6/dm6.2bit
 SEQ1_CHUNK=5000000
 SEQ1_LAP=10000
 SEQ1_LEN=/hive/data/genomes/dm6/chrom.sizes
 
 # QUERY - T. castaneum
 SEQ2_DIR=/hive/data/genomes/triCas2/triCas2.2bit
 SEQ2_CHUNK=5000000
 SEQ2_LAP=10000
 SEQ2_LEN=/hive/data/genomes/triCas2/chrom.sizes
 
 BASE=/hive/data/genomes/dm6/bed/lastzTriCas2.2014-08-29
 TMPDIR=/dev/shm
 '_EOF_'
     # << happy emacs
 
     time doBlastzChainNet.pl `pwd`/DEF \
         -syntenicNet -fileServer=hgwdev \
         -chainMinScore=3000  -chainLinearGap=loose \
         -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > do.log 2>&1
     # real    43m49.065s
     cat fb.dm6.chainTriCas2Link.txt
     # 20091664 bases of 142573024 (14.092%) in intersection
 
     #   running the swap
     mkdir /hive/data/genomes/triCas2/bed/blastz.dm6.swap
     cd /hive/data/genomes/triCas2/bed/blastz.dm6.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
         /hive/data/genomes/dm6/bed/lastzTriCas2.2014-08-29/DEF \
         -swap -syntenicNet -chainMinScore=3000 -chainLinearGap=loose \
         -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > swap.log 2>&1
     # real    1m48.420s
     cat fb.triCas2.chainDm6Link.txt
     # 22441942 bases of 151549969 (14.808%) in intersection
 
 ###########################################################################
 # LASTZ A. gambiae AnoGam1 (DONE 2014-08-29 - Hiram)
     screen -S anoGam1-dm6
     mkdir /hive/data/genomes/dm6/bed/lastzAnoGam1.2014-08-29
     cd /hive/data/genomes/dm6/bed/lastzAnoGam1.2014-08-29
 
     cat << '_EOF_' > DEF
 # D. melanogaster vs. D. sechellia
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.52/bin/lastz
 BLASTZ_H=2000
 BLASTZ_Y=3400
 BLASTZ_L=4000
 BLASTZ_K=2200
 BLASTZ_Q=/cluster/data/blastz/HoxD55.q
 
 # TARGET - D. melanogaster
 SEQ1_DIR=/hive/data/genomes/dm6/dm6.2bit
 SEQ1_CHUNK=5000000
 SEQ1_LAP=10000
 SEQ1_LEN=/hive/data/genomes/dm6/chrom.sizes
 
 # QUERY - A. gambiae
 SEQ2_DIR=/hive/data/genomes/anoGam1/anoGam1.2bit
 SEQ2_CHUNK=5000000
 SEQ2_LAP=10000
 SEQ2_LEN=/hive/data/genomes/anoGam1/chrom.sizes
 
 BASE=/hive/data/genomes/dm6/bed/lastzAnoGam1.2014-08-29
 TMPDIR=/dev/shm
 '_EOF_'
     # << happy emacs
 
     time doBlastzChainNet.pl `pwd`/DEF \
         -syntenicNet -fileServer=hgwdev \
         -chainMinScore=3000  -chainLinearGap=loose \
         -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > do.log 2>&1
     # real    85m43.807s
     cat fb.dm6.chainAnoGam1Link.txt
     # 20604473 bases of 142573024 (14.452%) in intersection
 
     #   running the swap
     mkdir /hive/data/genomes/anoGam1/bed/blastz.dm6.swap
     cd /hive/data/genomes/anoGam1/bed/blastz.dm6.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
         /hive/data/genomes/dm6/bed/lastzAnoGam1.2014-08-29/DEF \
         -swap -syntenicNet -chainMinScore=3000 -chainLinearGap=loose \
         -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > swap.log 2>&1
     # real    2m1.764s
     cat fb.anoGam1.chainDm6Link.txt
     # 22154872 bases of 278268413 (7.962%) in intersection
 
 ###########################################################################
 # LASTZ D. pseudoobscura DroPse3 (DONE 2014-08-29 - Hiram)
     screen -S droPse3-dm6
     mkdir /hive/data/genomes/dm6/bed/lastzDroPse3.2014-08-29
     cd /hive/data/genomes/dm6/bed/lastzDroPse3.2014-08-29
 
     cat << '_EOF_' > DEF
 # D. melanogaster vs. D. pseudoobscura
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.52/bin/lastz
 BLASTZ_H=2000
 BLASTZ_Y=3400
 BLASTZ_L=4000
 BLASTZ_K=2200
 BLASTZ_Q=/cluster/data/blastz/HoxD55.q
 
 # TARGET - D. melanogaster
 SEQ1_DIR=/hive/data/genomes/dm6/dm6.2bit
 SEQ1_CHUNK=5000000
 SEQ1_LAP=10000
 SEQ1_LEN=/hive/data/genomes/dm6/chrom.sizes
 
 # QUERY - D. pseudoobscura
 SEQ2_DIR=/hive/data/genomes/droPse3/droPse3.2bit
 SEQ2_CHUNK=5000000
 SEQ2_LAP=10000
 SEQ2_LEN=/hive/data/genomes/droPse3/chrom.sizes
 
 BASE=/hive/data/genomes/dm6/bed/lastzDroPse3.2014-08-29
 TMPDIR=/dev/shm
 '_EOF_'
     # << happy emacs
 
     time doBlastzChainNet.pl `pwd`/DEF \
         -syntenicNet -fileServer=hgwdev \
         -chainMinScore=3000  -chainLinearGap=loose \
         -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > do.log 2>&1
     # real    54m34.163s
     cat fb.dm6.chainDroPse3Link.txt
     # 85310450 bases of 142573024 (59.836%) in intersection
 
     #   running the swap
     mkdir /hive/data/genomes/droPse3/bed/blastz.dm6.swap
     cd /hive/data/genomes/droPse3/bed/blastz.dm6.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
         /hive/data/genomes/dm6/bed/lastzDroPse3.2014-08-29/DEF \
         -swap -syntenicNet -chainMinScore=3000 -chainLinearGap=loose \
         -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > swap.log 2>&1
     # real    6m27.317s
     cat fb.droPse3.chainDm6Link.txt
     # 89188686 bases of 149047912 (59.839%) in intersection
 
 ###########################################################################
 # LASTZ D. willistoni DroWil2 (DONE 2014-08-29 - Hiram)
     screen -S droWil2-dm6
     mkdir /hive/data/genomes/dm6/bed/lastzDroWil2.2014-08-29
     cd /hive/data/genomes/dm6/bed/lastzDroWil2.2014-08-29
 
     cat << '_EOF_' > DEF
 # D. melanogaster vs. D. willistoni
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.52/bin/lastz
 BLASTZ_H=2000
 BLASTZ_Y=3400
 BLASTZ_L=4000
 BLASTZ_K=2200
 BLASTZ_Q=/cluster/data/blastz/HoxD55.q
 
 # TARGET - D. melanogaster
 SEQ1_DIR=/hive/data/genomes/dm6/dm6.2bit
 SEQ1_CHUNK=5000000
 SEQ1_LAP=10000
 SEQ1_LEN=/hive/data/genomes/dm6/chrom.sizes
 
 # QUERY - D. willistoni
 SEQ2_DIR=/hive/data/genomes/droWil2/droWil2.2bit
 SEQ2_CHUNK=5000000
 SEQ2_LAP=10000
 SEQ2_LEN=/hive/data/genomes/droWil2/chrom.sizes
 
 BASE=/hive/data/genomes/dm6/bed/lastzDroWil2.2014-08-29
 TMPDIR=/dev/shm
 '_EOF_'
     # << happy emacs
 
     time doBlastzChainNet.pl `pwd`/DEF \
         -syntenicNet -fileServer=hgwdev \
         -chainMinScore=3000  -chainLinearGap=loose \
         -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > do.log 2>&1
     # real    12m46.450s
     cat fb.dm6.chainDroWil2Link.txt
     # 73328691 bases of 142573024 (51.432%) in intersection
 
     #   running the swap
     mkdir /hive/data/genomes/droWil2/bed/blastz.dm6.swap
     cd /hive/data/genomes/droWil2/bed/blastz.dm6.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
         /hive/data/genomes/dm6/bed/lastzDroWil2.2014-08-29/DEF \
         -swap -syntenicNet -chainMinScore=3000 -chainLinearGap=loose \
         -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > swap.log 2>&1
     # real    7m21.824s
     cat fb.droWil2.chainDm6Link.txt
     # 90409687 bases of 223610028 (40.432%) in intersection
 
 ###########################################################################
 # LASTZ D. yakuba DroYak3 (DONE 2014-08-29 - Hiram)
     screen -S droYak3-dm6
     mkdir /hive/data/genomes/dm6/bed/lastzDroYak3.2014-08-29
     cd /hive/data/genomes/dm6/bed/lastzDroYak3.2014-08-29
 
     cat << '_EOF_' > DEF
 # D. melanogaster vs. D. yakuba
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.52/bin/lastz
 BLASTZ_H=2000
 BLASTZ_Y=3400
 BLASTZ_L=4000
 BLASTZ_K=2200
 BLASTZ_Q=/cluster/data/blastz/HoxD55.q
 
 # TARGET - D. melanogaster
 SEQ1_DIR=/hive/data/genomes/dm6/dm6.2bit
 SEQ1_CHUNK=5000000
 SEQ1_LAP=10000
 SEQ1_LEN=/hive/data/genomes/dm6/chrom.sizes
 
 # QUERY - D. yakuba
 SEQ2_DIR=/hive/data/genomes/droYak3/droYak3.2bit
 SEQ2_CHUNK=5000000
 SEQ2_LAP=10000
 SEQ2_LEN=/hive/data/genomes/droYak3/chrom.sizes
 
 BASE=/hive/data/genomes/dm6/bed/lastzDroYak3.2014-08-29
 TMPDIR=/dev/shm
 '_EOF_'
     # << happy emacs
 
     time doBlastzChainNet.pl `pwd`/DEF \
         -syntenicNet -fileServer=hgwdev \
         -chainMinScore=3000  -chainLinearGap=loose \
         -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > do.log 2>&1
     # real    68m39.923s
     cat fb.dm6.chainDroYak3Link.txt
     # 114859096 bases of 142573024 (80.562%) in intersection
 
     #   running the swap
     mkdir /hive/data/genomes/droYak3/bed/blastz.dm6.swap
     cd /hive/data/genomes/droYak3/bed/blastz.dm6.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
         /hive/data/genomes/dm6/bed/lastzDroYak3.2014-08-29/DEF \
         -swap -syntenicNet -chainMinScore=3000 -chainLinearGap=loose \
         -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > swap.log 2>&1
     # real    8m13.655s
     cat fb.droYak3.chainDm6Link.txt
     # 131647225 bases of 162598019 (80.965%) in intersection
 
 ###########################################################################
 # LASTZ A. mellifera ApiMel4 (DONE 2014-08-29 - Hiram)
     screen -S apiMel4-dm6
     mkdir /hive/data/genomes/dm6/bed/lastzApiMel4.2014-08-29
     cd /hive/data/genomes/dm6/bed/lastzApiMel4.2014-08-29
 
     cat << '_EOF_' > DEF
 # D. melanogaster vs. A. mellifera
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.52/bin/lastz
 BLASTZ_H=2000
 BLASTZ_Y=3400
 BLASTZ_L=4000
 BLASTZ_K=2200
 BLASTZ_Q=/cluster/data/blastz/HoxD55.q
 
 # TARGET - D. melanogaster
 SEQ1_DIR=/hive/data/genomes/dm6/dm6.2bit
 SEQ1_CHUNK=5000000
 SEQ1_LAP=10000
 SEQ1_LEN=/hive/data/genomes/dm6/chrom.sizes
 
 # QUERY - A. mellifera
 SEQ2_DIR=/hive/data/genomes/apiMel4/apiMel4.2bit
 SEQ2_CHUNK=5000000
 SEQ2_LAP=10000
 SEQ2_LEN=/hive/data/genomes/apiMel4/chrom.sizes
 
 BASE=/hive/data/genomes/dm6/bed/lastzApiMel4.2014-08-29
 TMPDIR=/dev/shm
 '_EOF_'
     # << happy emacs
 
     time doBlastzChainNet.pl `pwd`/DEF \
         -syntenicNet -fileServer=hgwdev \
         -chainMinScore=3000  -chainLinearGap=loose \
         -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > do.log 2>&1
     # real    42m58.632s
     cat fb.dm6.chainApiMel4Link.txt
     # 11667011 bases of 142573024 (8.183%) in intersection
 
     #   running the swap
     mkdir /hive/data/genomes/apiMel4/bed/blastz.dm6.swap
     cd /hive/data/genomes/apiMel4/bed/blastz.dm6.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
         /hive/data/genomes/dm6/bed/lastzApiMel4.2014-08-29/DEF \
         -swap -syntenicNet -chainMinScore=3000 -chainLinearGap=loose \
         -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > swap.log 2>&1
     # real    1m19.091s
     cat fb.apiMel4.chainDm6Link.txt
     # 12280818 bases of 229123650 (5.360%) in intersection
 
 ###########################################################################
 # LASTZ D. mojavensis DroMoj3 (DONE 2014-08-29 - Hiram)
     screen -S droMoj3-dm6
     mkdir /hive/data/genomes/dm6/bed/lastzDroMoj3.2014-08-29
     cd /hive/data/genomes/dm6/bed/lastzDroMoj3.2014-08-29
 
     cat << '_EOF_' > DEF
 # D. melanogaster vs. D. mojavensis
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.52/bin/lastz
 BLASTZ_H=2000
 BLASTZ_Y=3400
 BLASTZ_L=4000
 BLASTZ_K=2200
 BLASTZ_Q=/cluster/data/blastz/HoxD55.q
 
 # TARGET - D. melanogaster
 SEQ1_DIR=/hive/data/genomes/dm6/dm6.2bit
 SEQ1_CHUNK=5000000
 SEQ1_LAP=10000
 SEQ1_LEN=/hive/data/genomes/dm6/chrom.sizes
 
 # QUERY - D. mojavensis
 SEQ2_DIR=/hive/data/genomes/droMoj3/droMoj3.2bit
 SEQ2_CHUNK=5000000
 SEQ2_LAP=10000
 SEQ2_LEN=/hive/data/genomes/droMoj3/chrom.sizes
 
 BASE=/hive/data/genomes/dm6/bed/lastzDroMoj3.2014-08-29
 TMPDIR=/dev/shm
 '_EOF_'
     # << happy emacs
 
     time doBlastzChainNet.pl `pwd`/DEF \
         -syntenicNet -fileServer=hgwdev \
         -chainMinScore=3000  -chainLinearGap=loose \
         -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > do.log 2>&1
     # real    76m3.636s
     cat fb.dm6.chainDroMoj3Link.txt
     # 71474162 bases of 142573024 (50.132%) in intersection
 
     #   running the swap
     mkdir /hive/data/genomes/droMoj3/bed/blastz.dm6.swap
     cd /hive/data/genomes/droMoj3/bed/blastz.dm6.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
         /hive/data/genomes/dm6/bed/lastzDroMoj3.2014-08-29/DEF \
         -swap -syntenicNet -chainMinScore=3000 -chainLinearGap=loose \
         -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > swap.log 2>&1
     # real    5m40.109s
     cat fb.droMoj3.chainDm6Link.txt
     # 74578701 bases of 180207831 (41.385%) in intersection
 
 ###########################################################################
 # LASTZ D. pseudoobscura DP4 (Done 2014-08-29 - Hiram)
     screen -S dp4-dm6
     mkdir /hive/data/genomes/dm6/bed/lastzDP4.2014-08-29
     cd /hive/data/genomes/dm6/bed/lastzDP4.2014-08-29
 
     cat << '_EOF_' > DEF
 # D. melanogaster vs. D. pseudoobscura
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.52/bin/lastz
 BLASTZ_H=2000
 BLASTZ_Y=3400
 BLASTZ_L=4000
 BLASTZ_K=2200
 BLASTZ_Q=/cluster/data/blastz/HoxD55.q
 
 # TARGET - D. melanogaster
 SEQ1_DIR=/hive/data/genomes/dm6/dm6.2bit
 SEQ1_CHUNK=5000000
 SEQ1_LAP=10000
 SEQ1_LEN=/hive/data/genomes/dm6/chrom.sizes
 
 # QUERY - D. pseudoobscura
 SEQ2_DIR=/hive/data/genomes/dp4/dp4.2bit
 SEQ2_CHUNK=5000000
 SEQ2_LAP=10000
 SEQ2_LEN=/hive/data/genomes/dp4/chrom.sizes
 
 BASE=/hive/data/genomes/dm6/bed/lastzDP4.2014-08-29
 TMPDIR=/dev/shm
 '_EOF_'
     # << happy emacs
 
     time doBlastzChainNet.pl `pwd`/DEF \
         -syntenicNet -fileServer=hgwdev \
         -chainMinScore=3000  -chainLinearGap=loose \
         -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > do.log 2>&1
     # real    46m17.831s
     cat fb.dm6.chainDp4Link.txt
     # 85549372 bases of 142573024 (60.004%) in intersection
 
     #   running the swap
     mkdir /hive/data/genomes/dp4/bed/blastz.dm6.swap
     cd /hive/data/genomes/dp4/bed/blastz.dm6.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
         /hive/data/genomes/dm6/bed/lastzDP4.2014-08-29/DEF \
         -swap -syntenicNet -chainMinScore=3000 -chainLinearGap=loose \
         -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > swap.log 2>&1
     # real    6m45.065s
     cat fb.dp4.chainDm6Link.txt
     # 88406867 bases of 146066673 (60.525%) in intersection
 
 ###########################################################################
 # LASTZ D. albomicans DroAlb1 (DONE 2014-08-29 - Hiram)
     screen -S droAlb1-dm6
     mkdir /hive/data/genomes/dm6/bed/lastzDroAlb1.2014-08-29
     cd /hive/data/genomes/dm6/bed/lastzDroAlb1.2014-08-29
 
     cat << '_EOF_' > DEF
 # D. melanogaster vs. D. albomicans
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.52/bin/lastz
 BLASTZ_H=2000
 BLASTZ_Y=3400
 BLASTZ_L=4000
 BLASTZ_K=2200
 BLASTZ_Q=/cluster/data/blastz/HoxD55.q
 
 # TARGET - D. melanogaster
 SEQ1_DIR=/hive/data/genomes/dm6/dm6.2bit
 SEQ1_CHUNK=5000000
 SEQ1_LAP=10000
 SEQ1_LEN=/hive/data/genomes/dm6/chrom.sizes
 
 # QUERY - D. albomicans
 SEQ2_DIR=/hive/data/genomes/droAlb1/droAlb1.2bit
 SEQ2_CHUNK=5000000
 SEQ2_LAP=10000
 SEQ2_LEN=/hive/data/genomes/droAlb1/chrom.sizes
 
 BASE=/hive/data/genomes/dm6/bed/lastzDroAlb1.2014-08-29
 TMPDIR=/dev/shm
 '_EOF_'
     # << this line keeps emacs coloring happy
     time doBlastzChainNet.pl `pwd`/DEF \
         -syntenicNet -fileServer=hgwdev \
         -chainMinScore=3000  -chainLinearGap=loose \
         -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > do.log 2>&1
     # real    86m58.011s
     cat fb.dm6.chainDroAlb1Link.txt
     # 63507363 bases of 142573024 (44.544%) in intersection
 
     #   running the swap
     mkdir /hive/data/genomes/droAlb1/bed/blastz.dm6.swap
     cd /hive/data/genomes/droAlb1/bed/blastz.dm6.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
         /hive/data/genomes/dm6/bed/lastzDroAlb1.2014-08-29/DEF \
         -swap -syntenicNet -chainMinScore=3000 -chainLinearGap=loose \
         -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > swap.log 2>&1
     # real    7m39.524s
     cat fb.droAlb1.chainDm6Link.txt
     # 82594337 bases of 216120500 (38.217%) in intersection
 
 ###########################################################################
 # LASTZ D. biarmipes DroBia2 (DONE 2014-08-29 - Hiram)
     screen -S droBia2-dm6
     mkdir /hive/data/genomes/dm6/bed/lastzDroBia2.2014-08-29
     cd /hive/data/genomes/dm6/bed/lastzDroBia2.2014-08-29
 
     cat << '_EOF_' > DEF
 # D. melanogaster vs. D. biarmipes
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.52/bin/lastz
 BLASTZ_H=2000
 BLASTZ_Y=3400
 BLASTZ_L=4000
 BLASTZ_K=2200
 BLASTZ_Q=/cluster/data/blastz/HoxD55.q
 
 # TARGET - D. melanogaster
 SEQ1_DIR=/hive/data/genomes/dm6/dm6.2bit
 SEQ1_CHUNK=5000000
 SEQ1_LAP=10000
 SEQ1_LEN=/hive/data/genomes/dm6/chrom.sizes
 
 # QUERY - D. biarmipes
 SEQ2_DIR=/hive/data/genomes/droBia2/droBia2.2bit
 SEQ2_CHUNK=5000000
 SEQ2_LAP=10000
 SEQ2_LEN=/hive/data/genomes/droBia2/chrom.sizes
 
 BASE=/hive/data/genomes/dm6/bed/lastzDroBia2.2014-08-29
 TMPDIR=/dev/shm
 '_EOF_'
     # << happy emacs
 
     time doBlastzChainNet.pl `pwd`/DEF \
         -syntenicNet -fileServer=hgwdev \
         -chainMinScore=3000  -chainLinearGap=loose \
         -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > do.log 2>&1
     # real    67m19.563s
     cat fb.dm6.chainDroBia2Link.txt
     # 106989297 bases of 142573024 (75.042%) in intersection
 
     #   running the swap
     mkdir /hive/data/genomes/droBia2/bed/blastz.dm6.swap
     cd /hive/data/genomes/droBia2/bed/blastz.dm6.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
         /hive/data/genomes/dm6/bed/lastzDroBia2.2014-08-29/DEF \
         -swap -syntenicNet -chainMinScore=3000 -chainLinearGap=loose \
         -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > swap.log 2>&1
     # real    7m41.031s
     cat fb.droBia2.chainDm6Link.txt
     # 113609151 bases of 168601808 (67.383%) in intersection
 
 ###########################################################################
 # LASTZ D. bipectinata DroBip2 (DONE 2014-08-29 - Hiram)
     screen -S droBip2-dm6
     mkdir /hive/data/genomes/dm6/bed/lastzDroBip2.2014-08-29
     cd /hive/data/genomes/dm6/bed/lastzDroBip2.2014-08-29
 
     cat << '_EOF_' > DEF
 # D. melanogaster vs. D. bipectinata
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.52/bin/lastz
 BLASTZ_H=2000
 BLASTZ_Y=3400
 BLASTZ_L=4000
 BLASTZ_K=2200
 BLASTZ_Q=/cluster/data/blastz/HoxD55.q
 
 # TARGET - D. melanogaster
 SEQ1_DIR=/hive/data/genomes/dm6/dm6.2bit
 SEQ1_CHUNK=5000000
 SEQ1_LAP=10000
 SEQ1_LEN=/hive/data/genomes/dm6/chrom.sizes
 
 # QUERY - D. bipectinata
 SEQ2_DIR=/hive/data/genomes/droBip2/droBip2.2bit
 SEQ2_CHUNK=5000000
 SEQ2_LAP=10000
 SEQ2_LEN=/hive/data/genomes/droBip2/chrom.sizes
 
 BASE=/hive/data/genomes/dm6/bed/lastzDroBip2.2014-08-29
 TMPDIR=/dev/shm
 '_EOF_'
     # << happy emacs
 
     time doBlastzChainNet.pl `pwd`/DEF \
         -syntenicNet -fileServer=hgwdev \
         -chainMinScore=3000  -chainLinearGap=loose \
         -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > do.log 2>&1
     # real    72m58.502s
 
     cat fb.dm6.chainDroBip2Link.txt
     # 93905122 bases of 142573024 (65.865%) in intersection
 
     #   running the swap
     mkdir /hive/data/genomes/droBip2/bed/blastz.dm6.swap
     cd /hive/data/genomes/droBip2/bed/blastz.dm6.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
         /hive/data/genomes/dm6/bed/lastzDroBip2.2014-08-29/DEF \
         -swap -syntenicNet -chainMinScore=3000 -chainLinearGap=loose \
         -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > swap.log 2>&1
     # real    6m57.737s
     cat fb.droBip2.chainDm6Link.txt
     # 100268467 bases of 166411051 (60.253%) in intersection
 
 ###########################################################################
 # LASTZ D. elegans DroEle2 (DONE 2014-08-29 - Hiram)
     screen -S droEle2-dm6
     mkdir /hive/data/genomes/dm6/bed/lastzDroEle2.2014-08-29
     cd /hive/data/genomes/dm6/bed/lastzDroEle2.2014-08-29
 
     cat << '_EOF_' > DEF
 # D. melanogaster vs. D. elegans
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.52/bin/lastz
 BLASTZ_H=2000
 BLASTZ_Y=3400
 BLASTZ_L=4000
 BLASTZ_K=2200
 BLASTZ_Q=/cluster/data/blastz/HoxD55.q
 
 # TARGET - D. melanogaster
 SEQ1_DIR=/hive/data/genomes/dm6/dm6.2bit
 SEQ1_CHUNK=5000000
 SEQ1_LAP=10000
 SEQ1_LEN=/hive/data/genomes/dm6/chrom.sizes
 
 # QUERY - D. elegans
 SEQ2_DIR=/hive/data/genomes/droEle2/droEle2.2bit
 SEQ2_CHUNK=5000000
 SEQ2_LAP=10000
 SEQ2_LEN=/hive/data/genomes/droEle2/chrom.sizes
 
 BASE=/hive/data/genomes/dm6/bed/lastzDroEle2.2014-08-29
 TMPDIR=/dev/shm
 '_EOF_'
     # << happy emacs
 
     time doBlastzChainNet.pl `pwd`/DEF \
         -syntenicNet -fileServer=hgwdev \
         -chainMinScore=3000  -chainLinearGap=loose \
         -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > do.log 2>&1
     # real    52m32.697s
     cat fb.dm6.chainDroEle2Link.txt
     # 107563805 bases of 142573024 (75.445%) in intersection
 
     #   running the swap
     mkdir /hive/data/genomes/droEle2/bed/blastz.dm6.swap
     cd /hive/data/genomes/droEle2/bed/blastz.dm6.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
         /hive/data/genomes/dm6/bed/lastzDroEle2.2014-08-29/DEF \
         -swap -syntenicNet -chainMinScore=3000 -chainLinearGap=loose \
         -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > swap.log 2>&1
     # real    7m37.855s
     cat fb.droEle2.chainDm6Link.txt
     # 115222391 bases of 170533638 (67.566%) in intersection
 
 ###########################################################################
 # LASTZ D. eugracilis DroEug2 (DONE  2014-08-29 - Hiram)
     screen -S droEug2-dm6
     mkdir /hive/data/genomes/dm6/bed/lastzDroEug2.2014-08-29
     cd /hive/data/genomes/dm6/bed/lastzDroEug2.2014-08-29
 
     cat << '_EOF_' > DEF
 # D. melanogaster vs. D. eugracilis
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.52/bin/lastz
 BLASTZ_H=2000
 BLASTZ_Y=3400
 BLASTZ_L=4000
 BLASTZ_K=2200
 BLASTZ_Q=/cluster/data/blastz/HoxD55.q
 
 # TARGET - D. melanogaster
 SEQ1_DIR=/hive/data/genomes/dm6/dm6.2bit
 SEQ1_CHUNK=5000000
 SEQ1_LAP=10000
 SEQ1_LEN=/hive/data/genomes/dm6/chrom.sizes
 
 # QUERY - D. eugracilis
 SEQ2_DIR=/hive/data/genomes/droEug2/droEug2.2bit
 SEQ2_CHUNK=5000000
 SEQ2_LAP=10000
 SEQ2_LEN=/hive/data/genomes/droEug2/chrom.sizes
 
 BASE=/hive/data/genomes/dm6/bed/lastzDroEug2.2014-08-29
 TMPDIR=/dev/shm
 '_EOF_'
     # << happy emacs
 
     time doBlastzChainNet.pl `pwd`/DEF \
         -syntenicNet -fileServer=hgwdev \
         -chainMinScore=3000  -chainLinearGap=loose \
         -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > do.log 2>&1
     # real    81m4.137s
     cat fb.dm6.chainDroEug2Link.txt
     # 107307060 bases of 142573024 (75.265%) in intersection
 
     #   running the swap
     mkdir /hive/data/genomes/droEug2/bed/blastz.dm6.swap
     cd /hive/data/genomes/droEug2/bed/blastz.dm6.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
         /hive/data/genomes/dm6/bed/lastzDroEug2.2014-08-29/DEF \
         -swap -syntenicNet -chainMinScore=3000 -chainLinearGap=loose \
         -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > swap.log 2>&1
     # real    7m6.555s
     cat fb.droEug2.chainDm6Link.txt
     # 114576241 bases of 156328338 (73.292%) in intersection
 
 ###########################################################################
 # LASTZ D. ficusphila DroFic2 (DONE 2014-08-29 - Hiram)
     screen -S droFic2-dm6
     mkdir /hive/data/genomes/dm6/bed/lastzDroFic2.2014-08-29
     cd /hive/data/genomes/dm6/bed/lastzDroFic2.2014-08-29
 
     cat << '_EOF_' > DEF
 # D. melanogaster vs. D. ficusphila
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.52/bin/lastz
 BLASTZ_H=2000
 BLASTZ_Y=3400
 BLASTZ_L=4000
 BLASTZ_K=2200
 BLASTZ_Q=/cluster/data/blastz/HoxD55.q
 
 # TARGET - D. melanogaster
 SEQ1_DIR=/hive/data/genomes/dm6/dm6.2bit
 SEQ1_CHUNK=5000000
 SEQ1_LAP=10000
 SEQ1_LEN=/hive/data/genomes/dm6/chrom.sizes
 
 # QUERY - D. ficusphila
 SEQ2_DIR=/hive/data/genomes/droFic2/droFic2.2bit
 SEQ2_CHUNK=5000000
 SEQ2_LAP=10000
 SEQ2_LEN=/hive/data/genomes/droFic2/chrom.sizes
 
 BASE=/hive/data/genomes/dm6/bed/lastzDroFic2.2014-08-29
 TMPDIR=/dev/shm
 '_EOF_'
     # << happy emacs
 
     time doBlastzChainNet.pl `pwd`/DEF \
         -syntenicNet -fileServer=hgwdev \
         -chainMinScore=3000  -chainLinearGap=loose \
         -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > do.log 2>&1
     # real    62m0.114s
     cat fb.dm6.chainDroFic2Link.txt
     # 106559757 bases of 142573024 (74.740%) in intersection
 
     #   running the swap
     mkdir /hive/data/genomes/droFic2/bed/blastz.dm6.swap
     cd /hive/data/genomes/droFic2/bed/blastz.dm6.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
         /hive/data/genomes/dm6/bed/lastzDroFic2.2014-08-29/DEF \
         -swap -syntenicNet -chainMinScore=3000 -chainLinearGap=loose \
         -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > swap.log 2>&1
     # real    6m54.593s
 
     cat fb.droFic2.chainDm6Link.txt
     # 112386864 bases of 151050603 (74.403%) in intersection
 
 ###########################################################################
 # LASTZ D. kikkawai DroKik2 (DONE 2014-08-29 - Hiram)
     screen -S droKik2-dm6
     mkdir /hive/data/genomes/dm6/bed/lastzDroKik2.2014-08-29
     cd /hive/data/genomes/dm6/bed/lastzDroKik2.2014-08-29
 
     cat << '_EOF_' > DEF
 # D. melanogaster vs. D. kikkawai
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.52/bin/lastz
 BLASTZ_H=2000
 BLASTZ_Y=3400
 BLASTZ_L=4000
 BLASTZ_K=2200
 BLASTZ_Q=/cluster/data/blastz/HoxD55.q
 
 # TARGET - D. melanogaster
 SEQ1_DIR=/hive/data/genomes/dm6/dm6.2bit
 SEQ1_CHUNK=5000000
 SEQ1_LAP=10000
 SEQ1_LEN=/hive/data/genomes/dm6/chrom.sizes
 
 # QUERY - D. kikkawai
 SEQ2_DIR=/hive/data/genomes/droKik2/droKik2.2bit
 SEQ2_CHUNK=5000000
 SEQ2_LAP=10000
 SEQ2_LEN=/hive/data/genomes/droKik2/chrom.sizes
 
 BASE=/hive/data/genomes/dm6/bed/lastzDroKik2.2014-08-29
 TMPDIR=/dev/shm
 '_EOF_'
     # << happy emacs
 
     time doBlastzChainNet.pl `pwd`/DEF \
         -syntenicNet -fileServer=hgwdev \
         -chainMinScore=3000  -chainLinearGap=loose \
         -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > do.log 2>&1
     # real    68m49.238s
     cat fb.dm6.chainDroKik2Link.txt
     # 100642052 bases of 142573024 (70.590%) in intersection
 
     #   running the swap
     mkdir /hive/data/genomes/droKik2/bed/blastz.dm6.swap
     cd /hive/data/genomes/droKik2/bed/blastz.dm6.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
         /hive/data/genomes/dm6/bed/lastzDroKik2.2014-08-29/DEF \
         -swap -syntenicNet -chainMinScore=3000 -chainLinearGap=loose \
         -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > swap.log 2>&1
     # real    7m5.278s
     cat fb.droKik2.chainDm6Link.txt
     # 105765944 bases of 163450645 (64.708%) in intersection
 
 ###########################################################################
 # LASTZ D. miranda DroMir2 (DONE 2014-08-29 - Hiram)
     screen -S droMir2-dm6
     mkdir /hive/data/genomes/dm6/bed/lastzDroMir2.2014-08-29
     cd /hive/data/genomes/dm6/bed/lastzDroMir2.2014-08-29
 
     cat << '_EOF_' > DEF
 # D. melanogaster vs. D. miranda
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.52/bin/lastz
 BLASTZ_H=2000
 BLASTZ_Y=3400
 BLASTZ_L=4000
 BLASTZ_K=2200
 BLASTZ_Q=/cluster/data/blastz/HoxD55.q
 
 # TARGET - D. melanogaster
 SEQ1_DIR=/hive/data/genomes/dm6/dm6.2bit
 SEQ1_CHUNK=5000000
 SEQ1_LAP=10000
 SEQ1_LEN=/hive/data/genomes/dm6/chrom.sizes
 
 # QUERY - D. miranda
 SEQ2_DIR=/hive/data/genomes/droMir2/droMir2.2bit
 SEQ2_CHUNK=5000000
 SEQ2_LAP=10000
 SEQ2_LEN=/hive/data/genomes/droMir2/chrom.sizes
 
 BASE=/hive/data/genomes/dm6/bed/lastzDroMir2.2014-08-29
 TMPDIR=/dev/shm
 '_EOF_'
     # << happy emacs
 
     time doBlastzChainNet.pl `pwd`/DEF \
         -syntenicNet -fileServer=hgwdev \
         -chainMinScore=3000  -chainLinearGap=loose \
         -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > do.log 2>&1
     # real    46m36.289s
     cat fb.dm6.chainDroMir2Link.txt
     # 84557634 bases of 142573024 (59.308%) in intersection
 
     #   running the swap
     mkdir /hive/data/genomes/droMir2/bed/blastz.dm6.swap
     cd /hive/data/genomes/droMir2/bed/blastz.dm6.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
         /hive/data/genomes/dm6/bed/lastzDroMir2.2014-08-29/DEF \
         -swap -syntenicNet -chainMinScore=3000 -chainLinearGap=loose \
         -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > swap.log 2>&1
     # real    real    4m55.434s
 
     cat fb.droMir2.chainDm6Link.txt
     # 84386358 bases of 132586288 (63.646%) in intersection
 
 ###########################################################################
 # LASTZ D. suzukii DroSuz1 (DONE 2014-08-29 - Hiram)
     screen -S droSuz1-dm6
     mkdir /hive/data/genomes/dm6/bed/lastzDroSuz1.2014-08-29
     cd /hive/data/genomes/dm6/bed/lastzDroSuz1.2014-08-29
 
     cat << '_EOF_' > DEF
 # D. melanogaster vs. D. suzukii
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.52/bin/lastz
 BLASTZ_H=2000
 BLASTZ_Y=3400
 BLASTZ_L=4000
 BLASTZ_K=2200
 BLASTZ_Q=/cluster/data/blastz/HoxD55.q
 
 # TARGET - D. melanogaster
 SEQ1_DIR=/hive/data/genomes/dm6/dm6.2bit
 SEQ1_CHUNK=5000000
 SEQ1_LAP=10000
 SEQ1_LEN=/hive/data/genomes/dm6/chrom.sizes
 
 # QUERY - D. suzukii
 SEQ2_DIR=/hive/data/genomes/droSuz1/droSuz1.2bit
 SEQ2_CHUNK=5000000
 SEQ2_LAP=10000
 SEQ2_LEN=/hive/data/genomes/droSuz1/chrom.sizes
 
 BASE=/hive/data/genomes/dm6/bed/lastzDroSuz1.2014-08-29
 TMPDIR=/dev/shm
 '_EOF_'
     # << happy emacs
 
     time doBlastzChainNet.pl `pwd`/DEF \
         -syntenicNet -fileServer=hgwdev \
         -chainMinScore=3000  -chainLinearGap=loose \
         -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > do.log 2>&1
     # real    69m12.803s
     cat fb.dm6.chainDroSuz1Link.txt
     # 106346119 bases of 142573024 (74.591%) in intersection
 
     #   running the swap
     mkdir /hive/data/genomes/droSuz1/bed/blastz.dm6.swap
     cd /hive/data/genomes/droSuz1/bed/blastz.dm6.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
         /hive/data/genomes/dm6/bed/lastzDroSuz1.2014-08-29/DEF \
         -swap -syntenicNet -chainMinScore=3000 -chainLinearGap=loose \
         -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > swap.log 2>&1
     # real    9m7.373s
     cat fb.droSuz1.chainDm6Link.txt
     # 129874262 bases of 202303443 (64.198%) in intersection
 
 ###########################################################################
 # LASTZ D. rhopaloa DroRho2 (DONE 2014-08-29 - Hiram)
     screen -S droRho2-dm6
     mkdir /hive/data/genomes/dm6/bed/lastzDroRho2.2014-08-29
     cd /hive/data/genomes/dm6/bed/lastzDroRho2.2014-08-29
 
     cat << '_EOF_' > DEF
 # D. melanogaster vs. D. rhopaloa
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.52/bin/lastz
 BLASTZ_H=2000
 BLASTZ_Y=3400
 BLASTZ_L=4000
 BLASTZ_K=2200
 BLASTZ_Q=/cluster/data/blastz/HoxD55.q
 
 # TARGET - D. melanogaster
 SEQ1_DIR=/hive/data/genomes/dm6/dm6.2bit
 SEQ1_CHUNK=5000000
 SEQ1_LAP=10000
 SEQ1_LEN=/hive/data/genomes/dm6/chrom.sizes
 
 # QUERY - D. rhopaloa
 SEQ2_DIR=/hive/data/genomes/droRho2/droRho2.2bit
 SEQ2_CHUNK=5000000
 SEQ2_LAP=10000
 SEQ2_LEN=/hive/data/genomes/droRho2/chrom.sizes
 
 BASE=/hive/data/genomes/dm6/bed/lastzDroRho2.2014-08-29
 TMPDIR=/dev/shm
 '_EOF_'
     # << happy emacs
 
     time doBlastzChainNet.pl `pwd`/DEF \
         -syntenicNet -fileServer=hgwdev \
         -chainMinScore=3000  -chainLinearGap=loose \
         -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > do.log 2>&1
     # real    79m31.589s
     cat fb.dm6.chainDroRho2Link.txt
     # 106844545 bases of 142573024 (74.940%) in intersection
 
     #   running the swap
     mkdir /hive/data/genomes/droRho2/bed/blastz.dm6.swap
     cd /hive/data/genomes/droRho2/bed/blastz.dm6.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
         /hive/data/genomes/dm6/bed/lastzDroRho2.2014-08-29/DEF \
         -swap -syntenicNet -chainMinScore=3000 -chainLinearGap=loose \
         -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > swap.log 2>&1
     # real  10m15.734s
     cat fb.droRho2.chainDm6Link.txt
     # 126832194 bases of 193889799 (65.415%) in intersection
 
 ###########################################################################
 # LASTZ D. takahashii DroTak2 (DONE 2014-08-29 - Hiram)
     screen -S droTak2-dm6
     mkdir /hive/data/genomes/dm6/bed/lastzDroTak2.2014-08-29
     cd /hive/data/genomes/dm6/bed/lastzDroTak2.2014-08-29
 
     cat << '_EOF_' > DEF
 # D. melanogaster vs. D. takahashii
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.52/bin/lastz
 BLASTZ_H=2000
 BLASTZ_Y=3400
 BLASTZ_L=4000
 BLASTZ_K=2200
 BLASTZ_Q=/cluster/data/blastz/HoxD55.q
 
 # TARGET - D. melanogaster
 SEQ1_DIR=/hive/data/genomes/dm6/dm6.2bit
 SEQ1_CHUNK=5000000
 SEQ1_LAP=10000
 SEQ1_LEN=/hive/data/genomes/dm6/chrom.sizes
 
 # QUERY - D. takahashii
 SEQ2_DIR=/hive/data/genomes/droTak2/droTak2.2bit
 SEQ2_CHUNK=5000000
 SEQ2_LAP=10000
 SEQ2_LEN=/hive/data/genomes/droTak2/chrom.sizes
 
 BASE=/hive/data/genomes/dm6/bed/lastzDroTak2.2014-08-29
 TMPDIR=/dev/shm
 '_EOF_'
     # << happy emacs
 
     time doBlastzChainNet.pl `pwd`/DEF \
         -syntenicNet -fileServer=hgwdev \
         -chainMinScore=3000  -chainLinearGap=loose \
         -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > do.log 2>&1
     # real     66m20.654s
     cat fb.dm6.chainDroTak2Link.txt
     # 107653248 bases of 142573024 (75.507%) in intersection
 
     #   running the swap
     mkdir /hive/data/genomes/droTak2/bed/blastz.dm6.swap
     cd /hive/data/genomes/droTak2/bed/blastz.dm6.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
         /hive/data/genomes/dm6/bed/lastzDroTak2.2014-08-29/DEF \
         -swap -syntenicNet -chainMinScore=3000 -chainLinearGap=loose \
         -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > swap.log 2>&1
     # real    7m15.787s
     cat fb.droTak2.chainDm6Link.txt
     # 114989522 bases of 181031891 (63.519%) in intersection
 
 ###########################################################################
 # LASTZ M. domestica MusDom2 (DONE 2014-08-29 - Hiram)
     screen -S musDom2-dm6
     mkdir /hive/data/genomes/dm6/bed/lastzMusDom2.2014-08-29
     cd /hive/data/genomes/dm6/bed/lastzMusDom2.2014-08-29
 
     cat << '_EOF_' > DEF
 # D. melanogaster vs. M. domestica
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.52/bin/lastz
 BLASTZ_H=2000
 BLASTZ_Y=3400
 BLASTZ_L=4000
 BLASTZ_K=2200
 BLASTZ_Q=/cluster/data/blastz/HoxD55.q
 
 # TARGET - D. melanogaster
 SEQ1_DIR=/hive/data/genomes/dm6/dm6.2bit
 SEQ1_CHUNK=5000000
 SEQ1_LAP=10000
 SEQ1_LEN=/hive/data/genomes/dm6/chrom.sizes
 
 # QUERY - M. domestica
 SEQ2_DIR=/hive/data/genomes/musDom2/musDom2.2bit
 SEQ2_CHUNK=5000000
 SEQ2_LAP=10000
 SEQ2_LEN=/hive/data/genomes/musDom2/chrom.sizes
 
 BASE=/hive/data/genomes/dm6/bed/lastzMusDom2.2014-08-29
 TMPDIR=/dev/shm
 '_EOF_'
     # << happy emacs
 
     time doBlastzChainNet.pl `pwd`/DEF \
         -syntenicNet -fileServer=hgwdev \
         -chainMinScore=3000  -chainLinearGap=loose \
         -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > do.log 2>&1
     # Elapsed time: 81m30s
     cat fb.dm6.chainMusDom2Link.txt
     # 27485718 bases of 142573024 (19.278%) in intersection
 
     #   running the swap
     mkdir /hive/data/genomes/musDom2/bed/blastz.dm6.swap
     cd /hive/data/genomes/musDom2/bed/blastz.dm6.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
         /hive/data/genomes/dm6/bed/lastzMusDom2.2014-08-29/DEF \
         -swap -syntenicNet -chainMinScore=3000 -chainLinearGap=loose \
         -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > swap.log 2>&1
     # real    3m35.469s
     cat fb.musDom2.chainDm6Link.txt
     # 33622643 bases of 691751600 (4.861%) in intersection
 
 #########################################################################
 ## 27-Way Multiz (DONE - 2014-09-05 - Hiram)
     ssh hgwdev
     mkdir /hive/data/genomes/dm6/bed/multiz27way
     cd /hive/data/genomes/dm6/bed/multiz27way
 
     # worked up a phylo tree with the help of
     # http://phylot.biobyte.de/
     # http://itol.embl.de/
     # and editor: http://treegraph.bioinfweb.info/
 
     cp -p ~/kent/src/hg/utils/phyloTrees/dm6.27way.nh ./
 
 
     #	what that looks like:
     ~/kent/src/hg/utils/phyloTrees/asciiTree.pl dm6.27way.nh
 (((((((((((((((((dm6:0.1,
                 (droSim1:0.1,
                 droSec1:0.1):0.1):0.1,
                (droYak3:0.1,
                droEre2:0.1):0.1):0.1,
               (droBia2:0.1,
               droSuz1:0.1):0.1):0.1,
              (droAna3:0.1,
              droBip2:0.1):0.1):0.1,
             droEug2:0.1):0.1,
            droEle2:0.1):0.1,
           droKik2:0.1):0.1,
          droTak2:0.1):0.1,
         droRho2:0.1):0.1,
        droFic2:0.1):0.1,
       ((droPse3:0.1,
        droPer1:0.1):0.1,
       droMir2:0.1):0.1):0.1,
      droWil2:0.1):0.1,
     (((droVir3:0.1,
       droMoj3:0.1):0.1,
      droAlb1:0.1):0.1,
     droGri2:0.1):0.1):0.1,
    musDom2:0.1):0.1,
   anoGam1:0.1):0.1,
  apiMel4:0.1):0.1,
 triCas2:0.1);
 
     # extract species list from that .nh file
     sed 's/[a-z][a-z]*_//g; s/:[0-9\.][0-9\.]*//g; s/;//; /^ *$/d' \
         dm6.27way.nh | xargs echo | sed 's/ //g; s/,/ /g' \
         | sed 's/[()]//g; s/,/ /g' | tr '[ ]' '[\n]' > species.list.txt
 
     # construct db to common name translation list:
     cat species.list.txt | while read DB
 do
 hgsql -N -e "select name,organism from dbDb where name=\"${DB}\";" hgcentraltest
 done | sed -e "s/\t/->/; s/ /_/g;" | sed -e 's/$/;/' | sed -e 's/\./_/g' \
         > db.to.name.txt
     # construct db to scientific name translation list:
     cat species.list.txt | while read DB
 do
 hgsql -N -e "select name,scientificName from dbDb where name=\"${DB}\";" hgcentraltest
 done | sed -e "s/\t/->/; s/ /_/g;" | sed -e 's/$/;/' | sed -e 's/\./_/g' \
      | sed -e 's/pseudoobscura_pseudoobscura/pseudoobscura/;' \
         > db.to.sciName.txt
 
     # construct a common name .nh file:
     /cluster/bin/phast/tree_doctor --rename \
     "`cat db.to.name.txt`" dm6.27way.nh | sed -e 's/00*)/)/g; s/00*,/,/g' \
        | $HOME/kent/src/hg/utils/phyloTrees/asciiTree.pl /dev/stdin \
          > dm6.27way.commonNames.nh
 
     # construct a scientific name .nh file:
     /cluster/bin/phast/tree_doctor --rename \
     "`cat db.to.sciName.txt`" dm6.27way.nh | sed -e 's/00*)/)/g; s/00*,/,/g' \
        | $HOME/kent/src/hg/utils/phyloTrees/asciiTree.pl /dev/stdin \
          > dm6.27way.scientificNames.nh
     # construct a scientific name .nh file:
 
     cat dm6.27way.scientificNames.nh
 (((((((((((((((((Drosophila_melanogaster:0.1,
                 (Drosophila_simulans:0.1,
                 Drosophila_sechellia:0.1):0.1):0.1,
                (Drosophila_yakuba:0.1,
                Drosophila_erecta:0.1):0.1):0.1,
               (Drosophila_biarmipes:0.1,
               Drosophila_suzukii:0.1):0.1):0.1,
              (Drosophila_ananassae:0.1,
              Drosophila_bipectinata:0.1):0.1):0.1,
             Drosophila_eugracilis:0.1):0.1,
            Drosophila_elegans:0.1):0.1,
           Drosophila_kikkawai:0.1):0.1,
          Drosophila_takahashii:0.1):0.1,
         Drosophila_rhopaloa:0.1):0.1,
        Drosophila_ficusphila:0.1):0.1,
       ((Drosophila_pseudoobscura:0.1,
        Drosophila_persimilis:0.1):0.1,
       Drosophila_miranda:0.1):0.1):0.1,
      Drosophila_willistoni:0.1):0.1,
     (((Drosophila_virilis:0.1,
       Drosophila_mojavensis:0.1):0.1,
      Drosophila_albomicans:0.1):0.1,
     Drosophila_grimshawi:0.1):0.1):0.1,
    Musca_domestica:0.1):0.1,
   Anopheles_gambiae:0.1):0.1,
  Apis_mellifera:0.1):0.1,
 Tribolium_castaneum:0.1);
 
     #	Use this specification in the phyloGif tool:
     #	http://genome.ucsc.edu/cgi-bin/phyloGif
     #	to obtain a png image for src/hg/htdocs/images/phylo/dm6_27way.png
 
     /cluster/bin/phast/all_dists dm6.27way.nh | grep dm6 \
         | sed -e "s/dm6.//" | sort -k2n > 27way.distances.txt
     #	Use this output to create the table below
     head 27way.distances.txt
 droSec1 0.300000
 droSim1 0.300000
 droEre2 0.400000
 droYak3 0.400000
 droBia2 0.500000
 droSuz1 0.500000
 droAna3 0.600000
 droBip2 0.600000
 droEug2 0.600000
 droEle2 0.700000
 
     cat << '_EOF_' > sizeStats.pl
 #!/usr/bin/env perl
 
 use strict;
 use warnings;
 
 open (FH, "<27way.distances.txt") or
         die "can not read 27way.distances.txt";
 
 my $count = 0;
 while (my $line = <FH>) {
     chomp $line;
     my ($D, $dist) = split('\s+', $line);
     my $chain = "chain" . ucfirst($D);
     my $B="/hive/data/genomes/dm6/bed/lastz.$D/fb.dm6." .
         $chain . "Link.txt";
     my $chainLinkMeasure =
         `awk '{print \$5}' ${B} 2> /dev/null | sed -e "s/(//; s/)//"`;
     chomp $chainLinkMeasure;
     $chainLinkMeasure = 0.0 if (length($chainLinkMeasure) < 1);
     $chainLinkMeasure =~ s/\%//;
     my $swapFile="/hive/data/genomes/${D}/bed/lastz.dm6/fb.${D}.chainDm6Link.txt";
     my $swapMeasure = "N/A";
     if ( -s $swapFile ) {
 	$swapMeasure =
 	    `awk '{print \$5}' ${swapFile} 2> /dev/null | sed -e "s/(//; s/)//"`;
 	chomp $swapMeasure;
 	$swapMeasure = 0.0 if (length($swapMeasure) < 1);
 	$swapMeasure =~ s/\%//;
     }
     my $orgName=
     `hgsql -N -e "select organism from dbDb where name='$D';" hgcentraltest`;
     chomp $orgName;
     if (length($orgName) < 1) {
         $orgName="N/A";
     }
     ++$count;
     printf "# %02d  %.4f (%% %06.3f) (%% %06.3f) - %s %s\n", $count, $dist,
         $chainLinkMeasure, $swapMeasure, $orgName, $D;
 }
 close (FH);
 '_EOF_'
     # << happy emacs
     chmod +x ./sizeStats.pl
     ./sizeStats.pl
 #
 
 #	If you can fill in all the numbers in this table, you are ready for
 #	the multiple alignment procedure
 
 #       featureBits chainLink measures
 #               chainLink
 #  N distance  on dm6  on other     other species
 # 01  0.3000 (% 82.561) (% 81.572) - D. sechellia droSec1
 # 02  0.3000 (% 80.042) (% 89.329) - D. simulans droSim1
 # 03  0.4000 (% 80.023) (% 84.386) - D. erecta droEre2
 # 04  0.4000 (% 80.562) (% 80.965) - D. yakuba droYak3
 # 05  0.5000 (% 75.042) (% 67.383) - D. biarmipes droBia2
 # 06  0.5000 (% 74.591) (% 64.198) - D. suzukii droSuz1
 # 07  0.6000 (% 67.282) (% 58.760) - D. ananassae droAna3
 # 08  0.6000 (% 65.865) (% 60.253) - D. bipectinata droBip2
 # 09  0.6000 (% 75.265) (% 73.292) - D. eugracilis droEug2
 # 10  0.7000 (% 75.445) (% 67.566) - D. elegans droEle2
 # 11  0.8000 (% 70.590) (% 64.708) - D. kikkawai droKik2
 # 12  0.9000 (% 75.507) (% 63.519) - D. takahashii droTak2
 # 13  1.0000 (% 74.940) (% 65.415) - D. rhopaloa droRho2
 # 14  1.1000 (% 74.740) (% 74.403) - D. ficusphila droFic2
 # 15  1.3000 (% 59.308) (% 63.646) - D. miranda droMir2
 # 16  1.3000 (% 51.432) (% 40.432) - D. willistoni droWil2
 # 17  1.4000 (% 59.049) (% 52.949) - D. persimilis droPer1
 # 18  1.4000 (% 59.836) (% 59.839) - D. pseudoobscura droPse3
 # 19  1.5000 (% 50.979) (% 42.814) - D. grimshawi droGri2
 # 20  1.5000 (% 19.278) (% 04.861) - M. domestica musDom2
 # 21  1.6000 (% 14.452) (% 07.962) - A. gambiae anoGam1
 # 22  1.6000 (% 44.544) (% 38.217) - D. albomicans droAlb1
 # 23  1.7000 (% 08.183) (% 05.360) - A. mellifera apiMel4
 # 24  1.7000 (% 50.132) (% 41.385) - D. mojavensis droMoj3
 # 25  1.7000 (% 51.937) (% 43.828) - D. virilis droVir3
 # 26  1.8000 (% 14.092) (% 14.808) - T. castaneum triCas2
 
 # None of this concern for distances matters in building the first step, the
 # maf files.
 
     # create species list and stripped down tree for autoMZ
     sed 's/[a-z][a-z]*_//g; s/:[0-9\.][0-9\.]*//g; s/;//; /^ *$/d' \
 	dm6.27way.nh | xargs echo | sed 's/ //g; s/,/ /g' > tree.nh
 
     sed 's/[()]//g; s/,/ /g' tree.nh > species.list
 # dm6 droSim1 droSec1 droYak3 droEre2 droBia2 droSuz1 droAna3 droBip2 droEug2
 # droEle2 droKik2 droTak2 droRho2 droFic2 droPse3 droPer1 droMir2 droWil2
 # droVir3 droMoj3 droAlb1 droGri2 musDom2 anoGam1 apiMel4 triCas2
 
     #	bash shell syntax here ...
     cd /hive/data/genomes/dm6/bed/multiz27way
     export H=/hive/data/genomes/dm6/bed
     mkdir mafLinks
     # want syntenic net for all the Drosophilas
     for G in droAlb1 droAna3 droBia2 droBip2 droEle2 droEre2 droEug2 droFic2 droGri2 droKik2 droMir2 droMoj3 droPer1 droPse3 droRho2 droSec1 droSim1 droSuz1 droTak2 droVir3 droWil2 droYak3
     do
       mkdir mafLinks/$G
       echo ln -s ${H}/lastz.$G/axtChain/dm6.${G}.synNet.maf.gz ./mafLinks/$G
       ln -s ${H}/lastz.$G/axtChain/dm6.${G}.synNet.maf.gz ./mafLinks/$G
     done
 
     # and basic maf net for the others:
     for G in anoGam1 apiMel4 musDom2 triCas2
 do
       mkdir mafLinks/$G
       echo ln -s ${H}/lastz.$G/mafNet/dm6.${G}.net.maf.gz ./mafLinks/$G
       ln -s ${H}/lastz.$G/mafNet/dm6.${G}.net.maf.gz ./mafLinks/$G
 done
 
     # verify the symLinks are good:
     ls -ogrtL mafLinks/*/*
 # -rw-rw-r-- 1 57989657 Aug 29 11:52 mafLinks/droAna3/dm6.droAna3.synNet.maf.gz
 # -rw-rw-r-- 1 66653928 Aug 29 13:31 mafLinks/droEre2/dm6.droEre2.synNet.maf.gz
 # -rw-rw-r-- 1 55414479 Aug 29 13:48 mafLinks/droSim1/dm6.droSim1.synNet.maf.gz
 # -rw-rw-r-- 1 64261526 Aug 29 13:55 mafLinks/droSec1/dm6.droSec1.synNet.maf.gz
 # -rw-rw-r-- 1 43326252 Aug 29 13:58 mafLinks/droGri2/dm6.droGri2.synNet.maf.gz
 # -rw-rw-r-- 1 14470342 Aug 29 14:03 mafLinks/triCas2/dm6.triCas2.net.maf.gz
 # -rw-rw-r-- 1 53914019 Aug 29 14:06 mafLinks/droPer1/dm6.droPer1.synNet.maf.gz
 # -rw-rw-r-- 1  8871501 Aug 29 14:09 mafLinks/apiMel4/dm6.apiMel4.net.maf.gz
 # -rw-rw-r-- 1 44299779 Aug 29 14:12 mafLinks/droVir3/dm6.droVir3.synNet.maf.gz
 # -rw-rw-r-- 1 56305568 Aug 29 14:19 mafLinks/droPse3/dm6.droPse3.synNet.maf.gz
 # -rw-rw-r-- 1 58119972 Aug 29 14:31 mafLinks/droMir2/dm6.droMir2.synNet.maf.gz
 # -rw-rw-r-- 1 68523213 Aug 29 14:31 mafLinks/droEle2/dm6.droEle2.synNet.maf.gz
 # -rw-rw-r-- 1 68100446 Aug 29 14:35 mafLinks/droYak3/dm6.droYak3.synNet.maf.gz
 # -rw-rw-r-- 1 64321608 Aug 29 14:43 mafLinks/droBia2/dm6.droBia2.synNet.maf.gz
 # -rw-rw-r-- 1 44761222 Aug 29 14:43 mafLinks/droWil2/dm6.droWil2.synNet.maf.gz
 # -rw-rw-r-- 1 68869400 Aug 29 14:44 mafLinks/droFic2/dm6.droFic2.synNet.maf.gz
 # -rw-rw-r-- 1 42864649 Aug 29 14:45 mafLinks/droMoj3/dm6.droMoj3.synNet.maf.gz
 # -rw-rw-r-- 1 14470901 Aug 29 14:47 mafLinks/anoGam1/dm6.anoGam1.net.maf.gz
 # -rw-rw-r-- 1 61398493 Aug 29 14:51 mafLinks/droBip2/dm6.droBip2.synNet.maf.gz
 # -rw-rw-r-- 1 66734812 Aug 29 14:52 mafLinks/droKik2/dm6.droKik2.synNet.maf.gz
 # -rw-rw-r-- 1 67398540 Aug 29 14:54 mafLinks/droTak2/dm6.droTak2.synNet.maf.gz
 # -rw-rw-r-- 1 60781324 Aug 29 14:54 mafLinks/droSuz1/dm6.droSuz1.synNet.maf.gz
 # -rw-rw-r-- 1 28941891 Aug 29 15:01 mafLinks/droAlb1/dm6.droAlb1.synNet.maf.gz
 # -rw-rw-r-- 1 69660184 Aug 29 15:02 mafLinks/droEug2/dm6.droEug2.synNet.maf.gz
 # -rw-rw-r-- 1 61713626 Aug 29 15:06 mafLinks/droRho2/dm6.droRho2.synNet.maf.gz
 # -rw-rw-r-- 1 19994083 Aug 29 15:08 mafLinks/musDom2/dm6.musDom2.net.maf.gz
 
     # split the maf files into a set of hashed named files
     # this hash named split keeps the same chr/contig names in the same
     # named hash file.
     mkdir /hive/data/genomes/dm6/bed/multiz27way/mafSplit
     cd /hive/data/genomes/dm6/bed/multiz27way/mafSplit
     for D in `sed -e "s/dm6 //" ../species.list`
 do
     echo "${D}"
     mkdir $D
     cd $D
     echo "mafSplit -byTarget -useHashedName=8 /dev/null . ../../mafLinks/${D}/*.maf.gz"
     mafSplit -byTarget -useHashedName=8 /dev/null . \
 	../../mafLinks/${D}/*.maf.gz
     cd ..
 done
 
     # construct a list of all possible maf file names.
     # they do not all exist in each of the species directories
     find . -type f | wc -l
     # 678
     find . -type f | grep ".maf$" | xargs -L 1 basename | sort -u > maf.list
     wc -l maf.list
     # 161 maf.list
 
     mkdir /hive/data/genomes/dm6/bed/multiz27way/splitRun
     cd /hive/data/genomes/dm6/bed/multiz27way/splitRun
     mkdir maf run
     cd run
     mkdir penn
     cp -p /cluster/bin/penn/multiz.2009-01-21_patched/multiz penn
     cp -p /cluster/bin/penn/multiz.2009-01-21_patched/maf_project penn
     cp -p /cluster/bin/penn/multiz.2009-01-21_patched/autoMZ penn
 
     #	set the db and pairs directories here
     cat > autoMultiz.csh << '_EOF_'
 #!/bin/csh -ef
 set db = dm6
 set c = $1
 set result = $2
 set run = `/bin/pwd`
 set tmp = /dev/shm/$db/multiz.$c
 set pairs = /hive/data/genomes/dm6/bed/multiz27way/mafSplit
 /bin/rm -fr $tmp
 /bin/mkdir -p $tmp
 /bin/cp -p ../../tree.nh ../../species.list $tmp
 pushd $tmp > /dev/null
 foreach s (`/bin/sed -e "s/$db //" species.list`)
     set in = $pairs/$s/$c
     set out = $db.$s.sing.maf
     if (-e $in.gz) then
         /bin/zcat $in.gz > $out
         if (! -s $out) then
             echo "##maf version=1 scoring=autoMZ" > $out
         endif
     else if (-e $in) then
         /bin/ln -s $in $out
     else
         echo "##maf version=1 scoring=autoMZ" > $out
     endif
 end
 set path = ($run/penn $path); rehash
 $run/penn/autoMZ + T=$tmp E=$db "`cat tree.nh`" $db.*.sing.maf $c \
         > /dev/null
 popd > /dev/null
 /bin/rm -f $result
 /bin/cp -p $tmp/$c $result
 /bin/rm -fr $tmp
 '_EOF_'
 # << happy emacs
     chmod +x autoMultiz.csh
 
     cat  << '_EOF_' > template
 #LOOP
 ./autoMultiz.csh $(file1) {check out line+ /hive/data/genomes/dm6/bed/multiz27way/splitRun/maf/$(root1).maf}
 #ENDLOOP
 '_EOF_'
 # << happy emacs
 
     ln -s ../../mafSplit/maf.list maf.list
     ssh ku
     cd /hive/data/genomes/dm6/bed/multiz27way/splitRun/run
     gensub2 maf.list single template stdout > jobList
     para -ram=8g create jobList
 # Completed: 161 of 161 jobs
 # CPU time in finished jobs:     172027s    2867.12m    47.79h    1.99d  0.005 y
 # IO & Wait Time:                  1065s      17.75m     0.30h    0.01d  0.000 y
 # Average job time:                1075s      17.92m     0.30h    0.01d
 # Longest finished job:           42777s     712.95m    11.88h    0.50d
 # Submission to last job:         42793s     713.22m    11.89h    0.50d
 
     # combine into one file  (the 1>&2 redirect sends the echo to stderr)
     cd /hive/data/genomes/dm6/bed/multiz27way
     head -1 splitRun/maf/053.maf > multiz27way.maf
     for F in splitRun/maf/*.maf
 do
     echo "${F}" 1>&2
     egrep -v "^#" ${F}
 done >> multiz27way.maf
     tail -1 splitRun/maf/053.maf >> multiz27way.maf
 # -rw-rw-r--  1 5808772759 Sep  5 15:14 multiz27way.maf
 
     # Load into database
     ssh hgwdev
     cd /hive/data/genomes/dm6/bed/multiz27way
     mkdir /gbdb/dm6/multiz27way
     ln -s `pwd`/multiz27way.maf /gbdb/dm6/multiz27way
     cd /dev/shm
     time hgLoadMaf dm6 multiz27way
     # Loaded 2110160 mafs in 1 files from /gbdb/dm6/multiz27way
     # real    5m42.300s
 
     time hgLoadMafSummary -verbose=2 -minSize=30000 \
 	-mergeGap=1500 -maxSize=200000 dm6 multiz27waySummary \
 	/gbdb/dm6/multiz27way/multiz27way.maf
     # Created 157893 summary blocks from 34026658 components and 2110160 mafs from /gbdb/dm6/multiz27way/multiz27way.maf
     # real    2m44.856s
 
 
     wc -l multiz27way*.tab
     #  2110160 multiz27way.tab
     #   157893 multiz27waySummary.tab
     #  2268053 total
 
 -rw-rw-r-- 1 102915136 Sep  5 15:20 multiz27way.tab
 -rw-rw-r-- 1   7215723 Sep  5 15:35 multiz27waySummary.tab
 
     rm multiz27way*.tab
 
 #######################################################################
 # GAP ANNOTATE MULTIZ27WAY MAF AND LOAD TABLES (DONE - 2014-11-13 - Hiram)
     # mafAddIRows has to be run on single chromosome maf files, it does not
     #   function correctly when more than one reference sequence
     #   are in a single file.
     mkdir -p /hive/data/genomes/dm6/bed/multiz27way/anno/mafSplit
     cd /hive/data/genomes/dm6/bed/multiz27way/anno/mafSplit
     time mafSplit -byTarget -useFullSequenceName \
         /dev/null . ../../multiz27way.maf
 
     ls | wc
     # 277     277    6007
 
     cd /hive/data/genomes/dm6/bed/multiz27way/anno
     # skipped making the N.bed files - all dbs should have one already
     # check for N.bed files everywhere:
     for DB in `cat ../species.list.txt`
 do
     if [ ! -s /hive/data/genomes/${DB}/${DB}.N.bed ]; then
         echo "MISS: ${DB}"
         cd /hive/data/genomes/${DB}
         twoBitInfo -nBed ${DB}.2bit ${DB}.N.bed
     else
         echo "  OK: ${DB}"
     fi
     cd /hive/data/genomes/dm6/bed/multiz27way/anno
 done
 
     for DB in `cat ../species.list.txt`
     do
       echo "${DB} "
       ln -s  /hive/data/genomes/${DB}/${DB}.N.bed ${DB}.bed
       echo ${DB}.bed  >> nBeds
       ln -s  /hive/data/genomes/${DB}/chrom.sizes ${DB}.len
       echo ${DB}.len  >> sizes
     done
 
     # make sure they all are successful symLinks:
     ls -ogrtL
 
     cd /hive/data/genomes/dm6/bed/multiz27way/anno
     mkdir result
     cat << '_EOF_' > template
 #LOOP
 mafAddIRows -nBeds=nBeds $(path1) /hive/data/genomes/dm6/dm6.2bit {check out exists+ result/$(file1)}
 #ENDLOOP
 '_EOF_'
     # << happy emacs
 
     ls mafSplit/*.maf > maf.list
     ssh ku
     cd /hive/data/genomes/dm6/bed/multiz27way/anno
     gensub2 maf.list single template jobList
 
     para create jobList
     para try ... check ... etc
     para push
 # Completed: 276 of 277 jobs
 # Other count: 1 jobs
 # CPU time in finished jobs:        285s       4.75m     0.08h    0.00d  0.000 y
 # IO & Wait Time:                  4730s      78.84m     1.31h    0.05d  0.000 y
 # Average job time:                  18s       0.30m     0.01h    0.00d
 # Longest finished job:             103s       1.72m     0.03h    0.00d
 # Submission to last job:           195s       3.25m     0.05h    0.00d
     # one job was completed manually due to ku problems
 
     # combine into one file
     head -q -n 1 result/chr2L.maf > dm6.27way.maf
     for F in result/*.maf
     do
 	grep -h -v "^#" ${F}
     done >> dm6.27way.maf
     #   these maf files do not have the end marker, this does nothing:
     #   tail -q -n 1 result/chr2L.maf > dm6.27way.maf
     #   tail -q -n 1 result/GL172637.maf >> dm6.27way.maf
     # How about an official end marker:
     echo "##eof maf" >> dm6.27way.maf
 
     # Load into database
     rm /gbdb/dm6/multiz27way/multiz27way.maf   # remove old symlink
     ln -s `pwd`/dm6.27way.maf /gbdb/dm6/multiz27way/multiz27way.maf
     cd /dev/shm
     time nice -n +19 hgLoadMaf dm6 multiz27way
 
     #  real    1m43.417s
 
     time hgLoadMafSummary -verbose=2 -minSize=30000 \
 	-mergeGap=1500 -maxSize=200000 dm6 multiz27waySummary \
 	/gbdb/dm6/multiz27way/multiz27way.maf
     # Created 157893 summary blocks from 34026658 components and 2115903 mafs
     #  from /gbdb/dm6/multiz27way/multiz27way.maf
     # real    2m41.434s
 
     wc -l multiz27way*.tab
     # 2115903 multiz27way.tab
     #  157893 multiz27waySummary.tab
     # 2273796 total
 
     rm multiz27way*.tab
 
 #########################################################################
 # multiz27way MAF FRAMES (DONE - 2014-11-13 - Hiram)
     ssh hgwdev
     mkdir /hive/data/genomes/dm6/bed/multiz27way/frames
     cd /hive/data/genomes/dm6/bed/multiz27way/frames
     mkdir genes
 
     cat << '_EOF_' > showGenes.sh
 #!/bin/sh
 cat ../species.list.txt | while read db
 do
     echo -n "${db}: "
     hgsql $db -N -e "show tables like '%Gene%'" | while read table
     do
         if [ $table = "ensGene" -o $table = "refGene" -o \
            $table = "mgcGenes" -o $table = "knownGene" -o \
            $table = "xenoRefGene" -o $table = "flyBaseGene" ]; then
            count=`hgsql $db -N -e "select count(*) from $table"`
            echo -n "${table}: ${count}, "
         else
            echo -n "${table}: n/a, "
         fi
     done
     orgName=`hgsql hgcentraltest -N -e "select scientificName from dbDb where name='$db'"`
     orgId=`hgsql dm6 -N -e "select id from organism where name='$orgName'"`
     if [ "x${orgId}y" = "xy" ]; then
         echo "Mrnas: 0"
     else
         count=`hgsql dm6 -N -e "select count(*) from gbCdnaInfo where organism=$orgId"`
         echo "Mrnas: ${count}"
     fi
 done
 '_EOF_'
     # << happy emacs
     chmod +x showGenes.sh
     ./showGenes.sh > survey.txt
 
     # Getting numbers of items in gene tracks for each of the assemblies
     #   to pick which is best gene set to use:
     cat survey.txt
 dm6: flyBaseGene: 30264, flyBaseGenePep: n/a, refGene: 36039, xenoRefGene: 49470, Mrnas: 907097
 droSim1: dmRefGene: n/a, xenoRefGene: 251522, Mrnas: 341
 droSec1: xenoRefGene: 222725, Mrnas: 17
 droYak3: Mrnas: 815
 droEre2: ensGene: 15902, ensemblToGeneName: n/a, Mrnas: 42
 droBia2: Mrnas: 0
 droSuz1: Mrnas: 1
 droAna3: ensGene: 16061, ensemblToGeneName: n/a, Mrnas: 30
 droBip2: Mrnas: 3
 droEug2: Mrnas: 5
 droEle2: Mrnas: 1
 droKik2: Mrnas: 2
 droTak2: Mrnas: 1
 droRho2: Mrnas: 4
 droFic2: Mrnas: 1
 droPse3: Mrnas: 18
 droPer1: xenoRefGene: 220792, Mrnas: 34
 droMir2: Mrnas: 2
 droWil2: Mrnas: 20
 droVir3: Mrnas: 84
 droMoj3: Mrnas: 25
 droAlb1: Mrnas: 1
 droGri2: Mrnas: 10
 musDom2: Mrnas: 6447
 anoGam1: ensGene: 15802, xenoRefGene: 106775, Mrnas: 64135
 apiMel4: Mrnas: 121235
 triCas2: Mrnas: 1376
 
     # therefore:
     # refGene - dm6
     # ensGene - droEre2 droAna3 anoGam1
 
     #------------------------------------------------------------------------
     # get the genes for all genomes
     # mRNAs with CDS.  single select to get cds+psl, then split that up and
     # create genePred
 
     # using ensGene for: cavPor3 panTro4 canFam3 ailMel1 monDom5 galGal4 melGal1
     for qDB in droEre2 droAna3
     do
       echo hgsql -N -e \"'select * from 'ensGene\;\" ${qDB}
 	  hgsql -N -e "select * from ensGene" ${qDB} | cut -f 2-11 \
 	  | genePredSingleCover stdin stdout | gzip -2c > genes/$qDB.gp.gz
     done
 
     # seems to be an older ensGene here, no 'bin' column:
     for qDB in anoGam1
     do
       echo hgsql -N -e \"'select * from 'ensGene\;\" ${qDB}
 	  hgsql -N -e "select * from ensGene" ${qDB} \
 	  | genePredSingleCover stdin stdout | gzip -2c > genes/$qDB.gp.gz
     done
 
     # Using refGene for: dm6 - want the full extended genePred:
     for qDB in dm6
 do
   hgsql -N -e "select * from refGene" ${qDB} | cut -f2- \
      | genePredSingleCover stdin stdout | gzip -2c \
         > genes/$qDB.gp.gz
   echo "${qDB} done"
 done
 
     # verify counts for genes are reasonable:
     for T in genes/*.gz
 do
   echo -n "# $T: "
   zcat $T | cut -f1 | sort | uniq -c | wc -l
 done
 
 # genes/anoGam1.gp.gz: 14267
 # genes/dm6.gp.gz: 13823
 # genes/droAna3.gp.gz: 15022
 # genes/droEre2.gp.gz: 14992
 
     ssh hgwdev
     cd /hive/data/genomes/dm6/bed/multiz27way/frames
     time (cat ../multiz27way.maf \
 	| genePredToMafFrames dm6 stdin stdout \
           dm6 genes/dm6.gp.gz droAna3 genes/droAna3.gp.gz \
           droEre2 genes/droEre2.gp.gz anoGam1 genes/anoGam1.gp.gz \
 		| gzip > multiz27way.mafFrames.gz)
     #	real    2m22.546s
 
     # verify there are frames on everything:
     zcat multiz27way.mafFrames.gz | awk '{print $4}' | sort | uniq -c
 
     # 164994 anoGam1
     #  55050 dm6
     # 118998 droAna3
     #  77171 droEre2
 
     ssh hgwdev
     cd /hive/data/genomes/dm6/bed/multiz27way/frames
     time hgLoadMafFrames dm6 multiz27wayFrames multiz27way.mafFrames.gz
     # real    0m4.457s
 
 ##############################################################################
 # construct downloads files and pushQ entry (DONE - 2014-10-14 - Hiram)
 #   before starting downloads, the joinerCheck should be clean
 #   after dm6 is added to all.joiner:
     joinerCheck -tableCoverage -database=dm6 all.joiner
     joinerCheck -keys -database=dm6 all.joiner
 
     cd /hive/data/genomes/dm6
     makeDownloads.pl -dbHost=hgwdev -workhorse=hgwdev dm6 \
 	> downloads.log 2>&1
     # real    28m50.409s
 
     # examine the goldenPath/*/README.txt files to verify the text
 
     # ready for pushQ entry (DONE - 2014-10-14 - Hiram)
     mkdir /hive/data/genomes/dm6/pushQ
     cd /hive/data/genomes/dm6/pushQ
     makePushQSql.pl dm6 > dm6.sql 2> stderr.out
     # real    1m52.294s
     # some errors are legitimate and OK:
      head stderr.out
 # WARNING: hgwdev does not have /gbdb/dm6/wib/gc5Base.wib
 # WARNING: hgwdev does not have /gbdb/dm6/wib/quality.wib
 # WARNING: hgwdev does not have /gbdb/dm6/bbi/qualityBw/quality.bw
 # WARNING: dm6 does not have seq
 
     scp -p dm6.sql qateam@hgwbeta:/tmp
     ssh qateam@hgwbeta './bin/x86_64/hgsql qapushq < /tmp/dm6.sql'
 
 #########################################################################
 # Phylogenetic tree from 27-way (DONE - 2014-11-13 - Hiram)
 
     mkdir /hive/data/genomes/dm6/bed/multiz27way/4d
     cd /hive/data/genomes/dm6/bed/multiz27way/4d
 
     # the split annotated maf's are in:
     ../anno/result/*.maf
 
     # using refGene for dm6, only transcribed genes and nothing
     #	from the randoms and other misc.
     hgsql dm6 -Ne \
     "select * from refGene WHERE cdsEnd > cdsStart;" | cut -f 2- \
 	| egrep -v "_random|chrUn" > refGene.gp
     # verify primary chroms only:
     cut -f2 refGene.gp | sort | uniq -c | sort -rn | less
     #  7213 chr3R
     #  6581 chr2L
     #  6167 chr2R
     #  5914 chr3L
     #  5577 chrX
     #   300 chr4
     #    31 chrY
 
     genePredSingleCover refGene.gp stdout | sort > refGeneNP.gp
     wc -l *.gp
     #  31783 refGene.gp
     #  13955 refGeneNP.gp
 
     ssh ku
     mkdir /hive/data/genomes/dm6/bed/multiz27way/4d/run
     cd /hive/data/genomes/dm6/bed/multiz27way/4d/run
     mkdir ../mfa
 
     # newer versions of msa_view have a slightly different operation
     # the sed of the gp file inserts the reference species in the chr name
 
     cat << '_EOF_' > 4d.csh
 #!/bin/csh -fe
 set PHASTBIN = /cluster/bin/phast.build/cornellCVS/phast.2010-12-30/bin
 set r = "/hive/data/genomes/dm6/bed/multiz27way"
 set c = $1
 set infile = $r/anno/result/$2
 set outfile = $3
 cd /scratch/tmp
 # 'clean' maf
 perl -wpe 's/^s ([^.]+)\.\S+/s $1/' $infile > $c.maf
 awk -v C=$c '$2 == C {print}' $r/4d/refGeneNR.gp | sed -e "s/\t$c\t/\tdm6.$c\t/" > $c.gp
 set NL=`wc -l $c.gp| gawk '{print $1}'`
 if ("$NL" != "0") then
     $PHASTBIN/msa_view --4d --features $c.gp -i MAF $c.maf -o SS > $c.ss
     $PHASTBIN/msa_view -i SS --tuple-size 1 $c.ss > $r/4d/run/$outfile
 else
     echo "" > $r/4d/run/$outfile
 endif
 rm -f $c.gp $c.maf $c.ss
 '_EOF_'
     # << happy emacs
     chmod +x 4d.csh
 
     ls -1S /hive/data/genomes/dm6/bed/multiz27way/anno/result/*.maf \
 	| egrep -v "_random|chrUn" | sed -e "s#.*multiz27way/anno/result/##" \
 	> maf.list
 
     cat << '_EOF_' > template
 #LOOP
 4d.csh $(root1) $(path1) {check out line+ ../mfa/$(root1).mfa}
 #ENDLOOP
 '_EOF_'
     # << happy emacs
 
     # the tac puts the quick jobs at the front
     gensub2 maf.list single template stdout | tac > jobList
     para create jobList
     para try ... check
     para -maxJob=5 push
     para time
 # Completed: 8 of 8 jobs
 # CPU time in finished jobs:        502s       8.37m     0.14h    0.01d  0.000 y
 # IO & Wait Time:                    39s       0.64m     0.01h    0.00d  0.000 y
 # Average job time:                  68s       1.13m     0.02h    0.00d
 # Longest finished job:             118s       1.97m     0.03h    0.00d
 # Submission to last job:           125s       2.08m     0.03h    0.00d
 
     # combine mfa files
     ssh hgwdev
     cd /hive/data/genomes/dm6/bed/multiz27way/4d
     # one file result is useless:
 # -rw-rw-r-- 1       1 Nov 13 14:13 chrM.mfa
 # -rw-rw-r-- 1   48002 Nov 13 14:13 chrY.mfa
 # -rw-rw-r-- 1  207788 Nov 13 14:13 chr4.mfa
 # -rw-rw-r-- 1 3658928 Nov 13 14:14 chr2R.mfa
 # -rw-rw-r-- 1 3532514 Nov 13 14:14 chr2L.mfa
 # -rw-rw-r-- 1 3198092 Nov 13 14:14 chrX.mfa
 # -rw-rw-r-- 1 3670700 Nov 13 14:14 chr3L.mfa
 # -rw-rw-r-- 1 4577630 Nov 13 14:14 chr3R.mfa
 
     rm -f mfa/chrM.mfa
 
     #want comma-less species.list.txt
     /cluster/bin/phast.build/cornellCVS/phast.2010-12-30/bin/msa_view \
 	--aggregate "`cat ../species.list.txt`" mfa/*.mfa | sed s/"> "/">"/ \
 	    > 4d.all.mfa
     # check they are all in there:
     grep "^>" 4d.all.mfa
 # >dm6
 # >droSim1
 # >droSec1
 # >droYak3
 # >droEre2
 ....
 # >droAlb1
 # >droGri2
 # >musDom2
 # >anoGam1
 # >apiMel4
 # >triCas2
 
     # create species list and stripped down tree for autoMZ
     sed 's/[a-z][a-z]*_//g; s/:[0-9\.][0-9\.]*//g; s/;//; /^ *$/d' \
         ../dm6.27way.nh > tmp.nh
     echo `cat tmp.nh` > tree-commas.nh
     echo `cat tree-commas.nh` | sed 's/ //g; s/,/ /g' > tree.nh
     # tree-commas.nh:
 # (((((((((((((((((dm6, (droSim1, droSec1)), (droYak3, droEre2)),
 # (droBia2, droSuz1)), (droAna3, droBip2)), droEug2), droEle2), droKik2),
 # droTak2), droRho2), droFic2), ((droPse3, droPer1), droMir2)), droWil2),
 # (((droVir3, droMoj3), droAlb1), droGri2)), musDom2), anoGam1), apiMel4),
 # triCas2)
 
     # use phyloFit to create tree model (output is phyloFit.mod)
     time /cluster/bin/phast.build/cornellCVS/phast.2010-12-30/bin/phyloFit \
 	    --EM --precision MED --msa-format FASTA --subst-mod REV \
 		--tree tree-commas.nh 4d.all.mfa
     #	real    22m8.563s
 
     mv phyloFit.mod all.mod
 
     grep TREE all.mod
 # TREE: (((((((((((((((((dm6:0.0542029,(droSim1:0.0213103,
 # droSec1:0.0227973):0.0242621):0.0585014,(droYak3:0.0889658,
 # droEre2:0.0771526):0.0254381):0.181996,(droBia2:0.126739,
 # droSuz1:0.0934222):0.0964756):0.0125386,(droAna3:0.132676,
 # droBip2:0.134892):0.531515):0.00264138,droEug2:0.318569):0.0369765,
 # droEle2:0.207426):0.0025605,droKik2:0.485329):0.00243957,
 # droTak2:0.226746):0.0143511,droRho2:0.195624):0.0495906,
 # droFic2:0.257704):0.290027,((droPse3:0.0138526,droPer1:0.014979):0.00969914,
 # droMir2:0.0172853):0.398344):0.150107,droWil2:0.550935):0.0418275,
 # (((droVir3:0.242786,droMoj3:0.321598):0.090819,droAlb1:0.436249):0.0143777,
 # droGri2:0.318782):0.162991):0.222902,musDom2:0.772795):0.268197,
 # anoGam1:1.14709):0.387299,apiMel4:0.865834):0.588712,triCas2:0.588712);
 
 ##############################################################################
 # phastCons 27-way (DONE - 2014-11-27 - Hiram)
 
     # split 27way mafs into 10M chunks and generate sufficient statistics
     # files for # phastCons
     ssh ku
     mkdir -p /hive/data/genomes/dm6/bed/multiz27way/cons/SS
     cd /hive/data/genomes/dm6/bed/multiz27way/cons/SS
     mkdir result done
 
     # XXX beware missed jobs in this procedure.  The first time through this,
     # the chr3R was missed.  It was caught up manually 2014-12-04
 
     cat << '_EOF_' > mkSS.csh
 #!/bin/csh -ef
 set c = $1
 set MAF = /hive/data/genomes/dm6/bed/multiz27way/anno/result/$c.maf
 set WINDOWS = /hive/data/genomes/dm6/bed/multiz27way/cons/SS/result/$c
 set WC = `cat $MAF | wc -l`
 set NL = `grep "^#" $MAF | wc -l`
 if ( -s $2 ) then
     exit 0
 endif
 if ( -s $2.running ) then
     exit 0
 endif
 
 date >> $2.running
 
 rm -fr $WINDOWS
 mkdir $WINDOWS
 pushd $WINDOWS > /dev/null
 if ( $WC != $NL ) then
 /cluster/bin/phast.build/cornellCVS/phast.2010-12-30/bin/msa_split \
     $MAF -i MAF -o SS -r $WINDOWS/$c -w 10000000,0 -I 1000 -B 5000
 endif
 popd > /dev/null
 date >> $2
 rm -f $2.running
 '_EOF_'
     # << happy emacs
 
     chmod +x mkSS.csh
 
     cat << '_EOF_' > template
 #LOOP
 mkSS.csh $(root1) {check out line+ done/$(root1)}
 #ENDLOOP
 '_EOF_'
     # << happy emacs
 
     #	do the easy ones first to see some immediate results
     ls -1S -r ../../anno/result | sed -e "s/.maf//" > maf.list
 
     gensub2 maf.list single template jobList
     para create jobList
     para try ... check ... etc
 
 # Completed: 276 of 277 jobs
 # CPU time in finished jobs:        896s      14.93m     0.25h    0.01d  0.000 y
 # IO & Wait Time:                 16749s     279.16m     4.65h    0.19d  0.001 y
 # Average job time:                  64s       1.07m     0.02h    0.00d
 # Longest finished job:             283s       4.72m     0.08h    0.00d
 # Submission to last job:          1017s      16.95m     0.28h    0.01d
     # one job was finished manually due to ku problems
 
     #	 Not all files have a result due to low alignment
     wc -l jobList
     #  277 jobList
     find ./result -type f | wc
     #  158     158    8397
 
     # Run phastCons
     #	This job is I/O intensive in its output files, beware where this
     #	takes place or do not run too many at once.
     ssh ku
     mkdir -p /hive/data/genomes/dm6/bed/multiz27way/cons/run.cons
     cd /hive/data/genomes/dm6/bed/multiz27way/cons/run.cons
 
     #	there are going to be only one phastCons run using
     #	this same script.  It triggers off of the current working directory
     #	$cwd:t which is the "grp" in this script.  It is:
     #	all
 
     cat << '_EOF_' > doPhast.csh
 #!/bin/csh -fe
 set PHASTBIN = /cluster/bin/phast.build/cornellCVS/phast.2010-12-30/bin
 set c = $1
 set cX = $1:r
 set f = $2
 set len = $3
 set cov = $4
 set rho = $5
 set grp = $cwd:t
 set cons = /hive/data/genomes/dm6/bed/multiz27way/cons
 set tmp = $cons/tmp/$f
 mkdir -p $tmp
 set ssSrc = $cons
 set useGrp = "$grp.mod"
 ln -s $ssSrc/SS/result/$c/$f.ss $tmp
 ln -s $cons/$grp/$grp.mod $tmp
 pushd $tmp > /dev/null
 $PHASTBIN/phastCons $f.ss $useGrp \
     --rho $rho --expected-length $len --target-coverage $cov --quiet \
     --seqname $c --idpref $c --most-conserved $f.bed --score > $f.pp
 popd > /dev/null
 mkdir -p pp/$c bed/$c
 sleep 4
 touch pp/$c bed/$c
 rm -f pp/$c/$f.pp
 rm -f bed/$c/$f.bed
 mv $tmp/$f.pp pp/$c
 mv $tmp/$f.bed bed/$c
 rm -fr $tmp
 '_EOF_'
     # << happy emacs
     chmod +x doPhast.csh
 
     #	this template will serve for all runs
     #	root1 == chrom name, file1 == ss file name without .ss suffix
 
     cat << '_EOF_' > template
 #LOOP
 ../run.cons/doPhast.csh $(root1) $(file1) 45 0.3 0.3 {check out line+ pp/$(root1)/$(file1).pp}
 #ENDLOOP
 '_EOF_'
     # << happy emacs
 
     find ../SS -type f | grep ".ss$" | sed -e 's/.ss$//' > ss.list
     wc -l ss.list
     # 158 ss.list
 
     # Create parasol batch and run it
     # run for all species
     mkdir /hive/data/genomes/dm6/bed/multiz27way/cons/all
     cd /hive/data/genomes/dm6/bed/multiz27way/cons/all
     #	Using the .mod tree
     cp -p ../../4d/all.mod ./all.mod
 
     gensub2 ../run.cons/ss.list single ../run.cons/template jobList
     para create jobList
     para try ... check ...
     para push
 # Completed: 158 of 158 jobs
 # CPU time in finished jobs:       1356s      22.59m     0.38h    0.02d  0.000 y
 # IO & Wait Time:                  1391s      23.19m     0.39h    0.02d  0.000 y
 # Average job time:                  17s       0.29m     0.00h    0.00d
 # Longest finished job:             228s       3.80m     0.06h    0.00d
 # Submission to last job:           900s      15.00m     0.25h    0.01d
 
     cd /hive/data/genomes/dm6/bed/multiz27way/cons/all
 
     find ./bed -type f | grep ".bed$" | xargs cat | sort -k1,1 -k2,2n \
 	| awk '{printf "%s\t%d\t%d\tlod=%d\t%s\n", $1, $2, $3, $5, $5;}' \
 	    > tmpMostConserved.bed
 
     /cluster/bin/scripts/lodToBedScore tmpMostConserved.bed > mostConserved.bed
 
     # load into database
     time hgLoadBed dm6 phastConsElements27way mostConserved.bed
     # Read 1427898 elements of size 5 from mostConserved.bed
     # real    0m13.345s
 
     # Read 1078721 elements of size 5 from mostConserved.bed
     # real    0m9.166s
 
     # on human we often try for 5% overall cov, and 70% CDS cov
     # most bets are off here for that goal, these alignments are too few
     #	and too far between
     featureBits dm6 -enrichment refGene:cds phastConsElements27way
     # refGene:cds 16.046%, phastConsElements27way 51.675%, both 13.124%,
     #   cover 81.79%, enrich 1.58x
 
     # Create merged posterier probability file and wiggle track data files
     cd /hive/data/genomes/dm6/bed/multiz27way/cons/all
     mkdir downloads
 
     time (find ./pp -type f | sed -e "s#^./##; s#\.# d #g; s#-# m #;" \
 	| sort -k1,1 -k3,3n | sed -e "s# d #.#g; s# m #-#g;" | xargs cat \
 	| gzip -c > downloads/phastCons27way.wigFix.gz)
     #  real    1m12.361s
 
     # check integrity of data with wigToBigWig
     time (zcat downloads/phastCons27way.wigFix.gz \
 	| wigToBigWig -verbose=2 stdin /hive/data/genomes/dm6/chrom.sizes \
 	    phastCons27way.bw) > bigWig.log 2>&1 &
     tail bigWig.log
 
     # pid=34798: VmPeak:     1054096 kB
     # real    1m8.421s
 
     bigWigInfo phastCons27way.bw
 # version: 4
 # isCompressed: yes
 # isSwapped: 0
 # primaryDataSize: 172,458,601
 # primaryIndexSize: 3,804,252
 # zoomLevels: 10
 # chromCount: 151
 # basesCovered: 117,294,910
 # mean: 0.627817
 # min: 0.000000
 # max: 1.000000
 # std: 0.446656
 
     #	encode those files into wiggle data
     time (zcat downloads/phastCons27way.wigFix.gz \
 	| wigEncode stdin phastCons27way.wig phastCons27way.wib)
     # Converted stdin, upper limit 1.00, lower limit 0.00
     # real    0m25.328s
 
     du -hsc *.wi?
     #    112M    phastCons27way.wib
     #    11M     phastCons27way.wig
     #    123M    total
 
     # Load gbdb and database with wiggle.
     ln -s `pwd`/phastCons27way.wib /gbdb/dm6/multiz27way/phastCons27way.wib
     time nice -n +19 hgLoadWiggle -pathPrefix=/gbdb/dm6/multiz27way \
 	dm6 phastCons27way phastCons27way.wig
     #   real    0m0.854s
 
     # use to set trackDb.ra entries for wiggle min and max
     # and verify table is loaded correctly
 
     wigTableStats.sh dm6 phastCons27way
     # db.table      min max mean count sumData stdDev viewLimits
     # dm6.phastCons27way 0 1 0.618592 89437277 5.53252e+07 0.448759
     #  viewLimits=0:1
 
     #  Create histogram to get an overview of all the data
     time nice -n +19 hgWiggle -doHistogram -db=dm6 \
 	-hBinSize=0.001 -hBinCount=1000 -hMinVal=0.0 -verbose=2 \
 	    phastCons27way > histogram.data 2>&1
     #   real    0m5.630s
     #	create plot of histogram:
 
     cat << '_EOF_' | gnuplot > histo.png
 set terminal png small x000000 xffffff xc000ff x66ff66 xffff00 x00ffff
 set size 1.4, 0.8
 set key left box
 set grid noxtics
 set grid ytics
 set title " D. melanogaster dm6 Histogram phastCons27way track"
 set xlabel " phastCons27way score"
 set ylabel " Relative Frequency"
 set y2label " Cumulative Relative Frequency (CRF)"
 set y2range [0:1]
 set y2tics
 set yrange [0:0.02]
 
 plot "histogram.data" using 2:5 title " RelFreq" with impulses, \
         "histogram.data" using 2:7 axes x1y2 title " CRF" with lines
 '_EOF_'
     #	<< happy emacs
 
     display histo.png &
 
 #############################################################################
 # phyloP for 27-way (DONE - 2014-11-13 - Hiram)
     # run phyloP with score=LRT
     ssh ku
     mkdir /cluster/data/dm6/bed/multiz27way/consPhyloP
     cd /cluster/data/dm6/bed/multiz27way/consPhyloP
 
     mkdir run.phyloP
     cd run.phyloP
     # Adjust model file base composition background and rate matrix to be
     # representative of the chromosomes in play
     grep BACKGROUND ../../cons/all/all.mod | awk '{printf "%0.3f\n", $3 + $4}'
     # 0.662
     /cluster/bin/phast.build/cornellCVS/phast.2010-12-30/bin/modFreqs \
 	../../cons/all/all.mod 0.662 > all.mod
     # verify, the BACKGROUND should now be paired up:
     grep BACK all.mod
     # BACKGROUND: 0.169000 0.331000 0.331000 0.169000
 
     #	following the pattern from rn5 with grp: "all" (no other grp)
     cat << '_EOF_' > doPhyloP.csh
 #!/bin/csh -fe
 set PHASTBIN = /cluster/bin/phast.build/cornellCVS/phast.2010-12-30/bin
 set f = $1
 set file1 = $f:t
 set out = $2
 set cName = $f:t:r
 set n = $f:r:e
 set grp = $cwd:t
 set cons = /hive/data/genomes/dm6/bed/multiz27way/consPhyloP
 set tmp = $cons/tmp/$grp/$f
 rm -fr $tmp
 mkdir -p $tmp
 set ssSrc = "/hive/data/genomes/dm6/bed/multiz27way/cons/SS/result/$f"
 set useGrp = "$grp.mod"
 ln -s $cons/run.phyloP/$grp.mod $tmp
 pushd $tmp > /dev/null
 $PHASTBIN/phyloP --method LRT --mode CONACC --wig-scores --chrom $cName \
     -i SS $useGrp $ssSrc.ss > $file1.wigFix
 popd > /dev/null
 mkdir -p $out:h
 sleep 4
 mv $tmp/$file1.wigFix $out
 rm -fr $tmp
 rmdir --ignore-fail-on-non-empty $cons/tmp/$grp/$cName
 rmdir --ignore-fail-on-non-empty $cons/tmp/$grp
 rmdir --ignore-fail-on-non-empty $cons/tmp
 '_EOF_'
     # << happy emacs
     chmod +x doPhyloP.csh
 
     # Create list of chunks
     find ../../cons/SS/result -type f | grep ".ss$" \
 	| sed -e "s/.ss$//; s#^../../cons/SS/result/##" > ss.list
     # make sure the list looks good
     wc -l ss.list
     #   162 ss.list
 
     # Create template file
     #	file1 == $chr/$chunk/file name without .ss suffix
     cat << '_EOF_' > template
 #LOOP
 ../run.phyloP/doPhyloP.csh $(path1) {check out line+ wigFix/$(dir1)/$(file1).wigFix}
 #ENDLOOP
 '_EOF_'
     # << happy emacs
 
     ######################   Running all species  #######################
     # setup run for all species
     mkdir /hive/data/genomes/dm6/bed/multiz27way/consPhyloP/all
     cd /hive/data/genomes/dm6/bed/multiz27way/consPhyloP/all
     rm -fr wigFix
     mkdir wigFix
 
     gensub2 ../run.phyloP/ss.list single ../run.phyloP/template jobList
     # the -ram=8g will allow only one job per node to slow this down since
     #	it would run too fast otherwise.  Either run on one of the small
     #	klusters or use the -ram=8g on the para create
     para -ram=8g create jobList
     para try ... check ... etc ...
     para -maxJob=64 push
     para time > run.time
 # Completed: 158 of 158 jobs
 # CPU time in finished jobs:      45635s     760.58m    12.68h    0.53d  0.001 y
 # IO & Wait Time:                  2235s      37.26m     0.62h    0.03d  0.000 y
 # Average job time:                 303s       5.05m     0.08h    0.00d
 # Longest finished job:            6072s     101.20m     1.69h    0.07d
 # Submission to last job:          6125s     102.08m     1.70h    0.07d
 
     # make downloads
     mkdir downloads
 
     time (find ./wigFix -type f | sed -e "s#^./##; s#\.# d #g; s#-# m #;" \
 	| sort -k1,1 -k3,3n | sed -e "s# d #.#g; s# m #-#g;" | xargs cat \
 	| gzip -c > downloads/phyloP27way.wigFix.gz)
     #   real    2m6.787s
 
     # check integrity of data with wigToBigWig
     time (zcat downloads/phyloP27way.wigFix.gz \
 	| wigToBigWig -verbose=2 stdin /hive/data/genomes/dm6/chrom.sizes \
 	phyloP27way.bw) > bigWig.log 2>&1
     egrep "real|VmPeak" bigWig.log
     # pid=523: VmPeak:       1372692 kB
     # real    1m13.231s
 
     bigWigInfo phyloP27way.bw
 # version: 4
 # isCompressed: yes
 # isSwapped: 0
 # primaryDataSize: 328,237,985
 # primaryIndexSize: 3,804,252
 # zoomLevels: 10
 # chromCount: 151
 # basesCovered: 117,294,910
 # mean: 1.206203
 # min: -8.216000
 # max: 7.073000
 # std: 1.669809
 
     #	encode those files into wiggle data
     time (zcat downloads/phyloP27way.wigFix.gz \
 	| wigEncode stdin phyloP27way.wig phyloP27way.wib)
     #   Converted stdin, upper limit 7.07, lower limit -8.22
     #   real    0m35.814s
 
     du -hsc *.wi?
     #   112M    phyloP27way.wib
     #   12M     phyloP27way.wig
     #   124M    total
 
     # Load gbdb and database with wiggle.
     ln -s `pwd`/phyloP27way.wib /gbdb/dm6/multiz27way/phyloP27way.wib
     hgLoadWiggle -pathPrefix=/gbdb/dm6/multiz27way dm6 \
 	phyloP27way phyloP27way.wig
 
     # use to set trackDb.ra entries for wiggle min and max
     # and verify table is loaded correctly
 
     wigTableStats.sh dm6 phyloP27way
     # db.table      min max mean count sumData stdDev viewLimits
     # dm6.phyloP27way -8.216 7.073 1.2062 117294910 1.41482e+08 1.66981
     # viewLimits=-7.14284:7.073
 
     calc 8.216\+7.7073
     #  8.216+7.7073 = 15.923300
 
     #  Create histogram to get an overview of all the data
     time nice -n +19 hgWiggle -doHistogram -db=dm6 \
 	-hBinSize=0.0161 -hBinCount=1000 -hMinVal=-8.3 -verbose=2 \
 	    phyloP27way > histogram.data 2>&1
     #	real    0m7.284s
 
     # find out the range for the 2:5 graph
     grep -v chrom histogram.data | grep "^[0-9]" | ave -col=5 stdin
     # Q1 0.000012
     # median 0.000255
     # Q3 0.001728
     # average 0.001134
     # min 0.000000
     # max 0.009261
     # count 882
     # total 0.999997
     # standard deviation 0.001655
 
     #	create plot of histogram:
 
     cat << '_EOF_' | gnuplot > histo.png
 set terminal png small x000000 xffffff xc000ff x66ff66 xffff00 x00ffff
 set size 1.4, 0.8
 set key left box
 set grid noxtics
 set grid ytics
 set title " D. melanogaster dm6 Histogram phyloP27way track"
 set xlabel " phyloP27way score"
 set ylabel " Relative Frequency"
 set y2label " Cumulative Relative Frequency (CRF)"
 set y2range [0:1]
 set y2tics
 set yrange [0:0.007]
 set xrange [-3:6]
 
 plot "histogram.data" using 2:5 title " RelFreq" with impulses, \
         "histogram.data" using 2:7 axes x1y2 title " CRF" with lines
 '_EOF_'
     #	<< happy emacs
 
     display histo.png &
 
 #########################################################################
 # Construct download files for 27-way (DONE - 2014-11-20 - Hiram)
     mkdir /usr/local/apache/htdocs-hgdownload/goldenPath/dm6/multiz27way
     mkdir /usr/local/apache/htdocs-hgdownload/goldenPath/dm6/multiz27way/maf
     mkdir /usr/local/apache/htdocs-hgdownload/goldenPath/dm6/multiz27way/alignments
     mkdir /usr/local/apache/htdocs-hgdownload/goldenPath/dm6/phastCons27way
     mkdir /usr/local/apache/htdocs-hgdownload/goldenPath/dm6/phastCons27way/dm6.27way.phastCons
     mkdir /usr/local/apache/htdocs-hgdownload/goldenPath/dm6/phyloP27way
     mkdir /usr/local/apache/htdocs-hgdownload/goldenPath/dm6/phyloP27way/dm6.27way.phyloP27way
 
     mkdir /hive/data/genomes/dm6/bed/multiz27way/downloads
     cd /hive/data/genomes/dm6/bed/multiz27way/downloads
     mkdir multiz27way phastCons27way phyloP27way
     cd multiz27way
     mkdir maf alignments
     cd maf
     time rsync -a -P ../../../anno/result/ ./
     #   real    xxx
     time gzip *.maf
     #   real    real    8m46.494s
     time md5sum *.maf.gz > md5sum.txt
     #   real    0m6.206s
 
     ln -s `pwd`/*.maf.gz `pwd`/md5sum.txt \
         /usr/local/apache/htdocs-hgdownload/goldenPath/dm6/multiz27way/maf
     cd ..
     du -hsc maf ../../anno/result/
     # 1.1G    maf
     # 7.4G    ../../anno/result/
     # 8.5G    total
 
     grep TREE ../../4d/all.mod | sed -e 's/TREE: //' \
        | ~/kent/src/hg/utils/phyloTrees/asciiTree.pl /dev/stdin \
           > dm6.27way.nh
 
    ~/kent/src/hg/utils/phyloTrees/commonNames.sh dm6.27way.nh \
        | ~/kent/src/hg/utils/phyloTrees/asciiTree.pl /dev/stdin \
           > dm6.27way.commonNames.nh
    ~/kent/src/hg/utils/phyloTrees/scientificNames.sh dm6.27way.nh \
        | ~/kent/src/hg/utils/phyloTrees/asciiTree.pl /dev/stdin \
           > dm6.27way.scientificNames.nh
     md5sum *.gz *.nh > md5sum.txt
 
     ln -s `pwd`/*.nh `pwd`/*.txt `pwd`/*.maf.gz \
         /usr/local/apache/htdocs-hgdownload/goldenPath/dm6/multiz27way
 
     #####################################################################
     cd /hive/data/genomes/dm6/bed/multiz27way/downloads/phastCons27way
     ln -s ../../cons/all/downloads/phastCons27way.wigFix.gz .
 
     ln -s ../../cons/all/all.mod dm6.27way.phastCons.mod
     ln -s ../../cons/all/phastCons27way.bw dm6.27way.phastCons.bw
     time md5sum *.mod *.bw *.gz > md5sum.txt
     #  real    0m4.631s
 
     ln -s `pwd`/*.gz `pwd`/*.mod `pwd`/*.bw `pwd`/*.txt \
   /usr/local/apache/htdocs-hgdownload/goldenPath/dm6/phastCons27way/
 
     # obtain the README.txt from hg38/phastCons7way and update for this
     #   situation, from:
     # /hive/data/genomes/hg38/bed/multiz7way/downloads/phastCons7way/README.txt
     ln -s `pwd`/*.mod `pwd`/*.bw `pwd`/*.txt \
       /usr/local/apache/htdocs-hgdownload/goldenPath/dm6/phastCons27way
 
     #####################################################################
     cd /hive/data/genomes/dm6/bed/multiz27way/downloads/phyloP27way
     mkdir dm6.27way.phyloP27way
     cd dm6.27way.phyloP27way
     ln -s ../../../consPhyloP/all/downloads/*.gz .
     time md5sum *.gz > md5sum.txt &
     # real    3m37.827s
 
     ln -s `pwd`/*.gz `pwd`/md5sum.txt \
       /usr/local/apache/htdocs-hgdownload/goldenPath/dm6/phyloP27way/dm6.27way.phyloP27way
 
     cd ..
     ln -s ../../consPhyloP/run.phyloP/all.mod dm6.27way.phyloP27way.mod
 
     ln -s ../../consPhyloP/all/phyloP27way.bw dm6.27way.phyloP27way.bw
 
     time md5sum *.mod *.bw > md5sum.txt &
     #  real    4m44.724s
 
     # obtain the README.txt from hg38/phyloP7way and update for this
     #   situation, from:
     # /hive/data/genomes/hg38/bed/multiz7way/downloads/phyloP7way/README.txt
     ln -s `pwd`/*.mod `pwd`/*.bw `pwd`/*.txt \
       /usr/local/apache/htdocs-hgdownload/goldenPath/dm6/phyloP27way
 
     ############################################################################
     # creating upstream mafs (DONE - 2014-11-20 - Hiram)
     ssh hgwdev
 
     mkdir /hive/data/genomes/dm6/bed/multiz27way/downloads/multiz27way
     cd /hive/data/genomes/dm6/bed/multiz27way/downloads/multiz27way
     # run this bash script
     cat << '_EOF_' > mkUpstream.sh
 #!/bin/sh
 DB=dm6
 GENE=refGene
 NWAY=multiz27way
 export DB GENE
 
 for S in 1000 2000 5000
 do
     echo "making upstream${S}.${GENE}.maf"
     echo "featureBits ${DB} ${GENE}:upstream:${S} -fa=/dev/null -bed=stdout"
     featureBits ${DB} ${GENE}:upstreamAll:${S} -fa=/dev/null -bed=stdout \
         | perl -wpe 's/_up[^\t]+/\t0/' | sort -k1,1 -k2,2n \
         | mafFrags ${DB} ${NWAY} stdin stdout \
                 -orgs=/hive/data/genomes/${DB}/bed/${NWAY}/species.list.txt \
         | gzip -c > upstream${S}.${GENE}.maf.gz
     echo "done upstream${S}.${GENE}.maf.gz"
 done
 '_EOF_'
     # << happy emacs
     chmod +x mkUpstream.sh
     time ./mkUpstream.sh
     #   real    69m25.852s
 
     # FIXUP/VERIFY the genbank.conf file to indicate this gene choice for the
     #   upstream automatic generation
     ~/kent/src/hg/makeDb/genbank/etc/genbank.conf
 # dm6.upstreamGeneTbl = refGene
 # dm6.upstreamMaf = multiz27way /hive/data/genomes/dm6/bed/multiz27way/species.list.txt
 
     git commit -m "dm6 GeneTbl upstreamMaf set to 27-way refs #4049" genbank.conf
     git push
     make etc-update
 
 #############################################################################
 # hgPal downloads (DONE - 2014-11-20 - Hiram)
 #   FASTA from 27-way for refGene
 
     ssh hgwdev
     screen -S dm6HgPal
     mkdir /hive/data/genomes/dm6/bed/multiz27way/pal
     cd /hive/data/genomes/dm6/bed/multiz27way/pal
     cat ../species.list | tr '[ ]' '[\n]' > order.list
 
     export mz=multiz27way
     export gp=refGene
     export db=dm6
     export I=0
     mkdir exonAA exonNuc
     for C in `sort -nk2 ../../../chrom.sizes | cut -f1`
     do
         I=`echo $I | awk '{print $1+1}'`
 	echo "mafGene -chrom=$C -exons -noTrans $db $mz $gp order.list stdout | gzip -c > exonNuc/$C.exonNuc.fa.gz &"
 	echo "mafGene -chrom=$C -exons $db $mz $gp order.list stdout | gzip -c > exonAA/$C.exonAA.fa.gz &"
         if [ $I -gt 6 ]; then
             echo "date"
             echo "wait"
             I=0
         fi
     done > $gp.jobs
     echo "date" >> $gp.jobs
     echo "wait" >> $gp.jobs
 
     time ./$gp.jobs > $gp.jobs.log 2>&1
     #   real    20m2.909s
 
     time zcat exonAA/*.gz | gzip -c > $gp.$mz.exonAA.fa.gz
     #   real    0m32.720s
 
     time zcat exonNuc/*.gz | gzip -c > $gp.$mz.exonNuc.fa.gz
     #   real    5m53.612s
 
     # we're only distributing exons at this time
     export pd=/usr/local/apache/htdocs-hgdownload/goldenPath/$db/$mz/alignments
     mkdir -p $pd
     md5sum *.fa.gz > md5sum.txt
     ln -s `pwd`/$gp.$mz.exonAA.fa.gz $pd/$gp.exonAA.fa.gz
     ln -s `pwd`/$gp.$mz.exonNuc.fa.gz $pd/$gp.exonNuc.fa.gz
     ln -s `pwd`/md5sum.txt $pd/
 
     rm -rf exonAA exonNuc
 
 #############################################################################
 # make refGenePfam table (Done - 2016-09-19 - Brian Raney)
 # Pfam in Refseq Genes
 
 export dir=/cluster/data/dm6/bed/pfam
 mkdir -p $dir
 cd $dir
 rm -rf  splitProt
 mkdir  splitProt
 refSeqGet -protSeqs=refSeq.faa dm6
 #20090154 bases (15768554 N's 4321600 real 4321600 upper 0 lower) in 30430 sequences in 1 files
 refSeqGet -rnaSeqs=refSeq.fa dm6
 hgsql dm6 -Ne "select * from refSeqAli" | cut -f 2- > refGene.psl
 
 faSplit sequence refSeq.faa 10000 splitProt/
 mkdir -p result
 ls -1 splitProt > prot.list
 # /hive/data/outside/pfam/hmmpfam -E 0.1 /hive/data/outside/pfam/current/Pfam_fs \
 cat << '_EOF_' > doPfam
 #!/bin/csh -ef
 /hive/data/outside/pfam/Pfam29.0/PfamScan/hmmer-3.1b2-linux-intel-x86_64/binaries/hmmsearch   --domtblout /scratch/tmp/pfam.$2.pf --noali -o /dev/null -E 0.1 /hive/data/outside/pfam/Pfam29.0/Pfam-A.hmm     splitProt/$1
 mv /scratch/tmp/pfam.$2.pf $3
 _EOF_
     # << happy emacs
 chmod +x doPfam
 cat << '_EOF_' > template
 #LOOP
 doPfam $(path1) $(root1) {check out line+ result/$(root1).pf}
 #ENDLOOP
 _EOF_
 gensub2 prot.list single template jobList
 
 ssh ku "cd $dir; para make jobList"
 ssh ku "cd $dir; para time > run.time"
 cat run.time
 
 # Completed: 9690 of 9690 jobs
 # CPU time in finished jobs:    2667749s   44462.49m   741.04h   30.88d  0.085
 # y
 # IO & Wait Time:                148194s    2469.89m    41.16h    1.72d  0.005
 # y
 # Average job time:                 291s       4.84m     0.08h    0.00d
 # Longest finished job:             511s       8.52m     0.14h    0.01d
 # Submission to last job:          8704s     145.07m     2.42h    0.10d
 #
 # Make up pfamDesc.tab by converting pfam to a ra file first
 cat << '_EOF_' > makePfamRa.awk
 /^NAME/ {print}
 /^ACC/ {print}
 /^DESC/ {print}
 /^TC/ {print $1,$3; printf("\n");}
 _EOF_
 awk -f makePfamRa.awk  /hive/data/outside/pfam/Pfam29.0/Pfam-A.hmm  > pfamDesc.ra
 raToTab -cols=ACC,NAME,DESC,TC pfamDesc.ra stdout |  awk -F '\t' '{printf("%s\t%s\t%s\t%g\n", $1, $2, $3, $4);}' | sort > pfamDesc.tab
 
 # Convert output to tab-separated file.
 cd $dir
 catDir result | sed '/^#/d' > allResults.tab
 awk 'BEGIN {OFS="\t"} { print $5,$1,$18-1,$19,$4,$14}' allResults.tab | sort > allUcscPfam.tab
 join  -t $'\t' -j 1  allUcscPfam.tab pfamDesc.tab | tawk '{if ($6 > $9) print $2, $3, $4, $5, $6, $1}' > ucscPfam.tab
 
 cd $dir
 
 cat refSeq.faa | sed 's/\.[0-9]*//' > refSeq.noVers.faa
 cat refSeq.fa | sed 's/\.[0-9]*//' > refSeq.noVers.fa
 sort ucscPfam.tab | sed 's/\.[0-9]*//' > sortPfam.tab
 faCount refSeq.noVers.faa | awk '{print $1, $2}' > protein.sizes
 
 hgsql dm6 -Ne "select protAcc, mrnaAcc from refLink, refGene where mrnaAcc=refGene.name" | awk '{if (NF == 2) print}' |sort > sortMap.txt
 while read prot mrna
 do
 echo ./doBlat $prot $mrna
 done > blat.jobs < sortMap.txt
 
 ssh ku  "cd $dir; para make blat.jobs"
 
 cat results/* > proteinToMrna.psl
 tawk '{print $10, $16, $17}'  proteinToMrna.psl | sort > sortCdsOut.tab
 awk '{print $10, $11}' refGene.psl > gene.sizes
 join sortCdsOut.tab sortPfam.tab |  join /dev/stdin sortMap.txt | awk '{print $9, $2 + 3 * $4, $2  + 3 * $5, $6}' | bedToPsl gene.sizes stdin domainToGene.psl
 
 pslMap domainToGene.psl refGene.psl stdout | pslToBed stdin stdout | bedOrBlocks -useName stdin domainToGenome.bed
 
 hgLoadBed dm6 refGenePfam domainToGenome.bed
 
 ##############################################################################
 # Crispr track. See ../crisprTrack/README.txt (2016-09-15 max)
 # Command: doCrispr.sh dm6 ensGene
 
 ##############################################################################
 # LASTZ A. gambiae AnoGam3 (DONE 2017-12-20 - Hiram)
     screen -S anoGam3-dm6
     mkdir /hive/data/genomes/dm6/bed/lastzAnoGam3.2017-12-20
     cd /hive/data/genomes/dm6/bed/lastzAnoGam3.2017-12-20
 
     printf '# D. melanogaster vs. A. gambiae
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.66/bin/lastz
 BLASTZ_H=2000
 BLASTZ_Y=3400
 BLASTZ_L=4000
 BLASTZ_K=2200
 BLASTZ_Q=/cluster/data/blastz/HoxD55.q
 
 # TARGET - D. melanogaster
 SEQ1_DIR=/hive/data/genomes/dm6/dm6.2bit
 SEQ1_CHUNK=5000000
 SEQ1_LAP=10000
 SEQ1_LEN=/hive/data/genomes/dm6/chrom.sizes
 
 # QUERY - A. gambiae
 SEQ2_DIR=/hive/data/genomes/anoGam3/anoGam3.2bit
 SEQ2_CHUNK=5000000
 SEQ2_LAP=10000
 SEQ2_LEN=/hive/data/genomes/anoGam3/chrom.sizes
 
 BASE=/hive/data/genomes/dm6/bed/lastzAnoGam3.2017-12-20
 TMPDIR=/dev/shm
 ' > DEF
 
     time (doBlastzChainNet.pl `pwd`/DEF \
 	-syntenicNet -fileServer=hgwdev \
 	-chainMinScore=3000  -chainLinearGap=loose \
 	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku) > do.log 2>&1
     # real    8m50.927s
 
     cat fb.dm6.chainAnoGam3Link.txt
     # 19641460 bases of 142573024 (13.776%) in intersection
 
     time (doRecipBest.pl -load -workhorse=hgwdev \
 	-buildDir=`pwd` dm6 anoGam3) > rBest.log 2>&1 &
     # real    44m34.312s
 
     cat fb.dm6.chainRBestAnoGam3Link.txt
     # 15964959 bases of 142573024 (11.198%) in intersection
 
     #   running the swap
     mkdir /hive/data/genomes/anoGam3/bed/blastz.dm6.swap
     cd /hive/data/genomes/anoGam3/bed/blastz.dm6.swap
     time (doBlastzChainNet.pl -verbose=2 \
         /hive/data/genomes/dm6/bed/lastzAnoGam3.2017-12-20/DEF \
         -swap -syntenicNet -chainMinScore=3000 -chainLinearGap=loose \
         -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku) > swap.log 2>&1
     # real    2m0.901s
 
     cat fb.anoGam3.chainDm6Link.txt
     # 20168390 bases of 264424304 (7.627%) in intersection
 
     time (doRecipBest.pl -load -workhorse=hgwdev \
 	-buildDir=`pwd` anoGam3 dm6) > rBest.log 2>&1 &
     # real    46m7.441s
 
     cat fb.anoGam3.chainRBestDm6Link.txt
     # 16040573 bases of 264424304 (6.066%) in intersection
 
 ###########################################################################
 # LASTZ D. simulans DroSim2 (DONE 2018-09-12 - Hiram)
     screen -S droSim2-dm6
     mkdir /hive/data/genomes/dm6/bed/lastzDroSim2.2018-09-12
     cd /hive/data/genomes/dm6/bed/lastzDroSim2.2018-09-12
 
     cat << '_EOF_' > DEF
     printf '# D. melanogaster vs. D. simulans
 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.52/bin/lastz
 BLASTZ_H=2000
 BLASTZ_Y=3400
 BLASTZ_L=4000
 BLASTZ_K=2200
 BLASTZ_Q=/cluster/data/blastz/HoxD55.q
 
 # TARGET - D. melanogaster
 SEQ1_DIR=/hive/data/genomes/dm6/dm6.2bit
 SEQ1_CHUNK=5000000
 SEQ1_LAP=10000
 SEQ1_LEN=/hive/data/genomes/dm6/chrom.sizes
 
 # QUERY - D. simulans
 SEQ2_DIR=/hive/data/genomes/droSim2/droSim2.2bit
 SEQ2_CHUNK=5000000
 SEQ2_LAP=10000
 SEQ2_LEN=/hive/data/genomes/droSim2/chrom.sizes
 
 BASE=/hive/data/genomes/dm6/bed/lastzDroSim2.2018-09-12
 TMPDIR=/dev/shm
 ' > DEF
 
     time (doBlastzChainNet.pl `pwd`/DEF -verbose=2 \
         -chainMinScore=3000  -chainLinearGap=loose \
         -syntenicNet -fileServer=hgwdev \
         -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku) > do.log 2>&1
 
     # real    39m35.348s
     cat fb.dm6.chainDroSim2Link.txt
     # 117723253 bases of 142573024 (82.570%) in intersection
 
     cat fb.dm6.chainSynDroSim2Link.txt
     # 108599452 bases of 142573024 (76.171%) in intersection
 
     time (doRecipBest.pl -load -workhorse=hgwdev -buildDir=`pwd` dm6 droSim2) > rbest.log 2>&1 &
     # real    6m47.792s
 
     cat fb.dm6.chainRBest.DroSim2.txt
     # 110510644 bases of 142573024 (77.512%) in intersection
 
     #   running the swap
     mkdir /hive/data/genomes/droSim2/bed/blastz.dm6.swap
     cd /hive/data/genomes/droSim2/bed/blastz.dm6.swap
     time (doBlastzChainNet.pl -verbose=2 \
       /hive/data/genomes/dm6/bed/lastzDroSim2.2018-09-12/DEF \
         -swap -chainMinScore=3000 -chainLinearGap=loose \
           -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
             -syntenicNet) > swap.log 2>&1 &
     # real    4m32.910s
 
     cat fb.droSim2.chainDm6Link.txt
     # 115776543 bases of 124963774 (92.648%) in intersection
     cat fb.droSim2.chainSynDm6Link.txt
     # 108004181 bases of 124963774 (86.428%) in intersection
 
     time (doRecipBest.pl -load -workhorse=hgwdev -buildDir=`pwd` droSim2 dm6) > rbest.log 2>&1 &
     # real    7m22.120s
 
     cat fb.droSim2.chainRBest.Dm6.txt
     # 110784245 bases of 124963774 (88.653%) in intersection
 
 ###########################################################################
+
+# ReMap refs #28960 (2022-04-13 Gerardo)
+cd /hive/data/genomes/dm6/bed
+mkdir reMap
+cd reMap
+wget https://remap.univ-amu.fr/storage/public/hubReMap2022/dm6/bigBed/remap2022_all_macs2_dm6_v1_0.bb
+mv remap2022_all_macs2_dm6_v1_0.bb reMap2022.bb
+wget https://remap.univ-amu.fr/storage/public/hubReMap2022/dm6/bigBed/dm6.bw
+mv dm6.bw reMapDensity2022.bw
+cd /gbdb/dm6
+mkdir reMap
+cd reMap
+ln -s /hive/data/genomes/dm6/bed/reMap/reMap2022.bb
+ln -s /hive/data/genomes/dm6/bed/reMap/reMapDensity2022.bw
+cd ~/kent/src/hg/makeDb/trackDb/drosophila/dm6/
+wget https://remap.univ-amu.fr/storage/public/hubReMap2022/remapHub2022.html
+cp remapHub2022.html  reMap.html
+cd ~/kent/src/hg/makeDb/trackDb
+curl https://remap.univ-amu.fr/storage/public/hubReMap2022/dm6/trackDb.txt > drosophila/dm6/reMap.ra
+vi drosophila/dm6/reMap.ra
+vi drosophila/dm6/trackDb.ra 
+vi drosophila/dm6/reMap.html