src/hg/makeDb/doc/hg38/hg38.txt f7577fc37cbcba25eac796f12e81ac0235f77c0c

f7577fc37cbcba25eac796f12e81ac0235f77c0c
jeltje.van.baren
  Tue Jan 21 11:03:16 2025 -0800
fixing wrongly named copy of alphaMissense.txt

diff --git src/hg/makeDb/doc/hg38/hg38.txt src/hg/makeDb/doc/hg38/hg38.txt
index 20b6e2a715a..1f3dc1234db 100644
--- src/hg/makeDb/doc/hg38/hg38.txt
+++ src/hg/makeDb/doc/hg38/hg38.txt
@@ -1,14 +1,7390 @@
+# for emacs: -*- mode: sh; -*-
+
+# This file describes how we made the browser database on
+# NCBI build 38 (December 2013 freeze) aka:
+#	GRCh38 - Genome Reference Consortium Human Reference 38
+#	Assembly Accession: GCA_000001405.2
+
+#############################################################################
+## Download sequence - DONE - 2013-12-24
+    mkdir /hive/data/genomes/hg38
+    mkdir /hive/data/genomes/hg38/genbank
+    cd /hive/data/genomes/hg38/genbank
+    time rsync -a -P rsync://ftp.ncbi.nlm.nih.gov/genbank/genomes/Eukaryotes/vertebrates_mammals/Homo_sapiens/GRCh38/ ./
+# sent 19643 bytes  received 4914689807 bytes  4490369.53 bytes/sec
+# total size is 4914019581  speedup is 1.00
+
+# real    18m14.497s
+
+#############################################################################
+## convert to UCSC names - DONE - 2013-12-24
+#  with this release, NCBI has adopted a naming convention that is similar
+#  to UCSC.  The delivered sequence with these names can be found in:
+#  /hive/data/genomes/hg38/genbank/seqs_for_alignment_pipelines/
+#
+#  The following scripts reproduce this naming scheme from the separate
+#  files in the release
+#
+    mkdir /hive/data/genomes/hg38/ucsc
+    cat << '_EOF_' > ucscCompositeAgp.pl
+#!/bin/env perl
+
+use strict;
+use warnings;
+
+my %accToChr;
+
+open (FH, "<../genbank/Primary_Assembly/assembled_chromosomes/chr2acc") or
+        die "can not read Primary_Assembly/assembled_chromosomes/chr2acc";
+while (my $line = <FH>) {
+    next if ($line =~ m/^#/);
+    chomp $line;
+    my ($chrN, $acc) = split('\s+', $line);
+    $accToChr{$acc} = $chrN;
+}
+close (FH);
+
+foreach my $acc (keys %accToChr) {
+    my $chrN =  $accToChr{$acc};
+    print "$acc $accToChr{$acc}\n";
+    open (FH, "zcat ../genbank/Primary_Assembly/assembled_chromosomes/AGP/chr${chrN}.comp.agp.gz|") or die "can not read chr${chrN}.comp.agp.gz";
+    open (UC, ">chr${chrN}.agp") or die "can not write to chr${chrN}.agp";
+    while (my $line = <FH>) {
+        if ($line =~ m/^#/) {
+            print UC $line;
+        } else {
+            $line =~ s/^$acc/chr${chrN}/;
+            print UC $line;
+        }
+    }
+    close (FH);
+    close (UC);
+    open (FH, "zcat ../genbank/Primary_Assembly/assembled_chromosomes/FASTA/chr${chrN}.fa.gz|") or die "can not read chr${chrN}.fa.gz";
+    open (UC, ">chr${chrN}.fa") or die "can not write to chr${chrN}.fa";
+    while (my $line = <FH>) {
+        if ($line =~ m/^>/) {
+            printf UC ">chr${chrN}\n";
+        } else {
+            print UC $line;
+        }
+    }
+    close (FH);
+    close (UC);
+}
+'_EOF_'
+    # << happy emacs
+    chmod +x ucscCompositeAgp.pl
+
+    cat << '_EOF_' > unlocalized.pl
+#!/bin/env perl
+
+use strict;
+use warnings;
+
+my %accToChr;
+my %chrNames;
+
+open (FH, "<../genbank/Primary_Assembly/unlocalized_scaffolds/unlocalized.chr2scaf") or
+        die "can not read Primary_Assembly/unlocalized_scaffolds/unlocalized.chr2scaf";
+while (my $line = <FH>) {
+    next if ($line =~ m/^#/);
+    chomp $line;
+    my ($chrN, $acc) = split('\s+', $line);
+    $acc =~ s/\./v/;
+    $accToChr{$acc} = $chrN;
+    $chrNames{$chrN} += 1;
+}
+close (FH);
+
+foreach my $chrN (keys %chrNames) {
+    my $agpFile =  "../genbank/Primary_Assembly/unlocalized_scaffolds/AGP/chr$chrN.unlocalized.scaf.agp.gz";
+    my $fastaFile =  "../genbank/Primary_Assembly/unlocalized_scaffolds/FASTA/chr$chrN.unlocalized.scaf.fa.gz";
+    open (FH, "zcat $agpFile|") or die "can not read $agpFile";
+    open (UC, ">chr${chrN}_random.agp") or die "can not write to chr${chrN}_random.agp";
+    while (my $line = <FH>) {
+        if ($line =~ m/^#/) {
+            print UC $line;
+        } else {
+            chomp $line;
+            my (@a) = split('\t', $line);
+            my $acc = $a[0];
+            $acc =~ s/\./v/;
+            die "ERROR: chrN $chrN not correct for $acc"
+                if ($accToChr{$acc} ne $chrN);
+            my $ucscName = "chr${chrN}_${acc}_random";
+            printf UC "%s", $ucscName;
+            for (my $i = 1; $i < scalar(@a); ++$i) {
+                printf UC "\t%s", $a[$i];
+            }
+            printf UC "\n";
+        }
+    }
+    close (FH);
+    close (UC);
+    printf "chr%s\n", $chrN;
+    open (FH, "zcat $fastaFile|") or die "can not read $fastaFile";
+    open (UC, ">chr${chrN}_random.fa") or die "can not write to chr${chrN}_random.fa";
+    while (my $line = <FH>) {
+        if ($line =~ m/^>/) {
+            chomp $line;
+            my $acc = $line;
+            $acc =~ s/.*gb\|//;
+            $acc =~ s/. Homo.*//;
+            $acc =~ s/\./v/;
+            die "ERROR: chrN $chrN not correct for $acc"
+                if ($accToChr{$acc} ne $chrN);
+            my $ucscName = "chr${chrN}_${acc}_random";
+            printf UC ">$ucscName\n";
+        } else {
+            print UC $line;
+        }
+    }
+    close (FH);
+    close (UC);
+}
+'_EOF_'
+    # << happy emacs
+    chmod +x unlocalized.pl
+
+    cat << '_EOF_' > unplaced.pl
+#!/bin/env perl
+
+use strict;
+use warnings;
+
+my $agpFile =  "../genbank/Primary_Assembly/unplaced_scaffolds/AGP/unplaced.scaf.agp.gz";
+my $fastaFile =  "../genbank/Primary_Assembly/unplaced_scaffolds/FASTA/unplaced.scaf.fa.gz";
+open (FH, "zcat $agpFile|") or die "can not read $agpFile";
+open (UC, ">chrUn.agp") or die "can not write to chrUn.agp";
+while (my $line = <FH>) {
+    if ($line =~ m/^#/) {
+        print UC $line;
+    } else {
+        $line =~ s/\./v/;
+        printf UC "chrUn_%s", $line;
+    }
+}
+close (FH);
+close (UC);
+
+open (FH, "zcat $fastaFile|") or die "can not read $fastaFile";
+open (UC, ">chrUn.fa") or die "can not write to chrUn.fa";
+while (my $line = <FH>) {
+    if ($line =~ m/^>/) {
+        chomp $line;
+        $line =~ s/.*gb\|//;
+        $line =~ s/. Homo.*//;
+        $line =~ s/\./v/;
+        printf UC ">chrUn_$line\n";
+    } else {
+        print UC $line;
+    }
+}
+close (FH);
+close (UC);
+'_EOF_'
+    # << happy emacs
+    chmod +x unplaced.pl
+
+    cat << '_EOF_' > altSequence.pl
+#!/usr/bin/env perl
+
+use strict;
+use warnings;
+use File::Basename;
+
+open (AG, ">chrAlt.agp") or die "can not write to chrAlt.agp";
+open (FA, ">chrAlt.fa") or die "can not write to chrAlt.fa";
+open (FH, "find ../genbank/ALT* -type f | grep alt_scaffold_placement.txt|") or die "can not find alt_scaffold_placement.txt files";
+while (my $file = <FH>) {
+  chomp $file;
+  my $dirName = dirname($file);
+  my $agpFile = "$dirName/AGP/alt.scaf.agp.gz";
+  my $fastaFile = "$dirName/FASTA/alt.scaf.fa.gz";
+  # key is genbank acc name, value is UCSC chr name
+  my %nameDelta;
+#  printf STDERR "# %s\n", $file;
+  open (AL, "<$file") or die "can not read $file";
+  while (my $line = <AL>) {
+     next if ($line =~ m/^#/);
+     chomp $line;
+     my ($alt_asm_name, $prim_asm_name, $alt_scaf_name, $alt_scaf_acc,
+          $parent_type, $parent_name, $parent_acc, $region_name, $ori,
+           $alt_scaf_start, $alt_scaf_stop, $parent_start, $parent_stop,
+            $alt_start_tail, $alt_stop_tail) = split('\t', $line);
+     my $ucscAcc = $alt_scaf_acc;
+     $ucscAcc =~ s/\./v/;
+     my $ucscName = sprintf("chr%s_%s_alt", $parent_name, $ucscAcc);
+     printf "%s %s\n", $alt_scaf_acc, $ucscName;
+     if (exists ($nameDelta{$alt_scaf_acc})) {
+         die "duplicate name incorrect ? $alt_scaf_acc $nameDelta{$alt_scaf_acc} ne $ucscName" if ($nameDelta{$alt_scaf_acc} ne $ucscName);
+     } else {
+         $nameDelta{$alt_scaf_acc} = $ucscName;
+     }
+  }
+  close (AL);
+  open (AL, "zcat $agpFile|") or die "can not read $agpFile";
+  while (my $line = <AL>) {
+     if ($line =~ m/^#/) {
+       print AG "$line";
+     } else {
+       my ($acc, $rest) = split('\t', $line, 2);
+       die "can not find ucsc name for $acc" if (!exists($nameDelta{$acc}));
+       printf AG "%s\t%s", $nameDelta{$acc}, $rest;
+     }
+  }
+  close (AL);
+  open (AL, "zcat $fastaFile|") or die "can not read $fastaFile";
+  while (my $line = <AL>) {
+     chomp $line;
+     if ($line =~ m/^>/) {
+       $line =~ s/.*gb.//;
+       $line =~ s/. Homo.*//;
+       die "can not find ucsc name for $line" if (!exists($nameDelta{$line}));
+       printf FA ">%s\n", $nameDelta{$line};
+     } else {
+       printf FA "%s\n", $line;
+     }
+  }
+  close (AL);
+}
+close (FH);
+close (AG);
+close (FA);
+'_EOF_'
+    # << happy emacs
+    chmod +x altSequence.pl
+
+    ./ucscCompositeAgp.pl
+    ./unlocalized.pl
+    ./unplaced.pl
+    ./altSequence.pl
+
+    # temporarily verify the fasta and AGP are complete and compatible
+    faToTwoBit chr*.fa hg38.test.2bit
+    cat chr*.agp > hg38.agp
+    checkAgpAndFa hg38.agp hg38.test.2bit 2>&1 | tail -1
+# All AGP and FASTA entries agree - both files are valid
+
+    rm -f hg38.agp hg38.test.2bit
+
+    # comparing faCounts of this 2bit file and the sequences delivered
+    # in genbank/seqs_for_alignment_pipelines/
+    # result in the exact same sequence
+
+#############################################################################
+## initial db build - DONE - 2013-12-24 - Hiram
+
+    cd /hive/data/genomes/hg38
+    cat << '_EOF_' > hg38.config.ra
+# Config parameters for makeGenomeDb.pl:
+db hg38
+scientificName Homo sapiens
+commonName Human
+assemblyDate Dec. 2013
+assemblyLabel GRCh38 Genome Reference Consortium Human Reference 38 (GCA_000001405.2)
+assemblyShortLabel GRCh38
+orderKey 13
+mitoAcc none
+fastaFiles /hive/data/genomes/hg38/ucsc/chr*.fa
+agpFiles /hive/data/genomes/hg38/ucsc/chr*.agp
+# qualFiles /dev/null
+dbDbSpeciesDir human
+photoCreditURL http://www.cbse.ucsc.edu/
+photoCreditName Graphic courtesy of CBSE
+ncbiGenomeId 51
+ncbiAssemblyId 883148
+ncbiAssemblyName GRCh38
+ncbiBioProject 31257
+genBankAccessionID GCA_000001305.2
+taxId   9606
+'_EOF_'
+    # << happy emacs
+
+    # step wise to first verify AGP and Fasta files
+    time makeGenomeDb.pl -stop=agp hg38.config.ra > agp.log 2>&1
+
+    # looking good, continue:
+    time makeGenomeDb.pl -continue=db hg38.config.ra > db.log 2>&1
+
+    # add the files produced by the trackDb build to the source tree
+
+    # this path is fixed in the makeGenomeDb.pl for next time
+    # honor new convention for bbi location files:
+    cd /gbdb/hg38/bbi
+    mkdir gc5BaseBw
+    mv gc5Base.bw gc5BaseBw
+    cd gc5BaseBw
+    # before
+    hgsql -e 'select * from gc5BaseBw;' hg38
+# +---------------------------+
+# | fileName                  |
+# +---------------------------+
+# | /gbdb/hg38/bbi/gc5Base.bw |
+# +---------------------------+
+    # and fixed
+    hgBbiDbLink hg38 gc5BaseBw `pwd`/gc5Base.bw
+    hgsql -e 'select * from gc5BaseBw;' hg38
+# +-------------------------------------+
+# | fileName                            |
+# +-------------------------------------+
+# | /gbdb/hg38/bbi/gc5BaseBw/gc5Base.bw |
+# +-------------------------------------+
+
+#############################################################################
+## RepeatMasker with CrossMatch - DONE - 2013-12-24,27 - Hiram
+    mkdir /hive/data/genomes/hg38/bed/repeatMaskerCM
+    cd /hive/data/genomes/hg38/bed/repeatMaskerCM
+    # running this step wise so it can be loaded into its own table
+    time doRepeatMasker.pl -stop=mask -bigClusterHub=ku \
+       -workhorse=hgwdev -dbHost=hgwdev -buildDir=`pwd` hg38 > mask.log 2>&1
+    # real    3443m13.026s
+# RepeatMasker version June 20 2013 open-4.0.3
+# Search Engine: cross-match version 1.090518
+# RepeatMasker Database: 20130422
+
+    # take the install script from this -debug run and alter it to load
+    # the table into rmskCM
+    time doRepeatMasker.pl -continue=install -stop=install -debug \
+       -bigClusterHub=ku -workhorse=hgwdev -dbHost=hgwdev -buildDir=`pwd` hg38
+    cat fb.hg38.rmskCM.txt
+    # 1586326530 bases of 3209286105 (49.429%) in intersection
+
+    # profile of repeat elements:
+#  1852545 rmskClass/SINE.tab
+#  1570523 rmskClass/LINE.tab
+#   748597 rmskClass/LTR.tab
+#   703682 rmskClass/Simple_repeat.tab
+#   499108 rmskClass/DNA.tab
+#   102856 rmskClass/Low_complexity.tab
+#     7962 rmskClass/Satellite.tab
+#     5750 rmskClass/Retroposon.tab
+#     5667 rmskClass/LTR?.tab
+#     5622 rmskClass/Unknown.tab
+#     4516 rmskClass/snRNA.tab
+#     3294 rmskClass/DNA?.tab
+#     2026 rmskClass/tRNA.tab
+#     1840 rmskClass/rRNA.tab
+#     1784 rmskClass/RC.tab
+#     1672 rmskClass/srpRNA.tab
+#     1420 rmskClass/scRNA.tab
+#      704 rmskClass/RNA.tab
+#      411 rmskClass/RC?.tab
+#       38 rmskClass/SINE?.tab
+
+    # using this RM result with trfMask for the final masked sequence
+    cd /hive/data/genomes/hg38
+    twoBitMask hg38.rmskCM.2bit -add bed/simpleRepeat/trfMask.bed hg38.2bit
+    twoBitToFa hg38.2bit stdout | faSize stdin > faSize.hg38.2bit.txt
+# 3209286105 bases (159970322 N's 3049315783 real 1460684798 upper 1588630985 lower) in 455 sequences in 1 files
+# %49.50 masked total, %52.10 masked real
+
+    featureBits -countGaps hg38 rmskCM '!rmskHmmer' -bed=crossMatchUnique.bed
+    # 24868153 bases of 3209286105 (0.775%) in intersection
+    hgLoadBed hg38 crossMatchUnique crossMatchUnique.bed
+    # Read 2352219 elements of size 4 from crossMatchUnique.bed
+
+#############################################################################
+## repeating RepeatMasker Blastn run (DONE - 2014-01-07 - Hiram)
+    mkdir /hive/data/genomes/hg38/bed/rmskBlastn
+    cd /hive/data/genomes/hg38/bed/rmskBlastn
+
+    time $HOME/kent/src/hg/utils/automation/doRepeatMasker.pl \
+      -useRMBlastn -bigClusterHub=ku -workhorse=hgwdev -dbHost=hgwdev \
+        -stop=mask -buildDir=`pwd` hg38 > mask.log
+    # real    203m33.670s
+
+# 3209286105 bases (159970322 N's 3049315783 real 1491207906 upper 1558107877 lower) in 455 sequences in 1 files
+# %48.55 masked total, %51.10 masked real
+
+    # install step with debug so the script can be altered to load into
+    # a specific rmskBlastn table:
+
+    $HOME/kent/src/hg/utils/automation/doRepeatMasker.pl \
+      -useRMBlastn -bigClusterHub=ku -workhorse=hgwdev -dbHost=hgwdev \
+        -continue=install -debug -buildDir=`pwd` hg38
+
+#############################################################################
+## repeating RepeatMasker cross-match run (DONE - 2014-01-07 - Hiram)
+    mkdir /hive/data/genomes/hg38/bed/rmskCM
+    cd /hive/data/genomes/hg38/bed/rmskCM
+
+    # missed recording stderr ....  forgot the 2>&1
+    time $HOME/kent/src/hg/utils/automation/doRepeatMasker.pl \
+      -bigClusterHub=ku -workhorse=hgwdev -dbHost=hgwdev \
+        -stop=mask -buildDir=`pwd` hg38 > mask.log
+    # real    1897m33.517s
+    # running from Tue Jan  7 16:10:33 PST 2014 thru 08 Jan 23:48
+#  *** All done!  (through the 'mask' step) - Elapsed time: 1897m34s
+#  *** Steps were performed in /hive/data/genomes/hg38/bed/rmskCM
+    # running install manually to allow edit of the script to load
+    # a specific rmskCm table
+    time $HOME/kent/src/hg/utils/automation/doRepeatMasker.pl \
+      -bigClusterHub=ku -workhorse=hgwdev -dbHost=hgwdev \
+        -continue=install -stop=install -buildDir=`pwd` hg38 -debug
+
+#############################################################################
+## RepeatMasker with RM Blastn - DONE - 2013-12-24,25 - Hiram
+    mkdir /hive/data/genomes/hg38/bed/repeatMaskerBlastn
+    cd /hive/data/genomes/hg38/bed/repeatMaskerBlastn
+    # running this step wise so it can be loaded into its own table
+    time doRepeatMasker.pl -stop=mask -useRMBlastn -bigClusterHub=ku \
+       -workhorse=hgwdev -dbHost=hgwdev -buildDir=`pwd` hg38 > mask.log 2>&1
+    # real    354m55.842s
+
+    # take the install script from this -debug run and alter it to load
+    # the table into rmskBlastn
+    doRepeatMasker.pl -useRMBlastn -bigClusterHub=ku  -continue=install \
+     -stop=install -debug -workhorse=hgwdev -dbHost=hgwdev -buildDir=`pwd` hg38
+    # 1560264046 bases of 3209286105 (48.617%) in intersection
+    # profile of repeat elements:
+#   1824560 rmskClass/SINE.tab
+#   1552814 rmskClass/LINE.tab
+#    738435 rmskClass/LTR.tab
+#    715998 rmskClass/Simple_repeat.tab
+#    486591 rmskClass/DNA.tab
+#    105026 rmskClass/Low_complexity.tab
+#      7712 rmskClass/Satellite.tab
+#      5638 rmskClass/Retroposon.tab
+#      5276 rmskClass/Unknown.tab
+#      5100 rmskClass/LTR?.tab
+#      4548 rmskClass/snRNA.tab
+#      3033 rmskClass/DNA?.tab
+#      1987 rmskClass/tRNA.tab
+#      1809 rmskClass/rRNA.tab
+#      1710 rmskClass/RC.tab
+#      1633 rmskClass/srpRNA.tab
+#      1428 rmskClass/scRNA.tab
+#       614 rmskClass/RNA.tab
+#       376 rmskClass/RC?.tab
+#        38 rmskClass/SINE?.tab
+#         3 rmskClass/Unspecified.tab
+#   5464329 total
+
+#############################################################################
+## repeating RepeatMasker run with HMMER - DONE - 2014-01-08 - Hiram
+    mkdir /hive/data/genomes/hg38/bed/rmskHmmer
+    cd /hive/data/genomes/hg38/bed/rmskHmmer
+
+    # trying cpu=4 and ram=32g
+    time $HOME/kent/src/hg/utils/automation/doRepeatMasker.pl \
+      -stop=mask -useHMMER -bigClusterHub=ku \
+       -workhorse=hgwdev -dbHost=hgwdev -buildDir=`pwd` hg38 > mask.log 2>&1
+    # 6 jobs required more than 32 Gb of memory to complete, ran them on
+    # hgwdev to complete, then continuing:
+    time $HOME/kent/src/hg/utils/automation/doRepeatMasker.pl \
+      -continue=cat -stop=mask -useHMMER -bigClusterHub=ku \
+       -workhorse=hgwdev -dbHost=hgwdev -buildDir=`pwd` hg38 > cat.log 2>&1
+    #  real    24m5.274s
+# 3209286105 bases (159970322 N's 3049315783 real 1314916231 upper 1734399552 lower) in 455 sequences in 1 files
+# %54.04 masked total, %56.88 masked real
+
+    # running install manually to allow edit of the script to load
+    # a specific rmskHmmer table
+    time $HOME/kent/src/hg/utils/automation/doRepeatMasker.pl \
+      -continue=install -debug -useHMMER -bigClusterHub=ku \
+       -workhorse=hgwdev -dbHost=hgwdev -buildDir=`pwd` hg38
+
+    time ./doLoad_rmskHmmer.bash > load.log 2>&1
+    # real    4m47.432s
+
+    featureBits -countGaps hg38 rmskHmmer > fb.hg38.rmskHmmer.txt 2>&1
+    # 1734398971 bases of 3209286105 (54.043%) in intersection
+
+    grep rmskClass hg38.class.profile.txt \
+        | sed -e 's#rmskClass/##; s/.tab//;' | sort -rn
+    # profile of repeat elements:
+#  1884179 SINE
+#  1702529 LINE
+#   805427 LTR
+#   636906 Simple_repeat
+#   565171 DNA
+#    95480 Low_complexity
+#    11861 Retroposon
+#    10852 Satellite
+#     9181 LTR?
+#     6783 scRNA
+#     4582 DNA?
+#     3914 Unknown
+#     2059 RC
+#     1517 srpRNA
+#     1484 RNA
+#      970 SINE?
+#      806 RC?
+#      464 rRNA
+#  5744165 total
+
+    featureBits -countGaps hg38 rmskHmmer '!rmskCM' -bed=hmmerUnique.bed
+    # 172940594 bases of 3209286105 (5.389%) in intersection
+    hgLoadBed hg38 hmmerUnique hmmerUnique.bed
+    # Read 3099505 elements of size 4 from hmmerUnique.bed
+
+#############################################################################
+## RepeatMasker with HMMER - DONE - 2013-12-24,26 - Hiram
+    mkdir /hive/data/genomes/hg38/bed/repeatMaskerHMMER
+    cd /hive/data/genomes/hg38/bed/repeatMaskerHMMER
+
+    time doRepeatMasker.pl -stop=mask -useHMMER -bigClusterHub=ku \
+       -workhorse=hgwdev -dbHost=hgwdev -buildDir=`pwd` hg38 > mask.log 2>&1
+    # take the install script from this -debug run and alter it to load
+    # the table into rmskHmmer
+    doRepeatMasker.pl -continue=install -stop=install -useHMMER \
+      -bigClusterHub=ku -workhorse=hgwdev -dbHost=hgwdev \
+         -buildDir=`pwd` hg38 > mask.log 2>&1
+    # 1702017722 bases of 3209286105 (53.034%) in intersection
+    # profile of repeat elements:
+#   1879864 rmskClass/SINE.tab
+#   1678216 rmskClass/LINE.tab
+#    794231 rmskClass/LTR.tab
+#    651561 rmskClass/Simple_repeat.tab
+#    551965 rmskClass/DNA.tab
+#     97186 rmskClass/Low_complexity.tab
+#     10756 rmskClass/Retroposon.tab
+#     10448 rmskClass/Satellite.tab
+#      8393 rmskClass/LTR?.tab
+#      5849 rmskClass/scRNA.tab
+#      4282 rmskClass/Unknown.tab
+#      4276 rmskClass/DNA?.tab
+#      2000 rmskClass/RC.tab
+#      1573 rmskClass/srpRNA.tab
+#      1291 rmskClass/RNA.tab
+#       906 rmskClass/snRNA.tab
+#       747 rmskClass/SINE?.tab
+#       723 rmskClass/RC?.tab
+#       722 rmskClass/rRNA.tab
+#       468 rmskClass/tRNA.tab
+#   5705457 total
+
+#############################################################################
+# rmsk from genbank release (DONE - 2014-12-25 - Hiram)
+    mkdir /hive/data/genomes/hg38/bed/repeatMaskerGenbank
+    cd /hive/data/genomes/hg38/bed/repeatMaskerGenbank
+
+    head -3 ../repeatMaskerBlastn/hg38.fa.out > genbank.rm.out
+find ../../genbank -type f | grep rm.out | grep -v "/placed_scaffolds/" | while read F
+do
+  headRest 3 $F
+done | sort -k5,45 -k6,6n >> genbank.rm.out
+    grep -v "^#" ../../genbank/GCA_000001405.15_GRCh38_top-level.acc2name \
+       | awk '{printf "s/%s/%s/g;\n", $1, $3}' > accessionToUcsc.sed.txt
+
+    sed -e "`cat accessionToUcsc.sed.txt`" genbank.rm.out > ucscNames.rm.out
+
+    head -3 ucscNames.rm.out > hg38.sorted.fa.out
+    tail -n +4 ucscNames.rm.out  | sort -k5,5 -k6,6n >> hg38.sorted.fa.out
+
+    hgLoadOut -table=rmskGenbank -nosplit hg38 hg38.sorted.fa.out
+    hgLoadOut -verbose=2 -tabFile=hg38.rmskGenbank.tab -table=rmskGenbank \
+       -nosplit hg38 hg38.sorted.fa.out 2> bad.records.txt
+    # fixed up one of the masking scripts from the other runs to construct
+    # the bbi files
+
+    # 1581568556 bases of 3209286105 (49.281%) in intersection
+    # profile of repeat elements:
+#   1849444 rmskClass/SINE.tab
+#   1586141 rmskClass/LINE.tab
+#    759248 rmskClass/LTR.tab
+#    502186 rmskClass/DNA.tab
+#    433789 rmskClass/Simple_repeat.tab
+#    396378 rmskClass/Low_complexity.tab
+#     10198 rmskClass/Satellite.tab
+#      5884 rmskClass/LTR?.tab
+#      4595 rmskClass/snRNA.tab
+#      4163 rmskClass/Retroposon.tab
+#      2802 rmskClass/Unknown.tab
+#      2157 rmskClass/DNA?.tab
+#      2154 rmskClass/tRNA.tab
+#      1915 rmskClass/rRNA.tab
+#      1860 rmskClass/RC.tab
+#      1784 rmskClass/srpRNA.tab
+#      1397 rmskClass/scRNA.tab
+#       822 rmskClass/RNA.tab
+#       488 rmskClass/SINE?.tab
+#       445 rmskClass/RC?.tab
+#   5567850 total
+
+#############################################################################
+## running TRF simple repeats - DONE - 2013-12-24,29 - Hiram
+    # this procedure ran into much trouble on this release.  The new
+    # repeat sequences in the centromeres caused trf to run indefinitely.
+    # I tried different sizes of chunks, working down to 20 Mbase chunks.
+    # Even still, some jobs would not complete.  Those broke down even
+    # more, eventually to the smallest bit of 30 Kbase that needed to
+    # run all the way down to 3,000 based chunks with 1,000 base overlaps.
+
+    # this did not work:
+    screen # use screen to manage this day-long job
+    mkdir /hive/data/genomes/hg38/bed/simpleRepeat
+    cd /hive/data/genomes/hg38/bed/simpleRepeat
+    time doSimpleRepeat.pl -bigClusterHub=ku -workhorse=hgwdev \
+	-smallClusterHub=ku -buildDir=`pwd` hg38 > do.log 2>&1
+    cd /hive/data/genomes/hg38/bed
+    # move it aside:
+    mv simpleRepeat simpleRepeat.2013-12-24
+
+    # Instead, something like this:
+    mkdir /hive/data/genomes/hg38/bed/simpleRepeat.2013-12-27/splitGap
+    cd /hive/data/genomes/hg38/bed/simpleRepeat.2013-12-27/splitGap
+    mkdir -p noGap
+
+    twoBitToFa ../../../hg38.unmasked.2bit stdout \
+       | faSplit -lift=noGap.lift gap stdin 5000000 noGap/hg38_
+    # make sure nothing has gone missing:
+    faCount noGap/*.fa > faCount.txt
+    tail -1 faCount.txt
+# total 3068387174 898285419 623727342 626335137 900967885  19071391 30979734
+    # compared to the full sequence, same numbers for ACGT:
+    twoBitToFa ../../../hg38.unmasked.2bit stdout | faCount stdin
+# total 3209286105 898285419 623727342 626335137 900967885 159970322 30979743
+    faToTwoBit noGap/*.fa hg38.nogap.2bit
+    twoBitInfo hg38.nogap.2bit stdout | sort -k2,2nr > hg38.nogap.sizes
+
+
+    mkdir /hive/data/genomes/hg38/bed/simpleRepeat.2013-12-27/run20M
+    cd /hive/data/genomes/hg38/bed/simpleRepeat.2013-12-27/run20M
+    rm -rf /hive/data/genomes/hg38/TrfPart20M
+    /cluster/bin/scripts/simplePartition.pl \
+/hive/data/genomes/hg38/bed/simpleRepeat.2013-12-27/splitGap/hg38.nogap.2bit \
+   20000000 /hive/data/genomes/hg38/TrfPart20M
+   rm -f /hive/data/genomes/hg38/bed/simpleRepeat.2013-12-27/TrfPart20M
+   ln -s /hive/data/genomes/hg38/TrfPart20M \
+      /hive/data/genomes/hg38/bed/simpleRepeat.2013-12-27/TrfPart20M
+   ssh ku
+   cd /hive/data/genomes/hg38/bed/simpleRepeat.2013-12-27/run20M
+   gensub2 /hive/data/genomes/hg38/TrfPart20M/partitions.lst single gsub jobList
+   para create jobList
+   para push
+   # 20 jobs would not complete:
+# Completed: 143 of 163 jobs
+# Jobs currently running: 20
+# CPU time in finished jobs:      76994s    1283.24m    21.39h    0.89d  0.002 y
+# IO & Wait Time:                  1095s      18.24m     0.30h    0.01d  0.000 y
+# Time in running jobs:         1807279s   30121.32m   502.02h   20.92d  0.057 y
+# Average job time:                 546s       9.10m     0.15h    0.01d
+# Longest running job:            90422s    1507.03m    25.12h    1.05d
+# Longest finished job:           43348s     722.47m    12.04h    0.50d
+# Submission to last job:         43363s     722.72m    12.05h    0.50d
+   # determine which are the last jobs as individual bits:
+   para status | grep -v done | awk '{print $(NF-1),$NF}' | grep TrfRun \
+     > not.done.list
+   awk '{print $NF}' not.done.list | sed -e 's/.bed//' | while read F
+do
+   cat $F
+done > seq.specs.not.done
+
+   mkdir /hive/data/genomes/hg38/bed/simpleRepeat.2013-12-27/run20M/lastJobs
+   cd /hive/data/genomes/hg38/bed/simpleRepeat.2013-12-27/run20M/lastJobs
+   mkdir fasta
+   for seqSpec in `cat ../seq.specs.not.done`
+do
+  fName=`echo $seqSpec | sed -e 's/.*://'`
+  echo $fName
+  twoBitToFa $seqSpec fasta/$fName.fa
+done
+  ls -1S `pwd`/fasta > part.list
+  cat << '_EOF_' > template
+#LOOP
+./runTrf {check in line+ $(path1)}  {check out line bed/$(root1).bed}
+#ENDLOOP
+'_EOF_'
+  # << happy emacs
+
+  cat << '_EOF_' > runTrf
+#!/bin/bash
+set -beEu -o pipefail
+export path1=$1
+export inputFN=`basename $1`
+export outpath=$2
+export outputFN=`basename $2`
+mkdir -p /dev/shm/$outputFN
+cp -p $path1 /dev/shm/$outputFN
+cd /dev/shm/$outputFN
+/cluster/bin/x86_64/trfBig -trf=/cluster/bin/x86_64/trf \
+      $inputFN /dev/null -bedAt=$outputFN -tempDir=/dev/shm
+cd /hive/data/genomes/hg38/bed/simpleRepeat.2013-12-27/run20M/lastJobs
+rm -f $outpath
+cp -p /dev/shm/$outputFN/$outputFN $outpath
+rm -fr /dev/shm/$outputFN/*
+rmdir --ignore-fail-on-non-empty /dev/shm/$outputFN
+'_EOF_'
+  # << happy emacs
+  chmod +x runTrf
+
+  gensub2 part.list single template jobList
+  para create jobList
+  para push
+  # not all of these jobs will finish either:
+# Completed: 85 of 106 jobs
+# Jobs currently running: 21
+# CPU time in finished jobs:      58076s     967.93m    16.13h    0.67d  0.002 y
+# IO & Wait Time:                   828s      13.81m     0.23h    0.01d  0.000 y
+# Time in running jobs:         1988997s   33149.95m   552.50h   23.02d  0.063 y
+# Average job time:                 693s      11.55m     0.19h    0.01d
+# Longest running job:            94730s    1578.83m    26.31h    1.10d
+# Longest finished job:           34216s     570.27m     9.50h    0.40d
+# Submission to last job:         34342s     572.37m     9.54h    0.40d
+
+  # can use what we have here:
+  liftUp result.bed ../../splitGap/noGap.lift error bed/*.bed
+  # find jobs not done
+  para status | grep -v done | awk '{print $(NF-1),$NF}' | grep TrfRun \
+     > not.done.list
+  # splitting up those last jobs:
+  mkdir /hive/data/genomes/hg38/bed/simpleRepeat.2013-12-27/run20M/splitBits
+  cd /hive/data/genomes/hg38/bed/simpleRepeat.2013-12-27/run20M/splitBits
+  mkdir noGap
+  awk '{print $2}' ../lastJobs/not.done.list | while read F
+do
+  cp -p $F ./noGap/
+done
+
+  # split into 1,000,000 chunks with 10,000 overlap:
+  mkdir -p 1M_10K
+
+for F in noGap/*.fa
+do
+  B=`basename $F | sed -e 's/.fa//'`
+  echo "faSplit -lift=$B.lift -extra=10000 size $F 1000000 1M_10K/$B_"
+  faSplit -lift=$B.lift -extra=10000 size $F 1000000 1M_10K/${B}_
+done
+
+  ls -1S `pwd`/1M_10K/*.fa > part.list
+  cat << '_EOF_' > runTrf
+#!/bin/bash
+set -beEu -o pipefail
+export path1=$1
+export inputFN=`basename $1`
+export outpath=$2
+export outputFN=`basename $2`
+mkdir -p /dev/shm/$outputFN
+cp -p $path1 /dev/shm/$outputFN
+cd /dev/shm/$outputFN
+/cluster/bin/x86_64/trfBig -trf=/cluster/bin/x86_64/trf \
+      $inputFN /dev/null -bedAt=$outputFN -tempDir=/dev/shm
+cd /hive/data/genomes/hg38/bed/simpleRepeat.2013-12-27/run20M/splitBits
+rm -f $outpath
+cp -p /dev/shm/$outputFN/$outputFN $outpath
+rm -fr /dev/shm/$outputFN/*
+rmdir --ignore-fail-on-non-empty /dev/shm/$outputFN
+'_EOF_'
+  # << happy emacs
+
+  cat << '_EOF_' > template
+#LOOP
+./runTrf {check in line+ $(path1)}  {check out line bed/$(root1).bed}
+#ENDLOOP
+'_EOF_'
+  # << happy emacs
+
+  gensub2 part.list single template jobList
+  para create jobList
+  para push
+  # not all of these jobs will complete either:
+# Completed: 53 of 96 jobs
+# CPU time in finished jobs:     212403s    3540.05m    59.00h    2.46d  0.007 y
+# IO & Wait Time:                  1851s      30.85m     0.51h    0.02d  0.000 y
+# Average job time:                4043s      67.38m     1.12h    0.05d
+# Longest finished job:           68726s    1145.43m    19.09h    0.80d
+# Submission to last job:         68890s    1148.17m    19.14h    0.80d
+  # use what results we have here:
+  cat *.lift  | liftUp parts.bed stdin error bed/*.bed
+  liftUp -type=.bed stdout ../../splitGap/noGap.lift error parts.bed \
+    | sort -u | sort -k1,1 -k2,2n > hg38.result.bed
+
+  para status | grep -v -w done | awk '{print $(NF-1)}' > will.not.finish.txt
+
+  # split those last bits:
+  mkdir /hive/data/genomes/hg38/bed/simpleRepeat.2013-12-27/run20M/splitSplitBits
+  cd /hive/data/genomes/hg38/bed/simpleRepeat.2013-12-27/run20M/splitSplitBits
+  mkdir splitBits
+  cat ../splitBits/will.not.finish.txt | while read F
+do
+  cp -p $F splitBits
+done
+
+  #  100K chunks with 10K overlap
+  mkdir -p 100K_10K
+
+for F in splitBits/*.fa
+do
+  B=`basename $F | sed -e 's/.fa//'`
+  echo "faSplit -lift=$B.lift -extra=10000 size $F 1000000 1M_10K/$B_"
+  faSplit -lift=$B.lift -extra=10000 size $F 100000 100K_10K/${B}_
+done
+
+  cat << '_EOF_' > runTrf
+#!/bin/bash
+set -beEu -o pipefail
+export path1=$1
+export inputFN=`basename $1`
+export outpath=$2
+export outputFN=`basename $2`
+mkdir -p /dev/shm/$outputFN
+cp -p $path1 /dev/shm/$outputFN
+cd /dev/shm/$outputFN
+/cluster/bin/x86_64/trfBig -trf=/cluster/bin/x86_64/trf \
+      $inputFN /dev/null -bedAt=$outputFN -tempDir=/dev/shm
+cd /hive/data/genomes/hg38/bed/simpleRepeat.2013-12-27/run20M/splitSplitBits
+rm -f $outpath
+cp -p /dev/shm/$outputFN/$outputFN $outpath
+rm -fr /dev/shm/$outputFN/*
+rmdir --ignore-fail-on-non-empty /dev/shm/$outputFN
+'_EOF_'
+  # << happy emacs
+  chmod +x runTrf
+
+  cat << '_EOF_' > template
+#LOOP
+./runTrf {check in line+ $(path1)}  {check out line bed/$(root1).bed}
+#ENDLOOP
+'_EOF_'
+  # << happy emacs
+
+  ls -1S `pwd`/100K_10K/*.fa > part.list
+  gensub2 part.list single template jobList
+  para create jobList
+  para push
+  # one last bit does not complete:
+# Completed: 420 of 421 jobs
+# CPU time in finished jobs:      19862s     331.04m     5.52h    0.23d  0.001 y
+# IO & Wait Time:                  2360s      39.33m     0.66h    0.03d  0.000 y
+# Average job time:                  53s       0.88m     0.01h    0.00d
+# Longest finished job:             368s       6.13m     0.10h    0.00d
+# Submission to last job:           448s       7.47m     0.12h    0.01d
+
+  # can use the results obtained here:
+  cat *.lift  | liftUp splitParts.bed stdin error bed/*.bed
+  cat ../splitBits/*.lift | liftUp parts.bed  stdin error splitParts.bed
+  liftUp -type=.bed stdout ../../splitGap/noGap.lift error parts.bed | sort -u \
+    | sort -k1,1 -k2,2n > hg38.result.bed
+
+  para status | grep -v -w done | awk '{print $(NF-1)}'
+  # last chunk: 100K_10K/hg38_89_2_00.fa
+
+  mkdir /hive/data/genomes/hg38/bed/simpleRepeat.2013-12-27/run20M/last100K
+  cd /hive/data/genomes/hg38/bed/simpleRepeat.2013-12-27/run20M/last100K
+  cp -p ../splitSplitBits/100K_10K/hg38_89_2_00.fa .
+
+  # 20K chunks with 10K overlap:
+  mkdir -p 20K_10K
+
+for F in hg38_89_2_00.fa
+do
+  B=`basename $F | sed -e 's/.fa//'`
+  echo "faSplit -lift=$B.lift -extra=10000 size $F 20000 20K_10K/$B_"
+  faSplit -lift=$B.lift -extra=10000 size $F 20000 20K_10K/${B}_
+done
+
+  ls -1S `pwd`/20K_10K/*.fa > part.list
+  cat << '_EOF_' > runTrf
+#!/bin/bash
+set -beEu -o pipefail
+export path1=$1
+export inputFN=`basename $1`
+export outpath=$2
+export outputFN=`basename $2`
+mkdir -p /dev/shm/$outputFN
+cp -p $path1 /dev/shm/$outputFN
+cd /dev/shm/$outputFN
+/cluster/bin/x86_64/trfBig -trf=/cluster/bin/x86_64/trf \
+      $inputFN /dev/null -bedAt=$outputFN -tempDir=/dev/shm
+cd /hive/data/genomes/hg38/bed/simpleRepeat.2013-12-27/run20M/last100K
+rm -f $outpath
+cp -p /dev/shm/$outputFN/$outputFN $outpath
+rm -fr /dev/shm/$outputFN/*
+rmdir --ignore-fail-on-non-empty /dev/shm/$outputFN
+'_EOF_'
+  # << happy emacs
+  chmod +s runTrf
+  cat << '_EOF_' > template
+#LOOP
+./runTrf {check in line+ $(path1)}  {check out line bed/$(root1).bed}
+#ENDLOOP
+'_EOF_'
+  # << happy emacs
+
+  gensub2 part.list single template jobList
+  para create jobList
+  para push
+  # one of these jobs will not finish:
+# Completed: 4 of 5 jobs
+# CPU time in finished jobs:         10s       0.17m     0.00h    0.00d  0.000 y
+# IO & Wait Time:                    16s       0.26m     0.00h    0.00d  0.000 y
+# Average job time:                   7s       0.11m     0.00h    0.00d
+# Longest finished job:               8s       0.13m     0.00h    0.00d
+# Submission to last job:            16s       0.27m     0.00h    0.00d
+
+  # can use the results we have here:
+  cat *.lift  | liftUp 20Kparts.bed stdin error bed/*.bed
+  cat ../splitSplitBits/*.lift | liftUp 100Kpart.bed stdin error 20Kparts.bed
+  cat ../splitBits/*.lift | liftUp parts.bed  stdin error 100Kpart.bed
+  liftUp -type=.bed stdout ../../splitGap/noGap.lift error parts.bed | sort -u \
+    | sort -k1,1 -k2,2n > hg38.result.bed
+
+  # finally, what turns out to be the last batch:
+  mkdir /hive/data/genomes/hg38/bed/simpleRepeat.2013-12-27/run20M/last30K
+  cd /hive/data/genomes/hg38/bed/simpleRepeat.2013-12-27/run20M/last30K
+  cp -p ../last100K/20K_10K/hg38_89_2_00_3.fa .
+
+  # 2K chunks with 1K overlap
+  mkdir -p 2K_1K
+
+for F in hg38_89_2_00_3.fa
+do
+  B=`basename $F | sed -e 's/.fa//'`
+  echo "faSplit -lift=$B.lift -extra=1000 size $F 2000 2K_1K/$B_"
+  faSplit -lift=$B.lift -extra=1000 size $F 2000 2K_1K/${B}_
+done
+
+  ls -1S `pwd`/2K_1K/*.fa > part.list
+  cat << '_EOF_' > runTrf
+#!/bin/bash
+set -beEu -o pipefail
+export path1=$1
+export inputFN=`basename $1`
+export outpath=$2
+export outputFN=`basename $2`
+mkdir -p /dev/shm/$outputFN
+cp -p $path1 /dev/shm/$outputFN
+cd /dev/shm/$outputFN
+/cluster/bin/x86_64/trfBig -trf=/cluster/bin/x86_64/trf \
+      $inputFN /dev/null -bedAt=$outputFN -tempDir=/dev/shm
+cd /hive/data/genomes/hg38/bed/simpleRepeat.2013-12-27/run20M/last30K
+rm -f $outpath
+cp -p /dev/shm/$outputFN/$outputFN $outpath
+rm -fr /dev/shm/$outputFN/*
+rmdir --ignore-fail-on-non-empty /dev/shm/$outputFN
+'_EOF_'
+  # << happy emacs
+  chmod +x runTrf
+  cat << '_EOF_' > template
+#LOOP
+./runTrf {check in line+ $(path1)}  {check out line bed/$(root1).bed}
+#ENDLOOP
+'_EOF_'
+  # << happy emacs
+
+  gensub2 part.list single template jobList
+  para create
+  para push
+# Completed: 15 of 15 jobs
+# CPU time in finished jobs:          1s       0.02m     0.00h    0.00d  0.000 y
+# IO & Wait Time:                    26s       0.43m     0.01h    0.00d  0.000 y
+# Average job time:                   2s       0.03m     0.00h    0.00d
+# Longest finished job:               4s       0.07m     0.00h    0.00d
+# Submission to last job:            14s       0.23m     0.00h    0.00d
+
+  cat *.lift  | liftUp 2Kparts.bed stdin error bed/*.bed
+  cat ../last100K/*.lift | liftUp 20Kpart.bed stdin error 2Kparts.bed
+  cat ../splitSplitBits/*.lift | liftUp 100Kpart.bed stdin error 20Kpart.bed
+  cat ../splitBits/*.lift | liftUp parts.bed  stdin error 100Kpart.bed
+  liftUp -type=.bed stdout ../../splitGap/noGap.lift error parts.bed | sort -u \
+    | sort -k1,1 -k2,2n > hg38.result.bed
+
+  ## To put it all together:
+  cd /hive/data/genomes/hg38/bed/simpleRepeat.2013-12-27/run20M
+  cat /hive/data/genomes/hg38/TrfPart20M/???/*.bed lastJobs/bed/*.bed \
+     splitBits/parts.bed splitSplitBits/parts.bed last100K/parts.bed \
+     last30K/parts.bed > beforeLift.simpleRepeat.bed
+  liftUp -type=.bed stdout ../splitGap/noGap.lift error \
+     beforeLift.simpleRepeat.bed | sort -u \
+       | sort -k1,1 -k2,2n > simpleRepeat.bed
+
+  awk '{if ($5 <= 12) print;}' simpleRepeat.bed > trfMask.bed
+
+  hgLoadBed hg38 simpleRepeat simpleRepeat.bed \
+        -sqlTable=$HOME/kent/src/hg/lib/simpleRepeat.sql
+  featureBits hg38 simpleRepeat > fb.simpleRepeat 2>&1
+  cat fb.simpleRepeat
+# 146785521 bases of 3049335806 (4.814%) in intersection
+
+  cd /hive/data/genomes/hg38/bed
+  ln -s simpleRepeat.2013-12-27/run20M simpleRepeat
+
+############################################################################
+
+ # WINDOWMASKER - DONE - 2013-12-24 - Hiram
+    mkdir /hive/data/genomes/hg38/bed/windowMasker
+    cd /hive/data/genomes/hg38/bed/windowMasker
+    time nice -n +19 doWindowMasker.pl -buildDir=`pwd` -workhorse=hgwdev \
+	-dbHost=hgwdev hg38 > do.log 2>&1 &
+
+############################################################################
+# Verify all gaps are marked - DONE - 2013-12-24 - Hiram
+    mkdir /hive/data/genomes/hg38/bed/gap
+    cd /hive/data/genomes/hg38/bed/gap
+    time nice -n +19 findMotif -motif=gattaca -verbose=4 \
+	-strand=+ ../../hg38.unmasked.2bit > findMotif.txt 2>&1
+    #	real    0m28.634s
+    grep "^#GAP " findMotif.txt | sed -e "s/^#GAP //" > allGaps.bed
+    featureBits hg38 -not gap -bed=notGap.bed
+    #	3049335806 bases of 3049335806 (100.000%) in intersection
+    time featureBits hg38 allGaps.bed notGap.bed -bed=new.gaps.bed
+    #   20023 bases of 3049335806 (0.001%) in intersection
+    # real    0m20.427s
+    # this indicates that 20,023 bases are not marked as N's
+    # with this element size profile:
+    awk '{print $3-$2}' new.gaps.bed | ave stdin
+# Q1 1.000000
+# median 1.000000
+# Q3 100.000000
+# average 44.894619
+# min 1.000000
+# max 1000.000000
+# count 446
+# total 20023.000000
+# standard deviation 81.743447
+
+    # the four largest ones:
+# 1000 chr2         32916625        32917625        chr2.7
+# 1000 chr2         32867130        32868130        chr2.6
+#  348 chr20        36314371        36314719        chr20.36
+#  200 chr12       123443533       123443733        chr12.10
+
+#########################################################################
+## CYTOBAND - fixing the ideogram track (DONE - 2014-06-11 - Hiram)
+    ## the file we used before was broken
+    mkdir -p /hive/data/outside/ncbi/ideogram/2014-06
+    cd /hive/data/outside/ncbi/ideogram/2014-06
+    # fetch all the ideogram files:
+    rsync -a -P rsync://ftp.ncbi.nlm.nih.gov/pub/gdp/ ./
+    mkdir /hive/data/genomes/hg38/bed/cytoBandUpdate
+    cd /hive/data/genomes/hg38/bed/cytoBandUpdate
+
+    # Create bed file
+    $HOME/kent/src/utils/ncbi/createNcbiCytoBand.pl \
+/hive/data/outside/ncbi/ideogram/2014-06/ideogram_9606_GCF_000001305.14_850_V1
+
+    # add in the other genome data:
+    hgsql -N -e 'select * from cytoBand;' hg38 \
+        | egrep "chrU|chrM|_alt|_random" >> cytoBand.bed
+
+    $HOME/kent/src/utils/ncbi/cytoBandVerify.pl
+    #   everything checks out OK on 455 chroms
+
+    # Load the bed file
+    hgLoadBed -tab -noBin -sqlTable=$HOME/kent/src/hg/lib/cytoBand.sql \
+	hg38 cytoBand cytoBand.bed
+    cut -f1 cytoBand.bed | sort -u | awk '{print length($1)}' | sort -rn | head
+    #  23
+    sed -e 's/12/23/' $HOME/kent/src/hg/lib/cytoBand.sql > cytoBand.sql
+    sort -k1,1 -k2,2n cytoBand.bed \
+	| hgLoadSqlTab hg38 cytoBand cytoBand.sql stdin
+
+    # Make cytoBandIdeo track for ideogram gif on hgTracks page.
+    # cytoBandIdeo is just a replicate of the cytoBand track.
+    hgsql -e "drop table cytoBandIdeo;" hg38
+    hgsql hg38 -e "create table cytoBandIdeo (index(chrom(23),chromStart)) as select * from cytoBand;"
+
+#########################################################################
+##  CYTOBAND - ideogram track (DONE - 2014-03-04 - Hiram)
+    ssh hgwdev
+    mkdir -p /hive/data/outside/ncbi/ideogram/2014-03
+    cd /hive/data/outside/ncbi/ideogram/2014-03
+
+    # fetch all the ideogram files:
+    rsync -a -P rsync://ftp.ncbi.nlm.nih.gov/pub/gdp/ ./
+
+    mkdir /hive/data/genomes/hg38/bed/cytoBand
+    cd /hive/data/genomes/hg38/bed/cytoBand
+
+    # Create bed file
+    $HOME/kent/src/utils/ncbi/createNcbiCytoBand.pl \
+/hive/data/outside/ncbi/ideogram/2014-03/ideogram_9606_GCF_000001305.14_850_V1
+
+    # add in the other genome data:
+    hgsql -N -e 'select * from cytoBand;' hg38 > bobTable.bed
+
+    egrep "chrU|chrM|_alt|_random" bobTable.bed >> cytoBand.bed
+
+    ## can now verify before load:
+    $HOME/kent/src/utils/ncbi/cytoBandVerify.pl
+    #   everything checks out OK on 455 chroms
+
+    # Load the bed file
+    hgLoadBed -tab -noBin -sqlTable=$HOME/kent/src/hg/lib/cytoBand.sql \
+	hg38 cytoBand cytoBand.bed
+    cut -f1 cytoBand.bed | sort -u | awk '{print length($1)}' | sort -rn | head
+    #  23
+    sed -e 's/12/23/' $HOME/kent/src/hg/lib/cytoBand.sql > cytoBand.sql
+    sort -k1,1 -k2,2n cytoBand.bed \
+	| hgLoadSqlTab hg38 cytoBand cytoBand.sql stdin
+
+    # Make cytoBandIdeo track for ideogram gif on hgTracks page.
+    # cytoBandIdeo is just a replicate of the cytoBand track.
+    hgsql -e "drop table cytoBandIdeo;" hg38
+    hgsql hg38 -e "create table cytoBandIdeo (index(chrom(23),chromStart)) as select * from cytoBand;"
+
+##########################################################################
+# cytoBandIdeo - (DONE - 2013-12-26 - Hiram)
+    mkdir /hive/data/genomes/hg38/bed/cytoBand
+    cd /hive/data/genomes/hg38/bed/cytoBand
+    makeCytoBandIdeo.csh hg38
+
+#making temporary liftover of items from hg19
+liftOver /hive/data/genomes/hg19/bed/ncbiCytoBand/cytobands.bed \
+      /hive/data/gbdb/hg19/liftOver/hg19ToHg38.over.chain.gz \
+      cytobands.bed unMapped
+
+liftOver -minBlocks=0.5 /hive/data/genomes/hg19/bed/ncbiCytoBand/cytobands.bed \
+      /hive/data/gbdb/hg19/liftOver/hg19ToHg38.over.chain.gz \
+      cytobands.0.5.bed unMapped0.5
+
+###############################                    ######################
+# cytoBandIdeo - (reDONE - 2014-02-25 - kuhn)
+
+# adding centromeres to generic cytonBandIdeo tavle as it exists.
+# (lifted track is already gone)
+
+# get the cen values for hg38
+hgsql -Ne "SELECT DISTINCT chrom FROM centromeres" hg38 | sort > hg38.chroms
+rm -f hg38.cens
+foreach chrom (`cat hg38.chroms`)
+  set cenStart=""
+  set cenEnd=""
+  set cenStart=`hgsql -Ne 'SELECT MIN(chromStart) FROM centromeres WHERE chrom = "'$chrom'"' hg38`
+  set cenEnd=`hgsql -Ne 'SELECT MAX(chromEnd) FROM centromeres WHERE chrom = "'$chrom'"' hg38`
+  echo "$chrom $cenStart $cenEnd" >> hg38.cens
+end
+
+# Modified makeCytoBandIdeo.csh to use this file instead of looking
+#   for centromeres in a gap table.
+# Replaced existing cytoBandIdeo table, which was really only a copy
+#   of chromInfo.
+
+##########################################################################
+# hg19 <-> hg38 difference tracks (DONE - 2013-12-28 - Hiram)
+    mkdir /hive/data/genomes/hg19/bed/liftOverHg38
+    cd /hive/data/genomes/hg19/bed/liftOverHg38
+
+    #	not needed, but interesting, collect all the fragment
+    #	definitions from the gold tables:
+    hgsql -N -e "select frag,fragStart,fragEnd,strand from gold;" hg19 \
+        | sort > hg19.gold.frags.tab
+
+    hgsql -N -e "select frag,fragStart,fragEnd,strand from gold;" hg38 \
+        | sort > hg38.gold.frags.tab
+
+    # construct common and difference listings
+    comm -12 hg19.gold.frags.tab hg38.gold.frags.tab \
+	> identical.hg19.hg38.frags.tab
+    comm -23 hg19.gold.frags.tab hg38.gold.frags.tab \
+	> unique.hg19Only.frags.tab
+    comm -13 hg19.gold.frags.tab hg38.gold.frags.tab \
+	> unique.hg38Only.frags.tab
+
+    # better yet, get full information about each fragment
+    hgsql -N -e "select chrom,chromStart,chromEnd,ix,type,frag,fragStart,fragEnd,strand from gold;" hg19 \
+        | sort -k6 > hg19.gold.tab
+
+    hgsql -N -e "select chrom,chromStart,chromEnd,ix,type,frag,fragStart,fragEnd,strand from gold;" hg38 \
+        | sort -k6 > hg38.gold.tab
+
+    # construct a single key for each fragment for joining.
+    # the key is frag,fragStart,fragEnd,strand
+    awk '{printf "%s,%d,%d,%s\t%s\t%s\t%s\t%d\t%d\t%d\t%s\n",
+	$6,$7,$8,$9,$6,$9,$1,$2,$3,$4,$5}' hg19.gold.tab | sort \
+	> hg19.fragKey.tab
+    awk '{printf "%s,%d,%d,%s\t%s\t%s\t%s\t%d\t%d\t%d\t%s\n",
+	$6,$7,$8,$9,$6,$9,$1,$2,$3,$4,$5}' hg38.gold.tab | sort \
+	> hg38.fragKey.tab
+
+    # now, by joining those keys, we can get exact identicals, and
+    # the only-in listings as bed files to load as tracks:
+    join hg19.fragKey.tab hg38.fragKey.tab \
+	| awk '{printf "%s\t%d\t%d\t%s\t1000\t%s\t%d\t%d\t0,0,128\n", $4,$5,$6,$2,$3,$5,$6}' \
+        | sort -k1,1 -k2,2n > hg19.hg38.identical.bed
+
+    join hg19.fragKey.tab hg38.fragKey.tab \
+	| awk '{printf "%s\t%d\t%d\t%s\t1000\t%s\t%d\t%d\t0,0,128\n", $11,$12,$13,$9,$10,$12,$13}' \
+        | sort -k1,1 -k2,2n > hg38.hg19.identical.bed
+
+    join -v 1 hg19.fragKey.tab hg38.fragKey.tab \
+	| awk '{printf "%s\t%d\t%d\t%s\t0\t%s\n", $4,$5,$6,$2,$3}' \
+        | sort -k1,1 -k2,2n > hg19.only.bed
+
+    join -v 2 hg19.fragKey.tab hg38.fragKey.tab \
+	| awk '{printf "%s\t%d\t%d\t%s\t0\t%s\n", $4,$5,$6,$2,$3}' \
+        | sort -k1,1 -k2,2n > hg38.only.bed
+
+    hgLoadBed hg19 hg38ContigDiff hg19.only.bed
+    hgLoadBed hg38 hg19ContigDiff hg38.only.bed
+
+    wc -l hg??.only.bed
+    #  6097 hg19.only.bed
+    #  23632 hg38.only.bed
+
+    # this leaves the outstanding question of "why" they might be in
+    #	the only-in listings.  Some contigs may be different versions,
+    #   sometimes different sections of the same contig are used,
+    #	and contigs are dropped from hg19 to hg38, or new contigs added
+    #	to hg38 to fill in gaps from hg19
+    # Let's see if we can measure some of this:
+    awk '{print $4}' hg19.only.bed | sort -u > hg19.only.ids.list
+    awk '{print $4}' hg38.only.bed | sort -u > hg38.only.ids.list
+
+    # Looks like 5405 idential contigs with different parts used:
+    comm -12 hg19.only.ids.list hg38.only.ids.list > differentPortions.list
+    wc -l differentPortions.list
+    # 5405
+
+    # and perhaps 63 = 5468-5405 of different versions of same contig:
+    sed -e "s/\.[0-9]*$//" hg19.only.ids.list | sort -u \
+	> hg19.noVersions.ids.list
+    sed -e "s/\.[0-9]*$//" hg38.only.ids.list | sort -u \
+	> hg38.noVersions.ids.list
+    comm -12 hg19.noVersions.ids.list hg38.noVersions.ids.list | wc -l
+    #	5468
+    sed -e "s/\.[0-9]*$//" differentPortions.list | sort -u \
+	> differentPortions.noVersions.list
+    comm -12 hg19.noVersions.ids.list hg38.noVersions.ids.list | sort -u \
+	> noVersions.common.list
+    # indeed, 63 contigs of different versions:
+    comm -23 noVersions.common.list differentPortions.noVersions.list \
+	| sort -u > differentVersions.list
+    wc -l differentVersions.list
+    #	63
+
+    # dividing up these items:
+    cat << '_EOF_' > identifyPortions.pl
+#!/usr/bin/env perl
+
+use strict;
+use warnings;
+
+my %differentVersions;
+my %differentPortions;
+
+open (FH, "<differentVersions.list" ) or
+	die "can not read differentVersions.list";
+while (my $line = <FH>) {
+    chomp $line;
+    $differentVersions{$line} = 1;
+}
+close (FH);
+
+open (FH, "differentPortions.list" ) or
+	die "can not read differentPortions.list";
+while (my $line = <FH>) {
+    chomp $line;
+    $differentPortions{$line} = 1;
+}
+close (FH);
+
+my %hg19Done;
+open (DP, ">hg19.differentPortions.bed") or die "can not write to hg19.differentPortions.bed";
+open (DV, ">hg19.differentVersions.bed") or die "can not write to hg19.differentVersions.bed";
+open (FH, "<hg19.only.bed" ) or die "can not read hg19.only.bed";
+while (my $line = <FH>) {
+    chomp $line;
+    my ($chr, $start, $end, $acc, $score, $strand) = split('\s+', $line);
+    # assume done while $acc is still complete
+    $hg19Done{$acc} = 1;
+    if (exists($differentPortions{$acc})) {
+	printf DP "%s\n", $line;
+    } else {
+	my $trimAcc = $acc;
+	$trimAcc =~ s/\.[0-9]+$//;
+	if (exists($differentVersions{$trimAcc})) {
+	    printf DV "%s\n", $line;
+	} else {
+            # this one does not match
+	    $hg19Done{$acc} = 0;
+	}
+    }
+}
+close (FH);
+close (DV);
+close (DP);
+open (DR, ">hg19.dropped.bed") or die "can not write to hg19.dropped.bed";
+open (FH, "<hg19.only.bed" ) or die "can not read hg19.only.bed";
+while (my $line = <FH>) {
+    chomp $line;
+    my ($chr, $start, $end, $acc, $score, $strand) = split('\s+', $line);
+    if (0 == $hg19Done{$acc}) {
+	printf DR "%s\n", $line;
+    }
+}
+close (FH);
+close (DR);
+
+my %hg38Done;
+open (DP, ">hg38.differentPortions.bed") or die "can not write to hg38.differentPortions.bed";
+open (DV, ">hg38.differentVersions.bed") or die "can not write to hg38.differentVersions.bed";
+open (FH, "<hg38.only.bed" ) or die "can not read hg38.only.bed";
+while (my $line = <FH>) {
+    chomp $line;
+    my ($chr, $start, $end, $acc, $score, $strand) = split('\s+', $line);
+    # assume done while $acc is still complete
+    $hg38Done{$acc} = 1;
+    if (exists($differentPortions{$acc})) {
+	printf DP "%s\n", $line;
+    } else {
+	my $trimAcc = $acc;
+	$trimAcc =~ s/\.[0-9]+$//;
+	if (exists($differentVersions{$trimAcc})) {
+	    printf DV "%s\n", $line;
+	} else {
+            # this one does not match
+	    $hg38Done{$acc} = 0;
+	}
+    }
+}
+close (FH);
+close (DV);
+close (DP);
+open (DR, ">hg38.newTo19.bed") or die "can not write to hg38.newTo19.bed";
+open (FH, "<hg38.only.bed" ) or die "can not read hg38.only.bed";
+while (my $line = <FH>) {
+    chomp $line;
+    my ($chr, $start, $end, $acc, $score, $strand) = split('\s+', $line);
+    if (0 == $hg38Done{$acc}) {
+	printf DR "%s\n", $line;
+    }
+}
+close (FH);
+close (DR);
+'_EOF_'
+    # << happy emacs
+    chmod +x identifyPortions.pl
+    ./identifyPortions.pl
+    # make sure nothing was lost
+    sort hg19.differentVersions.bed hg19.differentPortions.bed \
+	hg19.dropped.bed  | sum
+    #	43711   233
+    sort hg19.only.bed | sum
+    #	43711   233
+    sort hg38.differentVersions.bed hg38.differentPortions.bed \
+	hg38.newTo19.bed | sum
+    #	00502   911
+    sort hg38.only.bed | sum
+    #	00502   911
+
+    sort -k1,1 -k2,2n hg38.differentVersions.bed hg38.differentPortions.bed \
+	hg38.newTo19.bed > hg38.itemRgb.bed
+    sort -k1,1 -k2,2n hg19.differentVersions.bed hg19.differentPortions.bed \
+	hg19.dropped.bed > hg19.itemRgb.bed
+
+    hgLoadBed hg19 hg38ContigDiff hg19.itemRgb.bed
+    # if you wanted to load the identicals in this track too:
+    sort -k1,1 -k2,2n hg38.hg19.identical.bed hg38.itemRgb.bed \
+       | hgLoadBed hg38 hg38ContigDiff stdin
+    # but we don't, we deliver only the differences
+    hgLoadBed hg38 hg38ContigDiff hg38.itemRgb.bed
+
+#########################################################################
+# construct ooc file to be used in blat operations
+#                      DONE - 2012-12-30 - Hiram
+# can be done on unmasked sequence the same result as masked:
+    cd /hive/data/genomes/hg38
+    time blat hg38.unmasked.2bit /dev/null /dev/null \
+       -tileSize=11 -makeOoc=jkStuff/hg38.11.ooc -repMatch=1024
+
+    # been confirmed, the 100-base non-bridged gaps are really non-bridged
+    gapToLift -minGap=100 -bedFile=jkStuff/nonBridgedGaps.bed hg38 \
+	jkStuff/hg38.nonBridged.lft
+
+##############################################################################
+# cpgIslands - (DONE - 2014-01-07 - Hiram)
+    # run on the Hmmer + trfMask sequence
+    mkdir /hive/data/genomes/hg38/bed/cpgIslands
+    cd /hive/data/genomes/hg38/bed/cpgIslands
+    time $HOME/kent/src/hg/utils/automation/doCpgIslands.pl \
+      -dbHost=hgwdev -bigClusterHub=ku -buildDir=`pwd` \
+        -workhorse=hgwdev -smallClusterHub=ku hg38 > do.log 2>&1
+    # real    3m31.684s
+    # wc -l cpgIsland.bed -> 30456 cpgIsland.bed
+    cat fb.hg38.cpgIslandExt.txt
+    #  23654068 bases of 3049335806 (0.776%) in intersection
+
+    # Previously in hg19:
+    featureBits -countGaps hg19 cpgIslandExt
+    # 21842742 bases of 3137161264 (0.696%) in intersection
+
+    # when run on Hmmer and Trf masked sequence:
+    # wc -l cpgIsland.bed -> 30416 cpgIsland.bed
+    #   23635946 bases of 3049335806 (0.775%) in intersection
+
+    # when run on unmasked sequence:
+    # wc -l cpgIsland.bed -> 55149 cpgIsland.bed
+    # 33637531 bases of 3049335806 (1.103%) in intersection
+##############################################################################
+# rerun cpgIslands on contig sequence (DONE - 2014-01-07 - Hiram)
+    # this is a test of the contig sequence file,
+    # should get a very similar answer to the above
+    mkdir /hive/data/genomes/hg38/bed/cpgIslandsContigs
+    cd /hive/data/genomes/hg38/bed/cpgIslandsContigs
+
+    # run stepwise so the lift can be done on the result before loading
+    time $HOME/kent/src/hg/utils/automation/doCpgIslands.pl \
+      -dbHost=hgwdev -bigClusterHub=ku -buildDir=`pwd` \
+       -stop=makeBed -maskedSeq=/hive/data/genomes/hg38/hg38.contigs.2bit \
+        -workhorse=hgwdev -smallClusterHub=ku hg38 > makeBed.log 2>&1
+    # real    9m31.502s
+    # fails on the bedToBigBed creation since this isn't the actual
+    # hg38 sequence.
+    mv cpgIsland.bed cpgIsland.beforeLift.bed
+    liftUp -type=.bed stdout ../../jkStuff/hg38.contigs.lift carry \
+      cpgIsland.beforeLift.bed | sort -k1,1 -k2,2n > cpgIsland.bed
+    bedToBigBed -tab -type=bed4+6 -as=$HOME/kent/src/hg/lib/cpgIslandExt.as \
+       cpgIsland.bed ../../chrom.sizes hg38.cpgIslandExt.bb
+    zcat ../cpgIslands/cpgIsland.bed.gz | sort -k1,1 -k2,2n > t.bed
+    # Surprisingly, a few more are detected, perhaps due to the different
+    # masking since this contig run is on the final corrected cross-match rmsk
+    # plus TRF, the above was on the corrupted HMMER+TRF mask:
+    wc -l cpgIsland.bed t.bed
+#   30477 cpgIsland.bed
+#   30456 t.bed
+    # 2,835 different items between the two:
+    sort t.bed cpgIsland.bed | uniq -c | awk '$1 < 2' | wc -l
+    # 2835
+    # 29.049 identical items
+    sort t.bed cpgIsland.bed | uniq -c | awk '$1 == 2' | wc -l
+    # 29049
+    cut -f1-3 cpgIsland.bed | sort > contigs.bed
+    cut -f1-3 t.bed | sort > fullSequence.bed
+    # 29,339 identical locations:
+    comm -12 contigs.bed fullSequence.bed | wc -l
+    # 29339
+
+    time $HOME/kent/src/hg/utils/automation/doCpgIslands.pl \
+      -dbHost=hgwdev -bigClusterHub=ku -buildDir=`pwd` \
+       -continue=load -maskedSeq=/hive/data/genomes/hg38/hg38.contigs.2bit \
+        -workhorse=hgwdev -smallClusterHub=ku hg38 > load.log 2>&1
+    # real    0m12.056s
+
+    cat fb.hg38.cpgIslandExt.txt
+    # 23610399 bases of 3049335806 (0.774%) in intersection
+
+##############################################################################
+# rerun cpgIslands on contig UNMASKED sequence (DONE - 2014-01-07 - Hiram)
+    mkdir /hive/data/genomes/hg38/bed/cpgIslandsContigsUnmasked
+    cd /hive/data/genomes/hg38/bed/cpgIslandsContigsUnmasked
+
+    twoBitToFa -noMask ../../hg38.contigs.2bit stdout \
+      | faToTwoBit stdin hg38.contigsUnmasked.2bit
+
+    # verify sequence is OK:
+    twoBitToFa hg38.contigsUnmasked.2bit stdout | faSize stdin
+# 3061688741 bases (12372958 N's 3049315783 real 3049315783 upper 0 lower)
+#    in 733 sequences in 1 files
+# %0.00 masked total, %0.00 masked real
+    twoBitToFa hg38.contigsUnmasked.2bit stdout | faCount stdin | tail -1
+# total 3061688741 898285419 623727342 626335137 900967885  12372958 30979743
+    # ACGT CpG same as original hg38.2bit except for the missing N's:
+# total 3209286105 898285419 623727342 626335137 900967885 159970322 30979743
+
+    # run stepwise so the lift can be done on the result before loading
+    time $HOME/kent/src/hg/utils/automation/doCpgIslands.pl \
+      -dbHost=hgwdev -bigClusterHub=ku -buildDir=`pwd` \
+       -stop=makeBed -maskedSeq=`pwd`/hg38.contigsUnmasked.2bit \
+        -workhorse=hgwdev -smallClusterHub=ku hg38 > makeBed.log 2>&1
+    # real    11m0.690s
+    # as above, failed on the bedToBigBed step since this isn't the full hg38
+    # sequence
+    mv cpgIsland.bed cpgIsland.beforeLift.bed
+    liftUp -type=.bed stdout ../../jkStuff/hg38.contigs.lift carry \
+      cpgIsland.beforeLift.bed | sort -k1,1 -k2,2n > cpgIsland.bed
+    bedToBigBed -tab -type=bed4+6 -as=$HOME/kent/src/hg/lib/cpgIslandExt.as \
+       cpgIsland.bed ../../chrom.sizes hg38.cpgIslandExt.bb
+    # a lot more here that for masked sequence:
+    wc -l cpgIsland.bed ../cpgIslandsContigs/cpgIsland.bed
+    # 55149 cpgIsland.bed
+    # 30477 ../cpgIslandsContigs/cpgIsland.bed
+    featureBits -countGaps hg38 cpgIsland.bed
+    # 33637531 bases of 3209286105 (1.048%) in intersection
+    featureBits -countGaps hg38 ../cpgIslandsContigs/cpgIsland.bed
+    # 23610399 bases of 3209286105 (0.736%) in intersection
+
+    # debug load step so it can be loaded into a separate table:
+    $HOME/kent/src/hg/utils/automation/doCpgIslands.pl \
+      -dbHost=hgwdev -bigClusterHub=ku -buildDir=`pwd` \
+       -debug -continue=load -maskedSeq=`pwd`/hg38.contigsUnmasked.2bit \
+        -workhorse=hgwdev -smallClusterHub=ku hg38
+
+    time ./doLoadCpg.csh > load.log 2>&1
+    # real    0m2.179s
+    # 33637531 bases of 3049335806 (1.103%) in intersection
+
+#########################################################################
+# construct liftOver to hg19 (DONE - 2013-12-31 - Hiram)
+    # it turns out it doesn't matter if the query or target 2bit files
+    # are masked.  This procedure can be done on completely unmasked sequences
+    # for both, same result masked or not masked
+    screen -S hg38	# manage this longish running job in a screen
+    mkdir /hive/data/genomes/hg38/bed/blat.hg19.2013-12-31
+    cd /hive/data/genomes/hg38/bed/blat.hg19.2013-06-10
+    # this was run in manual steps as experiments were done about the masking
+    # check it with -debug first to see if it is going to work:
+    doSameSpeciesLiftOver.pl -stop=net -buildDir=`pwd` -bigClusterHub=ku \
+      -dbHost=hgwdev -workhorse=hgwdev -debug \
+        -ooc=/hive/data/genomes/hg38/jkStuff/hg38.11.ooc hg38 hg19
+    # the debug step doesn't actually construct enough files to run the
+    # steps manually.  The chaining has an extra procedure that is performed
+    # while not in 'debug' mode
+    # the run.blat was operated manually, then chaining:
+    time doSameSpeciesLiftOver.pl -continue=chain -stop=net -buildDir=`pwd` \
+      -bigClusterHub=ku \
+        -dbHost=hgwdev -workhorse=hgwdev \
+           -ooc=/hive/data/genomes/hg38/jkStuff/hg38.11.ooc \
+             hg38 hg19 > chain.log 2>&1
+    # real    22m31.635s
+    # loading is only a few seconds:
+    doSameSpeciesLiftOver.pl -continue=load -buildDir=`pwd` \
+     -bigClusterHub=ku \
+       -dbHost=hgwdev -workhorse=hgwdev \
+          -ooc=/hive/data/genomes/hg38/jkStuff/hg38.11.ooc \
+             hg38 hg19 > load.log 2>&1
+
+    # verify this file exists:
+    #	/gbdb/hg38/liftOver/hg38ToHg19.over.chain.gz
+    # and try out the conversion on genome-test from hg38 to hg19
+    # same file should exist for downloads:
+    #  /usr/local/apache/htdocs-hgdownload/goldenPath/hg38/liftOver/hg38ToHg19.over.chain.gz
+
+############################################################################
+# marking the PAR regions: (DONE - 2014-01-09 - Hiram)
+    # after much experimentation with the AGP files and the given NCBI
+    # files in hg38/genbank/Primary_Assembly/pseudoautosomal_region
+    # the PAR region definitions can be seen in the par_align.gff file:
+# CM000685.2  10001  2781479  ->  CM000686.2 10001 2781479
+# CM000685.2  155701383  156030895 -> CM000686.2 56887903 57217415
+    # equivalent to:
+# chrX  10001  2781479  ->  chrY 10001 2781479
+# chrX  155701383  156030895 -> chrY 56887903 57217415
+
+    # subtract one for the chromStart position:
+    cat << '_EOF_' > hg38Par.bed4
+chrX 10000      2781479   PAR1
+chrX 155701382  156030895 PAR2
+chrY 10000      2781479   PAR1
+chrY 56887902   57217415  PAR2
+'_EOF_'
+    # << happy emacs
+
+    hgLoadBed hg38 par hg38Par.bed4
+    checkTableCoords  hg38
+
+    # hg19 had:
++-------+------------+-----------+------+
+| chrom | chromStart | chromEnd  | name |
++-------+------------+-----------+------+
+| chrX  |      60000 |   2699520 | PAR1 |
+| chrX  |  154931043 | 155260560 | PAR2 |
+| chrY  |      10000 |   2649520 | PAR1 |
+| chrY  |   59034049 |  59363566 | PAR2 |
++-------+------------+-----------+------+
+
+    # The AGP files come close to definining the location, but not
+    # precisely.  The first region uses different bits of AC006209.25:
+zcat ../../genbank/Primary_Assembly/assembled_chromosomes/AGP/chrX.comp.agp.gz\
+  | grep AC006209.25
+CM000685.2      2665048 2677319 56      F       AC006209.25     127483  139754 -
+CM000685.2      2677869 2804801 58      F       AC006209.25     1       126933 -
+zcat ../../genbank/Primary_Assembly/assembled_chromosomes/AGP/chrY.comp.agp.gz\
+  | grep AC006209.25
+CM000686.2      2665048 2677319 56      F       AC006209.25     127483  139754 -
+CM000686.2      2677869 2781479 58      F       AC006209.25     23323   126933 -
+
+    # and the second region uses different bits of AJ271735.1:
+zcat ../../genbank/Primary_Assembly/assembled_chromosomes/AGP/chrX.comp.agp.gz\
+  | grep AJ271735.1 | head -1
+CM000685.2 155676925 155719966 3096  O AJ271735.1     44687    87728   +
+zcat ../../genbank/Primary_Assembly/assembled_chromosomes/AGP/chrY.comp.agp.gz\
+  | grep AJ271735.1 | head -1
+CM000686.2  56887903  56906486  356  O AJ271735.1     69145    87728   +
+
+    # combining all the contig definitions from each will find all the
+    # exact identical contig bits:
+zcat ../../genbank/Primary_Assembly/assembled_chromosomes/AGP/chrY.comp.agp.gz\
+  | grep -v "^#" | awk '$5 != "N"' \
+    | awk '{printf "%s_%d_%d\t%s\t%d\t%d\n", $6,$7,$8,$1,$2,$3}' \
+    | sort > chrY.comp.agp.txt
+zcat ../../genbank/Primary_Assembly/assembled_chromosomes/AGP/chrX.comp.agp.gz\
+  | grep -v "^#" | awk '$5 != "N"' \
+    | awk '{printf "%s_%d_%d\t%s\t%d\t%d\n", $6,$7,$8,$1,$2,$3}' \
+    | sort > chrX.comp.agp.txt
+   join -t'^I' chrY.comp.agp.txt chrX.comp.agp.txt | head
+
+CM000685.2  10001   44821   CM000686.2      10001   44821
+...
+CM000685.2  2677320 2677868 CM000686.2      2677320 2677868
+
+CM000685.2 155719967  155720351       CM000686.2      56906487        56906871
+...
+CM000685.2 155964490  156030895       CM000686.2      57151010        57217415
+
+############################################################################
+## altLocations track (DONE - 2014-01-02 - Hiram)
+    # indicate corresponding locations between haplotypes and reference
+    mkdir /hive/data/genomes/hg38/bed/altLocations
+    cd /hive/data/genomes/hg38/bed/altLocations
+
+    find ../../genbank/ALT_* -type f | grep alt_scaffold_placement.txt \
+  | while read F
+do
+  grep -v "^#" ${F} | sed -e 's/\./v/;' | awk -F'\t' '{printf "chr%s\t%d\t%d\tchr%s_%s_alt\n", $6,$12-1,$13,$6, $4}'
+done | sort -k1,1 -k2,2n > chrToAlt.bed
+
+    # note silent hidden <tab> character in the join -t argument
+    # explicit as written here
+
+find ../../genbank/ALT_* -type f | grep alt_scaffold_placement.txt \
+  | while read F
+do
+  grep -v "^#" ${F} | sed -e 's/\./v/;' | awk -F'\t' '{printf "chr%s_%s_alt\tchr%s:%d-%d\n", $6,$4,$6,$12,$13}'
+done | sort > altToChr.tab
+sort ../../chrom.sizes | join -t'^I' - altToChr.tab \
+   | awk '{printf "%s\t0\t%d\t%s\n", $1,$2,$3}' > altToChr.bed
+
+
+   hgLoadBed hg38 altLocations chrToAlt.bed altToChr.bed
+   featureBits -countGaps hg38 altLocations
+   # 170113652 bases of 3209286105 (5.301%) in intersection
+
+############################################################################
+## genscan (DONE - 2014-01-07 - Hiram)
+   mkdir /hive/data/genomes/hg38/bed/genscan
+   cd /hive/data/genomes/hg38/bed/genscan
+
+   # using the contig sequence
+   # running stepwise to allow the lifting of the final result
+   time $HOME/kent/src/hg/utils/automation/doGenscan.pl hg38 -buildDir=`pwd` \
+     -maskedSeq=/hive/data/genomes/hg38/hg38.contigs.2bit \
+       -stop=makeBed -bigClusterHub=ku -dbHost=hgwdev -workhorse=hgwdev \
+        > do.log 2>&1
+   # three jobs did not finish due to almost all N's in the sequence,
+   # just a couple of bases in each piece.  Their empty result is good enough.
+   time $HOME/kent/src/hg/utils/automation/doGenscan.pl hg38 -buildDir=`pwd` \
+     -maskedSeq=/hive/data/genomes/hg38/hg38.contigs.2bit \
+       -continue=makeBed -stop=makeBed -bigClusterHub=ku -dbHost=hgwdev \
+         -workhorse=hgwdev > makeBed.log 2>&1
+   # real    0m48.161s
+
+   cd lifted
+   mkdir -p gtf subopt nameFixed/gtf nameFixed/pep newNames pep
+   for F in ../gtf/000/*.gtf
+do
+   B=`basename $F`
+   liftUp gtf/${B} ../../../jkStuff/hg38.contigs.lift carry $F
+   echo $B
+done
+   for F in ../subopt/000/*.bed
+do
+   B=`basename $F`
+   liftUp subopt/${B} ../../../jkStuff/hg38.contigs.lift carry $F
+   echo $B
+done
+
+   ls gtf/chr*_[0-9][0-9].gtf \
+     | sed -e 's/_[0-9][0-9]//; s#gtf/##; s/.gtf//;' | sort -u | while read C
+do
+   cat ../pep/000/${C}_[0-9][0-9].pep > pep/${C}.pep
+   cat gtf/${C}_[0-9][0-9].gtf | ./gtfFixId.pl ${C} > nameFixed/gtf/${C}.gtf
+   ./pepNameFix.pl ${C} > nameFixed/pep/${C}.pep
+done
+
+   cat nameFixed/gtf/*.gtf > ../hg38.genscan.gtf
+   ls gtf | egrep -v '^chr[0-9XY][0-9]*_[0-9][0-9].gtf' | while read C
+do
+   cat gtf/${C}
+done >> ../hg38.genscan.gtf
+
+   cat nameFixed/pep/*.pep > ../hg38.genscan.pep
+   ls gtf | egrep -v '^chr[0-9XY][0-9]*_[0-9][0-9].gtf' \
+     | sed -e 's/.gtf/.pep/' | while read C
+do
+   cat ../pep/000/${C}
+done >> ../hg38.genscan.pep
+
+   cd /hive/data/genomes/hg38/bed/genscan
+   cat lifted/subopt/*.bed | sort -k1,1 -k2,2n > hg38.genscanSubopt.bed
+
+   gtfToGenePred hg38.genscan.gtf hg38.genscan.gp
+   genePredCheck -db=hg38 hg38.genscan.gp
+   # checked: 44149 failed: 0
+   genePredToBed hg38.genscan.gp hg38.genscan.bed
+   bedToBigBed hg38.genscan.bed ../../chrom.sizes hg38.genscan.bb
+   bedToBigBed hg38.genscanSubopt.bed ../../chrom.sizes hg38.genscanSubopt.bb
+   ldHgGene -gtf hg38 genscan hg38.genscan.gtf
+# Read 44149 transcripts in 339212 lines in 1 files
+#  44149 groups 345 seqs 1 sources 1 feature types
+
+    cat fb.hg38.genscan.txt
+    # 58278346 bases of 3049335806 (1.911%) in intersection
+    cat fb.hg38.genscanSubopt.txt
+    # 55020514 bases of 3049335806 (1.804%) in intersection
+
+    # oddly, we are getting half of what hg19 had ?
+    featureBits hg19 genscan
+    # 106433874 bases of 2897316137 (3.674%) in intersection
+
+    # This is because hg19 was run on soft-masked sequence and not
+    # on hard masked sequence
+
+############################################################################
+## genscan on unmasked sequence experiment (DONE - 2013-12-03 - Hiram)
+   ## instead, working on unmasked sequence:
+   mkdir /hive/data/genomes/hg38/bed/genscan/unmaskedRun
+   cd /hive/data/genomes/hg38/bed/genscan/unmaskedRun
+
+   mkdir liftSpecs
+   split -a 3 -d -l 1 ../../../jkStuff/hg38.nonBridged.lift liftSpecs/hg38_
+
+   mkdir fasta
+for F in liftSpecs/hg38_*
+do
+   L=`cut -f2 $F`
+   echo $L
+   /cluster/home/hiram/kent/src/hg/utils/lft2BitToFa.pl \
+       ../../../hg38.unmasked.2bit $F > fasta/${L}.fa
+done
+
+
+   cat << '_EOF_' > template
+#LOOP
+./runGsBig.bash $(path1) {check out exists gtf/$(root1).gtf} {check out exists pep/$(root1).pep} {check out exists subopt/$(root1).bed}
+#ENDLOOP
+'_EOF_'
+  # << happy emacs
+   cat << '_EOF_' > runGsBig.bash
+#!/bin/bash
+
+set -beEu -o pipefail
+
+export seqFile=$1
+export resultGtf=$2
+export resultPep=$3
+export resultSubopt=$4
+/cluster/bin/x86_64/gsBig $seqFile $resultGtf -trans=$resultPep -subopt=$resultSubopt -exe=/scratch/data/genscan/genscan -par=/scratch/data/genscan/HumanIso.smat -tmp=/dev/shm -window=2400000
+'_EOF_'
+  # << happy emacs
+
+  ls -1S `pwd`/fasta/*.fa > part.list
+  gensub2 part.list single template jobList
+  para create jobList
+  para push
+  # several jobs crashed:
+# Completed: 726 of 733 jobs
+# Crashed: 7 jobs
+# CPU time in finished jobs:      62501s    1041.68m    17.36h    0.72d  0.002 y
+# IO & Wait Time:                  2563s      42.72m     0.71h    0.03d  0.000 y
+# Average job time:                  90s       1.49m     0.02h    0.00d
+# Longest finished job:            3288s      54.80m     0.91h    0.04d
+# Submission to last job:          3294s      54.90m     0.92h    0.04d
+
+  para status | grep -v -w done | awk '{print $(NF-3)}' > crashed.job.list
+
+  mkdir /hive/data/genomes/hg38/bed/genscan/unmaskedRun/crashedJobs
+  cd /hive/data/genomes/hg38/bed/genscan/unmaskedRun/crashedJobs
+  mkdir splitBits
+
+  for F in chr2.06 chr1.03 chr3.05 chr12.07 chr10.05 chr17.08 chr11.04
+do
+   faSplit -lift=${F}.lift gap ../fasta/${F}.fa 2000000 splitBits/${F}_
+done
+
+  ls -1S `pwd`/splitBits/*.fa > part.list
+  cat << '_EOF_' > runGsBig.bash
+#!/bin/bash
+
+set -beEu -o pipefail
+
+export seqFile=$1
+export resultGtf=$2
+export resultPep=$3
+export resultSubopt=$4
+/cluster/bin/x86_64/gsBig $seqFile $resultGtf -trans=$resultPep -subopt=$resultSubopt -exe=/scratch/data/genscan/genscan -par=/scratch/data/genscan/HumanIso.smat -tmp=/dev/shm -window=2400000
+'_EOF_'
+  # << happy emacs
+  chmod +x runGsBig.bash
+
+  cat << '_EOF_' > template
+#LOOP
+./runGsBig.bash $(path1) {check out exists gtf/$(root1).gtf} {check out exists pep/$(root1).pep} {check out exists subopt/$(root1).bed}
+#ENDLOOP
+'_EOF_'
+  # << happy emacs
+
+  gensub2 part.list single template jobList
+  para create jobList
+  para push
+# Completed: 331 of 334 jobs
+# Crashed: 3 jobs
+# CPU time in finished jobs:      18097s     301.62m     5.03h    0.21d  0.001 y
+# IO & Wait Time:                  1085s      18.08m     0.30h    0.01d  0.000 y
+# Average job time:                  58s       0.97m     0.02h    0.00d
+# Longest finished job:              79s       1.32m     0.02h    0.00d
+# Submission to last job:           249s       4.15m     0.07h    0.00d
+  # the last three completed with -window=1600000
+
+  # lifting results:
+  cat << '_EOF_' > fixIds.pl
+#!/usr/bin/env perl
+
+use strict;
+use warnings;
+
+my $argc = scalar(@ARGV);
+
+if ($argc != 1) {
+  printf STDERR "usage: cat chrN.M.lifted | ./fixIds.pl chrN.M\n";
+  exit 255;
+}
+
+my $F=shift;
+my $C = $F;
+$C =~ s/\.[0-9][0-9]//;
+
+my $id = 0;
+my $prevId = "";
+open (GT, ">${F}.gtf") or die "can not write to ${F}.gtf";
+while (my $line=<>) {
+   chomp $line;
+   my $geneId = $line;
+   $geneId =~ s/^${C}.*gene_id "${C}//;
+   $geneId =~ s/";.*//;
+   $id += 1 if ( $prevId ne $geneId);
+   $line =~ s/${C}[0-9]+.[0-9]+/${F}.$id/g;
+   printf GT "%s\n", $line;
+   $prevId = $geneId;
+}
+close (GT);
+'_EOF_'
+  # << happy emacs
+  chmod +x fixIds.pl
+  for F in chr1.03 chr10.05 chr11.04 chr12.07 chr17.08 chr2.06 chr3.05
+do
+  echo "${F}" 1>&2
+  cut -f2 ${F}.lift | while read P
+  do
+     liftUp -type=.gtf stdout ${F}.lift error gtf/${P}.gtf
+  done > ${F}.lifted.gtf
+  cat ${F}.lifted.gtf | ./fixIds.pl ${F}
+done
+  # copied these results to ../gtf/ to get into the final result
+# -rw-rw-r-- 1 3349959 Jan  2 15:33 chr1.03.gtf
+# -rw-rw-r-- 1 2439182 Jan  2 15:33 chr10.05.gtf
+# -rw-rw-r-- 1 1068097 Jan  2 15:33 chr11.04.gtf
+# -rw-rw-r-- 1 2392548 Jan  2 15:33 chr12.07.gtf
+# -rw-rw-r-- 1 1831336 Jan  2 15:33 chr17.08.gtf
+# -rw-rw-r-- 1 3539694 Jan  2 15:33 chr2.06.gtf
+# -rw-rw-r-- 1 2309903 Jan  2 15:33 chr3.05.gtf
+
+  for F in chr1.03 chr10.05 chr11.04 chr12.07 chr17.08 chr2.06 chr3.05
+do
+  echo "${F}" 1>&2
+  cut -f2 ${F}.lift | while read P
+  do
+     liftUp -type=.bed stdout ${F}.lift error subopt/${P}.bed
+  done > ${F}.lifted.subopt.bed
+done
+  # copied these results to ../subopt/ to get into the final result
+# -rw-rw-r-- 1 3349959 Jan  2 15:33 chr1.03.gtf
+# -rw-rw-r-- 1 2439182 Jan  2 15:33 chr10.05.gtf
+# -rw-rw-r-- 1 1068097 Jan  2 15:33 chr11.04.gtf
+# -rw-rw-r-- 1 2392548 Jan  2 15:33 chr12.07.gtf
+# -rw-rw-r-- 1 1831336 Jan  2 15:33 chr17.08.gtf
+# -rw-rw-r-- 1 3539694 Jan  2 15:33 chr2.06.gtf
+# -rw-rw-r-- 1 2309903 Jan  2 15:33 chr3.05.gtf
+
+
+  cat << '_EOF_' > pepNameFix.pl
+#!/usr/bin/env perl
+
+use strict;
+use warnings;
+
+# BIG ASSUMPTION ! ! ! - the peptides are in the same order as
+# they are in the GTF file ! ! !
+
+my $argc = scalar(@ARGV);
+
+if ($argc != 1) {
+  printf STDERR "usage: cat chrN.M.needNameFix.pep | ./pepNameFix.pl chrN.M > chrN.M.pep\n";
+  exit 255;
+}
+
+my $C=shift;
+
+my $id = 1;
+
+while (my $line = <>) {
+  if ($line =~ m/^>/) {
+    printf ">%s.%d\n", $C, $id++;
+  } else {
+    print $line;
+  }
+}
+'_EOF_'
+  # << happy emacs
+  chmod +x pepNameFix.pl
+
+for F in chr1.03 chr10.05 chr11.04 chr12.07 chr17.08 chr2.06 chr3.05
+do
+  echo "${F}" 1>&2
+  cut -f2 ${F}.lift | while read P
+  do
+     cat pep/${P}.pep
+  done > ${F}.needNameFix.pep
+  cat ${F}.needNameFix.pep | ./pepNameFix.pl ${F} > ${F}.pep
+done
+  # copied these results to ../pep/ to get into the final result:
+# -rw-rw-r-- 1 1592655 Jan  2 15:55 chr1.03.pep
+# -rw-rw-r-- 1 1169168 Jan  2 15:55 chr10.05.pep
+# -rw-rw-r-- 1  519106 Jan  2 15:55 chr11.04.pep
+# -rw-rw-r-- 1 1152111 Jan  2 15:55 chr12.07.pep
+# -rw-rw-r-- 1  775052 Jan  2 15:55 chr17.08.pep
+# -rw-rw-r-- 1 1799546 Jan  2 15:55 chr2.06.pep
+# -rw-rw-r-- 1 1248762 Jan  2 15:55 chr3.05.pep
+
+  # and then, adding in all the results together
+
+  cd /hive/data/genomes/hg38/bed/genscan/unmaskedRun
+  cat << '_EOF_' > gtfIdFix.pl
+#!/usr/bin/env perl
+
+use strict;
+use warnings;
+
+my $argc = scalar(@ARGV);
+
+if ($argc != 1) {
+  printf STDERR "usage: cat lifted/gtf/chrN.gtf | ./gtfIdFix.pl chrN\n";
+  exit 255;
+}
+
+my $C=shift;
+
+my $id = 0;
+my $prevId = "";
+open (NM, ">nameFixed/newNames/${C}.tab") or die "can not write to nameFixed/newNames/${C}.tab";
+open (GT, ">nameFixed/gtf/${C}.gtf") or die "can not write to nameFixed/gtf/${C}.gtf";
+while (my $line=<>) {
+   chomp $line;
+   my $geneId = $line;
+   $geneId =~ s/^${C}.*gene_id "//;
+   $geneId =~ s/";.*//;
+   if ( $prevId ne $geneId) {
+     $id += 1;
+     printf NM "%s\t%s.%d\n", $geneId, $C, $id;
+   }
+   $line =~ s/${C}.[0-9]+.[0-9]+/${C}.$id/g;
+   printf GT "%s\n", $line;
+   $prevId = $geneId;
+}
+close (GT);
+close (NM);
+'_EOF_'
+  # << happy emacs
+  chmod +x gtfIdFix.pl
+
+  rm -fr lifted
+  rm -fr nameFix
+  mkdir -p lifted
+  mkdir -p lifted/gtf
+  mkdir -p lifted/pep
+  mkdir -p lifted/subopt
+  mkdir -p nameFix
+  mkdir -p nameFix/gtf
+  mkdir -p nameFix/newNames
+
+  for F in liftSpecs/hg38_*
+do
+   L=`cut -f2 $F`
+   C=`cut -f4 $F`
+   liftUp -type=.gtf stdout ${F} error gtf/${L}.gtf >> lifted/gtf/${C}.gtf
+   cat pep/${L}.pep >> lifted/pep/${C}.pep
+   liftUp -type=.bed stdout ${F} error subopt/${L}.bed >> lifted/subopt/${C}.bed
+done
+
+  for F in lifted/gtf/*.gtf
+do
+  C=`basename $F | sed -e 's/.gtf//'`
+  cat $F | ./gtfIdFix.pl $C
+done
+
+mkdir -p nameFixed/pep
+
+  cat << '_EOF_' > pepNameFix.pl
+#!/usr/bin/env perl
+
+use strict;
+use warnings;
+
+my $argc = scalar(@ARGV);
+if ($argc != 1) {
+  printf STDERR "usage: ./pepNameFix.pl chrN > chrN.pep\n";
+  exit 255
+}
+
+my $C = shift;
+my %newName;
+
+open (FH, "<lifted/pep/$C.pep") or die "can not read <lifted/pep/$C.pep";
+open (NM, "<nameFixed/newNames/$C.tab") or die "can not read nameFixed/newNames/$C.tab";
+while (my $line = <NM>) {
+  chomp $line;
+  my ($needFix, $fixedName) = split('\t', $line);
+  $newName{$needFix} = $fixedName;
+}
+close (NM);
+
+while (my $line = <FH>) {
+  if ($line =~m /^>/) {
+    chomp $line;
+    $line =~ s/^>//;
+    die "can not find name to fix $line" if (!exists($newName{$line}));
+    printf ">%s\n", $newName{$line};
+  } else {
+    print $line;
+  }
+}
+close (FH);
+'_EOF_'
+  # << happy emacs
+  chmod +x pepNameFix.pl
+
+  for F in lifted/pep/*.pep
+do
+  C=`basename $F | sed -e 's/.pep//'`
+  echo $C
+  ./pepNameFix.pl $C > nameFixed/pep/$C.pep
+done
+
+#############################################################################
+# Mark the new centromere regions (DONE - 2014-01-09 - Hiram)
+    mkdir /hive/data/genomes/hg38/bed/centromere
+    cd /hive/data/genomes/hg38/bed/centromere
+    grep GJ ../../hg38.agp > hg38.centContigs.agp
+
+    awk '{printf "%s\t%d\t%d\t%s\n", $1, $2-1, $3, $6}' hg38.centContigs.agp \
+      > hg38.centContigs.bed4
+
+    hgLoadBed hg38 centromeres hg38.centContigs.bed4
+    checkTableCoords hg38 centromeres
+
+#############################################################################
+## alternate sequence/haplotype alignments (DONE - 2014-01-23 - Hiram)
+    mkdir /hive/data/genomes/hg38/bed/lastzAltSequences
+    cd /hive/data/genomes/hg38/bed/lastzAltSequences
+
+rm -fr hg38.haplotypes.lift temp.lift targetFa queryFa
+mkdir targetFa
+mkdir queryFa
+touch temp.lift
+
+cat ../altLocations/chrToAlt.bed | while read L
+do
+  chrName=`echo $L | awk '{print $1}'`
+  chromSize=`egrep "^$chrName   " ../../chrom.sizes | cut -f2`
+  chrStart=`echo $L | awk '{if (($2-10000)>=0) {printf "%d", $2-10000} else {printf "0"}}'`
+  chrEnd=`echo $L | awk -v chromSize=$chromSize '{if (($3+10000)<=chromSize) {printf "%d", $3+10000} else {printf "%d", chromSize}}'`
+  chrSize=`echo $chrEnd $chrStart | awk '{print $1-$3}'`
+  queryName=`echo $L | awk '{print $4}'`
+  partName="${chrName}_${chrStart}_${chrEnd}"
+  echo $chrName $chrStart $chrEnd $queryName $partName $chromSize
+  echo -e "$chrStart\t${partName}\t$chrSize\t$chrName\t$chromSize" >> temp.lift
+  twoBitToFa ../../hg38.unmasked.2bit:$chrName:$chrStart-$chrEnd stdout | sed -e "s/^>.*/>$partName/;" > targetFa/$queryName.fa
+  twoBitToFa ../../hg38.unmasked.2bit:$queryName queryFa/$queryName.fa
+done
+
+sort -u temp.lift | sort -k4,4 -k1,1n > hg38.haplotypes.lift
+
+    # these were run serially on hgwdev, they could be a cluster run:
+    ssh ku
+    mkdir /hive/data/genomes/hg38/bed/lastzAltSequences/run.blastz
+    cd /hive/data/genomes/hg38/bed/lastzAltSequences/run.blastz
+    mkdir ../lav ../psl
+
+    # construct the jobList
+    ls ../targetFa | sed -e 's/.fa//;' | while read partName
+do
+   echo "./runJob.sh ${partName}"
+done > jobList
+
+    cat << '_EOF_' > runJob
+#!/bin/sh
+
+export partName=$1
+export target="../targetFa/$partName.fa"
+export query="../queryFa/$partName.fa"
+export lav="../lav/$partName.lav"
+export psl="../psl/$partName.psl"
+
+/cluster/bin/penn/lastz-distrib-1.03.46/bin/lastz \
+  $target $query \
+  Y=15000 T=2 M=254 O=600 H=2000 O=600 E=150 K=10000 L=10000 \
+  Q=/scratch/data/blastz/human_chimp.v2.q > $lav
+lavToPsl $lav stdout | liftUp $psl ../hg38.haplotypes.lift error stdin
+'_EOF_'
+    # << happy emacs
+
+    # these were run serially on hgwdev, they could be a cluster run:
+    time ./jobList > do.log
+    # real    61m35.898s
+
+    # chaining lastz results:
+    mkdir -p /hive/data/genomes/hg38/bed/lastzAltSequences/axtChain/run/chain
+    cd /hive/data/genomes/hg38/bed/lastzAltSequences/axtChain/run
+
+    ls ../../psl/*.psl | while read P
+do
+  B=`basename $P | sed -e 's/.psl//'`
+  echo $B $P
+  ls -og $P ../../targetFa/${B}.fa ../../queryFa/${B}.fa
+  /cluster/home/hiram/kent/src/hg/mouseStuff/axtChain/axtChain \
+    -psl -scoreScheme=/scratch/data/blastz/human_chimp.v2.q \
+    -minScore=1000 -linearGap=medium $P \
+    ../../../../hg38.unmasked.2bit \
+    ../../../../hg38.unmasked.2bit stdout \
+  | chainAntiRepeat ../../../../hg38.unmasked.2bit \
+    ../../../../hg38.unmasked.2bit stdin chain/${B}.chain
+done
+
+   # real    7m54.677s
+
+   cd /hive/data/genomes/hg38/bed/lastzAltSequences/axtChain
+   find ./run/chain -name "*.chain" | chainMergeSort -inputList=stdin \
+       | nice gzip -c > hg38.haplotypes.all.chain.gz
+   chainPreNet  hg38.haplotypes.all.chain.gz ../../../chrom.sizes \
+     /hive/data/genomes/hg38/chrom.sizes stdout \
+       | chainNet  stdin -minSpace=1 ../../../chrom.sizes \
+          ../../../chrom.sizes stdout /dev/null \
+             | netSyntenic stdin noClass.net
+
+    # Make liftOver chains from chroms to alternates:
+    netChainSubset -verbose=0 noClass.net hg38.haplotypes.all.chain.gz stdout \
+      | chainStitchId stdin stdout | gzip -c > hg38.haplotypes.over.chain.gz
+    # swap the alignments to get the alternates to chrom mappings:
+    chainSwap hg38.haplotypes.over.chain.gz stdout \
+       | gzip -c > hg38.reference.over.chain.gz
+    # and put them all together so mappings go both directions
+    chainMergeSort hg38.haplotypes.over.chain.gz hg38.reference.over.chain.gz \
+        | gzip -c > hg38.haploReference.over.chain.gz
+
+    hgLoadChain -tIndex hg38 chainAltSequence hg38.haploReference.over.chain.gz
+    netClass -verbose=0 -noAr noClass.net hg38 hg38 hg38.hg38AltSequence.net
+    netFilter -minGap=10 hg38.hg38AltSequence.net \
+      | hgLoadNet -verbose=0 hg38 netAltSequence stdin
+
+    chainToPsl hg38.haploReference.over.chain.gz ../../../chrom.sizes \
+      ../../../chrom.sizes \
+        /hive/data/genomes/hg38/hg38.unmasked.2bit  \
+          /hive/data/genomes/hg38/hg38.unmasked.2bit  \
+             hg38.beforeRecalc.haploReference.over.psl
+
+    pslCheck -targetSizes=../../../chrom.sizes \
+        -querySizes=../../../chrom.sizes \
+    hg38.beforeRecalc.haploReference.over.psl 2>&1 | tail -1
+    # checked: 3092 failed: 57 errors: 57
+
+    pslRecalcMatch hg38.beforeRecalc.haploReference.over.psl \
+    ../../../hg38.unmasked.2bit ../../../hg38.unmasked.2bit  \
+        hg38.haploReference.over.psl
+
+    pslCheck -targetSizes=../../../chrom.sizes \
+      -querySizes=../../../chrom.sizes \
+         hg38.haploReference.over.psl 2>&1 | tail -1
+    # checked: 3092 failed: 0 errors: 0
+
+    hgLoadPsl hg38 -table=altSequenceLiftOver hg38.haploReference.over.psl
+
+#############################################################################
+## construct non-bridged contig sequence (DONE - 2014-01-10 - Hiram)
+    mkdir /hive/data/genomes/hg38/bed/nonBridgedContigs
+    cd /hive/data/genomes/hg38/bed/nonBridgedContigs
+
+    # only need the actual split chroms in this lift, and the
+    # _nn name is a bit more convenient than the .nn:
+    gapToLift -minGap=100 hg38 stdout | sed -e 's/\./_/;' \
+        | awk '$1 != 0' > hg38.contigs.lift
+    # the warnings gapToLift issues are about gaps defined in the table
+    # that are abutting to each other.  teleomere gaps are next to contig gaps
+    # those lifts in the format of a bed file:
+    awk '{printf "%s\t%d\t%d\t%s\n", $4, $1, $1+$3, $2}' hg38.contigs.lift \
+        > hg38.contigs.bed
+    # the negation of that is the gaps between the contigs
+    #  fixup the .N to _nn with the awk:
+    featureBits -not -countGaps hg38 hg38.contigs.bed -bed=stdout \
+| awk '{split($4,a,"."); printf "%s\t%d\t%d\t%s_%02d\n", $1,$2,$3,a[1],a[2]}' \
+             > hg38.gaps.bed
+    # 268613637 bases of 3209286105 (8.370%) in intersection
+
+    # together, those two should be %100 of the genome exactly:
+    featureBits -countGaps -or hg38 hg38.contigs.bed hg38.gaps.bed
+    #  3209286105 bases of 3209286105 (100.000%) in intersection
+
+    # the list of all those other bits not in the split chroms:
+    egrep "_alt|chrUn|chrM|_random" hg38.gaps.bed | cut -f1 \
+       | sort > other.bits.list
+
+    # extract those chrom pieces and the other bits from the masked sequence:
+    (twoBitToFa -bed=hg38.contigs.bed ../../hg38.2bit stdout; \
+      twoBitToFa -seqList=other.bits.list ../../hg38.2bit stdout) \
+        | faToTwoBit stdin hg38.contigs.2bit
+    twoBitInfo hg38.contigs.2bit stdout | sort -k2nr > hg38.contigs.chrom.sizes
+    # verify nothing has been lost:
+    twoBitToFa ../../hg38.2bit stdout | faCount stdin | tail -1
+# total 3209286105 898285419 623727342 626335137 900967885 159970322 30979743
+    twoBitToFa hg38.contigs.2bit stdout | faCount stdin | tail -1
+# total 3061688741 898285419 623727342 626335137 900967885  12372958 30979743
+    # the ACGT and CPG counts remain the same, only N's have been lost
+
+    # make a copy of this at the top:
+    cp -p hg38.contigs.2bit ../..
+    cp -p hg38.contigs.lift ../../jkStuff
+
+    # load as a track to be able to see where they are:
+    egrep "chrUn|chrM|_alt|_random" hg38.contigs.chrom.sizes \
+	| awk '{printf "%s\t0\t%d\t%s\n", $1, $2, $1}' \
+	> fullCoverage.hg38Contigs.bed
+    cat hg38.contigs.bed >>  fullCoverage.hg38Contigs.bed
+    featureBits -or -countGaps hg38 fullCoverage.hg38Contigs.bed gap
+    # 3209286105 bases of 3209286105 (100.000%) in intersection
+
+    hgLoadBed hg38 contigAlignmentSegments fullCoverage.hg38Contigs.bed
+
+#############################################################################
+## analysis of repeat elements from each RM run
+## (DONE - 2014-01-10 - Hiram)
+    mkdir /hive/data/genomes/hg38/bed/repeatElementCount
+    cd /hive/data/genomes/hg38/bed/repeatElementCount
+    for F in ../rmsk*/hg38.class.profile.txt \
+          ../repeatMaskerGenbank/hg38.class.profile.txt
+do
+   D=`dirname $F`
+   B=`basename $D | sed -e 's/repeatMaskerGenbank/NCBI/; s/rmsk//;'`
+   echo "==== $B ===="
+   grep rmskClass $F | sed -e 's#rmskClass/##; s/.tab//;' \
+     | awk '{printf "%s\t%d\n", $2, $1}' | sort > ${B}.tab
+done
+
+   # Hmmer does not have snRNA and tRNA ?
+   echo -e "snRNA\t0" >> Hmmer.tab
+   echo -e "tRNA\t0" >> Hmmer.tab
+   sort Hmmer.tab > t.tab
+   mv t.tab Hmmer.tab
+
+   echo "#  Repeat Masker item counts" > table.result.txt
+   echo "#  class         NCBI cross-match rmblastn HMMER" >> table.result.txt
+   join NCBI.tab CM.tab  | join - Blastn.tab  | join - Hmmer.tab \
+     | awk '{printf "%-15s\t%7d\t%7d\t%7d\t%7d\n", $1,$2,$3,$4,$5}' \
+       | sort -k2,2nr >> table.result.txt
+
+   cat table.result.txt
+#  Repeat Masker item counts
+#  class         NCBI cross-match rmblastn HMMER
+SINE            1849444 1852545 1822406 1884179
+LINE            1586141 1570523 1551012 1702529
+LTR              759248  748597  737799  805427
+DNA              502186  499108  485558  565171
+Simple_repeat    433789  703682  716968  636906
+Low_complexity   396378  102856  105181   95480
+Satellite         10198    7962    7703   10852
+LTR?               5884    5667    5068    9181
+snRNA              4595    4516    4548       0
+Retroposon         4163    5750    5630   11861
+Unknown            2802    5622    5263    3914
+DNA?               2157    3294    3018    4582
+tRNA               2154    2026    1983       0
+rRNA               1915    1840    1810     464
+RC                 1860    1784    1706    2059
+srpRNA             1784    1672    1633    1517
+scRNA              1397    1420    1426    6783
+RNA                 822     704     611    1484
+SINE?               488      38      38     970
+RC?                 445     411     374     806
+
+total           5567850 5520017 5459735 5744165
+
+#############################################################################
+## blat server turned on (DONE - 2014-01-13 - Hiram)
+#	After getting a blat server assigned by the Blat Server Gods,
+    ssh hgwdev
+
+    hgsql -e 'INSERT INTO blatServers (db, host, port, isTrans, canPcr) \
+	VALUES ("hg38", "blat4c", "17780", "1", "0"); \
+	INSERT INTO blatServers (db, host, port, isTrans, canPcr) \
+	VALUES ("hg38", "blat4c", "17781", "0", "1");' \
+	    hgcentraltest
+    #	test it with some sequence
+
+############################################################################
+## reset default position to ABO gene (DONE - 2014-01-13 - Hiram)
+    ssh hgwdev
+    hgsql -e 'update dbDb set defaultPos="chr9:133252000-133280861"
+	where name="hg38";' hgcentraltest
+
+#########################################################################
+## update grp table with new set of standard rows (DONE - 2014-01-29 - Hiram)
+    hgsql -e 'alter table grp rename grpOriginal;' hg38
+    hgsql -e 'drop table grp;' hg38
+    hgsql -e "create table grp (PRIMARY KEY(NAME)) select * from hg19.grp" hg38
+    hgsql -e 'delete from grp where name="denisova";' hg38
+    hgsql -e 'delete from grp where name="pub";' hg38
+    hgsql -e 'delete from grp where name="neandertal";' hg38
+    hgsql -e 'update grp set defaultIsClosed=0 where name="map";' hg38
+
+    hgsql -e 'drop table grpOriginal;' hg38
+
+############################################################################
+# PREPARE LINEAGE SPECIFIC REPEAT FILES FOR LASTZ (DONE - 2014-01-21 - Hiram)
+    ssh ku
+    mkdir /hive/data/genomes/hg38/bed/linSpecRep
+    cd /hive/data/genomes/hg38/bed/linSpecRep
+    #	create individual .out files from the master record in ../repeatMasker
+    mkdir splitOut
+    cat << '_EOF_' > split.csh
+#!/bin/csh -fe
+set C = $1
+head -3 ../repeatMasker/hg38.sorted.fa.out > splitOut/${C}.out
+grep "${C} " ../repeatMasker/hg38.sorted.fa.out >> splitOut/${C}.out
+'_EOF_'
+    # << happy emacs
+    chmod +x split.csh
+
+    cat << '_EOF_' > template
+#LOOP
+split.csh $(root1) {check out line+ splitOut/$(root1).out}
+#ENDLOOP
+'_EOF_'
+    # << happy emacs
+
+    # small ones first:
+    cut -f1 ../../chrom.sizes | tac > chrom.list
+    gensub2 chrom.list single template jobList
+    para create jobList
+    para try ... check ... push ... etc...
+# Completed: 93 of 93 jobs
+# CPU time in finished jobs:        127s       2.12m     0.04h    0.00d  0.000 y
+# IO & Wait Time:                 17154s     285.90m     4.76h    0.20d  0.001 y
+# Average job time:                 186s       3.10m     0.05h    0.00d
+# Longest finished job:             224s       3.73m     0.06h    0.00d
+# Submission to last job:           280s       4.67m     0.08h    0.00d
+
+    #	now, we can date and process each of those .out files
+    #	constructing the humanSpecific set of repeats
+    #   this means repeats found in human, and not in others
+    #   using mouse here for 'others' is good enough, a variety
+    #   of other species could be used (rat dog cow) where they all
+    #   produce the same result
+    mkdir dateRepeats
+    cd dateRepeats
+    cat << '_EOF_' > mkLSR
+#!/bin/bash
+set -beEu -o pipefail
+rm -f $1.out_mus-musculus
+ln -s ../splitOut/$1.out .
+/scratch/data/RepeatMasker/DateRepeats $1.out -query human -comp mouse
+rm $1.out
+mkdir -p ../humanSpecific
+/cluster/bin/scripts/extractRepeats 1 $1.out_mus-musculus \
+	> ../humanSpecific/$1.out.spec
+'_EOF_'
+    #	<< happy emacs
+    chmod +x mkLSR
+
+    cat << '_EOF_' > template
+#LOOP
+./mkLSR $(path1) {check out line+ ../humanSpecific/$(path1).out.spec}
+#ENDLOOP
+'_EOF_'
+    #	<< happy emacs
+
+    gensub2 ../chrom.list single template jobList
+    para try ... check ... push ... etc...
+    para time
+# Completed: 455 of 455 jobs
+# CPU time in finished jobs:      13985s     233.08m     3.88h    0.16d  0.000 y
+# IO & Wait Time:                  1470s      24.50m     0.41h    0.02d  0.000 y
+# Average job time:                  34s       0.57m     0.01h    0.00d
+# Longest finished job:             111s       1.85m     0.03h    0.00d
+# Submission to last job:          1427s      23.78m     0.40h    0.02d
+
+
+    # We also need the nibs for blastz runs with lineage specific repeats
+    mkdir /hive/data/genomes/hg38/bed/nibs
+    cd /hive/data/genomes/hg38/bed/nibs
+    cut -f1 ../../chrom.sizes | while read C
+do
+    twoBitToFa -seq=${C} ../../hg38.2bit stdout \
+	| faToNib -softMask stdin ${C}.nib
+    echo "${C} done"
+done
+
+    # verify nothing lost
+    cat ../../chrom.sizes \
+     | awk '{printf "nibFrag -masked %s.nib 0 %d + stdout\n", $1, $2}' \
+        | sh | faSize stdin
+# 3209286105 bases (159970322 N's 3049315783 real 1460684798 upper
+#  1588630985 lower) in 455 sequences in 1 files
+# Total size: mean 7053376.1 sd 31548372.6
+#  min 970 (chrUn_KI270394v1.nib:0-970)
+#  max 248956422 (chr1.nib:0-248956422) median 161218
+# %49.50 masked total, %52.10 masked real
+
+    mkdir /hive/data/staging/data/hg38/nib
+    rsync -a --progress ./ /hive/data/staging/data/hg38/nib
+
+#############################################################################
+## GRC Contigs/ctgPos2 track (DONE - 2014-12-25 - Hiram)
+    # provide mapping of UCSC chrom names to GRC names
+    mkdir /hive/data/genomes/hg38/bed/ctgPos2
+    cd /hive/data/genomes/hg38/bed/ctgPos2
+    grep -v "^#" ../../genbank/GCA_000001405.15_GRCh38_top-level.acc2name \
+	| awk '{printf "s/^%s/%s/g;\n", $1, $3}' > accessionToUcsc.sed.txt
+
+    find ../../genbank -type f | grep "/assembled_chromosomes/AGP/" | sed -e 's/.comp//' | while read F
+do
+   if [ -s $F ]; then
+      zcat $F | grep -v "^#"
+   fi
+done | sed -e "`cat accessionToUcsc.sed.txt`" > ucsc.grch38.agp
+
+    awk '$5 != "N"' ucsc.grch38.agp \
+| awk '{printf "%s\t%d\t%s\t%d\t%d\t%s\n", $6, $3-$2+1, $1, $2-1, $3, $5}' \
+	| sort -u | sort -k3,3 -k4,4n > ctgPos2.tab
+
+
+    export ctgSize=`awk '{print length($1)}' ctgPos2.tab | sort -n | tail -1`
+    export chrSize=`awk '{print length($3)}' ctgPos2.tab | sort -n | tail -1`
+
+    sed -e "s/20/$ctgSize/; s/16/$chrSize/;" \
+	/cluster/home/hiram/kent/src/hg/lib/ctgPos2.sql > hg38.ctgPos2.sql
+
+    hgLoadSqlTab hg38 ctgPos2 hg38.ctgPos2.sql ctgPos2.tab
+
+############################################################################
+# constructing download files (WORKING - 2014-01-15 - Hiram)
+    # add hg38 to all.joiner and verify it is clean:
+    joinerCheck -database=hg38 -keys all.joiner
+# Checking keys on database hg38
+#  hg38.ucscToINSDC.chrom - hits 455 of 455 (100.000%) ok
+    # and all table coordinates are OK:
+    checkTableCoords hg38
+
+    cd /hive/data/genomes/hg38
+    time $HOME/kent/src/hg/utils/automation/makeDownloads.pl \
+      -workhorse=hgwdev hg38
+    # makeDownloads.pl has made a preliminary set of files
+
+    # need to fixup these names and add chromFa.tar.gz files
+    cd /hive/data/genomes/hg38/goldenPath/bigZips
+
+    mkdir chroms
+    mkdir maskedChroms
+
+    faSplit byname hg38.fa.gz chroms/
+    faSplit byname hg38.fa.masked.gz maskedChroms/
+
+    tar cvzf ./hg38.chromFa.tar.gz ./chroms/
+    tar cvzf ./hg38.chromFaMasked.tar.gz ./maskedChroms/
+
+    cd /usr/local/apache/htdocs-hgdownload/goldenPath/hg38/bigZips
+    ln -s /hive/data/genomes/hg38/goldenPath/bigZips/hg38.chromFa.tar.gz hg38.chromFa.tar.gz
+    ln -s /hive/data/genomes/hg38/goldenPath/bigZips/hg38.chromFaMasked.tar.gz hg38.chromFaMasked.tar.gz
+
+    #also added entries for above to md5sum.txt and README.txt
+
+############################################################################
+# LASTZ MOUSE Mm10 (DONE - 2014-01-23,31 - Hiram)
+    # can no longer use the lineage specific repeats with the new lastz
+    # use a screen to manage this longish job:
+    screen -S hg38Mm10
+
+    mkdir /hive/data/genomes/hg38/bed/lastzMm10.2014-01-23
+    cd /hive/data/genomes/hg38/bed/lastzMm10.2014-01-23
+
+    # best to always specify an exact path to lastz so we know which one is used
+    # lastz default parameters are human-mouse parameters
+
+    cat << '_EOF_' > DEF
+# human vs mouse
+BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.52/bin/lastz
+
+# TARGET: Human Hg38
+SEQ1_DIR=/hive/data/genomes/hg38/hg38.2bit
+SEQ1_LEN=/hive/data/genomes/hg38/chrom.sizes
+SEQ1_CTGDIR=/hive/data/genomes/hg38/hg38.contigs.2bit
+SEQ1_CTGLEN=/hive/data/genomes/hg38/hg38.contigs.chrom.sizes
+SEQ1_LIFT=/hive/data/genomes/hg38/jkStuff/hg38.contigs.lift
+SEQ1_CHUNK=40000000
+SEQ1_LAP=10000
+
+# QUERY: Mouse Mm10
+SEQ2_DIR=/scratch/data/mm10/mm10.2bit
+SEQ2_LEN=/scratch/data/mm10/chrom.sizes
+SEQ2_CHUNK=20000000
+SEQ2_LAP=0
+
+BASE=/hive/data/genomes/hg38/bed/lastzMm10.2014-01-23
+TMPDIR=/dev/shm
+'_EOF_'
+    # << happy emacs
+
+    time $HOME/kent/src/hg/utils/automation/doBlastzChainNet.pl \
+	-verbose=2 \
+        -stop=net `pwd`/DEF \
+        -syntenicNet -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
+	-fileServer=hgwdev \
+        -chainMinScore=3000 -chainLinearGap=medium > net.log 2>&1
+    #	real    1494m26.135s ---- busy cluster
+    time $HOME/kent/src/hg/utils/automation/doBlastzChainNet.pl \
+	-verbose=2 \
+        -continue=load `pwd`/DEF \
+        -syntenicNet -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
+	-fileServer=hgwdev \
+        -chainMinScore=3000 -chainLinearGap=medium > load.log 2>&1
+    #	Elapsed time: 43m11s
+    cat fb.hg38.chainMm10Link.txt
+    # 964465044 bases of 3049335806 (31.629%) in intersection
+
+    #	and the swap
+    mkdir /hive/data/genomes/mm10/bed/blastz.hg38.swap
+    cd /hive/data/genomes/mm10/bed/blastz.hg38.swap
+    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
+	/hive/data/genomes/hg38/bed/lastzMm10.2014-01-23/DEF \
+	-swap -syntenicNet \
+	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
+	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1
+    #   real    83m28.397s
+
+    cat fb.mm10.chainHg38Link.txt
+    #	937030766 bases of 2652783500 (35.323%) in intersection
+
+#########################################################################
+# LASTZ Dog CanFam3 (DONE - 2014-01-26 - Hiram)
+    mkdir /hive/data/genomes/hg38/bed/lastzCanFam3.2014-01-26
+    cd /hive/data/genomes/hg38/bed/lastzCanFam3.2014-01-26
+
+    cat << '_EOF_' > DEF
+# human vs dog
+BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.52/bin/lastz
+
+# TARGET: Human Hg38
+SEQ1_DIR=/hive/data/genomes/hg38/hg38.2bit
+SEQ1_LEN=/hive/data/genomes/hg38/chrom.sizes
+SEQ1_CTGDIR=/hive/data/genomes/hg38/hg38.contigs.2bit
+SEQ1_CTGLEN=/hive/data/genomes/hg38/hg38.contigs.chrom.sizes
+SEQ1_LIFT=/hive/data/genomes/hg38/jkStuff/hg38.contigs.lift
+SEQ1_CHUNK=20000000
+SEQ1_LAP=10000
+
+# QUERY: Dog CanFam3
+SEQ2_DIR=/hive/data/genomes/canFam3/canFam3.2bit
+SEQ2_LEN=/hive/data/genomes/canFam3/chrom.sizes
+SEQ2_CHUNK=20000000
+SEQ2_LAP=0
+
+BASE=/hive/data/genomes/hg38/bed/lastzCanFam3.2014-01-26
+TMPDIR=/dev/shm
+'_EOF_'
+    # << happy emacs
+
+    #	establish a screen to control this job
+    screen hg38CanFam3
+    time $HOME/kent/src/hg/utils/automation/doBlastzChainNet.pl -verbose=2 \
+	`pwd`/DEF \
+	-syntenicNet \
+	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
+	-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1
+    # Elapsed time: 1396m22s - busy cluster
+    cat fb.hg38.chainCanFam3Link.txt
+    #  1523987456 bases of 3049335806 (49.978%) in intersection
+
+    #	running the swap
+    mkdir /hive/data/genomes/canFam3/bed/blastz.hg38.swap
+    cd /hive/data/genomes/canFam3/bed/blastz.hg38.swap
+    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
+	/hive/data/genomes/hg38/bed/lastzCanFam3.2014-01-26/DEF \
+	-syntenicNet -swap \
+	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
+	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1
+    #	real    107m57.787s
+
+    cat fb.canFam3.chainHg38Link.txt
+    #	1437624815 bases of 2392715236 (60.083%) in intersection
+
+#########################################################################
+# LASTZ Macaca Mulatta RheMac3 (DONE - 2014-01-27,02-10 - Hiram)
+    mkdir /hive/data/genomes/hg38/bed/lastzRheMac3.2014-01-27
+    cd /hive/data/genomes/hg38/bed/lastzRheMac3.2014-01-27
+
+    # best to always specify an exact path to lastz so we know which one is used
+    # lastz default parameters are human-mouse parameters
+
+    cat << '_EOF_' > DEF
+# human vs macaca mulatta
+BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.52/bin/lastz
+# maximum M allowed with lastz is only 254
+BLASTZ_M=254
+BLASTZ_Q=/scratch/data/blastz/human_chimp.v2.q
+BLASTZ_O=600
+BLASTZ_E=150
+# other parameters from panTro2 vs hg18 lastz on advice from Webb
+BLASTZ_K=4500
+BLASTZ_Y=15000
+BLASTZ_T=2
+
+# TARGET: Human Hg38
+SEQ1_DIR=/hive/data/genomes/hg38/hg38.2bit
+SEQ1_LEN=/hive/data/genomes/hg38/chrom.sizes
+SEQ1_CTGDIR=/hive/data/genomes/hg38/hg38.contigs.2bit
+SEQ1_CTGLEN=/hive/data/genomes/hg38/hg38.contigs.chrom.sizes
+SEQ1_LIFT=/hive/data/genomes/hg38/jkStuff/hg38.contigs.lift
+SEQ1_CHUNK=20000000
+SEQ1_LAP=10000
+
+# QUERY: Macaca Mulatta RheMac3
+SEQ2_DIR=/scratch/data/rheMac3/rheMac3.2bit
+SEQ2_LEN=/scratch/data/rheMac3/chrom.sizes
+SEQ2_CHUNK=20000000
+SEQ2_LAP=0
+SEQ2_IN_CONTIGS=0
+
+BASE=/hive/data/genomes/hg38/bed/lastzRheMac3.2014-01-27
+TMPDIR=/dev/shm
+'_EOF_'
+    # << happy emacs
+    time $HOME/kent/src/hg/utils/automation/doBlastzChainNet.pl \
+        `pwd`/DEF \
+        -syntenicNet -fileServer=hgwdev \
+	-chainMinScore=5000 -chainLinearGap=medium \
+        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > do.log 2>&1
+    #   Elapsed time: 1426m43s - busy cluster
+    cat fb.hg38.chainRheMac3Link.txt
+    #   2431208700 bases of 3049335806 (79.729%) in intersection
+
+    #   running the swap
+    mkdir /hive/data/genomes/rheMac3/bed/blastz.hg38.swap
+    cd /hive/data/genomes/rheMac3/bed/blastz.hg38.swap
+    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
+        /hive/data/genomes/hg38/bed/lastzRheMac3.2014-01-27/DEF \
+        -swap -syntenicNet -chainMinScore=5000 -chainLinearGap=medium \
+        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > swap.log 2>&1
+    #    82m32.329s
+    cat fb.rheMac3.chainHg38Link.txt
+    #   2288533769 bases of 2639145830 (86.715%) in intersection
+
+#########################################################################
+## construct analysis set (DONE - 2014-01-27 - Hiram)
+    mkdir /hive/data/genomes/hg38/bed/analysisSet
+    cd /hive/data/genomes/hg38/bed/analysisSet
+    mkdir -p splitFa
+
+    faToTwoBit \
+../../genbank/seqs_for_alignment_pipelines/GCA_000001405.15_GRCh38_no_alt_analysis_set.fna.gz \
+	hg38.unmasked.analysisSet.2bit
+
+    faCount splitFa/c*.fa > splitFa.faCount.txt
+
+    egrep -v "chr[0-9_BGHIJKLXv]*_alt" ../rmskCM/hg38.sorted.fa.out \
+	> hg38.analysisSet.out
+
+    twoBitMask hg38.unmasked.analysisSet.2bit hg38.analysisSet.out \
+	hg38.rmsk.analysisSet.2bit
+
+    egrep -v "chr[0-9_BGHIJKLXv]*_alt" ../simpleRepeat/trfMask.bed \
+	> trfMask.analysisSet.bed
+
+    twoBitMask hg38.rmsk.analysisSet.2bit -add trfMask.analysisSet.bed \
+	hg38.analysisSet.2bit
+
+    twoBitToFa hg38.unmasked.analysisSet.2bit stdout | faSize stdin
+# 3099922541 bases (165046090 N's 2934876451 real 2934876451 upper 0 lower)
+#	in 195 sequences in 1 files
+# Total size: mean 15897038.7 sd 46804464.6 min 970 (chrUn_KI270394v1)
+#	max 248956422 (chr1) median 32032
+# %0.00 masked total, %0.00 masked real
+
+    twoBitToFa hg38.analysisSet.2bit stdout | faSize stdin
+# 3099922541 bases (165046090 N's 2934876451 real 1409378896 upper 1525497555
+#	lower) in 195 sequences in 1 files
+# Total size: mean 15897038.7 sd 46804464.6 min 970 (chrUn_KI270394v1)
+#	max 248956422 (chr1) median 32032
+# %49.21 masked total, %51.98 masked real
+
+    mkdir hg38.analysisSet.chroms
+    twoBitToFa hg38.analysisSet.2bit stdout \
+	| faSplit byname stdin hg38.analysisSet.chroms/
+
+    tar cvzf ./hg38.analysisSet.chroms.tar.gz ./hg38.analysisSet.chroms
+
+    ln -s `pwd`/hg38.analysisSet.2bit \
+        /usr/local/apache/htdocs-hgdownload/goldenPath/hg38/bigZips
+    ln -s `pwd`/hg38.analysisSet.chroms.tar.gz \
+        /usr/local/apache/htdocs-hgdownload/goldenPath/hg38/bigZips
+    # add these md5 sums to md5sum.txt
+    md5sum hg38.analysisSet.2bit hg38.analysisSet.chroms.tar.gz >> \
+        /usr/local/apache/htdocs-hgdownload/goldenPath/hg38/bigZips/md5sum.txt
+
+    cp ../../genbank/README_ANALYSIS_SETS README.analysisSet.txt
+    # add note at the top of README:
+    ######################################################################
+    UCSC copy of the file from:
+
+    ftp://ftp.ncbi.nlm.nih.gov/genbank/genomes/Eukaryotes/vertebrates_mammals/Homo_sapiens/GRCh38/seqs_for_alignment_pipelines/README_ANALYSIS_SETS
+
+    ln -s `pwd`/README.analysisSet.txt \
+        /usr/local/apache/htdocs-hgdownload/goldenPath/hg38/bigZips
+
+#########################################################################
+# the FULL analysis set (DONE - 2014-03-18 - Hiram
+    mkdir /hive/data/genomes/hg38/bed/fullAnalysisSet
+    cd /hive/data/genomes/hg38/bed/fullAnalysisSet
+
+    mkdir hg38.fullAnalysisSet.chroms
+    twoBitToFa ../analysisSet/hg38.analysisSet.2bit stdout \
+       | faSplit byname stdin hg38.fullAnalysisSet.chroms/
+
+    grep _alt ../../chrom.sizes | cut -f 1 > alt.list
+
+    twoBitToFa -seqList=alt.list ../../hg38.2bit stdout \
+       | faSplit byname stdin hg38.fullAnalysisSet.chroms/
+
+    faCount hg38.fullAnalysisSet.chroms/chr*.fa > faCount.fullAnalysisSet.txt
+
+    faToTwoBit hg38.fullAnalysisSet.chroms/chr*.fa hg38.fullAnalysisSet.2bit
+    twoBitInfo hg38.fullAnalysisSet.2bit stdout | sort -k2nr > chrom.sizes
+
+    tar cvzf ./hg38.fullAnalysisSet.chroms.tar.gz ./hg38.fullAnalysisSet.chroms
+
+#########################################################################
+# LASTZ Self/hg38 (DONE - 2014-01-25,02-10 - Hiram)
+    # can no longer use the lineage specific repeats with the new lastz
+    # use a screen to manage this longish job:
+    screen -S hg38Self
+
+    mkdir /hive/data/genomes/hg38/bed/lastzSelf.2014-01-25
+    cd /hive/data/genomes/hg38/bed/lastzSelf.2014-01-25
+    # construct the non-bridged contigs sequence to use:
+    (twoBitToFa ../nonBridgedContigs/hg38.chroms.contigs.2bit stdout;
+      twoBitToFa ../../hg38.2bit:chrM stdout) | faToTwoBit stdin hg38.self.2bit
+    twoBitInfo hg38.self.2bit stdout | sort -k2nr > hg38.self.chrom.sizes
+
+    # best to always specify an exact path to lastz so we know which one is used
+    # lastz default parameters are human-mouse parameters
+
+    cat << '_EOF_' > DEF
+# human vs human with mouse defaults
+BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.52/bin/lastz
+
+# TARGET: Human Hg38
+SEQ1_DIR=/hive/data/genomes/hg38/hg38.2bit
+SEQ1_LEN=/hive/data/genomes/hg38/chrom.sizes
+SEQ1_CTGDIR=/hive/data/genomes/hg38/bed/lastzSelf.2014-01-25/hg38.self.2bit
+SEQ1_CTGLEN=/hive/data/genomes/hg38/bed/lastzSelf.2014-01-25/hg38.self.chrom.sizes
+SEQ1_LIFT=/hive/data/genomes/hg38/jkStuff/hg38.contigs.lift
+SEQ1_CHUNK=20000000
+SEQ1_LAP=10000
+
+# QUERY: Human Hg38
+SEQ2_DIR=/hive/data/genomes/hg38/hg38.2bit
+SEQ2_LEN=/hive/data/genomes/hg38/chrom.sizes
+SEQ2_CTGDIR=/hive/data/genomes/hg38/bed/lastzSelf.2014-01-25/hg38.self.2bit
+SEQ2_CTGLEN=/hive/data/genomes/hg38/bed/lastzSelf.2014-01-25/hg38.self.chrom.sizes
+SEQ2_LIFT=/hive/data/genomes/hg38/jkStuff/hg38.contigs.lift
+SEQ2_CHUNK=20000000
+SEQ2_LAP=0
+
+BASE=/hive/data/genomes/hg38/bed/lastzSelf.2014-01-25
+TMPDIR=/dev/shm
+'_EOF_'
+_EOF_
+
+    time $HOME/kent/src/hg/utils/automation/doBlastzChainNet.pl \
+	-verbose=2 \
+        -stop=net `pwd`/DEF \
+        -syntenicNet -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
+	-fileServer=hgwdev \
+        -chainMinScore=3000 -chainLinearGap=medium > net.log 2>&1
+    #  real    1518m15.817s -- problems
+    # there was a problem in the 'part014' batch.  running that manually:
+    mkdir /hive/data/genomes/hg38/bed/lastzSelf.2014-01-25/run.blastz/lastJob
+    cd /hive/data/genomes/hg38/bed/lastzSelf.2014-01-25/run.blastz/lastJob
+    # make 100 jobs out of the 10 parts:
+    mkdir -p psl
+    cp ../tParts/part014.lst ./xpart014.lst
+    split -l 1 xpart014.lst -d -a 3 part
+    for F in part0*
+do
+   mv $F $F.lst
+done
+
+for T in part0*.lst
+do
+  for Q in part0*.lst
+  do
+    mkdir -p psl/${T}
+    echo /cluster/home/hiram/kent/src/hg/utils/automation/blastz-run-ucsc -outFormat psl -dropSelf ${T} ${Q} ../../DEF \{check out exists psl/${T}/${T}.${Q}.psl\}
+  done
+done > jobList
+    para -ram=32g create jobList
+    para push
+    # one last failing job:
+# Completed: 99 of 100 jobs
+# CPU time in finished jobs:       2836s      47.27m     0.79h    0.03d  0.000 y
+# IO & Wait Time:                   279s       4.65m     0.08h    0.00d  0.000 y
+# Average job time:                  31s       0.52m     0.01h    0.00d
+# Longest finished job:             586s       9.77m     0.16h    0.01d
+# Submission to last job:           620s      10.33m     0.17h    0.01d
+
+    mkdir /hive/data/genomes/hg38/bed/lastzSelf.2014-01-25/run.blastz/lastJob/split010
+    cd /hive/data/genomes/hg38/bed/lastzSelf.2014-01-25/run.blastz/lastJob/split010
+    mkdir psl
+
+    twoBitToFa /hive/data/genomes/hg38/bed/lastzSelf.2014-01-25/hg38.self.2bit:chr16_03:0-1689648 part010.fa
+
+    faSplit -lift=split010.lift size part010.fa 169000 split010_
+TOP="/hive/data/genomes/hg38/bed/lastzSelf.2014-01-25/run.blastz/lastJob/split010"
+
+for T in split*.fa
+do
+  mkdir -p psl/${T}
+  echo "${TOP}/${T}" > ${T}.lst
+  faToTwoBit  ${T} ${T}.2bit
+  for Q in split*.fa
+  do
+     echo "/cluster/home/hiram/kent/src/hg/utils/automation/blastz-run-ucsc -outFormat psl -dropSelf ${T}.lst ${Q}.lst DEF {check out exists psl/${T}/${T}.${Q}.psl}"
+  done
+done > jobList
+     para -ram=32g create jobList
+
+# Completed: 100 of 100 jobs
+# CPU time in finished jobs:     176579s    2942.99m    49.05h    2.04d  0.006 y
+# IO & Wait Time:                  1239s      20.64m     0.34h    0.01d  0.000 y
+# Average job time:                1778s      29.64m     0.49h    0.02d
+# Longest finished job:           29343s     489.05m     8.15h    0.34d
+# Submission to last job:         29348s     489.13m     8.15h    0.34d
+
+    catDir psl/* | grep -v "^#" > raw.psl
+
+    liftUp -type=.psl stdout split010.lift error raw.psl \
+        | liftUp -pslQ -type=.psl chr16_03.psl split010.lift error stdin
+
+    # this combination allowed psl headers to sneak in the middle,
+    # had to be cleaned:
+    catDir psl/* | grep -v "^#" > part014.psl
+    cat split010/chr16_03.psl >> part014.psl
+    cp -p part014.psl ../../psl/part014.lst/part014.lst_part014.lst.psl
+
+    time $HOME/kent/src/hg/utils/automation/doBlastzChainNet.pl \
+	-verbose=2 \
+        -continue=cat -stop=net `pwd`/DEF \
+        -syntenicNet -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
+	-fileServer=hgwdev \
+        -chainMinScore=3000 -chainLinearGap=medium > cat.log 2>&1
+    # real    43m11.340s
+    # failed in chaining, running manually on hgwdev
+    time ./bigJobs.sh > bigJobs.log 2>&1
+    #  real    468m59.648s
+
+    time ./part014.sh > part014.log 2>&1
+
+    # real    1319m57.911s
+    # -rw-rw-r-- 1 3581498246 Feb  8 14:37 part014.lst.chain
+    time $HOME/kent/src/hg/utils/automation/doBlastzChainNet.pl \
+	-verbose=2 \
+        -continue=chainMerge -stop=net `pwd`/DEF \
+        -syntenicNet -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
+	-fileServer=hgwdev \
+        -chainMinScore=3000 -chainLinearGap=medium > chainMerge.log 2>&1
+
+    time $HOME/kent/src/hg/utils/automation/doBlastzChainNet.pl \
+	-verbose=2 \
+        -continue=load -stop=load `pwd`/DEF \
+        -syntenicNet -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
+	-fileServer=hgwdev \
+        -chainMinScore=3000 -chainLinearGap=medium > load.log 2>&1
+
+    hgLoadChain -normScore -tIndex hg38 chainSelf hg38.hg38.all.chain.gz
+    #  Loading 104815249 chains into hg38.chainSelf
+
+    cat fb.hg38.chainSelfLink.txt
+    #   392419010 bases of 3049335806 (12.869%) in intersection
+    cd /hive/data/genomes/hg38/bed
+    ln -s lastzSelf.2014-01-25 lastz.self
+    ln -s lastzSelf.2014-01-25 lastz.hg38
+
+#########################################################################
+## 4-Way Multiz for UCSC Genes construction (DONE - 2014-02-11 - Hiram)
+    ssh hgwdev
+    mkdir /hive/data/genomes/hg38/bed/multiz4way
+    cd /hive/data/genomes/hg38/bed/multiz4way
+
+    #	extract our 4 organisms from the 44-way on hg18:
+    ln -s /hive/data/genomes/hg18/bed/multiz44way/44way.4d.nh ./44way.nh
+
+    /cluster/bin/phast/tree_doctor \
+	--prune-all-but hg19,mm10,canFam3,rheMac3 $HOME/kent/src/hg/utils/phyloTrees/120way.nh \
+	| sed -e "s/hg19/hg38/" > 4way.nh
+
+    #	this looks like:
+    cat 4way.nh
+(((hg38:0.033974,rheMac3:0.037601):0.109934,mm10:0.356483):0.020593,canFam3:0.165928);
+
+
+    #	Use this specification in the phyloGif tool:
+    #	http://genome.ucsc.edu/cgi-bin/phyloGif
+    #	to obtain a gif image for htdocs/images/phylo/hg38_4way.gif
+
+    /cluster/bin/phast/all_dists 4way.nh > 4way.distances.txt
+    #	Use this output to create the table below
+    grep -y hg38 4way.distances.txt | sort -k3,3n
+#
+#	If you can fill in all the numbers in this table, you are ready for
+#	the multiple alignment procedure
+#
+#                         featureBits chainLink measures
+#                                        chainHg38Link   chain    linearGap
+#    distance                      on hg38    on other   minScore
+#  1  0.071575 - rhesus rheMac3 (% 79.729) (% 86.715)       5000     medium
+#  2  0.330429 - dog canFam3    (% 49.978) (% 60.083)       3000     medium
+#  3  0.500391 - mouse mm10     (% 31.629) (% 35.323)       3000     medium
+
+    #	using the syntenic nets
+    cd /cluster/data/hg38/bed/multiz4way
+    mkdir mafLinks
+    cd mafLinks
+    mkdir rheMac3 canFam3 mm10
+
+    for D in mm10 canFam3 rheMac3
+do
+    ln -s ../../../lastz.${D}/axtChain/hg38.${D}.synNet.maf.gz ./${D}/
+done
+
+    mkdir /hive/data/genomes/hg38/bed/multiz4way/mafSplit
+    cd /hive/data/genomes/hg38/bed/multiz4way/mafSplit
+    for D in mm10 canFam3 rheMac3
+do
+    echo "working: ${D}"
+    zcat ../mafLinks/${D}/hg38.${D}.synNet.maf.gz > ${D}.maf
+    mkdir -p ${D}
+    mafSplit -byTarget -useFullSequenceName /dev/null ${D}/${D}_  ${D}.maf
+    rm -f ${D}.maf
+done
+
+    #	determine what is the newest version of multiz and use that
+    cd /hive/data/genomes/hg38/bed/multiz4way
+    mkdir penn
+    cp -p /cluster/bin/penn/multiz.2009-01-21_patched/multiz penn
+    cp -p /cluster/bin/penn/multiz.2009-01-21_patched/maf_project penn
+    cp -p /cluster/bin/penn/multiz.2009-01-21_patched/autoMZ penn
+
+    # the autoMultiz cluster run
+    ssh ku
+    cd /hive/data/genomes/hg38/bed/multiz4way
+
+    # create species list and stripped down tree for autoMZ
+    sed 's/[a-z][a-z]*_//g; s/:[0-9\.][0-9\.]*//g; s/;//; /^ *$/d' \
+	4way.nh > tmp.nh
+    echo `cat tmp.nh` | sed 's/ //g; s/,/ /g' > tree.nh
+    sed 's/[()]//g; s/,/ /g' tree.nh > species.lst
+
+    mkdir run maf
+    cd run
+
+    #	NOTE: you need to set the db and multiz dirname properly in this script
+    cat > autoMultiz << '_EOF_'
+#!/bin/csh -ef
+set db = hg38
+set c = $1
+set maf = $2
+set binDir = /hive/data/genomes/hg38/bed/multiz4way/penn
+set tmp = /dev/shm/$db/multiz.$c
+set pairs = /hive/data/genomes/hg38/bed/multiz4way/mafSplit
+rm -fr $tmp
+mkdir -p $tmp
+cp ../{tree.nh,species.lst} $tmp
+pushd $tmp
+foreach s (`cat species.lst`)
+    set in = $pairs/$s/${s}_$c.maf
+    set out = $db.$s.sing.maf
+    if ($s == $db) then
+	continue
+    endif
+    if (-e $in.gz) then
+	zcat $in.gz > $out
+    else if (-e $in) then
+	cp $in $out
+    else
+	echo "##maf version=1 scoring=autoMZ" > $out
+    endif
+end
+set path = ($binDir $path); rehash
+$binDir/autoMZ + T=$tmp E=$db "`cat tree.nh`" $db.*.sing.maf $c.maf
+popd
+cp $tmp/$c.maf $maf
+rm -fr $tmp
+'_EOF_'
+    # << happy emacs
+    chmod +x autoMultiz
+
+cat  << '_EOF_' > template
+#LOOP
+./autoMultiz $(root1) {check out line+ /hive/data/genomes/hg38/bed/multiz4way/maf/$(root1).maf}
+#ENDLOOP
+'_EOF_'
+    # << happy emacs
+
+    cut -f1 /cluster/data/hg38/chrom.sizes > chrom.lst
+    gensub2 chrom.lst single template jobList
+    para create jobList
+    # 455 jobs
+    para try ... check ... push ... etc ...
+# Completed: 455 of 455 jobs
+# CPU time in finished jobs:      50111s     835.18m    13.92h    0.58d  0.002 y
+# IO & Wait Time:                  5574s      92.91m     1.55h    0.06d  0.000 y
+# Average job time:                 122s       2.04m     0.03h    0.00d
+# Longest finished job:            4717s      78.62m     1.31h    0.05d
+# Submission to last job:          4722s      78.70m     1.31h    0.05d
+
+    #	combine results into a single file for loading and gbdb reference
+    cd /hive/data/genomes/hg38/bed/multiz4way
+    grep "^#" maf/chr19_GL949749v2_alt.maf | grep -v "eof maf" > multiz4way.maf
+    grep -h -v "^#" maf/*.maf >> multiz4way.maf
+    grep "^#" maf/chr19_GL949749v2_alt.maf | grep "eof maf" >> multiz4way.maf
+    #	real    3m27.561s
+
+    #	makes a 8.5 Gb file:
+    #   -rw-rw-r-- 1 9044143788 Feb 11 12:51 multiz4way.maf
+
+    # Load into database
+    ssh hgwdev
+    cd /hive/data/genomes/hg38/bed/multiz4way
+    mkdir /gbdb/hg38/multiz4way
+    ln -s /hive/data/genomes/hg38/bed/multiz4way/multiz4way.maf \
+	/gbdb/hg38/multiz4way
+    #	the hgLoadMaf generates huge tmp files, locate them in /dev/shm
+    cd /dev/shm
+    time nice -n +19 hgLoadMaf hg38 multiz4way
+    #   Loaded 6141667 mafs in 1 files from /gbdb/hg38/multiz4way
+    #   real    2m2.812s
+
+    cd /hive/data/genomes/hg38/bed/multiz4way
+    time (cat /gbdb/hg38/multiz4way/*.maf \
+        | hgLoadMafSummary -verbose=2 -minSize=10000 \
+	-mergeGap=500 -maxSize=50000 hg38 multiz4waySummary stdin)
+    # Created 1266559 summary blocks from 11780291 components and 6141667 mafs
+    # real    3m0.791s
+# -rw-rw-r-- 1  311246327 Feb 11 12:54 multiz4way.tab
+# -rw-rw-r-- 1   58730176 Feb 11 12:58 multiz4waySummary.tab
+    wc -l multiz4way*
+    # 6141667 multiz4way.tab
+    # 1266559 multiz4waySummary.tab
+    # 7408226 total
+
+#########################################################################
+## RE-load alternate sequence for PSL display (DONE - 2016-01-15 - Hiram)
+## The procedure below
+##    "load alternate sequence for PSL display (DONE - #2014-02-24 - Hiram)
+## produced an illegal psl Table altSeqLiftOverPsl:
+    pslCheck -db=hg38 altSeqLiftOverPsl
+    checked: 266 failed: 264 errors: 1046
+
+## Since then, the gff3ToPsl command has been updated to be a bit more
+##  robust, so, the following sequence produces the new alignment file:
+    mkdir -p /hive/data/genomes/hg38/bed/altAlignments/redo2016
+    cd /hive/data/genomes/hg38/bed/altAlignments/redo2016
+
+mkdir -p ucscPsl
+
+awk -F'/' '{printf "s/^%s\t/%s\t/g;\n", $3,$2}' ../accessionToUcsc.sed.txt \
+    > ucscToNcbi.sed.txt
+
+sed -f ucscToNcbi.sed.txt ../../../chrom.sizes > ncbi.chrom.sizes
+
+paste ncbi.chrom.sizes ../../../chrom.sizes \
+  | awk -F'\t' '{printf "0\t%s\t%d\t%s\t%d\n", $1,$2,$3,$4}' \
+    > ncbiToUcsc.lift
+
+find ../../../genbank -type f | grep alt_scaffolds | grep "\.gff$" \
+  | while read gff
+do
+  name=`basename $gff | sed -e 's/_.*//;'`
+  fasta=`dirname $gff | sed -e 's#alignments#FASTA/alt.scaf.fa.gz#;'`
+  size=`faCount $fasta | grep -w total | cut -f2`
+  printf "%s\t%d\n" "$name" "$size" > target.sizes
+  gff3ToPsl ncbi.chrom.sizes target.sizes $gff $name.psl
+  pslCheck ${name}.psl
+  liftUp -type=.psl stdout ncbiToUcsc.lift error ${name}.psl \
+    | liftUp -type=.psl -pslQ ucscPsl/${name}.psl ncbiToUcsc.lift error stdin
+  pslCheck ucscPsl/${name}.psl
+done
+
+  pslSort dirs altSeqLiftOverPsl.psl ./tmp ucscPsl
+  pslCheck -db=hg38 altSeqLiftOverPsl.psl
+
+  hgLoadPsl hg38 altSeqLiftOverPsl.psl
+  pslCheck -db=hg38 altSeqLiftOverPsl
+  #  checked: 266 failed: 0 errors: 0
+
+#########################################################################
+## load alternate sequence for PSL display (DONE - 2014-02-24 - Hiram)
+    mkdir /hive/data/genomes/hg38/bed/altAlignments/sequence
+    cd /hive/data/genomes/hg38/bed/altAlignments/sequence
+
+    rm -fr hg38.haplotypes.lift temp.lift targetFa queryFa
+    mkdir targetFa
+    mkdir queryFa
+    touch temp.lift
+
+    cat ../../altLocations/chrToAlt.bed | while read L
+do
+  chrName=`echo $L | awk '{print $1}'`
+  chromSize=`egrep "^$chrName   " ../../../chrom.sizes | cut -f2`
+  chrStart=`echo $L | awk '{printf "%d", $2}'`
+  chrEnd=`echo $L | awk  '{printf "%d", $3}'`
+  chrSize=`echo $chrEnd $chrStart | awk '{print $1-$3}'`
+  queryName=`echo $L | awk '{print $4}'`
+  partName="${chrName}_${chrStart}_${chrEnd}"
+  echo $chrName $chrStart $chrEnd $queryName $partName $chromSize
+  echo -e "$chrStart\t${partName}\t$chrSize\t$chrName\t$chromSize" >> temp.lift
+  twoBitToFa ../../../hg38.2bit:$chrName:$chrStart-$chrEnd stdout | sed -e "s/^>.*/>$partName/;" > targetFa/$queryName.fa
+  twoBitToFa ../../../hg38.2bit:$queryName queryFa/$queryName.fa
+done
+
+sort -u temp.lift | sort -k4,4 -k1,1n > hg38.haplotypes.lift
+
+    mkdir /gbdb/hg38/ncbiAltMappings
+    cd /hive/data/genomes/hg38/bed/altAlignments/sequence/queryFa
+    ln -s `pwd`/*.fa /gbdb/hg38/ncbiAltMappings
+    cd /hive/data/genomes/hg38/bed/altAlignments/sequence
+    hgLoadSeq -drop -seqTbl=seqNcbiAltSequence -extFileTbl=extNcbiAltSequence \
+        hg38 /gbdb/hg38/ncbiAltMappings/*.fa
+
+    pslSwap ../altAlignments.psl stdout \
+      | pslRecalcMatch stdin ../../../hg38.2bit ../../../hg38.2bit \
+        hg38.referenceTarget.psl
+
+    # the table name altSeqLiftOverPsl is recognized in hgc to allow display
+    # of the details of the alignments
+    hgLoadPsl hg38 -table=altSeqLiftOverPsl hg38.referenceTarget.psl
+
+#########################################################################
+## alternate sequence alignments EXPERIMENT (DONE - 2014-01-17 - Hiram)
+    # the lastzAltSequences.2014-01-23 alignment was used for this instead
+    # of this procedure
+    mkdir /hive/data/genomes/hg38/bed/altAlignments
+    cd /hive/data/genomes/hg38/bed/altAlignments
+
+    grep -v "^#" ../../genbank/GCA_000001405.15_GRCh38_top-level.acc2name \
+	| awk '{printf "s/%s/%s/g;\n", $1, $3}' > accessionToUcsc.sed.txt
+
+    find ../../genbank -type f | grep alt_scaffolds | grep "\.gff$" \
+	| while read F
+do
+   cat $F | sed -f accessionToUcsc.sed.txt \
+	| gff3ToPsl ../../chrom.sizes stdin stdout
+done > altAlignments.psl
+	| xargs cat | sed -f accessionToUcsc.sed.txt \
+	| gff3ToPsl ../../chrom.sizes stdin altAlignments.psl
+
+    time pslRecalcMatch altAlignments.psl ../../hg38.2bit ../../hg38.2bit \
+        altRecalcMatch.psl
+    # real    0m51.122s
+
+    # just to see what they look like in different formats:
+    pslToChain altRecalcMatch.psl altAlignments.chain
+    chainToAxt altAlignments.chain ../../hg38.2bit ../../hg38.2bit \
+	altAlignments.axt
+    axtToMaf -score altAlignments.axt ../../chrom.sizes ../../chrom.sizes \
+        altAlignments.maf
+
+    mkdir mafSplits
+    mafSplit /dev/null mafSplits/ altAlignments.maf
+    # doesn't work:
+# Can't find chrom in MAF component src: chr6_GL000250v2_alt
+
+    mkdir splits psl
+    find ../../genbank -type f | grep alt_scaffolds | grep "\.gff$" \
+        | while read F
+do
+   chrAlt=`basename $F | sed -e 's/_.*//' | sed -f accessionToUcsc.sed.txt`
+   echo $chrAlt
+   cat $F | sed -f accessionToUcsc.sed.txt \
+        | gff3ToPsl ../../chrom.sizes stdin splits/${chrAlt}.psl
+   pslRecalcMatch splits/${chrAlt}.psl ../../hg38.2bit ../../hg38.2bit \
+	psl/${chrAlt}.psl
+done
+
+   mkdir swap
+   mkdir swap/psl swap/chain swap/axt swap/maf swap/anno
+   for F in psl/*.psl
+do
+  B=`basename $F | sed -e 's/.psl//'`
+  echo $B
+  pslSwap $F stdout | pslRecalcMatch stdin ../../hg38.2bit ../../hg38.2bit \
+      swap/psl/${B}.psl
+  pslToChain swap/psl/${B}.psl swap/chain/${B}.chain
+  chainToAxt swap/chain/${B}.chain ../../hg38.2bit ../../hg38.2bit \
+	swap/axt/${B}.axt
+  axtToMaf -score swap/axt/${B}.axt ../../chrom.sizes ../../chrom.sizes stdout \
+      | sed -e 's/^s chr\([0-9XYM][0-9]* \)/s ref38.chr\1/; s/^s chr\([0-9XYM][0-9]*_\)/s alt38.chr\1/;' > swap/maf/${B}.maf
+  mafAddIRows -nBeds=nBeds swap/maf/${B}.maf ../../hg38.2bit swap/anno/${B}.maf
+done
+# axtToMaf -score swap/axt/${B}.axt ../../chrom.sizes ../../chrom.sizes stdout \
+#      | sed -e 's/^s chr/s hg38.chr/' > swap/maf/${B}.maf
+
+   twoBitInfo -nBed ../../hg38.2bit ../../hg38.N.bed
+   ln -s  ../../hg38.N.bed hg38.bed
+   ln -s ../../hg38.N.bed ref38.bed
+   ln -s ../../hg38.N.bed alt38.bed
+   echo hg38.bed > nBeds
+   echo ref38.bed >> nBeds
+   echo alt38.bed >> nBeds
+   ln -s  ../../chrom.sizes hg38.len
+   ln -s  ../../chrom.sizes ref38.len
+   ln -s  ../../chrom.sizes alt38.len
+   echo hg38.len > sizes
+   echo ref38.len >> sizes
+   echo alt38.len >> sizes
+
+   mkdir chain axt maf anno
+   for F in psl/*.psl
+do
+   B=`basename $F | sed -e 's/.psl//'`
+   echo $B
+   pslToChain $F chain/${B}.chain
+   chainToAxt chain/${B}.chain ../../hg38.2bit ../../hg38.2bit axt/${B}.axt
+  axtToMaf -score axt/${B}.axt ../../chrom.sizes ../../chrom.sizes stdout \
+      | sed -e 's/^s chr\([0-9XYM][0-9]* \)/s ref38.chr\1/; s/^s chr\([0-9XYM][0-9]*_\)/s alt38.chr\1/;' > maf/${B}.maf
+   mafAddIRows -nBeds=nBeds maf/${B}.maf ../../hg38.2bit anno/${B}.maf
+done
+
+#   axtToMaf -score axt/${B}.axt ../../chrom.sizes ../../chrom.sizes stdout \
+#      | sed -e 's/^s chr/s hg38.chr/' > maf/${B}.maf
+
+############################################################################
+# Liftover Gencode V19 from hg19  (DONE braney 2014-02-14)
+
+mkdir /cluster/data/hg38/bed/liftOverGencodeV19
+cd /cluster/data/hg38/bed/liftOverGencodeV19
+
+echo "show tables like 'wgEncodeGencode%19'" | hgsql hg19 | tail -n +2 > all.gencode.tables
+echo " select tableName from trackDb where tableName like 'wgEncodeGencode_%V19';" | hgsql hg19 --skip-column-names > genePred.gencode.tables
+
+# load the non-genepred table as is.   This isn't quite the right thing to do
+# with exon support, but it's good enough for our purposes at the moment
+join -v 1 *.gencode.tables | while read t; do echo "create table $t select * from hg19.$t" | hgsql hg38; echo $t; done
+
+for i in `cat genePredExt.gencode.tables`;
+do
+    echo "select name,score,name2 from $i" | hgsql hg19 | sort > $i.name2Score.txt;
+    genePredToFakePsl hg19 $i $i.psl $i.cds;
+    pslMap -chainMapFile -swapMap $i.psl /gbdb/hg19/liftOver/hg19ToHg38.over.chain.gz stdout | pslCDnaFilter -uniqueMapped stdin stdout |  sort -k 14,14 -k 16,16n | pslToBed -cds=$i.cds stdin stdout | bedToGenePred stdin stdout | sort |  join /dev/stdin $i.name2Score.txt| tr ' ' '\t' | hgLoadGenePred -genePredExt hg38 $i stdin;
+    echo $i;
+done
+
+for i in `cat genePred.gencode.tables`;
+do
+    genePredToFakePsl hg19 $i $i.psl $i.cds;
+    pslMap -chainMapFile -swapMap $i.psl /gbdb/hg19/liftOver/hg19ToHg38.over.chain.gz stdout | pslCDnaFilter -uniqueMapped stdin stdout |  sort -k 14,14 -k 16,16n | pslToBed -cds=$i.cds stdin stdout | bedToGenePred stdin stdout |  tr ' ' '\t' | hgLoadGenePred hg38 $i stdin;
+    echo $i;
+done
+
+#####################################################################
+## tRNAs track ( 2014-02-18 braney DONE)
+## this is a preliminary version for UCSC build.  NOT FOR RELEASE!
+ssh hgwdev
+cd /hive/data/genomes/hg38/bed
+mkdir tRNAs
+cd tRNAs
+
+cp  /hive/users/pchan/tRNAs/Eukaryota/hg38/hg38-tRNAs.bed .
+
+hgLoadBed -tab hg38 tRNAs hg38-tRNAs.bed -sqlTable=$HOME/kent/src/hg/lib/tRNAs.sql
+
+## tRNAs track (2015-10-04, Chris FINISHING BUILD FOR RELEASE)
+    cd /hive/data/genomes/hg38/bed/tRNAs
+    cat /hive/users/pchan/gtrnadb2/Eukaryota/hg38/hg38-tRNAs.bed | sed 's^</BLOCKQUOTE>^^g' | > hg38-tRNAs2.bed
+    hgsql hg38 -e 'drop table if exists tRNAs'
+    hgLoadBed -tab hg38 tRNAs hg38-tRNAs2.bed -sqlTable=$HOME/kent/src/hg/lib/tRNAs.sql
+    mkdir gif
+    cp -p /hive/users/pchan/gtrnadb2/Eukaryota/hg38/images/* gif
+    cd /hive/data/gbdb/hg38
+    ln -s /hive/data/genomes/hg38/bed/tRNAs/gif RNA-img
+    cd /usr/local/apache/htdocs-ceisenhart/RNA-img
+    ln -s /gbdb/hg38/RNA-img hg38
+
+############################################################################
+# EXONIPHY , lifted from hg19 (DONE - braney 2014-02-19)
+#	needed for ucscGenes building
+    # exoniphyHg19.gp is prepared as follows
+    mkdir /cluster/data/hg38/bed/exoniphy
+    cd /cluster/data/hg38/bed/exoniphy
+    hgsql hg19 -e "select * from exoniphy" -N | cut  -f 2-16 > exoniphyHg19.gp
+    time nice -n +19 liftOver -genePred exoniphyHg19.gp \
+	/cluster/data/hg19/bed/liftOver/hg19ToHg38.over.chain.gz \
+	    exoniphyHg38.gp unmapped
+    # real    0m2.015s
+    # user    0m1.894s
+    # sys     0m0.076s
+
+    wc -l *
+    #   186601 exoniphyHg19.gp
+    #   186533 exoniphyHg38.gp
+    #      136 unmapped
+    #   373270 total
+
+    cd /cluster/data/hg38/bed/exoniphy
+    nice -n +19 hgLoadGenePred -genePredExt hg38 exoniphy exoniphyHg38.gp
+    nice -n +19 featureBits hg38 exoniphy
+    # 28807039 bases of 3049335806 (0.945%) in intersection
+    nice -n +19 featureBits hg19 exoniphy
+    # 28661160 bases of 2897316137 (0.989%) in intersection
+
+#########################################################################
+# LASTZ Rat Rn5 (DONE - 2014-02-27 - Hiram)
+    #	establish a screen to control this job
+    screen -S hg38Rn5
+    mkdir /hive/data/genomes/hg38/bed/lastzRn5.2014-02-27
+    cd /hive/data/genomes/hg38/bed/lastzRn5.2014-02-27
+
+    # XXX don't forget to specify the BLASTZ binary:
+    cat << '_EOF_' > DEF
+# human vs rat
+BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.52/bin/lastz
+
+# TARGET: Human Hg38
+SEQ1_DIR=/hive/data/genomes/hg38/hg38.2bit
+SEQ1_LEN=/hive/data/genomes/hg38/chrom.sizes
+SEQ1_CTGDIR=/hive/data/genomes/hg38/hg38.contigs.2bit
+SEQ1_CTGLEN=/hive/data/genomes/hg38/hg38.contigs.chrom.sizes
+SEQ1_LIFT=/hive/data/genomes/hg38/jkStuff/hg38.contigs.lift
+SEQ1_CHUNK=20000000
+SEQ1_LAP=10000
+
+# QUERY: Rat Rn5
+SEQ2_DIR=/hive/data/genomes/rn5/rn5.2bit
+SEQ2_LEN=/hive/data/genomes/rn5/chrom.sizes
+SEQ2_CHUNK=10000000
+SEQ2_LIMIT=100
+SEQ2_LAP=0
+
+BASE=/hive/data/genomes/hg38/bed/lastzRn5.2014-02-27
+TMPDIR=/scratch/tmp
+'_EOF_'
+    # << happy emacs
+
+    time doBlastzChainNet.pl -verbose=2 \
+	`pwd`/DEF \
+	-syntenicNet \
+	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
+	-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1
+
+    #   real    658m53.984s
+    cat fb.hg38.chainRn5Link.txt
+    # 938823407 bases of 3049335806 (30.788%) in intersection
+
+    #	running the swap
+    mkdir /hive/data/genomes/rn5/bed/blastz.hg38.swap
+    cd /hive/data/genomes/rn5/bed/blastz.hg38.swap
+    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
+	/hive/data/genomes/hg38/bed/lastzRn5.2014-02-27/DEF \
+	-swap \
+	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
+	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1
+    #   real    66m53.095s
+    cat fb.rn5.chainHg38Link.txt
+    #   934256475 bases of 2572853723 (36.312%) in intersection
+
+    # syntenic net for 14-way use 2014-04-02 - Hiram
+    cd /hive/data/genomes/rn5/bed/blastz.hg38.swap
+    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
+	/hive/data/genomes/hg38/bed/lastzRn5.2014-02-27/DEF \
+	-continue=syntenicNet -syntenicNet -swap \
+	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
+	-chainMinScore=3000 -chainLinearGap=medium > synNet.log 2>&1
+    #  real    16m54.489s
+
+##############################################################################
+# LASTZ Rat Rn4 (DONE - 2014-02-27 - Hiram)
+    #	establish a screen to control this job
+    screen -S hg38Rn4
+    mkdir /hive/data/genomes/hg38/bed/lastzRn4.2014-02-27
+    cd /hive/data/genomes/hg38/bed/lastzRn4.2014-02-27
+
+    # XXX don't forget to specify the BLASTZ binary:
+    cat << '_EOF_' > DEF
+# human vs rat
+BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.52/bin/lastz
+
+# TARGET: Human Hg38
+SEQ1_DIR=/hive/data/genomes/hg38/hg38.2bit
+SEQ1_LEN=/hive/data/genomes/hg38/chrom.sizes
+SEQ1_CTGDIR=/hive/data/genomes/hg38/hg38.contigs.2bit
+SEQ1_CTGLEN=/hive/data/genomes/hg38/hg38.contigs.chrom.sizes
+SEQ1_LIFT=/hive/data/genomes/hg38/jkStuff/hg38.contigs.lift
+SEQ1_CHUNK=20000000
+SEQ1_LAP=10000
+
+# QUERY: Rat Rn4
+SEQ2_DIR=/hive/data/genomes/rn4/rn4.2bit
+SEQ2_LEN=/hive/data/genomes/rn4/chrom.sizes
+SEQ2_CHUNK=10000000
+SEQ2_LIMIT=100
+SEQ2_LAP=0
+
+BASE=/hive/data/genomes/hg38/bed/lastzRn4.2014-02-27
+TMPDIR=/scratch/tmp
+'_EOF_'
+    # << happy emacs
+
+    doBlastzChainNet.pl -verbose=2 \
+	`pwd`/DEF \
+	-syntenicNet \
+	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
+	-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1
+    #   real    658m53.984s
+
+    cat fb.hg38.chainRn4Link.txt
+    #   913992768 bases of 3049335806 (29.974%) in intersection
+
+    #	running the swap
+    mkdir /hive/data/genomes/rn4/bed/blastz.hg38.swap
+    cd /hive/data/genomes/rn4/bed/blastz.hg38.swap
+    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
+	/hive/data/genomes/hg38/bed/lastzRn4.2014-02-27/DEF \
+	-swap \
+	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
+	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
+    #   real    73m5.666s
+
+    cat fb.rn4.chainHg38Link.txt
+    #	889613774 bases of 2571531505 (34.595%) in intersection
+
+##############################################################################
+# GENEID GENE PREDICTIONS (DONE - 2014-03-07 - Hiram)
+    ssh hgwdev
+    mkdir /hive/data/genomes/hg38/bed/geneid
+    cd /hive/data/genomes/hg38/bed/geneid
+    mkdir download
+    cd download
+    for C in `cut -f1 ../../../chrom.sizes`
+    do
+	echo $C
+ wget --timestamping \
+http://genome.crg.es/genepredictions/H.sapiens/hg38/geneid_v1.4/${C}.gtf3
+    wget --timestamping \
+http://genome.crg.es/genepredictions/H.sapiens/hg38/geneid_v1.4/${C}.prot
+    done
+
+    cd ..
+    cat download/*.gtf | ldHgGene -gtf -genePredExt hg38 geneid stdin
+    #	Read 33428 transcripts in 277332 lines in 1 files
+    #	33428 groups 92 seqs 1 sources 3 feature types
+    #	33428 gene predictions
+
+############################################################################
+# GENEREVIEWS TRACK (DONE 2014-05-17 - Chin)
+# This track depends on some tasks completed for hg19, specifically:
+#
+# $HOME/kent/src/hg/lib/geneReviewsGrshortNBKid.sql
+# $HOME/kent/src/hg/lib/geneReviewsGrshortTitleNBKid.sql
+# $HOME/kent/src/hg/lib/geneReviewsDetail.sql
+# $HOME/kent/src/hg/makeDb/trackDb/human/geneReviews.html
+#
+# Unlike hg19, this hg38 tracks is generated by the automatic geneReviews
+# scripts in
+# /hive/data/outside/otto/geneReviews, specifically buildGeneReviews.sh.
+# Current data are fetched weekly from NCBI
+# ftp://ftp.ncbi.nlm.nih.gov/pub/GeneReviews/
+# to /hive/data/outside/otto/geneReviews/${DATE}.
+
+###########################################################################
+# Chimp Lastz run (DONE - 2014-05-27 - Hiram)
+    screen -S hg38PanTro4      # use a screen to manage this longish running job
+    mkdir /hive/data/genomes/hg38/bed/lastzPanTro4.2014-05-27
+    cd /hive/data/genomes/hg38/bed/lastzPanTro4.2014-05-27
+
+    # always set the BLASTZ program so we know what version was used
+    cat << '_EOF_' > DEF
+# human vs chimp
+BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.52/bin/lastz
+BLASTZ_O=600
+BLASTZ_E=150
+# maximum M allowed with lastz is only 254
+BLASTZ_M=254
+
+BLASTZ_T=2
+BLASTZ_Y=15000
+BLASTZ_K=4500
+BLASTZ_Q=/scratch/data/blastz/human_chimp.v2.q
+#    A    C    G    T
+#    90 -330 -236 -356
+#  -330  100 -318 -236
+#  -236 -318  100 -330
+#  -356 -236 -330   90
+
+# TARGET: Human Hg38
+SEQ1_DIR=/scratch/data/hg38/hg38.2bit
+SEQ1_LEN=/scratch/data/hg38/chrom.sizes
+SEQ1_CHUNK=10000000
+SEQ1_LAP=10000
+SEQ1_IN_CONTIGS=0
+
+# QUERY: Chimp PanTro4
+SEQ2_DIR=/hive/data/genomes/panTro4/panTro4.2bit
+SEQ2_LEN=/hive/data/genomes/panTro4/chrom.sizes
+SEQ2_CHUNK=10000000
+SEQ2_LAP=0
+SEQ2_LIMIT=200
+SEQ2_IN_CONTIGS=0
+
+BASE=/hive/data/genomes/hg38/bed/lastzPanTro4.2014-05-27
+TMPDIR=/dev/shm
+'_EOF_'
+    # << emacs
+
+    time (doBlastzChainNet.pl `pwd`/DEF -verbose=2 \
+        -chainMinScore=5000 -chainLinearGap=medium \
+        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
+        -syntenicNet) > do.log 2>&1
+    # real    154m12.215s
+    cat fb.hg38.chainPanTro4Link.txt
+    # 2839294579 bases of 3049335806 (93.112%) in intersection
+
+    # filter with doRecipBest.pl
+    time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` \
+        hg38 panTro4) > rbest.log 2>&1
+    # real    57m55.320s
+
+    # running the swap
+    mkdir /hive/data/genomes/panTro4/bed/blastz.hg38.swap
+    cd /hive/data/genomes/panTro4/bed/blastz.hg38.swap
+    time (doBlastzChainNet.pl -verbose=2 \
+        -swap /hive/data/genomes/hg38/bed/lastzPanTro4.2014-05-27/DEF \
+        -chainMinScore=5000 -chainLinearGap=medium \
+        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
+        -syntenicNet) > swap.log 2>&1
+    cat fb.panTro4.chainHg38Link.txt
+    # 2776497530 bases of 2902338967 (95.664%) in intersection
+    # real    98m23.729s
+
+    time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` \
+        panTro4 hg38) > rbest.log 2>&1
+    # real    64m33.812s
+
+#############################################################################
+# Opossum Lastz run (DONE - 2014-05-27 - Hiram)
+    screen -S hg38MonDom5      # use a screen to manage this longish running job
+    mkdir /hive/data/genomes/hg38/bed/lastzMonDom5.2014-05-27
+    cd /hive/data/genomes/hg38/bed/lastzMonDom5.2014-05-27
+
+    # always set the BLASTZ program so we know what version was used
+    cat << '_EOF_' > DEF
+# human vs chimp
+BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.52/bin/lastz
+BLASTZ_M=50
+
+BLASTZ_Y=3400
+BLASTZ_L=6000
+BLASTZ_K=2200
+BLASTZ_Q=/scratch/data/blastz/HoxD55.q
+#     A    C    G    T
+#    91  -90  -25 -100
+#   -90  100 -100  -25
+#   -25 -100  100  -90
+#  -100  -25  -90  91
+
+# TARGET: Human Hg38
+SEQ1_DIR=/scratch/data/hg38/hg38.2bit
+SEQ1_LEN=/scratch/data/hg38/chrom.sizes
+SEQ1_CHUNK=10000000
+SEQ1_LAP=10000
+SEQ1_LIMIT=5
+
+# QUERY: Opossum MonDom5
+SEQ2_DIR=/scratch/data/monDom5/monDom5.2bit
+SEQ2_LEN=/hive/data/genomes/monDom5/chrom.sizes
+SEQ2_CHUNK=10000000
+SEQ2_LAP=0
+
+BASE=/hive/data/genomes/hg38/bed/lastzMonDom5.2014-05-27
+TMPDIR=/dev/shm
+'_EOF_'
+    # << emacs
+
+    time (doBlastzChainNet.pl `pwd`/DEF -verbose=2 \
+        -chainMinScore=5000 -chainLinearGap=loose \
+        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
+        -syntenicNet) > do.log 2>&1
+    # real    670m13.280s
+    # one failed chain run for hg19, finished manually on hgwdev, then:
+    time (doBlastzChainNet.pl `pwd`/DEF -verbose=2 \
+        -continue=chainMerge -chainMinScore=5000 -chainLinearGap=loose \
+        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
+        -syntenicNet) > chainMerge.log 2>&1
+    # real    164m28.822s
+
+    cat fb.hg38.chainMonDom5Link.txt
+    # 438195373 bases of 3049335806 (14.370%) in intersection
+
+    # filter with doRecipBest.pl
+    time (/cluster/bin/scripts/doRecipBest.pl -buildDir=`pwd` \
+        -dbHost=hgwdev -workhorse=hgwdev hg38 monDom5) > rbest.log 2>&1
+    # real    130m22.825s
+
+    # running the swap
+    mkdir /hive/data/genomes/monDom5/bed/blastz.hg38.swap
+    cd /hive/data/genomes/monDom5/bed/blastz.hg38.swap
+    time (doBlastzChainNet.pl -verbose=2 \
+        /hive/data/genomes/hg38/bed/lastzMonDom5.2014-05-27/DEF \
+        -swap -chainMinScore=5000 -chainLinearGap=loose \
+        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
+        -syntenicNet) > swap.log 2>&1
+    # real    102m41.443s
+
+    cat fb.monDom5.chainHg38Link.txt
+    # 420069915 bases of 3501660299 (11.996%) in intersection
+    time (/cluster/bin/scripts/doRecipBest.pl -buildDir=`pwd` \
+        -dbHost=hgwdev -workhorse=hgwdev monDom5 hg38) > rbest.log 2>&1
+    #  real    90m56.189s
+
+_EOF_
+#############################################################################
+# LOCUS REFERENCE GENOMIC (LRG) REGIONS AND TRANSCRIPTS (DONE 10/25/19 angie)
+# Redmine #13359, #24285 -- otto-mate To Do #17877
+# previously done 7/7/14, 9/9/16, 5/30/18
+# THIS IS NOW AN OTTO JOB !!
+    set today = `date +%Y_%m_%d`
+    mkdir -p /hive/data/genomes/hg38/bed/lrg/$today
+    cd /hive/data/genomes/hg38/bed/lrg/$today
+    wget ftp://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_public_xml_files.zip
+    unzip LRG_public_xml_files.zip
+
+    # Run script to convert LRG*.xml files to BED+ for regions and genePredExt+fa for transcripts:
+    # parseLrgXml.pl updated 2020-09-16 to add four new fields to the gp output
+    # the four extra fields are identifiers for:
+    # NCBI transcript, Ensembl transcript, NCBI protein, Ensembl protein
+
+    ~/kent/src/hg/utils/automation/parseLrgXml.pl GRCh38
+    genePredCheck lrgTranscriptsUnmapped.gp
+#Error: lrgTranscriptsUnmapped.gp:765: LRG_7t1 no exonFrame on CDS exon 46
+#checked: 1029 failed: 1
+    # If there are complaints e.g. about exonFrame, look for inconsistencies in the
+    # affected transcript's coding_region/coordinates vs. exon/intron info in xml.
+    # Contact Variation team leader Fiona Cunningham @EBI to resolve in the background
+    # (missing exonFrame info doesn't affect our track representation because we end up using
+    # psl).  We agreed to disagree about exon 46 of LRG_7t1 because that last coding exon
+    # portion is only the stop codon.
+
+    # No longer necessary to filter out alt and fix patches since they have been added to hg38.
+
+    # and we need the transcript plus gene name later:
+    cut -f1,12 lrgTranscriptsUnmapped.gp | sort > transcript.gene.name.txt
+
+    # five extra columns have been added to the genePred (2020-10-05 - Hiram)
+    # extract them so they can be added to the psl:
+    awk -F$'\t' '{printf "%s\t%s\t%s\t%s\t%s\t%s %s %s %s\n", $1,$16,$17,$18,$19, $16,$18,$17,$19}' lrgTranscriptsUnmapped.gp | sort \
+       | join -t$'\t' - transcript.gene.name.txt \
+         | awk -F$'\t' '{printf "%s\t%s\t%s\t%s\t%s\t%s\t%s %s\n", $1,$2,$3,$4,$5,$7,$6,$7}' > lrgTransExtraFields.tsv
+
+    # the five extra fields are identifiers for:
+    # NCBI transcript, Ensembl transcript, NCBI protein, Ensembl protein,
+    #	Gene name
+
+    # Load LRG regions:
+    #bedToBigBed lrg.bed /hive/data/genomes/hg38/chrom.sizes lrg.bb \
+    #-tab -type=bed12+ -as=$HOME/kent/src/hg/lib/lrg.as -extraIndex=name
+    # after ML #29689, added ncbiAcc field, Max, July 1, 2022
+    # changed to:
+    bedToBigBed lrg.bed /hive/data/genomes/hg38/chrom.sizes lrg.bb \
+    -tab -type=bed12+ -as=$HOME/kent/src/hg/lib/lrg.as -extraIndex=name,ncbiAcc
+    ln -sf `pwd`/lrg.bb /gbdb/hg38/bbi/lrg.bb
+    hgBbiDbLink hg38 lrg /gbdb/hg38/bbi/lrg.bb
+
+    # Map LRG fixed_annotation transcripts from LRG coords to hg38 coords (HT MarkD):
+    lrgToPsl lrg.bed /hive/data/genomes/hg38/chrom.sizes lrg.psl
+    pslCheck lrg.psl
+#checked: 919 failed: 0 errors: 0
+    awk '{print $10 "\t" $11;}' lrg.psl > lrg.sizes
+    genePredToFakePsl -chromSize=lrg.sizes placeholder \
+      lrgTranscriptsUnmapped.gp lrgTranscriptsFakePsl.psl lrgTranscripts.cds
+    pslMap lrgTranscriptsFakePsl.psl lrg.psl lrgTranscriptsHg38.psl
+    mrnaToGene -genePredExt -cdsFile=lrgTranscripts.cds -keepInvalid \
+      lrgTranscriptsHg38.psl lrgTranscriptsHg38NoName2.gp
+#Warning: no CDS for LRG_163t1
+#Warning: no CDS for LRG_347t1
+    # It's OK if mrnaToGene complains about "no CDS" for a non-coding tx (RefSeq accession NR_*).
+    grep -l NR_ LRG_163.xml LRG_347.xml
+#LRG_163.xml
+#LRG_347.xml
+
+    cat lrgCdna.tab | sed -e 's/^/>/;' | tr '\t' '\n' > lrgCdna.fa
+    # construct bigPsl with five extra fields
+    pslToBigPsl -fa=lrgCdna.fa -cds=lrgTranscripts.cds \
+	lrgTranscriptsHg38.psl bigPsl.txt
+
+    # add the five extra identifiers to the bigPsl file:
+    join -t$'\t' -1 4 \
+       -o 1.1,1.2,1.3,1.4,1.5,1.6,1.7,1.8,1.9,1.10,1.11,1.12,1.13,1.14,1.15\
+,1.16,1.17,1.18,1.19,1.20,1.21,1.22,1.23,1.24,1.25,2.2,2.3,2.4,2.5,2.6,2.7 \
+       <(sort -k4 bigPsl.txt) lrgTransExtraFields.tsv \
+         | sort -k1,1 -k2,2n > lrgExtraTranscriptsHg38.bigPsl.bed
+
+    bedToBigBed -as=bigPsl+6.as -type=bed12+19 -tab \
+       lrgExtraTranscriptsHg38.bigPsl.bed ../../../chrom.sizes lrgBigPsl.bb
+    bigBedInfo lrgBigPsl.bb
+    rm -f /gbdb/hg38/bbi/lrgBigPsl.bb
+    ln -sf `pwd`/lrgBigPsl.bb /gbdb/hg38/bbi
+    hgBbiDbLink hg38 lrgBigPsl /gbdb/hg38/bbi/lrgBigPsl.bb
+
+
+    # Load PSL, CDS and sequences.
+    hgLoadPsl hg38 -table=lrgTranscriptAli lrgTranscriptsHg38.psl
+    hgLoadSqlTab hg38 lrgCds ~/kent/src/hg/lib/cdsSpec.sql lrgTranscripts.cds
+    hgPepPred hg38 tab lrgCdna lrgCdna.tab
+    hgPepPred hg38 tab lrgPep lrgPep.tab
+
+
+#############################################################################
+## 7-Way Multiz (DONE - 2014-06-02 - Hiram)
+    ssh hgwdev
+    mkdir /hive/data/genomes/hg38/bed/multiz7way
+    cd /hive/data/genomes/hg38/bed/multiz7way
+
+    # from the 63-way in the source tree, select out the 7 used here:
+    /cluster/bin/phast/tree_doctor \
+        --prune-all-but hg19,panTro4,rheMac3,mm10,rn5,canFam3,monDom5 \
+        /cluster/home/hiram/kent/src/hg/utils/phyloTrees/130way.nh \
+          | sed -e 's/hg19/hg38/' > hg38.7way.nh
+
+    #	what that looks like:
+    ~/kent/src/hg/utils/phyloTrees/asciiTree.pl hg38.7way.nh
+# (((((hg38:0.006550,
+#     panTro4:0.006840):0.027424,
+#    rheMac3:0.037601):0.109934,
+#   (mm10:0.084509,
+#   rn5:0.091589):0.271974):0.020593,
+#  canFam3:0.165928):0.258392,
+# monDom5:0.340786);
+
+    # extract species list from that .nh file
+    sed 's/[a-z][a-z]*_//g; s/:[0-9\.][0-9\.]*//g; s/;//; /^ *$/d' \
+        hg38.7way.nh | xargs echo | sed 's/ //g; s/,/ /g' \
+        | sed 's/[()]//g; s/,/ /g' | tr '[ ]' '[\n]' > species.list.txt
+
+    # construct db to name translation list:
+    cat species.list.txt | while read DB
+do
+hgsql -N -e "select name,organism from dbDb where name=\"${DB}\";" hgcentraltest
+done | sed -e "s/\t/->/; s/ /_/g;" | sed -e 's/$/;/' | sed -e 's/\./_/g' \
+        > db.to.name.txt
+
+    # construct a common name .nh file:
+    /cluster/bin/phast/tree_doctor --rename \
+    "`cat db.to.name.txt`" hg38.7way.nh | sed -e 's/00*)/)/g; s/00*,/,/g' \
+       | $HOME/kent/src/hg/utils/phyloTrees/asciiTree.pl /dev/stdin \
+         > hg38.7way.commonNames.nh
+
+    $HOME/kent/src/hg/utils/phyloTrees/asciiTree.pl hg38.7way.nh > t.nh
+    $HOME/kent/src/hg/utils/phyloTrees/scientificNames.sh t.nh \
+       | $HOME/kent/src/hg/utils/phyloTrees/asciiTree.pl /dev/stdin \
+          > hg38.7way.scientificNames.nh
+    rm -f t.nh
+    cat hg38.7way.scientificNames.nh
+# (((((Homo_sapiens:0.00655,
+#     Pan_troglodytes:0.00684):0.027424,
+#    Macaca_mulatta:0.037601):0.109934,
+#   (Mus_musculus:0.084509,
+#   Rattus_norvegicus:0.091589):0.271974):0.020593,
+#  Canis_lupus_familiaris:0.165928):0.258392,
+# Monodelphis_domestica:0.340786);
+
+    ~/kent/src/hg/utils/phyloTrees/asciiTree.pl hg38.7way.commonNames.nh
+# (((((Human:0.00655,
+#     Chimp:0.00684):0.027424,
+#    Rhesus:0.037601):0.109934,
+#   (Mouse:0.084509,
+#   Rat:0.091589):0.271974):0.020593,
+#  Dog:0.165928):0.258392,
+# Opossum:0.340786);
+
+    #	Use this specification in the phyloGif tool:
+    #	http://genome.ucsc.edu/cgi-bin/phyloGif
+    #	to obtain a png image for src/hg/htdocs/images/phylo/hg38_7way.png
+
+    /cluster/bin/phast/all_dists hg38.7way.nh | grep hg38 \
+        | sed -e "s/hg38.//" | sort -k2n > 7way.distances.txt
+    #	Use this output to create the table below
+    head 7way.distances.txt
+# taeGut1 0.075718
+# melUnd1 0.220312
+# galGal4 0.507021
+# melGal1 0.509140
+# hg19    1.175433
+# mm10    1.383071
+
+    cat << '_EOF_' > sizeStats.pl
+#!/usr/bin/env perl
+
+use strict;
+use warnings;
+
+open (FH, "<7way.distances.txt") or
+        die "can not read 7way.distances.txt";
+
+my $count = 0;
+while (my $line = <FH>) {
+    chomp $line;
+    my ($D, $dist) = split('\s+', $line);
+    my $chain = "chain" . ucfirst($D);
+    my $B="/hive/data/genomes/hg38/bed/lastz.$D/fb.hg38." .
+        $chain . "Link.txt";
+    my $chainLinkMeasure =
+        `awk '{print \$5}' ${B} 2> /dev/null | sed -e "s/(//; s/)//"`;
+    chomp $chainLinkMeasure;
+    $chainLinkMeasure = 0.0 if (length($chainLinkMeasure) < 1);
+    $chainLinkMeasure =~ s/\%//;
+    my $swapFile="/hive/data/genomes/${D}/bed/lastz.hg38/fb.${D}.chainHg38Link.txt";
+    my $swapMeasure = "N/A";
+    if ( -s $swapFile ) {
+	$swapMeasure =
+	    `awk '{print \$5}' ${swapFile} 2> /dev/null | sed -e "s/(//; s/)//"`;
+	chomp $swapMeasure;
+	$swapMeasure = 0.0 if (length($swapMeasure) < 1);
+	$swapMeasure =~ s/\%//;
+    }
+    my $orgName=
+    `hgsql -N -e "select organism from dbDb where name='$D';" hgcentraltest`;
+    chomp $orgName;
+    if (length($orgName) < 1) {
+        $orgName="N/A";
+    }
+    ++$count;
+    printf "# %02d  %.4f (%% %06.3f) (%% %06.3f) - %s %s\n", $count, $dist,
+        $chainLinkMeasure, $swapMeasure, $orgName, $D;
+}
+close (FH);
+'_EOF_'
+    # << happy emacs
+    chmod +x ./sizeStats.pl
+    ./sizeStats.pl
+#
+
+#	If you can fill in all the numbers in this table, you are ready for
+#	the multiple alignment procedure
+
+#       featureBits chainLink measures
+#               chainLink
+#  N distance  on hg38  on other     other species
+# 01  0.0134 (% 93.112) (% 95.664) - Chimp panTro4
+# 02  0.0716 (% 79.729) (% 86.715) - Rhesus rheMac3
+# 03  0.3304 (% 49.978) (% 60.083) - Dog canFam3
+# 04  0.5004 (% 31.629) (% 35.323) - Mouse mm10
+# 05  0.5075 (% 30.788) (% 36.312) - Rat rn5
+# 06  0.7637 (% 14.370) (% 11.996) - Opossum monDom5
+
+# None of this concern for distances matters in building the first step, the
+# maf files.
+
+    # create species list and stripped down tree for autoMZ
+    sed 's/[a-z][a-z]*_//g; s/:[0-9\.][0-9\.]*//g; s/;//; /^ *$/d' \
+	hg38.7way.nh | xargs echo | sed 's/ //g; s/,/ /g' > tree.nh
+
+    sed 's/[()]//g; s/,/ /g' tree.nh > species.list
+    #   hg38 panTro4 rheMac3 mm10 rn5 canFam3 monDom5
+
+    #	bash shell syntax here ...
+    cd /hive/data/genomes/hg38/bed/multiz7way
+    export H=/hive/data/genomes/hg38/bed
+    mkdir mafLinks
+    # want syntenic net for: panTro4 rheMac3 mm10 rn5 canFam3
+    # and unfiltered maf net for: monDom5
+    for G in panTro4 rheMac3 mm10 rn5 canFam3
+    do
+      mkdir mafLinks/$G
+      echo ln -s ${H}/lastz.$G/axtChain/hg38.${G}.synNet.maf.gz ./mafLinks/$G
+      ln -s ${H}/lastz.$G/axtChain/hg38.${G}.synNet.maf.gz ./mafLinks/$G
+    done
+
+    mkdir mafLinks/monDom5
+    echo ln -s ${H}/lastz.monDom5/mafNet/*.maf.gz ./mafLinks/monDom5
+    ln -s ${H}/lastz.monDom5/mafNet/*.maf.gz ./mafLinks/monDom5
+    # verify the symLinks are good:
+    ls -ogrtL mafLinks/*/*
+#-rw-rw-r-- 1  709500062 Jan 25 12:15 mafLinks/mm10/hg38.mm10.synNet.maf.gz
+#-rw-rw-r-- 1 1089643630 Jan 27 19:15 mafLinks/canFam3/hg38.canFam3.synNet.maf.gz
+#-rw-rw-r-- 1 1277455681 Jan 28 21:52 mafLinks/rheMac3/hg38.rheMac3.synNet.maf.gz
+#-rw-rw-r-- 1  687500679 Mar  1 12:27 mafLinks/rn5/hg38.rn5.synNet.maf.gz
+#-rw-rw-r-- 1 1463969868 May 27 11:41 mafLinks/panTro4/hg38.panTro4.synNet.maf.gz
+#-rw-rw-r-- 1  323347908 May 29 12:38 mafLinks/monDom5/hg38.monDom5.net.maf.gz
+
+    # split the maf files into a set of hashed named files
+    # this hash named split keeps the same chr/contig names in the same
+    # named hash file.
+    mkdir /hive/data/genomes/hg38/bed/multiz7way/mafSplit
+    cd /hive/data/genomes/hg38/bed/multiz7way/mafSplit
+    for D in `sed -e "s/hg38 //" ../species.list`
+do
+    echo "${D}"
+    mkdir $D
+    cd $D
+    echo "mafSplit -byTarget -useHashedName=8 /dev/null . ../../mafLinks/${D}/*.maf.gz"
+    mafSplit -byTarget -useHashedName=8 /dev/null . \
+	../../mafLinks/${D}/*.maf.gz
+    cd ..
+done
+
+    # construct a list of all possible maf file names.
+    # they do not all exist in each of the species directories
+    find . -type f | wc -l
+    # 641
+    find . -type f | grep ".maf$" | xargs -L 1 basename | sort -u > maf.list
+    wc -l maf.list
+    # 118 maf.list
+
+    mkdir /hive/data/genomes/hg38/bed/multiz7way/splitRun
+    cd /hive/data/genomes/hg38/bed/multiz7way/splitRun
+    mkdir maf run
+    cd run
+    mkdir penn
+    cp -p /cluster/bin/penn/multiz.2009-01-21_patched/multiz penn
+    cp -p /cluster/bin/penn/multiz.2009-01-21_patched/maf_project penn
+    cp -p /cluster/bin/penn/multiz.2009-01-21_patched/autoMZ penn
+
+    #	set the db and pairs directories here
+    cat > autoMultiz.csh << '_EOF_'
+#!/bin/csh -ef
+set db = hg38
+set c = $1
+set result = $2
+set run = `/bin/pwd`
+set tmp = /dev/shm/$db/multiz.$c
+set pairs = /hive/data/genomes/hg38/bed/multiz7way/mafSplit
+/bin/rm -fr $tmp
+/bin/mkdir -p $tmp
+/bin/cp -p ../../tree.nh ../../species.list $tmp
+pushd $tmp > /dev/null
+foreach s (`/bin/sed -e "s/$db //" species.list`)
+    set in = $pairs/$s/$c
+    set out = $db.$s.sing.maf
+    if (-e $in.gz) then
+        /bin/zcat $in.gz > $out
+        if (! -s $out) then
+            echo "##maf version=1 scoring=autoMZ" > $out
+        endif
+    else if (-e $in) then
+        /bin/ln -s $in $out
+    else
+        echo "##maf version=1 scoring=autoMZ" > $out
+    endif
+end
+set path = ($run/penn $path); rehash
+$run/penn/autoMZ + T=$tmp E=$db "`cat tree.nh`" $db.*.sing.maf $c \
+        > /dev/null
+popd > /dev/null
+/bin/rm -f $result
+/bin/cp -p $tmp/$c $result
+/bin/rm -fr $tmp
+'_EOF_'
+# << happy emacs
+    chmod +x autoMultiz.csh
+
+    cat  << '_EOF_' > template
+#LOOP
+./autoMultiz.csh $(file1) {check out line+ /hive/data/genomes/hg38/bed/multiz7way/splitRun/maf/$(root1).maf}
+#ENDLOOP
+'_EOF_'
+# << happy emacs
+
+    ln -s ../../mafSplit/maf.list maf.list
+    ssh ku
+    cd /hive/data/genomes/hg38/bed/multiz7way/splitRun/run
+    gensub2 maf.list single template stdout > jobList
+    para -ram=8g create jobList
+# Completed: 118 of 118 jobs
+# CPU time in finished jobs:     118241s    1970.69m    32.84h    1.37d  0.004 y
+# IO & Wait Time:                   682s      11.36m     0.19h    0.01d  0.000 y
+# Average job time:                1008s      16.80m     0.28h    0.01d
+# Longest finished job:           10068s     167.80m     2.80h    0.12d
+# Submission to last job:         10076s     167.93m     2.80h    0.12d
+
+    # combine into one file  (the 1>&2 redirect sends the echo to stderr)
+    cd /hive/data/genomes/hg38/bed/multiz7way
+    head -1 splitRun/maf/017.maf > multiz7way.maf
+    for F in splitRun/maf/*.maf
+do
+    echo "${F}" 1>&2
+    egrep -v "^#" ${F}
+done >> multiz7way.maf
+    tail -1 splitRun/maf/017.maf >> multiz7way.maf
+# -rw-rw-r-- 1 15635828403 Jun  3 11:49 multiz7way.maf
+
+    # Load into database
+    ssh hgwdev
+    cd /hive/data/genomes/hg38/bed/multiz7way
+    mkdir /gbdb/hg38/multiz7way
+    ln -s `pwd`/multiz7way.maf /gbdb/hg38/multiz7way
+    cd /dev/shm
+    time nice -n +17 hgLoadMaf hg38 multiz7way
+    # Loaded 10270624 mafs in 1 files from /gbdb/hg38/multiz7way
+    # real    3m51.265s
+
+    time nice -n +17 hgLoadMafSummary -verbose=2 -minSize=30000 \
+	-mergeGap=1500 -maxSize=200000 hg38 multiz7waySummary \
+	/gbdb/hg38/multiz7way/multiz7way.maf
+    # Created 1260918 summary blocks from 35384988 components
+    # and 10270624 mafs from /gbdb/hg38/multiz7way/multiz7way.maf
+    # real    5m39.388s
+
+
+    wc -l multiz7way*.tab
+    # 10270624 multiz7way.tab
+    # 1260918 multiz7waySummary.tab
+    # 11531542 total
+
+    rm multiz7way*.tab
+
+##############################################################################
+# GAP ANNOTATE MULTIZ7WAY MAF AND LOAD TABLES (DONE - 2014-06-03 - Hiram)
+    # mafAddIRows has to be run on single chromosome maf files, it does not
+    #	function correctly when more than one reference sequence
+    #	are in a single file.  Need to split of the maf file into individual
+    #   maf files
+    mkdir -p /hive/data/genomes/hg38/bed/multiz7way/anno/mafSplit
+    cd /hive/data/genomes/hg38/bed/multiz7way/anno/mafSplit
+
+    time mafSplit -outDirDepth=1 -byTarget -useFullSequenceName \
+        /dev/null . ../../multiz7way.maf
+    #   real    4m8.617s
+
+    find . -type f | wc -l
+    #   353
+
+    # check for N.bed files everywhere:
+    cd /hive/data/genomes/hg38/bed/multiz7way/anno
+    for DB in `cat ../species.list`
+do
+    if [ ! -s /hive/data/genomes/${DB}/${DB}.N.bed ]; then
+        echo "MISS: ${DB}"
+#        cd /hive/data/genomes/${DB}
+#        twoBitInfo -nBed ${DB}.2bit ${DB}.N.bed
+    else
+        echo "  OK: ${DB}"
+    fi
+done
+
+    cd /hive/data/genomes/hg38/bed/multiz7way/anno
+    for DB in `cat ../species.list`
+do
+    echo "${DB} "
+    ln -s  /hive/data/genomes/${DB}/${DB}.N.bed ${DB}.bed
+    echo ${DB}.bed  >> nBeds
+    ln -s  /hive/data/genomes/${DB}/chrom.sizes ${DB}.len
+    echo ${DB}.len  >> sizes
+done
+    # make sure they all are successful symLinks:
+    ls -ogrtL
+
+    screen -S hg38      # use a screen to control this longish job
+    ssh ku
+    cd /hive/data/genomes/hg38/bed/multiz7way/anno
+    mkdir result
+    for D in `ls mafSplit`
+do
+    echo mkdir result/${D}
+    mkdir result/${D}
+done
+    cat << '_EOF_' > template
+#LOOP
+mafAddIRows -nBeds=nBeds mafSplit/$(path1) /hive/data/genomes/hg38/hg38.2bit {check out exists+ result/$(path1)}
+#ENDLOOP
+'_EOF_'
+    # << happy emacs
+
+    find ./mafSplit -type f | sed -e 's#^./mafSplit/##' > maf.list
+    gensub2 maf.list single template jobList
+    # limit jobs on a node with the ram=32g requirement because they go fast
+    para -ram=32g create jobList
+    para try ... check ... push ...
+# Completed: 353 of 353 jobs
+# CPU time in finished jobs:        530s       8.83m     0.15h    0.01d  0.000 y
+# IO & Wait Time:                  1057s      17.62m     0.29h    0.01d  0.000 y
+# Average job time:                   4s       0.07m     0.00h    0.00d
+# Longest finished job:              63s       1.05m     0.02h    0.00d
+# Submission to last job:           220s       3.67m     0.06h    0.00d
+
+    # verify all result files have some content, look for 0 size files:
+    find ./result -type f -size 0
+    # should see none
+    # or in this manner:
+    find ./result -type f | xargs ls -og | sort -k3nr | tail
+
+    # combine into one file  (the 1>&2 redirect sends the echo to stderr)
+    head -q -n 1 result/0/chr8.maf > hg38.7way.maf
+    find ./result -type f | while read F
+do
+    echo "${F}" 1>&2
+    grep -h -v "^#" ${F}
+done >> hg38.7way.maf
+
+    #	these maf files do not have the end marker, this does nothing:
+    #	tail -q -n 1 result/0/chr8.maf >> hg38.7way.maf
+    # How about an official end marker:
+    echo "##eof maf" >> hg38.7way.maf
+    ls -og
+# -rw-rw-r--  1 17795297196 Jun  3 14:01 hg38.7way.maf
+
+    du -hsc hg38.7way.maf
+    # 17G     hg38.7way.maf
+
+    # construct symlinks to get the individual maf files into gbdb:
+    rm /gbdb/hg38/multiz7way/multiz7way.maf   # remove previous results
+    ln -s `pwd`/hg38.7way.maf /gbdb/hg38/multiz7way/multiz7way.maf
+
+    # Load into database
+    cd /dev/shm
+    time nice -n +19 hgLoadMaf -pathPrefix=/gbdb/hg38/multiz7way \
+        hg38 multiz7way
+    # Loaded 10359242 mafs in 1 files from /gbdb/hg38/multiz7way
+    # real    4m21.862s
+
+    time hgLoadMafSummary -verbose=2 -minSize=30000 \
+	-mergeGap=1500 -maxSize=200000 hg38 multiz7waySummary \
+        /gbdb/hg38/multiz7way/multiz7way.maf
+#  Created 1260918 summary blocks from 35384988 components
+#  and 10359242 mafs from /gbdb/hg38/multiz7way/multiz7way.maf
+#  real    6m6.583s
+
+# -rw-rw-r-- 1 530538267 Jun  3 14:05 multiz7way.tab
+# -rw-rw-r-- 1  60616616 Jun  3 14:15 multiz7waySummary.tab
+
+    rm multiz7way*.tab
+
+######################################################################
+# MULTIZ7WAY MAF FRAMES (DONE - 2014-06-03 - Hiram)
+    ssh hgwdev
+    mkdir /hive/data/genomes/hg38/bed/multiz7way/frames
+    cd /hive/data/genomes/hg38/bed/multiz7way/frames
+#   survey all the genomes to find out what kinds of gene tracks they have
+    cat << '_EOF_' > showGenes.csh
+#!/bin/csh -fe
+foreach db (`cat ../species.list`)
+    echo -n "${db}: "
+    set tables = `hgsql $db -N -e "show tables like '%Gene%'"`
+    foreach table ($tables)
+        if ($table == "ensGene" || $table == "refGene" || \
+           $table == "mgcGenes" || $table == "knownGene" || \
+           $table == "xenoRefGene" ) then
+           set count = `hgsql $db -N -e "select count(*) from $table"`
+            echo -n "${table}: ${count}, "
+        endif
+    end
+    set orgName = `hgsql hgcentraltest -N -e \
+            "select scientificName from dbDb where name='$db'"`
+    set orgId = `hgsql hg19 -N -e \
+            "select id from organism where name='$orgName'"`
+    if ($orgId == "") then
+        echo "Mrnas: 0"
+    else
+        set count = `hgsql hg19 -N -e "select count(*) from gbCdnaInfo where organism=$orgId"`
+        echo "Mrnas: ${count}"
+    endif
+end
+'_EOF_'
+    # << happy emacs
+    chmod +x ./showGenes.csh
+    time ./showGenes.csh
+# hg38: knownGene: 104178, mgcGenes: 34081, refGene: 54852, xenoRefGene: 172740, Mrnas: 10723716
+# panTro4: ensGene: 29160, refGene: 2622, xenoRefGene: 280516, Mrnas: 11163
+# rheMac3: refGene: 6369, xenoRefGene: 275096, Mrnas: 443642
+# mm10: ensGene: 94647, knownGene: 61642, mgcGenes: 26768, refGene: 33765, xenoRefGene: 161178, Mrnas: 5224613
+# rn5: ensGene: 29188, mgcGenes: 6924, refGene: 18567, xenoRefGene: 175416, Mrnas: 1247500
+# canFam3: ensGene: 29884, refGene: 1582, xenoRefGene: 253196, Mrnas: 387195
+# monDom5: ensGene: 24882, refGene: 492, xenoRefGene: 248251,  Mrnas: 2461
+
+    # from that summary, use these gene sets:
+    # refGene - rheMac3
+    # ensGene - panTro4 rn5 canFam3 monDom5
+    # knownGene - hg38 mm10
+
+    mkdir genes
+    #   1. knownGene: hg38 mm10
+    for DB in hg38 mm10
+do
+    hgsql -N -e "select name,chrom,strand,txStart,txEnd,cdsStart,cdsEnd,exonCount,exonStarts,exonEnds from knownGene" ${DB} \
+      | genePredSingleCover stdin stdout | gzip -2c \
+        > genes/${DB}.gp.gz
+done
+    #   2. ensGene:
+    for DB in panTro4 rn5 canFam3 monDom5
+do
+hgsql -N -e "select name,chrom,strand,txStart,txEnd,cdsStart,cdsEnd,exonCount,exonStarts,exonEnds from ensGene" ${DB} \
+      | genePredSingleCover stdin stdout | gzip -2c \
+        > /scratch/tmp/${DB}.tmp.gz
+    mv /scratch/tmp/${DB}.tmp.gz genes/$DB.gp.gz
+    echo "${DB} done"
+done
+    #   3. refGene
+    for DB in rheMac3
+do
+hgsql -N -e "select * from refGene" ${DB} | cut -f2- \
+      | genePredSingleCover stdin stdout | gzip -2c \
+        > /scratch/tmp/${DB}.tmp.gz
+    mv /scratch/tmp/${DB}.tmp.gz genes/$DB.gp.gz
+    echo "${DB} done"
+done
+
+    # verify counts for genes are reasonable:
+    for T in genes/*.gz
+do
+    echo -n "# $T: "
+    zcat $T | cut -f1 | sort | uniq -c | wc -l
+done
+# genes/canFam3.gp.gz: 19507
+# genes/hg38.gp.gz: 21887
+# genes/mm10.gp.gz: 21013
+# genes/monDom5.gp.gz: 21033
+# genes/panTro4.gp.gz: 18657
+# genes/rheMac3.gp.gz: 5614
+# genes/rn5.gp.gz: 22863
+
+    time (cat ../anno/hg38.7way.maf \
+	| nice -n +19 genePredToMafFrames hg38 stdin stdout \
+	    `sed -e "s#\([a-zA-Z0-9]*\)#\1 genes/\1.gp.gz#g" ../species.list` \
+		| gzip > multiz7wayFrames.bed.gz)
+    #   real    3m44.591s
+
+    # verify there are frames on everything, should be 7 species:
+    zcat multiz7wayFrames.bed.gz | awk '{print $4}' | sort | uniq -c
+# 265160 canFam3
+# 208941 hg38
+# 253323 mm10
+# 574521 monDom5
+# 200156 panTro4
+#  49802 rheMac3
+# 244731 rn5
+
+    #   load the resulting file
+    ssh hgwdev
+    cd /hive/data/genomes/hg38/bed/multiz7way/frames
+    time hgLoadMafFrames hg38 multiz7wayFrames multiz7wayFrames.bed.gz
+    #   real    0m19.959s
+
+    time featureBits -countGaps hg38 multiz7wayFrames
+    #   52686177 bases of 3209286105 (1.642%) in intersection
+    #   real    0m12.593s
+
+    #   enable the trackDb entries:
+# frames multiz7wayFrames
+# irows on
+    #   appears to work OK
+
+#########################################################################
+# Phylogenetic tree from 7-way (DONE - 2014-06-04 - Hiram)
+    mkdir /hive/data/genomes/hg38/bed/multiz7way/4d
+    cd /hive/data/genomes/hg38/bed/multiz7way/4d
+
+    # the annotated maf is:
+    ../anno/hg38.7way.maf
+
+    # using knownGene for hg38
+    hgsql -N -e "select name,chrom,strand,txStart,txEnd,cdsStart,cdsEnd,exonCount,exonStarts,exonEnds from knownGene" hg38 > hg38.knownGene.gp
+
+    genePredSingleCover hg38.knownGene.gp stdout | sort > hg38.knownGeneNR.gp
+    wc -l hg38.knownGeneNR.gp
+    #	21887 hg38.knownGeneNR.gp
+
+    mkdir annoSplit
+    cd annoSplit
+    time mafSplit -verbose=2 -outDirDepth=1 -byTarget -useFullSequenceName \
+        /dev/null . ../../anno/hg38.7way.maf
+    # real    5m14.770s
+
+    find . -type f | wc -l
+    #   353
+    ssh ku
+    mkdir /hive/data/genomes/hg38/bed/multiz7way/4d/run
+    cd /hive/data/genomes/hg38/bed/multiz7way/4d/run
+    mkdir ../mfa
+
+    # newer versions of msa_view have a slightly different operation
+    # the sed of the gp file inserts the reference species in the chr name
+    cat << '_EOF_' > 4d.csh
+#!/bin/csh -fe
+set PHASTBIN = /cluster/bin/phast.build/cornellCVS/phast.2010-12-30/bin
+set r = "/hive/data/genomes/hg38/bed/multiz7way"
+set c = $1:r
+set infile = $r/4d/annoSplit/$2
+set outDir = $r/4d/mfa/$3:h
+set outfile = $r/4d/mfa/$3
+/bin/mkdir -p $outDir
+cd /scratch/tmp
+/bin/awk -v C=$c '$2 == C {print}' $r/4d/hg38.knownGeneNR.gp | sed -e "s/\t$c\t/\thg38.$c\t/" > $c.gp
+set NL=`wc -l $c.gp| gawk '{print $1}'`
+echo $NL
+if ("$NL" != "0") then
+    $PHASTBIN/msa_view --4d --features $c.gp -i MAF $infile -o SS > $c.ss
+    $PHASTBIN/msa_view -i SS --tuple-size 1 $c.ss > $outfile
+else
+    echo "" > $outfile
+endif
+/bin/rm -f $c.gp $c.ss
+'_EOF_'
+    # << happy emacs
+    chmod +x 4d.csh
+
+    find ../annoSplit -type f | sed -e "s#../annoSplit/##" > maf.list
+
+    cat << '_EOF_' > template
+#LOOP
+4d.csh $(file1) $(path1) {check out line+ ../mfa/$(dir1)/$(root1).mfa}
+#ENDLOOP
+'_EOF_'
+    # << happy emacs
+
+    gensub2 maf.list single template jobList
+    para create jobList
+    para try ... check
+    para time
+# Completed: 353 of 353 jobs
+# CPU time in finished jobs:        836s      13.93m     0.23h    0.01d  0.000 y
+# IO & Wait Time:                  1172s      19.54m     0.33h    0.01d  0.000 y
+# Average job time:                   6s       0.09m     0.00h    0.00d
+# Longest finished job:              72s       1.20m     0.02h    0.00d
+# Submission to last job:            89s       1.48m     0.02h    0.00d
+
+    # Not all results have contents, that is OK
+
+    # combine mfa files
+    ssh hgwdev
+    cd /hive/data/genomes/hg38/bed/multiz7way/4d
+    # remove the broken empty files, size 0 and size 1:
+    find ./mfa -type f -size 0 | xargs rm -f
+    # most interesting, this did not identify files of size 1:
+#    find ./mfa -type f -size 1
+    find ./mfa -type f | xargs ls -og | awk '$3 == 1' | awk '{print $NF}' \
+        > empty.list
+    cat empty.list | xargs rm -f
+    #want comma-less species.list
+    /cluster/bin/phast.build/cornellCVS/phast.2010-12-30/bin/msa_view \
+	--aggregate "`cat ../species.list`" mfa/*/*.mfa | sed s/"> "/">"/ \
+	    > 4d.all.mfa
+    # check they are all in there:
+    grep "^>" 4d.all.mfa
+    #    >hg38
+    #    >panTro4
+    #    >rheMac3
+    #    >mm10
+    #    >rn5
+    #    >canFam3
+    #    >monDom5
+
+    sed 's/[a-z][a-z]*_//g; s/:[0-9\.][0-9\.]*//g; s/;//; /^ *$/d' \
+	../hg38.7way.nh | xargs echo | sed -e 's/ //g' > tree_commas.nh
+    # tree_commas.nh looks like:
+    #   (((((hg38,panTro4),rheMac3),(mm10,rn5)),canFam3),monDom5)
+    # use phyloFit to create tree model (output is phyloFit.mod)
+    time nice -n +19 \
+	/cluster/bin/phast.build/cornellCVS/phast.2010-12-30/bin/phyloFit \
+	    --EM --precision MED --msa-format FASTA --subst-mod REV \
+		--tree tree_commas.nh 4d.all.mfa
+    #   real    0m6.583s
+
+
+    mv phyloFit.mod all.mod
+
+    grep TREE all.mod
+# TREE: (((((hg38:0.00673596,panTro4:0.00686169):0.0248146,rheMac3:0.0357598):0.0970072,(mm10:0.081661,rn5:0.0874126):0.246527):0.0264964,canFam3:0.156769):0.303241,monDom5:0.303241);
+
+    # compare these calculated lengths to the tree extracted from 130way:
+    grep TREE all.mod | sed -e 's/TREE: //' \
+      | /cluster/bin/phast/all_dists /dev/stdin | grep hg38 | sort -k3n \
+        | sed -e "s/hg38.//; s/^/    #  /"
+    #  panTro4  0.013598
+    #  rheMac3  0.067310
+    #  canFam3  0.311823
+    #  mm10     0.456746
+    #  rn5      0.462497
+    #  monDom5  0.761536
+
+    # yes, somewhat similar
+    /cluster/bin/phast/all_dists ../hg38.7way.nh | grep hg38 \
+        | sort -k3n | sed -e "s/hg38.//; s/^/    #  /"
+    #  panTro4   0.013390
+    #  rheMac3   0.071575
+    #  canFam3   0.330429
+    #  mm10      0.500391
+    #  rn5       0.507471
+    #  monDom5   0.763679
+
+#########################################################################
+# phastCons 7-way (DONE - 2014-06-04 - Hiram)
+    # split 7way mafs into 10M chunks and generate sufficient statistics
+    # files for # phastCons
+    ssh ku
+    mkdir -p /hive/data/genomes/hg38/bed/multiz7way/cons/SS
+    cd /hive/data/genomes/hg38/bed/multiz7way/cons/SS
+    mkdir result done
+
+    cat << '_EOF_' > mkSS.csh
+#!/bin/csh -ef
+set d = $1
+set c = $2
+set doneDir = done/$d
+set MAF = /hive/data/genomes/hg38/bed/multiz7way/anno/result/$d/$c.maf
+set WINDOWS = /hive/data/genomes/hg38/bed/multiz7way/cons/SS/result/$d/$c
+set WC = `cat $MAF | wc -l`
+set NL = `grep "^#" $MAF | wc -l`
+if ( -s $3 ) then
+    exit 0
+endif
+if ( -s $3.running ) then
+    exit 0
+endif
+
+/bin/mkdir -p $doneDir
+/bin/date >> $3.running
+
+/bin/rm -fr $WINDOWS
+/bin/mkdir -p $WINDOWS
+pushd $WINDOWS > /dev/null
+if ( $WC != $NL ) then
+/cluster/bin/phast.build/cornellCVS/phast.2010-12-30/bin/msa_split \
+    $MAF -i MAF -o SS -r $WINDOWS/$c -w 10000000,0 -I 1000 -B 5000
+endif
+popd > /dev/null
+/bin/date >> $3
+/bin/rm -f $3.running
+'_EOF_'
+    # << happy emacs
+    chmod +x mkSS.csh
+
+    cat << '_EOF_' > template
+#LOOP
+mkSS.csh $(dir1) $(root1) {check out line+ done/$(dir1)/$(root1)}
+#ENDLOOP
+'_EOF_'
+    # << happy emacs
+
+    #	do the easy ones first to see some immediate results
+    find ../../anno/result -type f | sed -e "s#../../anno/result/##" > maf.list
+
+    gensub2 maf.list single template jobList
+    para -ram=32g create jobList
+    para try ... check ... etc
+# Completed: 353 of 353 jobs
+# CPU time in finished jobs:       1216s      20.27m     0.34h    0.01d  0.000 y
+# IO & Wait Time:                  1385s      23.08m     0.38h    0.02d  0.000 y
+# Average job time:                   7s       0.12m     0.00h    0.00d
+# Longest finished job:             111s       1.85m     0.03h    0.00d
+# Submission to last job:           189s       3.15m     0.05h    0.00d
+
+    find ./result -type f | wc -l
+    #	 641
+
+    # Run phastCons
+    #	This job is I/O intensive in its output files, beware where this
+    #	takes place or do not run too many at once.
+    ssh ku
+    mkdir -p /hive/data/genomes/hg38/bed/multiz7way/cons/run.cons
+    cd /hive/data/genomes/hg38/bed/multiz7way/cons/run.cons
+
+    #	This is setup for multiple runs based on subsets, but only running
+    #   the 'all' subset here.
+    #   It triggers off of the current working directory
+    #	$cwd:t which is the "grp" in this script.  Running:
+    #	all and vertebrates
+
+    cat << '_EOF_' > doPhast.csh
+#!/bin/csh -fe
+set PHASTBIN = /cluster/bin/phast.build/cornellCVS/phast.2010-12-30/bin
+set c = $1
+set d = $2
+set f = $3
+set len = $4
+set cov = $5
+set rho = $6
+set grp = $cwd:t
+set cons = /hive/data/genomes/hg38/bed/multiz7way/cons
+set tmp = $cons/tmp/${d}_${c}
+mkdir -p $tmp
+set ssSrc = $cons/SS/result
+set useGrp = "$grp.mod"
+if (-s $cons/$grp/$grp.non-inf) then
+  ln -s $cons/$grp/$grp.mod $tmp
+  ln -s $cons/$grp/$grp.non-inf $tmp
+  ln -s $ssSrc/$d/$f $tmp
+else
+  ln -s $ssSrc/$d/$f $tmp
+  ln -s $cons/$grp/$grp.mod $tmp
+endif
+pushd $tmp > /dev/null
+if (-s $grp.non-inf) then
+  $PHASTBIN/phastCons $f $useGrp \
+    --rho $rho --expected-length $len --target-coverage $cov --quiet \
+    --not-informative `cat $grp.non-inf` \
+    --seqname $c --idpref $c --most-conserved $c.bed --score > $c.pp
+else
+  $PHASTBIN/phastCons $f $useGrp \
+    --rho $rho --expected-length $len --target-coverage $cov --quiet \
+    --seqname $c --idpref $c --most-conserved $c.bed --score > $c.pp
+endif
+popd > /dev/null
+mkdir -p pp/$d bed/$d
+sleep 4
+touch pp/$d bed/$d
+rm -f pp/$d/$c.pp
+rm -f bed/$d/$c.bed
+mv $tmp/$c.pp pp/$d
+mv $tmp/$c.bed bed/$d
+rm -fr $tmp
+rmdir --ignore-fail-on-non-empty $cons/tmp/$d:h
+'_EOF_'
+    # << happy emacs
+    chmod +x doPhast.csh
+
+    #	this template will serve for all runs
+    #	root1 == chrom name, file1 == ss file name without .ss suffix
+    cat << '_EOF_' > template
+#LOOP
+../run.cons/doPhast.csh $(root1) $(dir1) $(file1) 45 0.3 0.3 {check out line+ pp/$(dir1)/$(root1).pp}
+#ENDLOOP
+'_EOF_'
+    # << happy emacs
+
+    find ../SS/result -type f | sed -e "s#../SS/result/##" > ss.list
+    wc -l ss.list
+    #	641 ss.list
+
+    # Create parasol batch and run it
+    # run for all species
+    cd /hive/data/genomes/hg38/bed/multiz7way/cons
+    mkdir -p all
+    cd all
+    #	Using the .mod tree
+    cp -p ../../4d/all.mod ./all.mod
+
+    gensub2 ../run.cons/ss.list single ../run.cons/template jobList
+    para -ram=32g create jobList
+    para try ... check ...
+    para push
+# Completed: 641 of 641 jobs
+# CPU time in finished jobs:       6557s     109.29m     1.82h    0.08d  0.000 y
+# IO & Wait Time:                  4497s      74.94m     1.25h    0.05d  0.000 y
+# Average job time:                  17s       0.29m     0.00h    0.00d
+# Longest finished job:              33s       0.55m     0.01h    0.00d
+# Submission to last job:           120s       2.00m     0.03h    0.00d
+
+    # create Most Conserved track
+    cd /hive/data/genomes/hg38/bed/multiz7way/cons/all
+    cut -f1 ../../../../chrom.sizes | while read C
+do
+    ls -d bed/?/${C} 2> /dev/null | while read D
+    do
+        echo ${D}/${C}*.bed 1>&2
+        cat ${D}/${C}*.bed
+    done | sort -k1,1 -k2,2n \
+    | awk '{printf "%s\t%d\t%d\tlod=%d\t%s\n", "'${C}'", $2, $3, $5, $5;}'
+done > tmpMostConserved.bed
+
+    /cluster/bin/scripts/lodToBedScore tmpMostConserved.bed > mostConserved.bed
+    #    -rw-rw-r--  1 42636652 Jun  4 10:45 tmpMostConserved.bed
+    #    -rw-rw-r--  1 43721828 Jun  4 10:45 mostConserved.bed
+
+    # load into database
+    ssh hgwdev
+    cd /hive/data/genomes/hg38/bed/multiz7way/cons/all
+    time nice -n +19 hgLoadBed hg38 phastConsElements7way mostConserved.bed
+    #  Read 1234990 elements of size 5 from mostConserved.bed
+    #  real    0m11.390s
+
+    # on human we often try for 5% overall cov, and 70% CDS cov
+    # most bets are off here for that goal, these alignments are too few
+    #	and too far between
+    #	--rho 0.3 --expected-length 45 --target-coverage 0.3
+    featureBits hg38 -enrichment knownGene:cds phastConsElements7way
+    # knownGene:cds 1.266%, phastConsElements7way 4.551%,
+    #    both 0.888%, cover 70.16%, enrich 15.42x
+
+    # Create merged posterier probability file and wiggle track data files
+    cd /hive/data/genomes/hg38/bed/multiz7way/cons/all
+    mkdir downloads
+
+    # the third sed fixes the chrom names, removing the partition extensions
+    time (find ./pp -type f | sed -e "s#^./##; s#\.# d #g; s#-# m #;" \
+	| sort -k1,1 -k3,3n | sed -e "s# d #.#g; s# m #-#g;" | xargs cat \
+	| sed -e 's/\.[0-9][0-9]*-[0-9][0-9]* start/ start/' \
+        | gzip -c > downloads/phastCons7way.wigFix.gz)
+    #   real    37m47.242s
+
+    # check integrity of data with wigToBigWig
+    time (zcat downloads/phastCons7way.wigFix.gz \
+	| wigToBigWig -verbose=2 stdin /hive/data/genomes/hg38/chrom.sizes \
+	    phastCons7way.bw) > bigWig.log 2>&1 &
+    tail bigWig.log
+    # pid=34733: VmPeak:    33106324 kB
+    #   real    40m53.287s
+
+    bigWigInfo phastCons7way.bw
+# version: 4
+# isCompressed: yes
+# isSwapped: 0
+# primaryDataSize: 5,675,802,079
+# primaryIndexSize: 92,579,900
+# zoomLevels: 10
+# chromCount: 353
+# basesCovered: 2,898,191,577
+# mean: 0.168088
+# min: 0.000000
+# max: 1.000000
+# std: 0.233827
+
+    #	encode those files into wiggle data
+    time (zcat downloads/phastCons7way.wigFix.gz \
+	| wigEncode stdin phastCons7way.wig phastCons7way.wib)
+    #   Converted stdin, upper limit 1.00, lower limit 0.00
+    #   real    15m28.525s
+
+    du -hsc *.wi?
+    #  2.7G    phastCons7way.wib
+    #  282M    phastCons7way.wig
+    #  3.0G    total
+
+    # Load gbdb and database with wiggle.
+    ln -s `pwd`/phastCons7way.wib /gbdb/hg38/multiz7way/phastCons7way.wib
+    time nice -n +19 hgLoadWiggle -pathPrefix=/gbdb/hg38/multiz7way \
+	hg38 phastCons7way phastCons7way.wig
+    #   real    0m33.502s
+
+    # use to set trackDb.ra entries for wiggle min and max
+    # and verify table is loaded correctly
+
+    wigTableStats.sh hg38 phastCons7way
+# db.table          min max mean       count sumData      stdDev  viewLimits
+hg38.phastCons7way 0 1 0.168088 2898191577 4.87152e+08 0.233827 viewLimits=0:1
+
+    #  Create histogram to get an overview of all the data
+    time nice -n +19 hgWiggle -doHistogram -db=hg38 \
+	-hBinSize=0.001 -hBinCount=1000 -hMinVal=0.0 -verbose=2 \
+	    phastCons7way > histogram.data 2>&1
+    #	real    2m40.179s
+
+    #	create plot of histogram:
+
+    cat << '_EOF_' | gnuplot > histo.png
+set terminal png small x000000 xffffff xc000ff x66ff66 xffff00 x00ffff
+set size 1.4, 0.8
+set key left box
+set grid noxtics
+set grid ytics
+set title " Human hg38 Histogram phastCons7way track"
+set xlabel " phastCons7way score"
+set ylabel " Relative Frequency"
+set y2label " Cumulative Relative Frequency (CRF)"
+set y2range [0:1]
+set y2tics
+set yrange [0:0.02]
+
+plot "histogram.data" using 2:5 title " RelFreq" with impulses, \
+        "histogram.data" using 2:7 axes x1y2 title " CRF" with lines
+'_EOF_'
+    #	<< happy emacs
+
+    display histo.png &
+
+#########################################################################
+# phyloP for 7-way (DONE - 2014-06-04 - Hiram)
+    # run phyloP with score=LRT
+    ssh ku
+    mkdir /cluster/data/hg38/bed/multiz7way/consPhyloP
+    cd /cluster/data/hg38/bed/multiz7way/consPhyloP
+
+    mkdir run.phyloP
+    cd run.phyloP
+    # Adjust model file base composition background and rate matrix to be
+    # representative of the chromosomes in play
+    grep BACKGROUND ../../cons/all/all.mod | awk '{printf "%0.3f\n", $3 + $4}'
+    #	0.556
+    /cluster/bin/phast.build/cornellCVS/phast.2010-12-30/bin/modFreqs \
+	../../cons/all/all.mod 0.556 > all.mod
+    # verify, the BACKGROUND should now be paired up:
+    grep BACK all.mod
+    #   BACKGROUND: 0.222000 0.278000 0.278000 0.222000
+
+    cat << '_EOF_' > doPhyloP.csh
+#!/bin/csh -fe
+set PHASTBIN = /cluster/bin/phast.build/cornellCVS/phast.2010-12-30/bin
+set f = $1
+set d = $f:h
+set file1 = $f:t
+set out = $2
+set cName = $f:t:r
+set grp = $cwd:t
+set cons = /hive/data/genomes/hg38/bed/multiz7way/consPhyloP
+set tmp = $cons/tmp/$grp/$f
+/bin/rm -fr $tmp
+/bin/mkdir -p $tmp
+set ssSrc = "/hive/data/genomes/hg38/bed/multiz7way/cons/SS/result/$f"
+set useGrp = "$grp.mod"
+/bin/ln -s $cons/run.phyloP/$grp.mod $tmp
+pushd $tmp > /dev/null
+$PHASTBIN/phyloP --method LRT --mode CONACC --wig-scores --chrom $cName \
+    -i SS $useGrp $ssSrc.ss > $file1.wigFix
+popd > /dev/null
+/bin/mkdir -p $out:h
+sleep 4
+/bin/touch $out:h
+/bin/mv $tmp/$file1.wigFix $out
+/bin/rm -fr $tmp
+/bin/rmdir --ignore-fail-on-non-empty $cons/tmp/$grp/$d
+/bin/rmdir --ignore-fail-on-non-empty $cons/tmp/$grp/$d:h
+/bin/rmdir --ignore-fail-on-non-empty $cons/tmp/$grp
+/bin/rmdir --ignore-fail-on-non-empty $cons/tmp
+'_EOF_'
+    # << happy emacs
+
+    # Create list of chunks
+    find ../../cons/SS/result -type f | grep ".ss$" \
+	| sed -e "s/.ss$//; s#^../../cons/SS/result/##" > ss.list
+    # make sure the list looks good
+    wc -l ss.list
+    #	641 ss.list
+
+    # Create template file
+    #	file1 == $chr/$chunk/file name without .ss suffix
+    cat << '_EOF_' > template
+#LOOP
+../run.phyloP/doPhyloP.csh $(path1) {check out line+ wigFix/$(dir1)/$(file1).wigFix}
+#ENDLOOP
+'_EOF_'
+    # << happy emacs
+
+    ######################   Running all species  #######################
+    # setup run for all species
+    mkdir /hive/data/genomes/hg38/bed/multiz7way/consPhyloP/all
+    cd /hive/data/genomes/hg38/bed/multiz7way/consPhyloP/all
+    rm -fr wigFix
+    mkdir wigFix
+
+    gensub2 ../run.phyloP/ss.list single ../run.phyloP/template jobList
+    # the -ram=8g will allow only one job per node to slow this down since
+    #	it would run too fast otherwise.  Either run on one of the small
+    #	klusters or use the -ram=8g on the para create
+    para -ram=32g create jobList
+    para try ... check ... push ... etc ...
+    para time > run.time
+# Completed: 641 of 641 jobs
+# CPU time in finished jobs:       4755s      79.24m     1.32h    0.06d  0.000 y
+# IO & Wait Time:                  4343s      72.39m     1.21h    0.05d  0.000 y
+# Average job time:                  14s       0.24m     0.00h    0.00d
+# Longest finished job:              27s       0.45m     0.01h    0.00d
+# Submission to last job:          1152s      19.20m     0.32h    0.01d
+
+    # make downloads
+    mkdir downloads
+
+    time (find ./wigFix -type f | sed -e "s#^./##; s#\.# d #g; s#-# m #;" \
+	| sort -k1,1 -k3,3n | sed -e "s# d #.#g; s# m #-#g;" | xargs cat \
+	| gzip -c > downloads/phyloP7way.wigFix.gz) &
+    #   real    29m51.665s
+
+    # check integrity of data with wigToBigWig
+    time (zcat downloads/phyloP7way.wigFix.gz \
+	| wigToBigWig -verbose=2 stdin /hive/data/genomes/hg38/chrom.sizes \
+	phyloP7way.bw) > bigWig.log 2>&1 &
+    egrep "real|VmPeak" bigWig.log
+    # pid=76577: VmPeak:    33106320 kB
+    #  real    42m53.038s
+
+
+    bigWigInfo phyloP7way.bw
+# version: 4
+# isCompressed: yes
+# isSwapped: 0
+# primaryDataSize: 3,759,451,708
+# primaryIndexSize: 92,579,900
+# zoomLevels: 10
+# chromCount: 353
+# basesCovered: 2,898,191,577
+# mean: 0.074472
+# min: -5.220000
+# max: 1.062000
+# std: 0.545945
+
+    #	encode those files into wiggle data
+    time (zcat downloads/phyloP7way.wigFix.gz \
+	| wigEncode stdin phyloP7way.wig phyloP7way.wib) &
+    #   Converted stdin, upper limit 1.06, lower limit -5.22
+    #   real    16m11.861s
+
+
+    du -hsc *.wi?
+    #   47M     phyloP7way.wib
+    #   12M     phyloP7way.wig
+    #   58M     total
+
+    # Load gbdb and database with wiggle.
+    ln -s `pwd`/phyloP7way.wib /gbdb/hg38/multiz7way/phyloP7way.wib
+    nice hgLoadWiggle -pathPrefix=/gbdb/hg38/multiz7way hg38 \
+	phyloP7way phyloP7way.wig
+
+    # use to set trackDb.ra entries for wiggle min and max
+    # and verify table is loaded correctly
+
+    wigTableStats.sh hg38 phyloP7way
+# db.table      min max mean count sumData
+# hg38.phyloP7way -5.22 1.062 0.0744721 2898191577 2.15834e+08
+#       stdDev viewLimits
+#     0.545945 viewLimits=-2.65525:1.062
+
+    #	that range is: 5.22+1.062 = 6.282 for hBinSize=0.006282
+
+    #  Create histogram to get an overview of all the data
+    time nice -n +19 hgWiggle -doHistogram \
+	-hBinSize=0.006282 -hBinCount=1000 -hMinVal=-5.22 -verbose=2 \
+	    -db=hg38 phyloP7way > histogram.data 2>&1
+    #   real    2m55.843s
+
+
+    # find out the range for the 2:5 graph
+    grep -v chrom histogram.data | grep "^[0-9]" | ave -col=5 stdin
+# Q1 0.000001
+# median 0.000060
+# Q3 0.000656
+# average 0.001022
+# min 0.000000
+# max 0.065461
+# count 978
+# total 0.999982
+# standard deviation 0.004157
+
+    #	create plot of histogram:
+    cat << '_EOF_' | gnuplot > histo.png
+set terminal png small x000000 xffffff xc000ff x66ff66 xffff00 x00ffff
+set size 1.4, 0.8
+set key left box
+set grid noxtics
+set grid ytics
+set title " Human hg38 Histogram phyloP7way track"
+set xlabel " phyloP7way score"
+set ylabel " Relative Frequency"
+set y2label " Cumulative Relative Frequency (CRF)"
+set y2range [0:1]
+set y2tics
+set yrange [0:0.02]
+
+plot "histogram.data" using 2:5 title " RelFreq" with impulses, \
+        "histogram.data" using 2:7 axes x1y2 title " CRF" with lines
+'_EOF_'
+    #	<< happy emacs
+
+    display histo.png &
+
+#############################################################################
+# construct download files for 7-way (DONE - 2014-06-05 - Hiram)
+    mkdir /usr/local/apache/htdocs-hgdownload/goldenPath/hg38/multiz7way
+    mkdir /usr/local/apache/htdocs-hgdownload/goldenPath/hg38/phastCons7way
+    mkdir /usr/local/apache/htdocs-hgdownload/goldenPath/hg38/phyloP7way
+    mkdir /hive/data/genomes/hg38/bed/multiz7way/downloads
+    cd /hive/data/genomes/hg38/bed/multiz7way/downloads
+    mkdir multiz7way phastCons7way phyloP7way
+    cd multiz7way
+    time cp -p ../../anno/hg38.7way.maf .
+    #   real    0m55.984s
+    time gzip *.maf
+    #   real    46m53.149s
+
+    ln -s ../../hg38.7way.nh .
+    ln -s ../../hg38.7way.commonNames.nh .
+    time md5sum *.nh *.maf.gz > md5sum.txt
+    #   real    1m55.317s
+    ln -s `pwd`/* \
+        /usr/local/apache/htdocs-hgdownload/goldenPath/hg38/multiz7way
+
+    du -hsc *.maf.gz ../../anno/hg38.7way.maf
+    #  3.5G    hg38.7way.maf.gz
+    #   17G     ../../anno/hg38.7way.maf
+
+    #####################################################################
+    cd /hive/data/genomes/hg38/bed/multiz7way/downloads/phastCons7way
+
+    ln -s ../../cons/all/downloads/phastCons7way.wigFix.gz \
+        ./hg38.phastCons7way.wigFix.gz
+    ln -s ../../cons/all/phastCons7way.bw ./hg38.phastCons7way.bw
+    ln -s ../../cons/all/all.mod ./hg38.phastCons7way.mod
+    time md5sum *.gz *.mod *.bw > md5sum.txt
+    #   real    0m37.384s
+    # obtain the README.txt from petMar2/phastCons7way and update for this
+    #   situation
+    ln -s `pwd`/*.gz `pwd`/*.mod `pwd`/*.bw `pwd`/*.txt \
+      /usr/local/apache/htdocs-hgdownload/goldenPath/hg38/phastCons7way
+
+    #####################################################################
+    cd /hive/data/genomes/hg38/bed/multiz7way/downloads/phyloP7way
+
+    ln -s ../../consPhyloP/all/downloads/phyloP7way.wigFix.gz \
+        ./hg38.phyloP7way.wigFix.gz
+    ln -s ../../consPhyloP/run.phyloP/all.mod hg38.phyloP7way.mod
+    ln -s ../../consPhyloP/all/phyloP7way.bw hg38.phyloP7way.bw
+
+    time md5sum *.mod *.bw *.gz > md5sum.txt
+    #   real    0m29.431s
+
+    # obtain the README.txt from geoFor1/phyloP7way and update for this
+    #   situation
+    ln -s `pwd`/* \
+      /usr/local/apache/htdocs-hgdownload/goldenPath/hg38/phyloP7way
+
+    ###########################################################################
+    ## create upstream refGene maf files
+    cd /hive/data/genomes/hg38/bed/multiz7way/downloads/multiz7way
+    # bash script
+#!/bin/sh
+export geneTbl="knownGene"
+for S in 1000 2000 5000
+do
+    echo "making upstream${S}.maf"
+    featureBits hg38 ${geneTbl}:upstream:${S} -fa=/dev/null -bed=stdout \
+        | perl -wpe 's/_up[^\t]+/\t0/' | sort -k1,1 -k2,2n \
+        | /cluster/bin/$MACHTYPE/mafFrags hg38 multiz7way \
+                stdin stdout \
+                -orgs=/hive/data/genomes/hg38/bed/multiz7way/species.list \
+        | gzip -c > upstream${S}.${geneTbl}.maf.gz
+    echo "done upstream${S}.${geneTbl}.maf.gz"
+done
+    #   real    60m16.631s
+
+    md5sum upstream*.gz >> md5sum.txt
+
+    # some other symlinks were already made above
+    # obtain the README.txt from geoFor1/multiz7way and update for this
+    #   situation
+    ln -s `pwd`/upstream*.gz README.txt \
+        /usr/local/apache/htdocs-hgdownload/goldenPath/hg38/multiz7way
+
+#############################################################################
+# hgPal downloads (DONE - 2014-06-06 - Hiram)
+#   FASTA from 7-way for knownGene, refGene and knownCanonical
+
+    ssh hgwdev
+    screen -S hg38HgPal
+    mkdir /hive/data/genomes/hg38/bed/multiz7way/pal
+    cd /hive/data/genomes/hg38/bed/multiz7way/pal
+    cat ../species.list | tr '[ ]' '[\n]' > order.list
+
+    export mz=multiz7way
+    export gp=knownGene
+    export db=hg38
+    export I=0
+    mkdir exonAA exonNuc
+    for C in `sort -nk2 ../../../chrom.sizes | cut -f1`
+    do
+        I=`echo $I | awk '{print $1+1}'`
+	echo "mafGene -chrom=$C -exons -noTrans $db $mz $gp order.list stdout | gzip -c > exonNuc/$C.exonNuc.fa.gz &"
+	echo "mafGene -chrom=$C -exons $db $mz $gp order.list stdout | gzip -c > exonAA/$C.exonAA.fa.gz &"
+        if [ $I -gt 6 ]; then
+            echo "date"
+            echo "wait"
+            I=0
+        fi
+    done > $gp.jobs
+    echo "date" >> $gp.jobs
+    echo "wait" >> $gp.jobs
+
+    time ./$gp.jobs > $gp.jobs.log 2>&1 &
+    #   real    28m46.919s
+
+    time zcat exonAA/*.gz | gzip -c > $gp.$mz.exonAA.fa.gz
+    #   real    0m23.798s
+    time zcat exonNuc/*.gz | gzip -c > $gp.$mz.exonNuc.fa.gz
+    #   real    1m28.197s
+
+    export mz=multiz7way
+    export gp=knownGene
+    export db=hg38
+    export pd=/usr/local/apache/htdocs-hgdownload/goldenPath/$db/$mz/alignments
+    mkdir -p $pd
+    md5sum *.fa.gz > md5sum.txt
+    ln -s `pwd`/$gp.$mz.exonAA.fa.gz $pd/$gp.exonAA.fa.gz
+    ln -s `pwd`/$gp.$mz.exonNuc.fa.gz $pd/$gp.exonNuc.fa.gz
+    ln -s `pwd`/md5sum.txt $pd/
+
+    rm -rf exonAA exonNuc
+
+    ### need other gene track alignments also
+    # running up refGene
+    cd /hive/data/genomes/hg38/bed/multiz7way/pal
+    export mz=multiz7way
+    export gp=refGene
+    export db=hg38
+    export I=0
+    mkdir exonAA exonNuc
+    for C in `sort -nk2 ../../../chrom.sizes | cut -f1`
+    do
+        I=`echo $I | awk '{print $1+1}'`
+	echo "mafGene -chrom=$C -exons -noTrans $db $mz $gp order.list stdout | gzip -c > exonNuc/$C.exonNuc.fa.gz &"
+	echo "mafGene -chrom=$C -exons $db $mz $gp order.list stdout | gzip -c > exonAA/$C.exonAA.fa.gz &"
+        if [ $I -gt 6 ]; then
+            echo "date"
+            echo "wait"
+            I=0
+        fi
+    done > $gp.jobs
+    echo "date" >> $gp.jobs
+    echo "wait" >> $gp.jobs
+
+    time sh -x $gp.jobs > $gp.jobs.log 2>&1
+    #   real    15m15.424s
+
+    export mz=multiz7way
+    export gp=refGene
+    export db=hg38
+    time zcat exonAA/*.gz | gzip -c > $gp.$mz.exonAA.fa.gz
+    #   real    0m23.119s
+    time zcat exonNuc/*.gz | gzip -c > $gp.$mz.exonNuc.fa.gz
+    #   real    1m15.547s
+
+    du -hsc exonAA exonNuc refGene*.fa.gz
+    #  59M     exonAA
+    #  101M    exonNuc
+    #  59M     refGene.multiz7way.exonAA.fa.gz
+    #  101M    refGene.multiz7way.exonNuc.fa.gz
+    #  317M    total
+
+    rm -rf exonAA exonNuc
+
+    # we're only distributing exons at the moment
+    export mz=multiz7way
+    export gp=refGene
+    export db=hg38
+    export pd=/usr/local/apache/htdocs-hgdownload/goldenPath/$db/$mz/alignments
+    mkdir -p $pd
+    md5sum *.fa.gz > md5sum.txt
+    ln -s `pwd`/$gp.$mz.exonAA.fa.gz $pd/$gp.exonAA.fa.gz
+    ln -s `pwd`/$gp.$mz.exonNuc.fa.gz $pd/$gp.exonNuc.fa.gz
+    ln -s `pwd`/md5sum.txt $pd/
+
+    ### And knownCanonical
+    cd /hive/data/genomes/hg38/bed/multiz7way/pal
+    export mz=multiz7way
+    export gp=knownCanonical
+    export db=hg38
+    mkdir exonAA exonNuc ppredAA ppredNuc knownCanonical
+
+    cut -f1 ../../../chrom.sizes | while read C
+    do
+        echo $C
+	hgsql hg38 -N -e "select chrom, chromStart, chromEnd, transcript from knownCanonical where chrom='$C'" > knownCanonical/$C.known.bed
+    done
+
+    ls knownCanonical/*.known.bed | while read F
+    do
+      if [ -s $F ]; then
+         echo $F | sed -e 's#knownCanonical/##; s/.known.bed//'
+      fi
+    done | while read C
+    do
+	echo "date"
+	echo "mafGene -geneBeds=knownCanonical/$C.known.bed  $db $mz knownGene order.list stdout | \
+	    gzip -c > ppredAA/$C.ppredAA.fa.gz"
+	echo "mafGene -geneBeds=knownCanonical/$C.known.bed -noTrans $db $mz knownGene order.list stdout | \
+	    gzip -c > ppredNuc/$C.ppredNuc.fa.gz"
+	echo "mafGene -geneBeds=knownCanonical/$C.known.bed -exons -noTrans $db $mz knownGene order.list stdout | \
+	    gzip -c > exonNuc/$C.exonNuc.fa.gz"
+	echo "mafGene -geneBeds=knownCanonical/$C.known.bed -exons $db $mz knownGene order.list stdout | \
+	    gzip -c > exonAA/$C.exonAA.fa.gz"
+    done > $gp.$mz.jobs
+
+    time sh -x $gp.$mz.jobs > $gp.$mz.job.log 2>&1
+    # real    72m58.133s
+
+    rm *.known.bed
+    mz=multiz7way
+    gp=knownCanonical
+    db=hg38
+    zcat exonAA/c*.gz | gzip -c > $gp.$mz.exonAA.fa.gz &
+    zcat exonNuc/c*.gz | gzip -c > $gp.$mz.exonNuc.fa.gz &
+    zcat ppredAA/c*.gz | gzip -c > $gp.$mz.ppredAA.fa.gz &
+    zcat ppredNuc/c*.gz | gzip -c > $gp.$mz.ppredNuc.fa.gz
+
+    rm -rf exonAA exonNuc ppredAA ppredNuc
+
+    mz=multiz7way
+    gp=knownCanonical
+    db=hg38
+    pd=/usr/local/apache/htdocs-hgdownload/goldenPath/$db/$mz/alignments
+    mkdir -p $pd
+    ln -s `pwd`/$gp.$mz.exonAA.fa.gz $pd/$gp.exonAA.fa.gz
+    ln -s `pwd`/$gp.$mz.exonNuc.fa.gz $pd/$gp.exonNuc.fa.gz
+    cd  $pd
+    md5sum *.exon*.fa.gz > md5sum.txt
+
+#############################################################################
+# wiki page for 7-way (DONE - 2014-06-04 - Hiram)
+    mkdir /hive/users/hiram/bigWays/hg38.7way
+    cd /hive/users/hiram/bigWays
+    echo "hg38" > hg38.7way/ordered.list
+    awk '{print $1}' /hive/data/genomes/hg38/bed/multiz7way/7way.distances.txt \
+       >> hg38.7way/ordered.list
+
+    # sizeStats.sh catches up the cached measurements required for data
+    # in the tables.  They may already be done.
+    ./sizeStats.sh hg38.7way/ordered.list
+    # dbDb.sh constructs hg38.7way/Hg38_7-way_conservation_alignment.html
+    ./dbDb.sh hg38 7way
+    # sizeStats.pl constructs hg38.7way/Hg38_7-way_Genome_size_statistics.html
+    ./sizeStats.pl hg38 7way
+
+    # defCheck.pl constructs Hg38_7-way_conservation_lastz_parameters.html
+    ./defCheck.pl hg38 7way
+
+    # this constructs the html pages in hg38.7way/:
+# -rw-rw-r-- 1 4153 Jun  5 11:03 Hg38_7-way_conservation_alignment.html
+# -rw-rw-r-- 1 5833 Jun  5 11:04 Hg38_7-way_Genome_size_statistics.html
+# -rw-rw-r-- 1 3854 Jun  5 11:04 Hg38_7-way_conservation_lastz_parameters.html
+
+    # add those pages to the genomewiki.  Their page names are the
+    # names of the .html files without the .html:
+#  Hg38_7-way_conservation_alignment
+#  Hg38_7-way_Genome_size_statistics
+#  Hg38_7-way_conservation_lastz_parameters
+
+    # when you view the first one you enter, it will have links to the
+    # missing two.
+
+#############################################################################
+# GRC Incident database (DONE - 2014-06-14 - Hiram)
+    # this procedure is run as a cron job in Hiram's account:
+
+    #	33 09 * * * /hive/data/outside/grc/incidentDb/runUpdate.sh makeItSo
+
+    # data comes from: ftp://ftp.ncbi.nlm.nih.gov/pub/grc/
+    # processed by /hive/data/outside/grc/incidentDb/grcUpdate.sh
+
+    # the table in the dataBase is: grcIncidentDb
+    # which is the URL to the bb file, a single row:
+    # http://genomewiki.ucsc.edu/images/7/7f/Hg38.grcIncidentDb.bb
+
+#############################################################################
+# RepeatMasker Visualization track (DONE - 2014-07-25 - Hiram)
+    mkdir /hive/data/genomes/hg38/bed/rmskJoined
+    cd /hive/data/genomes/hg38/bed/rmskJoined
+
+    ln -s ../repeatMasker/hg38.sorted.fa.out .
+    ln -s ../repeatMasker/hg38.fa.align.gz .
+
+    # working on fixing this script for the next release of RM
+    /scratch/data/RepeatMasker140131/util/nextVerRmToUCSCTables.pl \
+            -out hg38.sorted.fa.out -align hg38.fa.align.gz
+
+    hgLoadBed -sqlTable=$HOME/kent/src/hg/lib/rmskJoined.sql \
+        -renameSqlTable -verbose=4 -tab \
+            -type=bed4+9 -as=$HOME/kent/src/hg/lib/rmskJoined.as hg38 \
+                rmskJoinedBaseline hg38.sorted.fa.join.bed \
+                    > loadJoined.log 2>&1
+
+    hgLoadSqlTab hg38 rmskAlignBaseline \
+        /cluster/home/hiram/kent/src/hg/lib/rmskAlign.sql \
+            hg38.fa.align.tsv > loadAlign.log 2>&1
+
+    hgLoadOutJoined -verbose=2 hg38 hg38.sorted.fa.out > loadOut.log 2>&1
+
+    featureBits -countGaps hg38 rmskJoinedBaseline
+    #    2716777279 bases of 3209286105 (84.654%) in intersection
+
+##############################################################################
+# LASTZ Macaca Mulatta RheMac2 (DONE - 2014-07-13 - braney)
+    mkdir /hive/data/genomes/hg38/bed/lastzRheMac2.2014-07-11
+    cd /hive/data/genomes/hg38/bed/lastzRheMac2.2014-07-11
+
+    # best to always specify an exact path to lastz so we know which one is used
+    # lastz default parameters are human-mouse parameters
+
+    cat << '_EOF_' > DEF
+# human vs macaca mulatta
+BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.52/bin/lastz
+# maximum M allowed with lastz is only 254
+BLASTZ_M=254
+BLASTZ_Q=/scratch/data/blastz/human_chimp.v2.q
+BLASTZ_O=600
+BLASTZ_E=150
+# other parameters from panTro2 vs hg18 lastz on advice from Webb
+BLASTZ_K=4500
+BLASTZ_Y=15000
+BLASTZ_T=2
+
+# TARGET: Human Hg38
+SEQ1_DIR=/hive/data/genomes/hg38/hg38.2bit
+SEQ1_LEN=/hive/data/genomes/hg38/chrom.sizes
+SEQ1_CTGDIR=/hive/data/genomes/hg38/hg38.contigs.2bit
+SEQ1_CTGLEN=/hive/data/genomes/hg38/hg38.contigs.chrom.sizes
+SEQ1_LIFT=/hive/data/genomes/hg38/jkStuff/hg38.contigs.lift
+SEQ1_CHUNK=20000000
+SEQ1_LAP=10000
+
+# QUERY: Macaca Mulatta RheMac2
+SEQ2_DIR=/scratch/data/rheMac2/rheMac2.2bit
+SEQ2_LEN=/scratch/data/rheMac2/chrom.sizes
+SEQ2_CHUNK=20000000
+SEQ2_LAP=0
+SEQ2_IN_CONTIGS=0
+
+BASE=/hive/data/genomes/hg38/bed/lastzRheMac2.2014-07-11
+TMPDIR=/dev/shm
+'_EOF_'
+    # << happy emacs
+    time $HOME/kent/src/hg/utils/automation/doBlastzChainNet.pl \
+        `pwd`/DEF \
+        -syntenicNet -fileServer=hgwdev \
+	-chainMinScore=5000 -chainLinearGap=medium \
+        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > do.log 2>&1
+    #  Elapsed time: 141m36s
+    cat fb.hg38.chainRheMac2Link.txt
+    # 2455106923 bases of 3049335806 (80.513%) in intersection
+
+    #   running the swap
+    mkdir /hive/data/genomes/rheMac2/bed/blastz.hg38.swap
+    cd /hive/data/genomes/rheMac2/bed/blastz.hg38.swap
+    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
+        /hive/data/genomes/hg38/bed/lastzRheMac2.2014-07-11/DEF \
+        -swap -syntenicNet -chainMinScore=5000 -chainLinearGap=medium \
+        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > swap.log 2>&1
+    # 83m26.095s
+    cat fb.rheMac2.chainHg38Link.txt
+    # 2313950599 bases of 2646704109 (87.428%) in intersection
+#
+
+#########################################################################
+# LASTZ Chlorocebus sabaeus  (DONE - 2014-07-13 - braney)
+    mkdir /hive/data/genomes/hg38/bed/lastzChlSab2.2014-07-11
+    cd /hive/data/genomes/hg38/bed/lastzChlSab2.2014-07-11
+
+    # best to always specify an exact path to lastz so we know which one is used
+    # lastz default parameters are human-mouse parameters
+
+    cat << '_EOF_' > DEF
+# human vs Chlorocebus sabaeus
+BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.52/bin/lastz
+# maximum M allowed with lastz is only 254
+BLASTZ_M=254
+BLASTZ_Q=/scratch/data/blastz/human_chimp.v2.q
+BLASTZ_O=600
+BLASTZ_E=150
+# other parameters from panTro2 vs hg18 lastz on advice from Webb
+BLASTZ_K=4500
+BLASTZ_Y=15000
+BLASTZ_T=2
+
+
+# TARGET: Human Hg38
+SEQ1_DIR=/hive/data/genomes/hg38/hg38.2bit
+SEQ1_LEN=/hive/data/genomes/hg38/chrom.sizes
+SEQ1_CTGDIR=/hive/data/genomes/hg38/hg38.contigs.2bit
+SEQ1_CTGLEN=/hive/data/genomes/hg38/hg38.contigs.chrom.sizes
+SEQ1_LIFT=/hive/data/genomes/hg38/jkStuff/hg38.contigs.lift
+SEQ1_CHUNK=20000000
+SEQ1_LAP=10000
+
+# QUERY Chlorocebus sabaeus chlSab2
+SEQ2_DIR=/scratch/data/chlSab2/chlSab2.2bit
+SEQ2_LEN=/scratch/data/chlSab2/chrom.sizes
+SEQ2_CHUNK=20000000
+SEQ2_LAP=0
+SEQ2_IN_CONTIGS=0
+
+BASE=/hive/data/genomes/hg38/bed/lastzChlSab2.2014-07-11
+TMPDIR=/dev/shm
+'_EOF_'
+    # << happy emacs
+    time $HOME/kent/src/hg/utils/automation/doBlastzChainNet.pl \
+        `pwd`/DEF \
+        -syntenicNet -fileServer=hgwdev \
+	-chainMinScore=5000 -chainLinearGap=medium \
+        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > do.log 2>&1
+    # Elapsed time: 142m4s
+    cat fb.hg38.chainChlSab2Link.txt
+    # 2573435303 bases of 3049335806 (84.393%) in intersection
+
+    #   running the swap
+    mkdir /hive/data/genomes/chlSab2/bed/blastz.hg38.swap
+    cd /hive/data/genomes/chlSab2/bed/blastz.hg38.swap
+    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
+        /hive/data/genomes/hg38/bed/lastzChlSab2.2014-07-11/DEF \
+        -swap -syntenicNet -chainMinScore=5000 -chainLinearGap=medium \
+        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > swap.log 2>&1
+    # 88m48.411s
+    cat fb.chlSab2.chainHg38Link.txt
+    # 2429053010 bases of 2752019208 (88.264%) in intersection
+
+#########################################################################
+# SEGMENTAL DUPLICATIONS (DONE - 2014-08-13 - Hiram)
+    # redmine issue: refs #13580
+
+    # file received in email from Archana Natarajan Raja (araja at uw.edu)
+    mkdir /hive/data/genomes/hg38/bed/genomicSuperDups
+    cd /hive/data/genomes/hg38/bed/genomicSuperDups
+# -rw-r--r-- 1 16478617 Aug 11 16:18 GenomicSuperDup.tab
+
+    # no longer filtering items smaller than 1,000 bases, see note
+    # in redmine issue refs #13580
+# While the size of the 24 alignments are less than 1000 bases , the size of
+# their pairs to which they align are always >1000, you can confirm this by
+# looking at the value in column 22 in your table (alignB -ucsc format), will
+# always be >1000 bp . We are seeing this only now because there are lots of
+# new and resolved duplications added to hg38. Hence , I would recommend not
+# filtering these items and uploading the current set as is.
+
+    # there is no chrEBV in the browser:
+    grep -v chrEBV GenomicSuperDup.tab | sed -e 's/\t_\t/\t-\t/;' \
+      | hgLoadBed hg38 genomicSuperDups stdin \
+	-sqlTable=$HOME/kent/src/hg/lib/genomicSuperDups.sql
+    #  Read 69894 elements of size 29 from stdin
+
+    checkTableCoords  hg38 genomicSuperDups
+    # <silence>  (the chrEBV was found with this check)
+
+    featureBits -countGaps hg38 genomicSuperDups
+    # 175429664 bases of 3209286105 (5.466%) in intersection
+
+    featureBits -countGaps hg19 genomicSuperDups
+    #  166092393 bases of 3137161264 (5.294%) in intersection
+    featureBits -countGaps hg18 genomicSuperDups
+    #  159204446 bases of 3107677273 (5.123%) in intersection
+
+    featureBits -countGaps mm10 genomicSuperDups
+    # 214917441 bases of 2730871774 (7.870%) in intersection
+    featureBits -countGaps mm9 genomicSuperDups
+    # 208214567 bases of 2725765481 (7.639%) in intersection
+
+##############################################################################
+# cloneEnds (DONE - 2014-08-14 - Hiram)
+
+    mkdir /hive/data/genomes/hg38/bed/cloneEnds
+    cd /hive/data/genomes/hg38/bed/cloneEnds
+
+    # fetch the NCBI INSDC name correspondence file:
+    rsync -a -P rsync://ftp.ncbi.nlm.nih.gov/genomes/ASSEMBLY_REPORTS/All/GCF_000001405.26.assembly.txt ./
+
+    # fetch the clone reports
+    mkdir reports
+    rsync -a -P \
+rsync://ftp.ncbi.nih.gov/repository/clone/reports/Homo_sapiens/*.GCF_000001405.26.106.*.gff \
+       ./reports/
+
+    # script to establish refSeq to UCSC chrom names:
+
+    cat << '_EOF_' > refSeqNames.pl
+#!/usr/bin/env perl
+
+use strict;
+use warnings;
+
+open (FH, "<GCF_000001405.26.assembly.txt") or die "can not read GCF_000001405.26.assembly.txt";
+while (my $line = <FH>) {
+  chomp $line;
+  next if ($line =~ m/^#/);
+  my @a = split('\t', $line);
+  my $chrN = $a[2];
+  my $refSeq = $a[6];
+  my $contig = $a[4];
+  my $type = $a[1];
+  next if (!defined $type);
+  next if (!defined $refSeq);
+  next if (!defined $contig);
+  my $suffix = "";
+  if ($type eq "alt-scaffold") {
+     $suffix = "_alt";
+  } elsif ($type eq "unlocalized-scaffold") {
+     $suffix = "_random";
+  } elsif ($type eq "unplaced-scaffold") {
+     $chrN = "Un";
+  }
+  $chrN = "M" if ($chrN eq "MT");
+  if ($a[0] =~ m/_/) {
+    $contig =~ s/\./v/;
+    printf "%s\tchr%s_%s%s\n", $refSeq, $chrN, $contig, $suffix;
+  } else {
+    printf "%s\tchr%s\n", $refSeq, $chrN;
+  }
+}
+close (FH);
+'_EOF_'
+    # << happy emacs
+
+    chmod +x refSeqNames.pl
+
+    ./refSeqNames.pl > refSeq.ucscName.tab
+
+    # establish full library list:
+    ls reports/*.GCF_000001405.26.106.*.gff | sed -e 's#reports/##' \
+       | cut -d"." -f1 | sort -u > library.list.txt
+
+    # a script to scan the GFF files, with the refSeq.ucscName.tab
+    # name correspondence to construct bed files
+
+    cat << '_EOF_' > hg38.pl
+#!/usr/bin/env perl
+
+use strict;
+use warnings;
+
+my $argc = scalar(@ARGV);
+
+if ($argc < 1) {
+  printf STDERR "usage: ./hg38.pl <report.gff> [moreReports.gff]\n";
+  exit 255;
+}
+
+my %refSeqToUcsc;   # key is refSeq name, value is UCSC chrom name
+open (FH, "<refSeq.ucscName.tab") or die "can not read refSeq.ucscName.tab";
+while (my $line = <FH>) {
+  chomp $line;
+  my ($refSeq, $ucsc) = split('\t', $line);
+  $refSeqToUcsc{$refSeq} = $ucsc;
+}
+close (FH);
+
+my %chromSizes;    # key is UCSC chrom name, key is chrom size
+open (FH, "</hive/data/genomes/hg38/chrom.sizes") or die "can not read hg38/chrom.sizes";
+while (my $line = <FH>) {
+  chomp $line;
+  my ($chr, $size) = split('\t', $line);
+  $chromSizes{$chr} = $size;
+}
+close (FH);
+
+while (my $file = shift) {
+my %starts;   # key is parent ID, value is start end coordinates start,end
+my %ends;	# key is parent ID, value is end end coordinates start,end
+my %parents;	# key is parent ID, value is 1 to signify exists
+my %endNames;   # key is parent ID, value is the Name of the parent clone_insert
+
+printf STDERR "# processing $file\n";
+
+open (FH, "<$file") or die "can not read $file";
+while (my $line = <FH>) {
+  chomp $line;
+  next if ($line=~ m/^#/);
+  my @a = split('\t', $line);
+  next if (scalar(@a) < 1);
+  my $contig = $a[0];
+  $contig =~ s/ref.//;
+  $contig =~ s/\|//;
+  my $ucscChr = $refSeqToUcsc{$contig};
+  if (!defined($ucscChr)) {
+    printf STDERR "# ERR: contig not in refSeqToUcsc: '$contig'\n";
+    next;
+  }
+  next if (! exists($chromSizes{$ucscChr}));
+  my $chromSize = $chromSizes{$ucscChr};
+  my $chromStart = $a[3] - 1;
+  my $chromEnd = $a[4];
+  if ($chromStart > $chromSize) {
+    printf STDERR "# warning chromStart over size $ucscChr $chromStart $chromEnd\n";
+    $chromStart = $chromSize-1;
+  }
+  if ($chromEnd > $chromSize) {
+    my $overRun = $chromEnd - $chromSize;
+    printf STDERR "# warning chromEnd over size by $overRun -> $ucscChr $chromStart $chromEnd\n";
+    $chromEnd = $chromSize;
+  }
+  my $id="notFound";
+  my $name="notFound";
+  my $parent="notFound";
+  my @b = split(';', $a[8]);
+  for (my $i = 0; $i < scalar(@b); ++$i) {
+     my ($tag, $value) = split('=', $b[$i]);
+     if ($tag eq "ID") {
+        $id = $value;
+        if ($id !~ m/-/) {
+          if (exists($parents{$id})) {
+            printf STDERR "# WARN: duplicate parent: $id";
+          } else {
+            $parents{$id} = $ucscChr;
+          }
+        }
+     } elsif ($tag eq "Parent") {
+        $parent = $value;
+     } elsif ($tag eq "Name") {
+        $name = $value;
+     }
+  }
+  my $type="notFound";
+  my $insertType = $a[2];
+  if ($insertType =~ m/clone_insert_start/) {
+     $type = "start";
+     if ($parent eq "notFound") {
+       printf STDERR "# ERR: can not find parent for start $name Ttype $id\n";
+     } else {
+       if (!exists($parents{$parent})) {
+         printf STDERR "# ERR: start found $name  with no parent $parent declared\n";
+       } elsif (exists($starts{$parent})) {
+         printf STDERR "# ERR: duplicate start for $parent\n";
+       } elsif ($ucscChr eq $parents{$parent}) {
+         $starts{$parent} = sprintf("%s\t%s", $chromStart, $chromEnd);
+       } else {
+         printf STDERR "# ERR: start on different chrom $ucscChr than parent $parent $parents{$parent}\n";
+       }
+     }
+  } elsif ($insertType =~ m/clone_insert_end/) {
+     $type = "end";
+     if ($parent eq "notFound") {
+       printf STDERR "# ERR: can not find parent for end $name Ttype $id\n";
+     } else {
+       if (!exists($parents{$parent})) {
+         printf STDERR "# ERR: end found $name  with no parent $parent declared\n";
+       } elsif (exists($ends{$parent})) {
+         printf STDERR "# ERR: duplicate end for $parent\n";
+       } elsif ($ucscChr eq $parents{$parent}) {
+         $ends{$parent} = sprintf("%s\t%s", $chromStart, $chromEnd);
+       } else {
+         printf STDERR "# ERR: end on different chrom $ucscChr than parent $parent $parents{$parent}\n";
+       }
+     }
+  } elsif ($insertType =~ m/clone_insert/) {
+     $type = "insert";
+     $endNames{$id} = $name;
+  }
+  $name =~ s/gi\|//g;
+  $id =~ s/gi\|//g;
+  printf STDERR "%s\t%d\t%d\t%s_%s_%s\t0\t%s\n", $ucscChr, $chromStart, $chromEnd, $name, $type, $id, $a[6];
+}	# while (my $line = <FH>)
+
+close (FH);
+
+foreach my $parent (keys %parents) {
+  if (! exists($starts{$parent}) ) {
+    printf STDERR "# ERR: no start for $parent\n";
+  } elsif (! exists($ends{$parent}) ) {
+    printf STDERR "# ERR: no end for $parent\n";
+  } else {
+    my $strand = "+";
+    my $chrStart = 0;
+    my $chrEnd = 0;
+    my $blockStart = 0;
+    my ($sStart, $sEnd) = split('\t', $starts{$parent});
+    my ($eStart, $eEnd) = split('\t', $ends{$parent});
+    my $startSize = $sEnd - $sStart;
+    my $endSize = $eEnd - $eStart;
+    if ($eStart < $sStart) {
+      $chrStart = $eStart;
+      $chrEnd = $sEnd;
+      $blockStart = $sStart - $chrStart;
+      $strand = "-";
+      $startSize = $eEnd - $eStart;
+      $endSize = $sEnd - $sStart;
+    } else {
+      $chrStart = $sStart;
+      $chrEnd = $eEnd;
+      $blockStart = $eStart - $chrStart;
+    }
+    if ($startSize > $blockStart) {
+      printf STDERR "# startSize > blockStart $endNames{$parent}\n";
+    } else {
+      printf "%s\t%d\t%d\t%s\t0\t%s\t%d\t%d\t0\t2\t%d,%d\t0,%d\n", $parents{$parent}, $chrStart, $chrEnd, $endNames{$parent}, $strand, $chrStart, $chrEnd, $startSize, $endSize, $blockStart;
+    }
+  }
+}
+}
+'_EOF_'
+    # << happy emacs
+
+    chmod +x hg38.pl
+
+    # process GFF files into bed files into separateLibs/ directory
+for L in `cat library.list.txt`
+do
+   export destDir="separateLibs/${L}"
+   echo "working: ${L}" 1>&1
+   mkdir -p "${destDir}"
+   ./hg38.pl reports/${L}.GCF_000001405.26.106.*.gff \
+       2> ${destDir}/tmp.bed6 | sort -k1,1 -k2,2n > ${destDir}/hg38.${L}.bed
+   sort -k1,1 -k2,2n ${destDir}/tmp.bed6 > ${destDir}/hg38.${L}.items.bed6
+done
+
+    # use only those libraries with more than 20,000 clone ends
+    wc -l separateLibs/*/*.bed | sort -n | grep -v total | awk '$1 > 20000' \
+        | sed -e 's#.*separateLibs/##; s#/.*##' > libs.over20K.list
+
+    # note those libraries with less than 20,000 clone ends
+    wc -l separateLibs/*/*.bed | grep -v total | awk '$1 < 20000' | sed -e 's#.*separateLibs/##; s#/.*##' > libs.under20K.list
+
+    # filter out bad ends, length must be <= median size times three
+    cat libs.over20K.list | while read D
+do
+   if [ ! -s separateLibs/${D}/lengths.txt ]; then
+      awk '{print $3-$2}' separateLibs/${D}/hg38.${D}.bed \
+        > separateLibs/${D}/lengths.txt
+   fi
+   median3X=`ave separateLibs/${D}/lengths.txt | grep median | awk '{printf "%d", $2*3}'`
+   awk '($3-$2) < '$median3X'' separateLibs/${D}/hg38.${D}.bed > separateLibs/${D}/hg38.median3X.bed
+   awk '($3-$2) >= '$median3X'' separateLibs/${D}/hg38.${D}.bed > separateLibs/${D}/hg38.badMap.bed
+   before=`cat separateLibs/${D}/hg38.${D}.bed | wc -l`
+   after=`cat separateLibs/${D}/hg38.median3X.bed | wc -l`
+   dropped=`echo $before $after | awk '{print $1-$2}'`
+   perCent=`echo $dropped $before | awk '{printf "%.2f", 100*'$dropped/$before'}'`
+   echo "$D $before - $after = $dropped -> % $perCent dropped"
+done
+
+#  ABC20 24692 - 24474 = 218 -> % 0.88 dropped
+#  RP11 86660 - 85903 = 757 -> % 0.87 dropped
+#  CTD 95853 - 94941 = 912 -> % 0.95 dropped
+#  CH17 105618 - 105060 = 558 -> % 0.53 dropped
+#  ABC21 182154 - 180973 = 1181 -> % 0.65 dropped
+#  ABC22 189939 - 188743 = 1196 -> % 0.63 dropped
+#  COR02 208263 - 206782 = 1481 -> % 0.71 dropped
+#  ABC18 325080 - 322904 = 2176 -> % 0.67 dropped
+#  ABC27 334178 - 331822 = 2356 -> % 0.71 dropped
+#  ABC24 398944 - 395776 = 3168 -> % 0.79 dropped
+#  ABC23 436965 - 433896 = 3069 -> % 0.70 dropped
+#  ABC16 452220 - 449101 = 3119 -> % 0.69 dropped
+#  COR2A 583008 - 578578 = 4430 -> % 0.76 dropped
+#  WI2 587165 - 582843 = 4322 -> % 0.74 dropped
+#  ABC7 649297 - 644071 = 5226 -> % 0.80 dropped
+#  ABC11 729962 - 724864 = 5098 -> % 0.70 dropped
+#  ABC9 755994 - 750648 = 5346 -> % 0.71 dropped
+#  ABC12 777816 - 771827 = 5989 -> % 0.77 dropped
+#  ABC10 787969 - 781331 = 6638 -> % 0.84 dropped
+#  ABC13 810822 - 803589 = 7233 -> % 0.89 dropped
+#  ABC14 845573 - 839126 = 6447 -> % 0.76 dropped
+#  ABC8 1204275 - 1192784 = 11491 -> % 0.95 dropped
+
+   # loading the median3X files
+for L in `cat libs.over20K.list`
+do
+    echo $L 1>&2
+    hgLoadBed -type=bed12 hg38 cloneEnd${L} \
+       separateLibs/${L}/hg38.median3X.bed \
+        > separateLibs/loadBed.${L}.log 2>&1
+done
+
+   # loading the dropped ends:
+   mkdir /hive/data/genomes/hg38/bed/cloneEnds/droppedTooBig
+   # link them to here
+   cat ../libs.over20K.list | while read L
+do
+  ln -s ../separateLibs/${L}/hg38.badMap.bed ${L}.badMap.bed
+done
+  # then load
+  hgLoadBed -type=bed12 hg38 cloneEndbadEnds *.badMap.bed
+
+    # construct multiple mapped ends:
+for L in `cat libs.over20K.list`
+do
+    cat separateLibs/${L}/hg38.median3X.bed
+done | sort -k4 > allEnds.bed
+
+    cut -f4 allEnds.bed | sort | uniq -c | sort -rn > allEnds.names.count.txt
+
+    awk '$1 > 1' allEnds.names.count.txt | awk '{print $2}' \
+       | sort > multiples.names.txt
+
+    join -t'	' -o "2.1,2.2,2.3,2.4,2.5,2.6,2.7,2.8,2.9,2.10,2.11,2.12" \
+       -2 4 multiples.names.txt allEnds.bed | sort -k1,1 -k2,2n \
+           > allEnds.multiple.locations.bed
+
+    hgLoadBed -type=bed12 hg38 cloneEndmultipleMaps \
+        allEnds.multiple.locations.bed > load.multipleMaps.log 2>&1
+
+    awk '$6 == "+"' allEnds.bed | sort -k1,1 -k2,2n \
+      | bedItemOverlapCount hg38 stdin > allEnds.forward.bedGraph
+
+    awk '$6 == "-"' allEnds.bed | sort -k1,1 -k2,2n \
+      | bedItemOverlapCount hg38 stdin > allEnds.reverse.bedGraph
+
+    bedGraphToBigWig allEnds.forward.bedGraph \
+       /hive/data/genomes/hg38/chrom.sizes \
+         cloneEndcoverageForward.bw
+
+    bedGraphToBigWig allEnds.reverse.bedGraph \
+       /hive/data/genomes/hg38/chrom.sizes \
+          cloneEndcoverageReverse.bw
+
+    mkdir /gbdb/hg38/bbi/cloneEnd
+    ln -s `pwd`/cloneEndcoverageForward.bw /gbdb/hg38/bbi/cloneEnd
+    ln -s `pwd`/cloneEndcoverageReverse.bw /gbdb/hg38/bbi/cloneEnd
+
+    hgBbiDbLink hg38 cloneEndcoverageForward \
+        /gbdb/hg38/bbi/cloneEnd/cloneEndcoverageForward.bw
+    hgBbiDbLink hg38 cloneEndcoverageReverse \
+        /gbdb/hg38/bbi/cloneEnd/cloneEndcoverageReverse.bw
+
+    ### Fixup the scores to indicate how many multiple mappings as mentioned
+    ### in the hg19 bacEnds description page: one mapping: score = 1000
+    ### multiple mappings: score = 1500/count
+    ### the sort | uniq -c | awk does this score calculation with the name
+    ###   in column 1
+    ### The join puts the existing table together with those scores
+    ### DONE - 2015-06-18 - Hiram
+
+    mkdir /hive/data/genomes/hg38/bed/cloneEnds/addCounts
+    cd /hive/data/genomes/hg38/bed/cloneEnds/addCounts
+    mkdir score withScore noScore withScore
+    for table in cloneEndABC10 cloneEndABC11 cloneEndABC12 cloneEndABC13 \
+cloneEndABC14 cloneEndABC16 cloneEndABC18 cloneEndABC20 cloneEndABC21 \
+cloneEndABC22 cloneEndABC23 cloneEndABC24 cloneEndABC27 cloneEndABC7 \
+cloneEndABC8 cloneEndABC9 cloneEndCH17 cloneEndCOR02 cloneEndCOR2A \
+cloneEndCTD cloneEndRP11 cloneEndWI2 cloneEndbadEnds cloneEndmultipleMaps
+do
+  hgsql -N -e "select name from $table;" hg38 | sort | uniq -c |
+      awk '{ if (1 == $1) {printf "%s\t1000\n", $2} else {printf "%s\t%d\n", $2, 1500/$1} }' \
+         | sort > score/hg38.$table.score.tab
+  hgsql -N -e "select * from $table order by name;" hg38 \
+      | sort -k5 > noScore/hg38.$table.tab
+  join -t'^I' -1 5 noScore/hg38.$table.tab score/hg38.$table.score.tab \
+  | awk '{printf "%d\t%s\t%d\t%d\t%s\t%d\t%s\t%d\t%d\t%d\t%d\t%s\t%s\n", $2,$3,$4,$5,$1,$14,$7,$8,$9,$10,$11,$12,$13}' \
+    | sort -k2,2 -k3,3n > withScore/hg38.$table.withScore.tab
+  hgsql -e "delete from $table;" hg38
+  hgsql -e "load data local infile \"withScore/hg38.$table.withScore.tab\" into table $table;" hg38
+done
+
+##############################################################################
+# SIB Transcriptome (DONE 2014-08-27 Steve)
+
+    # Create working directory and download data from where Christian
+    # Iseli (christian.iseli@unil.ch) put it, and unpack.
+    mkdir -p /hive/data/genomes/hg38/bed/sibTranscriptome
+    cd /hive/data/genomes/hg38/bed/sibTranscriptome
+    wget --timestamping http://ludwig-sun1.unil.ch/~chris/HTr.gtf.gz
+    wget --timestamping http://ludwig-sun1.unil.ch/~chris/txg.tar.gz
+
+    tar -zxvf txg.tar.gz
+
+    zcat HTr.gtf.gz | ldHgGene hg38 sibGene stdin
+    # Reading stdin
+    # Read 208508 transcripts in 2824960 lines in 1 files
+    # 208508 groups 25 seqs 1 sources 2 feature types
+    # 208508 gene predictions
+
+    # Do a little data cleanup and transformation and load splice graphs
+    # into database.
+    sed 's/altGraphX/sibTxGraph/' ~/kent/src/hg/lib/altGraphX.sql > sibTxGraph.sql
+    cat txg/*.txg | txgToAgx stdin stdout | hgLoadBed -notItemRgb \
+      -sqlTable=sibTxGraph.sql hg38 sibTxGraph stdin
+    # Reading stdin
+    # Read 47817 elements of size 18 from stdin
+    # Sorted
+    # Creating table definition for sibTxGraph from sql: sibTxGraph.sql
+    # Saving bed.tab
+    # Loading hg38
+
+    # Create sibAltEvents track for analyzed alt-splices.
+    # Not on RR for hg18 and hg19, so do not push it out
+    cat txg/*.txg | txgAnalyze stdin /cluster/data/hg38/hg38.2bit sibAltEvents.bed
+    awk '$2 >= 0' sibAltEvents.bed | sort | uniq > foo.bed
+    hgLoadBed hg38 sibAltEvents foo.bed
+    # Reading foo.bed
+    # Read 452436 elements of size 6 from foo.bed
+    # Sorted
+    # Creating table definition for sibAltEvents, bedSize: 6
+    # Saving bed.tab
+    # Loading hg38
+
+    # push sibGene and sibTxGraph for hg38
+
+############################################################################
+# Orangutan Lastz run (DONE - 2014-05-27 - Hiram)
+    screen -S hg38PonAbe2      # use a screen to manage this longish running job
+    mkdir /hive/data/genomes/hg38/bed/lastzPonAbe2.2014-09-02
+    cd /hive/data/genomes/hg38/bed/lastzPonAbe2.2014-09-02
+
+    # always set the BLASTZ program so we know what version was used
+    cat << '_EOF_' > DEF
+# human vs chimp
+BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.52/bin/lastz
+BLASTZ_O=600
+BLASTZ_E=150
+# maximum M allowed with lastz is only 254
+BLASTZ_M=254
+
+BLASTZ_T=2
+BLASTZ_Y=15000
+BLASTZ_K=4500
+BLASTZ_Q=/scratch/data/blastz/human_chimp.v2.q
+#    A    C    G    T
+#    90 -330 -236 -356
+#  -330  100 -318 -236
+#  -236 -318  100 -330
+#  -356 -236 -330   90
+
+# TARGET: Human Hg38
+SEQ1_DIR=/scratch/data/hg38/hg38.2bit
+SEQ1_LEN=/scratch/data/hg38/chrom.sizes
+SEQ1_CHUNK=20000000
+SEQ1_LAP=10000
+SEQ1_IN_CONTIGS=0
+
+# QUERY: Orangutan PonAbe2
+SEQ2_DIR=/hive/data/genomes/ponAbe2/ponAbe2.2bit
+SEQ2_LEN=/hive/data/genomes/ponAbe2/chrom.sizes
+SEQ2_CHUNK=10000000
+SEQ2_LAP=0
+SEQ2_LIMIT=100
+SEQ2_IN_CONTIGS=0
+
+BASE=/hive/data/genomes/hg38/bed/lastzPonAbe2.2014-09-02
+TMPDIR=/dev/shm
+'_EOF_'
+
+    time (doBlastzChainNet.pl `pwd`/DEF -verbose=2 \
+        -chainMinScore=5000 -chainLinearGap=medium \
+        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
+        -syntenicNet) > do.log 2>&1
+    # real    144m46.575s
+    cat fb.hg38.chainPonAbe2Link.txt
+    # 2719618310 bases of 3049335806 (89.187%) in intersection
+
+    # filter with doRecipBest.pl
+    time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` \
+        hg38 ponAbe2) > rbest.log 2>&1
+    # real    60m1.060s
+    time (doRecipBest.pl -load -continue=load -workhorse=hgwdev \
+	-buildDir=`pwd` hg38 ponAbe2) > loadRBest.log 2>&1 &
+    # real    3m35.834s
+
+    cat fb.hg38.chainRBestPonAbe2Link.txt
+    # 2538296592 bases of 3049335806 (83.241%) in intersection
+
+    # running the swap
+    mkdir /hive/data/genomes/ponAbe2/bed/blastz.hg38.swap
+    cd /hive/data/genomes/ponAbe2/bed/blastz.hg38.swap
+    time (doBlastzChainNet.pl -verbose=2 \
+        -swap /hive/data/genomes/hg38/bed/lastzPonAbe2.2014-09-02/DEF \
+        -chainMinScore=5000 -chainLinearGap=medium \
+        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
+        -syntenicNet) > swap.log 2>&1
+    # real    102m27.866s
+    cat fb.ponAbe2.chainHg38Link.txt
+    #  2773568958 bases of 3093572278 (89.656%) in intersection
+
+    time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` \
+        ponAbe2 hg38) > rbest.log 2>&1
+    # real    78m47.312s
+
+
+
+
+#############################################################################
+# Add chrX alts to par (DONE 2014-10-14 angie)
+# Thanks to Hiram for pointing out that intersecting chrX positions in
+# altLocations and par shows whether a chrX alt overlaps a PAR.
+    cd /hive/data/genomes/hg38/bed/par
+    hgsql hg38 -e 'select * from altLocations where chrom = "chrX"'
+#+-----+-------+------------+----------+---------------------+
+#| bin | chrom | chromStart | chromEnd | name                |
+#+-----+-------+------------+----------+---------------------+
+#|  73 | chrX  |     319337 |   601516 | chrX_KI270880v1_alt |
+#|  73 | chrX  |     326487 |   601516 | chrX_KI270913v1_alt |
+#| 149 | chrX  |   79965153 | 80097082 | chrX_KI270881v1_alt |
+#+-----+-------+------------+----------+---------------------+
+    hgsql hg38 -e 'select * from par where chrom = "chrX"'
+#+-----+-------+------------+-----------+------+
+#| bin | chrom | chromStart | chromEnd  | name |
+#+-----+-------+------------+-----------+------+
+#|   9 | chrX  |      10000 |   2781479 | PAR1 |
+#| 221 | chrX  |  155701382 | 156030895 | PAR2 |
+#+-----+-------+------------+-----------+------+
+    # chrX_KI270880v1_alt and chrX_KI270913v1_alt are entirely contained in PAR1;
+    # chrX_KI270881v1_alt is not in either PAR.
+    hgsql hg38 -e 'select chrom,size from chromInfo \
+                     where chrom in ("chrX_KI270880v1_alt", "chrX_KI270913v1_alt");'
+#+---------------------+--------+
+#| chrom               | size   |
+#+---------------------+--------+
+#| chrX_KI270880v1_alt | 284869 |
+#| chrX_KI270913v1_alt | 274009 |
+#+---------------------+--------+
+    # Process that into bed4 with name=PAR1:
+    hgsql hg38 -NBe 'select chrom, 0, size, "PAR1" from chromInfo \
+                       where chrom in ("chrX_KI270880v1_alt", "chrX_KI270913v1_alt");' \
+      >> hg38Par.bed4
+    hgLoadBed hg38 par hg38Par.bed4
+    checkTableCoords hg38 par
+
+
+#############################################################################
+# LASTZ Cow bosTau8 (DONE - 2014-10-15 - Steve)
+    mkdir /hive/data/genomes/hg38/bed/lastzBosTau8.2014-10-215
+    cd /hive/data/genomes/hg38/bed/lastzBosTau8.2014-10-15
+
+    cat << '_EOF_' > DEF
+# human vs cow
+# maximum M allowed with lastz is only 254
+BLASTZ_M=254
+
+# TARGET: Human hg38
+SEQ1_DIR=/scratch/data/hg38/hg38.2bit
+SEQ1_LEN=/scratch/data/hg38/chrom.sizes
+SEQ1_CHUNK=10000000
+SEQ1_LAP=10000
+
+# QUERY: Cow bosTau8
+SEQ2_DIR=/hive/data/genomes/bosTau8/bosTau8.2bit
+SEQ2_LEN=/hive/data/genomes/bosTau8/chrom.sizes
+SEQ2_CHUNK=10000000
+SEQ2_LAP=0
+
+
+BASE=/hive/data/genomes/hg38/bed/lastzBosTau8.2014-10-15
+TMPDIR=/scratch/tmp
+'_EOF_'
+    # << happy emacs
+    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
+        `pwd`/DEF \
+        -syntenicNet \
+        -noLoadChainSplit \
+        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
+        -chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1
+    # real    602m37.523s
+    cat fb.hg38.chainBosTau8Link.txt
+    # 1401921010 bases of 3049335806 (45.975%) in intersection
+    # Create link
+    cd /hive/data/genomes/hg38/bed
+    ln -s  lastzBosTau8.2014-10-15 lastz.bosTau8
+
+    #   running the swap
+    mkdir /hive/data/genomes/bosTau8/bed/blastz.hg38.swap
+    cd /hive/data/genomes/bosTau8/bed/blastz.hg38.swap
+    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
+        /hive/data/genomes/hg38/bed/lastzBosTau8.2014-10-15/DEF \
+        -swap  -syntenicNet \
+        -noLoadChainSplit \
+        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
+        -chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1
+    #   real     116m32.121s
+    cat fb.bosTau8.chainHg38Link.txt
+    #   1336307377 bases of 2649307237 (50.440%) in intersection
+    cd /hive/data/genomes/bosTau8/bed
+    ln -s blastz.hg38.swap lastz.hg38
+
+############################################################################
+# NCBI ClinVar (new version -DONE - 2014-11-08 - Max)
+# see hg19.txt
+#########################################################################
+
+########################################################################
+# CNV Developmental Delay track (2014-11-21 Steve)
+
+    mkdir /hive/data/genomes/hg38/bed/cnvDevDelay
+    cd /hive/data/genomes/hg38/bed/cnvDevDelay
+
+wget --timestamping 'ftp://ftp.ncbi.nlm.nih.gov/pub/dbVar/data/Homo_sapiens/by_study/nstd100_Coe_et_al_2014/gvf/nstd100_Coe_et_al_2014.GRCh38.remap.all.germline.ucsc.gvf.gz'
+wget --timestamping 'ftp://ftp.ncbi.nlm.nih.gov/pub/dbVar/data/Homo_sapiens/by_study/nstd54_Cooper_et_al_2011/gvf/nstd54_Cooper_et_al_2011.GRCh38.remap.all.germline.ucsc.gvf.gz'
+
+cp /kent/src/hg/utils/automation/gvfToBed8Attrs.pl .
+mv gvfToBed8Attrs.pl gvfToBed8AttrsCase.pl
+cp gvfToBed8AttrsCase.pl gvfToBed8AttrsControl100.pl
+cp gvfToBed8AttrsCase.pl gvfToBed8AttrsControl54.pl
+
+# made three local copies of Angie's gvf conversion script - one to include
+# only case individuals from nstd100, one to include only control individuals
+# from nstd100 and one to include only control individuals from nstd54
+
+# had to add an additional elsif statement to the nstd100 scripts to filter
+# based on sample_name field:
+
+#  } elsif ($tag eq "sample_name") {
+#    $sample_name = $val;
+#  }
+
+# added line 33/35 to each file:
+
+# next if ($sample_name eq "Unknown"); # keep only "case" individuals from nstd100
+# next if ($sample_name ne "Unknown"); # keep only "control" individuals from nstd100
+# next if ($phenotype ne "not_reported"); # keep only "control" individuals from nstd54
+
+zcat nstd100_Coe_et_al_2014.GRCh38.remap.all.germline.ucsc.gvf.gz | gvfToBed8AttrsCase.pl > cnvDevDelayAllCase.bed
+zcat nstd100_Coe_et_al_2014.GRCh38.remap.all.germline.ucsc.gvf.gz | gvfToBed8AttrsControl100.pl > cnvDevDelayAllControl.bed
+zcat nstd54_Cooper_et_al_2011.GRCh38.remap.all.germline.ucsc.gvf.gz | gvfToBed8AttrsControl54.pl >> cnvDevDelayAllControl.bed
+
+# GRCh38 data from dbVar had different naming scheme for alternate chromosomes
+# (e.g., chr1|NT_187515.1 instead of chr1_KI270762v1_alt), so needed to write
+# a script to substitute the correct UCSC names
+
+    cat << '_EOF_' > chromXref.pl
+#!/usr/bin/env perl
+
+use strict;
+use warnings;
+
+sub usage() {
+  printf STDERR "usage: ./chromXref.pl <infile> <outfile>\n"
+}
+
+my $argc = scalar(@ARGV);
+
+if ($argc != 2) {
+  usage;
+  exit 255;
+}
+
+open (file1, "<hg38.xref") or die "cannot read hg38.xref";
+
+my @accArray = ();
+my $i = 0;
+while (my $line = <file1>) {
+  chomp($line);
+  my ($type, $chr, $acc1, $acc2) = split('\t', $line);
+  ($type, undef) = split('-', $type);
+  ($acc1, my $version) = split('\.', $acc1);
+  if ($type eq "unlocalized") {
+    $type = "random";
+  }
+  my $ucscAcc = "_" . $acc1 . "v" . $version . "_" . $type;
+  $accArray[$i][0] = $ucscAcc;
+  $accArray[$i][1] = $acc2;
+  $i++;
+}
+
+close (file1);
+
+open (file2, "<$ARGV[0]") or die "cannot read $ARGV[0]";
+open (file3, ">$ARGV[1]") or die "cannot read $ARGV[1]";
+local $/;
+my $fileContents = <file2>;
+for ($i = 0; $i < scalar(@accArray); $i++) {
+  my $temp1 = $accArray[$i][1];
+  my $temp2 = $accArray[$i][0];
+  if ($fileContents =~ m/\|$temp1/) {
+    $fileContents =~ s/\|$temp1/$temp2/g;
+  }
+}
+
+print file3 $fileContents;
+close (file2);
+close (file3);
+'_EOF_'
+    # << happy emacs
+
+cp /hive/data/genomes/hg38/genbank/GCF_000001405.26.assembly.txt .
+
+cat GCF_000001405.26.assembly.txt | grep -v '^#\|assembled\|unplaced' | awk '{print $2 "\t" $3 "\t" $5 "\t" $7}' > hg38.xref
+
+chromXref.pl cnvDevDelayAllCase.bed cnvDevDelayAllCaseUcsc.bed
+chromXref.pl cnvDevDelayAllControl.bed cnvDevDelayAllControlUcsc.bed
+
+hgLoadBed -tab -renameSqlTable -sqlTable=$HOME/kent/src/hg/lib/bed8Attrs.sql \
+        -allowStartEqualEnd hg38 cnvDevDelayCase cnvDevDelayAllCaseUcsc.bed
+
+hgLoadBed -tab -renameSqlTable -sqlTable=$HOME/kent/src/hg/lib/bed8Attrs.sql \
+        -allowStartEqualEnd hg38 cnvDevDelayControl cnvDevDelayAllControlUcsc.bed
+
+    checkTableCoords hg38 cnvDevDelayCase
+    checkTableCoords hg38 cnvDevDelayControl
+
+
+#########################################################################
+# RETROFINDER RETROPOSED GENES ucscRetro track VERSION 9
+# (2015-01-12 - 2015-01-20, hartera, DONE)
+ssh hgwdev
+mkdir -p /hive/groups/gencode/pseudogenes/retroFinder/hg38.20150112
+cd /hive/groups/gencode/pseudogenes/retroFinder/hg38.20150112
+
+cat << '_EOF_' > DEF
+
+RETRO_OPTIONS="-verbose=4 -minAli=0.98 -nearTop=0.005 "
+VERSION=9
+RUNDATE="2015-01-12"
+DB=hg38
+SCORETHRESH=550
+GENOMENAME='Homo sapiens'
+GBDB=hg
+DATE=20150112
+RUNDIR=/hive/groups/gencode/pseudogenes/retroFinder/$DB.$DATE
+BINDIR=/hive/users/hartera/GencodeWG/retroFinder/branches/version2/bin
+KENTDIR=/cluster/home/hartera/kent
+KENTBINDIR=/cluster/home/hartera/bin/x86_64
+MRNABASE=/hive/data/genomes/$DB/bed/mrnaBlastz.$VERSION
+TMPMRNA=$RUNDIR/mrnaBlastz/$DB
+TMPEST=$RUNDIR/est/$DB
+USEALTSEQS=0
+EST=all_est
+SPLICED_EST=intronEst
+SPLIT_EST=0
+SPLIT_SPLICED_EST=0
+LASTZPROG=/cluster/bin/penn/x86_64/lastz
+SCRIPT=/hive/users/hartera/GencodeWG/retroFinder/branches/version2/src/pipeline
+GENOME=/hive/data/genomes
+RETRODIR=$GENOME/$DB/bed/retro
+BASE=$RUNDIR/retro
+OUTDIR=${BASE}/version${VERSION}/${DB}
+RESULT=$OUTDIR/result
+RESULTSPLIT=$OUTDIR/resultSplit
+LOG=$OUTDIR/log
+OUT=$OUTDIR/out
+OVERLAPDIR=$OUTDIR/run.o
+TABLE=ucscRetroInfo$VERSION
+ORTHOTABLE=ucscRetroOrtho$VERSION
+ALIGN=ucscRetroAli$VERSION
+LOCAL=/scratch/data/$DB
+TWOBIT=$GENOME/$DB/$DB.2bit
+RMSK=rmsk
+NET1=netMm10
+NET2=netCanFam3
+NET3=netRheMac3
+# these two nets determine which retros are classified as ancient,
+# use two farthest nets
+ANCIENT1=netMm10
+ANCIENT2=netCanFam3
+GENE1=knownGene
+GENE2=refGene
+GENE3=wgEncodeGencodeCompV19
+CLUSTER=ku
+SPECIES="hg38 mm10"
+ROOTDIR="/cluster/home/hartera/public_html/retro/hg38Jun14"
+WEBROOT=$ROOTDIR/retro.$VERSION
+WEBSERVER=http://hgwdev-hartera.soe.ucsc.edu
+SHUFFLEDIR=shuffle
+SHUFFLEROOT=$WEBROOT/$SHUFFLEDIR
+DUPDIR=dups
+DUPROOT=$WEBROOT/$DUPDIR
+AGEDIR=age
+AGEROOT=$WEBROOT/$AGEDIR
+EXPDIR=exp
+GENEPFAM=knownGene
+PFAM=knownToPfam
+PFAMIDFIELD=name
+PFAMDOMAIN=value
+ALTSPICE=
+#ALTSPLICE=sibTxGraph
+SPLITBYAGE=$SCRIPT/splitRetrosByAge
+PDB=proteins140122
+#ARRAY=gnfAtlas2
+#AFFYPROBE="affyU133A,affyGnf1h"
+#ARRAYMEDIAN=hgFixed.gnfHumanAtlas2Median
+#ARRAYRATIO=hgFixed.gnfHumanAtlas2AllRatio
+#ARRAYABS=hgFixed.gnfHumanAtlas2All
+#ARRAYEXP=hgFixed.gnfHumanAtlas2MedianExps
+#ARRAYEXPALL=hgFixed.gnfHumanAtlas2AllExps
+#ARRAYLOOKUP=knownToGnfAtlas2
+#ARRAYPSLS="/hive/data/genomes/hg19/bed/geneAtlas2/affyU133A.psl /hive/data/genomes/hg19/bed/geneAtlas2/affyGnf1h.psl"
+'_EOF_'
+    # << happy emacs
+chmod +x DEF
+
+mkdir -p /hive/data/genomes/hg38/bed/retro
+mkdir -p /hive/data/genomes/hg38/bed/mrnaBlastz.9
+mkdir -p /hive/groups/gencode/pseudogenes/retroFinder/hg38.20150112/mrnaBlastz
+cd /hive/groups/gencode/pseudogenes/retroFinder/hg38.20150112/mrnaBlastz
+cp ../DEF .
+
+# Create S1.len file
+rom.sizes without random chroms or chrM, there are many alt loci also
+# in hg38 that were not in hg19 so 285 chroms total.
+cat /hive/data/genomes/hg38/chrom.sizes | grep -v random \
+   | grep -v chrUn | grep -v chrM > S1.len
+cp S1.len /hive/data/genomes/hg38/bed/mrnaBlastz.9
+
+screen
+# Run steps 1 to 5 of RetroFinder pipeline from scripts in CCDS SVN source tree:
+retroFinder/branches/version2/src/pipeline/ucscStep1.sh DEF
+# check cluster jobs on ku
+retroFinder/branches/version2/src/pipeline/ucscStep2.sh DEF
+retroFinder/branches/version2/src/pipeline/ucscStep3.sh DEF
+#check cluster jobs on ku
+retroFinder/branches/version2/src/pipeline/ucscStep4.sh DEF
+#check cluster jobs on ku
+    # Load the track
+retroFinder/branches/version2/src/pipeline/ucscStep5.sh DEF
+cd /hive/groups/gencode/pseudogenes/retroFinder/hg38.20150112/retro/version9/hg38
+retroFinder/branches/version2/src/pipeline/filterMrna.sh
+retroFinder/branches/version2/src/pipeline/filterEst.sh
+# Check cluster jobs on ku
+retroFinder/branches/version2/src/pipeline/analyseExpress.sh
+# Check cluster jobs on ku
+#added ucscRetroAli9 to kent/src/hg/makeDb/human/hg38/trackDb.ra
+# copied
+# /hive/groups/gencode/pseudogenes/retroFinder/hg38.20150112/retro/version9/hg38/trackDb.retro
+# entry to kent/src/hg/makeDb/trackDb/human/hg38/trackDb.ra and edited it to
+# remove the full date and add:
+# dataVersion Jan. 2015
+# Scripts copied ucscRetroAli9.psl, ucscRetroInfo9.bed and ucscRetroCds9.tab
+# to /hive/data/genomes/hg38/bed/retro/
+
+##########
+# Make dbVar chrom to UCSC chrom lift file
+#  DONE braney 2/12/15
+cd /cluster/data/hg38/jkStuff
+sort /cluster/data/hg38/chrom.sizes > tmpChrom
+grep -v '^#\|assembled' /hive/data/genomes/hg38/genbank/GCF_000001405.26.assembly.txt | awk 'BEGIN {OFS="\t"} {print "chr" $3 "_" $5 "_" $2, "chr" $3 "|"$7}' | sed 's/-scaffold//' | sed 's/unlocalized/random/' | sed 's/_unplaced//' | sed 's/chrna/chrUn/g' | sed 's/\./v/'  | sort | join /dev/stdin tmpChrom | awk 'BEGIN {OFS="\t"} {print 0, $2, $3, $1, $3}'  > dbVar.lift
+awk 'BEGIN {OFS="\t"} {print 0, $1, $2, $1, $2}' /cluster/data/hg38/chrom.sizes >> dbVar.lift
+rm tmpChrom
+
+#########################################################################
+# UCSC to RefSeq name correspondence (DONE - 2015-04-13 - Hiram)
+
+    mkdir /hive/data/genomes/hg38/bed/ucscToRefSeq
+    cd /hive/data/genomes/hg38/bed/ucscToRefSeq
+
+    # columns 5 and 7 are the INSDC and RefSeq names
+
+    grep -v "^#" ../../genbank/GCF_000001405.26.assembly.txt \
+      | awk -F'\t' '{printf "%s\t%s\n", $5,$7}'  | sort > insdc.refSeq.tab
+
+    hgsql -N -e 'select name,chrom,chromStart,chromEnd from ucscToINSDC;' hg38 \
+      | sort > insdc.ucsc.tab
+
+    join insdc.ucsc.tab insdc.refSeq.tab | tr '[ ]' '[\t]' \
+       | cut -f2- > ucsc.refSeq.tab
+
+
+    export chrSize=`cut -f1 ucsc.refSeq.tab | awk '{print length($0)}' | sort -n | tail -1`
+    sed -e "s/21/$chrSize/" $HOME/kent/src/hg/lib/ucscToINSDC.sql \
+       | sed -e 's/INSDC/RefSeq/g;' > ucscToRefSeq.sql
+    hgLoadSqlTab hg38 ucscToRefSeq ./ucscToRefSeq.sql ucsc.refSeq.tab
+
+    checkTableCoords  hg38 -table=ucscToRefSeq
+
+#########################################################################
+#CREATE MICROSAT TRACK (DONE - 2015-05-22 - Hiram)
+    ssh hgwdev
+    mkdir /cluster/data/hg38/bed/microsat
+    cd /cluster/data/hg38/bed/microsat
+
+    awk '($5==2 || $5==3) && $6 >= 15 && $8 == 100 && $9 == 0 {printf("%s\t%s\t%s\t%dx%s\n", $1, $2, $3, $6, $16);}' \
+       ../simpleRepeat/simpleRepeat.bed > microsat.bed
+
+    hgLoadBed hg38 microsat microsat.bed
+
+#############################################################################
+# ENCODE Regulatory tracks  (Kate & Chris)
+
+# see reg.txt
+#########################################################################
+# GWIPS-viz Ribo-seq - (DONE - 2016-02-05 - Steve)
+# contact Audrey Michel (audreymannion@gmail.com)
+# redmine #16765
+
+obtained bigWig file from shared Google drive
+https://drive.google.com/a/soe.ucsc.edu/folderview?id=0B_xvV_5tXzOGQ1h5NEh4bnhNTDg&usp=sharing_eid
+
+mkdir /hive/data/genomes/hg38/bed/gwipsvizRiboseq
+cp Global_RiboProElong.10_02_2016.bw /hive/data/genomes/hg38/bed/gwipsvizRiboseq/gwipsvizRiboseq.bw
+
+mkdir /gbdb/hg38/bbi/gwipsvizRiboseq
+cd /gbdb/hg38/bbi/gwipsvizRiboseq
+ln -s /hive/data/genomes/hg38/bed/gwipsvizRiboseq/gwipsvizRiboseq.bw gwipsvizRiboseq.bw
+
+hgsql hg38
+create table gwipsvizRiboseq select * from gc5BaseBw;
+update gwipsvizRiboseq set fileName="/gbdb/hg38/bbi/gwipsvizRiboseq/gwipsvizRiboseq.bw" where fileName="/gbdb/hg38/bbi/gc5BaseBw/gc5Base.bw";
+
+#########################################################################
+# COSMIC v81 DONE Chris Eisenhart 2017-05-11
+# Make a new COSCMIC track for hg19
+mkdir /hive/data/outside/cosmic/hg38/v81
+cd /hive/data/outside/cosmic/hg38/v81
+
+# Get the new data
+sftp ceisenha@ucsc.edu@sftp-cancer.sanger.ac.uk
+# Login to SFTP server then run these commands
+get /files/grch38/cosmic/v81/CosmicMutantExport.tsv.gz
+
+# Remove the 'NS' fields, search for the \t after to exclude the E'NS'ST transcripts.
+zcat CosmicMutantExport.tsv.gz | sed 's/NS\t/\t/g' > cosMut.tsv
+
+# Use a script to convert to bed format.
+cosmicToBed cosMut.tsv cosMut.bed
+# This many lines were skipped, 131597 for not having genomic coordinate
+
+# Sort and convert to big bed using the .as file.
+sort -k1,1 -k2,2n cosMut.bed > sCosMut.bed
+bedToBigBed -type=bed4+31 -as=cosmicNew.as sCosMut.bed /hive/data/genomes/hg38/chrom.sizes cosMutHg38V81.bb -tab -extraIndex=name,cosmLabel
+
+# Link it up so the outside world can see it.
+cd /gbdb/hg38/cosmic/
+ln -s /hive/data/outside/cosmic/hg38/v81/cosMutHg38V81.bb .
+#########################################################################
+# hoffmanMappability hub import (2 super tracks) DONE Chris Eisenhart 2017-05-16
+mkdir /hive/data/outside/hoffmanMappability/hg38
+cd /hive/data/outside/hoffmanMappability/hg38
+wget https://www.pmgenomics.ca/hoffmanlab/proj/bismap/trackhub/hg38/trackDb.txt
+# Get the trackDb file
+importTrackHub trackDb.txt hofMap.ra /gbdb/hg38/hoffmanMappability/ --trackDbPath=$HOME/kent/src/hg/makeDb/trackDb/human/hg38/ --test
+# Check that the commands are what we want, then run for real
+importTrackHub trackDb.txt hofMap.ra /gbdb/hg38/hoffmanMappability/ --trackDbPath=$HOME/kent/src/hg/makeDb/trackDb/human/hg38/
+# View the .ra file to make sure things are ok, here changed the groups to map,
+# added the alpha tags, and removed the 'show' from 'superTrack on show'
+cp hofMap.ra ~/kent/src/hg/makeDb/trackDb/human/hg38
+# Include hofMap.ra in the trackDb.ra file
+
+# the importTrackHub failed on redirection, fetch all the files manually:
+# 2017-09-15 - Hiram
+
+cd /hive/data/outside/hoffmanMappability/hg38
+
+grep bigDataUrl trackDb.txt | awk '{print $NF}' | sed -e 's#https://www.pmgenomics.ca/hoffmanlab/proj/bismap/trackhub/hg38/##;' | while read F
+do
+  echo $F
+  rm -f $F
+  wget --timestamping "https://www.pmgenomics.ca/hoffmanlab/proj/bismap/trackhub/hg38/${F}"
+done
+    # real    29m40.429s
+
+#########################################################################
+# tcgaExpr super track Chris Eisenhart, DONE, 2017-05-17
+# tcgaTranscExpr
+# TCGA transcript level expression barChart track, from TOIL pipeline recompute (John Vivian)
+# biorxiv.org/content/biorxiv/early/2016/07/07/062497.full.pdf
+mkdir /hive/data/outside/tcgaBarcharts/
+mkdir /hive/data/outside/tcgaBarcharts/transcripts
+cd /hive/data/outside/tcgaBarcharts/transcripts
+
+# Get all the meta data
+cp ~max/projects/cirm/datasetPages/tcgaGtex/tcgaMeta.tab .
+# Cut out the meta data the script wants, sample name and group.
+cut -f 1,5 tcgaMeta.tab | sed 's/ /_/g' > tcgaLargeSamples.tsv
+
+# Get and clean the matrix
+cp ~max/projects/cirm/datasetPages/tcgaGtex/tcga.tpm.tab .
+# Clean up the transcript names (remove the .#)
+cut -f 1 tcga.tpm.tab | cut -f 1 -d "." > tcgaTranscripts.txt
+cut -f 2- tcga.tpm.tab > tcgaTpmValues.tsv
+paste tcgaTranscripts.txt tcgaTpmValues.tsv > tcgaMatrix.tsv
+
+# Build a coordinate map
+hgsql hg38 -e "select * from ensGene" | cut -f 2- | sort > ensGene
+hgsql hg38 -e "select * from ensemblToGeneName" | sort >  ensemblToGeneName
+join ensGene ensemblToGeneName | awk '{print $2"\t"$4"\t"$5"\t"$1"\t0\t"$3"\t"$16}' > coord.bed
+
+# Use the meta data, matrix, and coordinate map to generate a barchart bed
+time expMatrixToBarchartBed tcgaLargeSamples.tsv tcgaMatrix.tsv coord.bed tcgaTransExp.bed --groupOrder tcgaGroupOrder.txt
+
+# NOTE: Use the header line of the bed file to populate the barChartBars field in the trackDb.
+# The order of the labels in the barChartBars field should match the order of the labels in the
+# expScores column in the bed file header.
+
+# Sort and convert into a bigBed file.
+sort -k1,1 -k2,2n tcgaTransExp.bed > sortedTcgaTransExp.bed
+bedToBigBed -type=bed6+5 -as=$HOME/kent/src/hg/lib/barChartTranscExp.as sortedTcgaTransExp.bed /hive/data/genomes/hg38/chrom.sizes tcgaTransExp.bb
+
+# Link the files into gbdb
+cd /gbdb/hgFixed/human/expMatrix
+ln -s /hive/data/outside/tcgaBarcharts/transcripts/tcgaLargeSamples.tsv tcgaLargeSamples.tab
+ln -s /hive/data/outside/tcgaBarcharts/transcripts/tcgaMatrix.tsv tcgaMatrix.tab
+ln -s /hive/data/outside/tcgaBarcharts/transcripts/tcgaTransExp.bb .
+
+###########3
+# Reload bigBed with a schema that will be shared with genes track, to support
+# configuration as subtracks in a composite
+# (2007-08-30 kate)
+cd /hive/data/outside/tcgaBarcharts/transcripts
+bedToBigBed -type=bed6+5 -as=$HOME/kent/src/hg/lib/barChartGeneExpr.as sortedTcgaTransExp.bed /hive/data/genomes/hg38/chrom.sizes tcgaTranscExpr.hg38.bb
+mkdir /gbdb/hg38/tcga
+ln -s `pwd`/tcgaTranscExpr.hg38.bb /gbdb/hg38/tcga/tcgaTranscExpr.bb
+
+# TCGA gene level expression barChart track, from TOIL pipeline recompute (John Vivian)
+# tcgaGeneExpr
+mkdir ../genes
+cd ../genes
+
+# Get the gene matrix.
+cp ~max/projects/cirm/datasetPages/tcgaGtex/tcga.geneTpm.tab .
+
+# Make a coordinate file, the genes in gtexGeneModelV6 have .# versions which are
+# removed with the temp fils.
+hgsql hg38 -e "select * from hg38.gtexGeneModelV6" | awk '{print $3"\t"$5"\t"$6"\t"$2"\t0\t"$4"\t"$2}' > coord6+1.bed.temp
+cut -f 4 coord6+1.bed.temp | cut -f 1 -d "." > foo
+cut -f 1-3 coord6+1.bed.temp > foo2
+paste foo2 foo > foo3
+cut -f 5- coord6+1.bed.temp > foo4
+paste foo3 foo4 > coord6+1.bed
+# This bed file didn't have the right gene names (ENS rather than Hugo), fix it.
+hgsql hg38 -e "select * From knownCanonical" > foo
+wc foo
+cut -f 6 foo | cut -f 1 -d "."
+cut -f 6 foo | cut -f 1 -d "." > foo2
+head foo
+cut -f 1-3 foo > foo3
+paste foo2 foo3 > foo4
+cut -f 4- coord6+1.bed > foo5
+join <(sort foo5) <(sort foo4) | awk '{print $5"\t"$6"\t"$7"\t"$1"\t0\t"$3"\t"$4}' > coord6+1.3.bed
+
+# Generate the bed file, can use the same transcript file
+time expMatrixToBarchartBed ../transcripts/tcgaLargeSamples.tsv tcga.geneTpm.tab coord6+1.3.bed tcgaGeneExp.bed --groupOrder=../transcripts/tcgaGroupOrder.txt
+
+# Convert to big bed
+sort -k1,1 -k2,2n tcgaGeneExp.bed > sortedTcgaGeneExp.bed
+bedToBigBed -type=bed6+5 -as=$HOME/kent/src/hg/lib/barChartGeneExp.as sortedTcgaGeneExp.bed /hive/data/genomes/hg38/chrom.sizes tcgaGeneExp.bb
+
+# Link to gbdb
+cd /gbdb/hgFixed/human/expMatrix
+ln -s /hive/data/outside/tcgaBarcharts/genes/tcgaGeneExp.bb .
+ln -s /hive/data/outside/tcgaBarcharts/genes/tcga.geneTpm.tab tcgaGeneMatrix.tab
+
+###########3
+# Reload bigBed with a schema that will be shared with transcript track, to support
+# configuration as subtracks in a composite
+# Apparently Chris actually loaded the #3 file (added gene names, adjusted end coord apparently)
+# (2007-08-30 kate)
+cd /hive/data/outside/tcgaBarcharts/genes
+bedToBigBed -type=bed6+5 -as=$HOME/kent/src/hg/lib/barChartGeneExpr.as sortedTcgaGeneExp3.bed /hive/data/genomes/hg38/chrom.sizes tcgaGeneExpr.hg38.bb
+mkdir /gbdb/hg38/tcga
+ln -s `pwd`/tcgaGeneExpr.hg38.bb /gbdb/hg38/tcga/tcgaGeneExpr.bb
+
+#########################################################################
+# gtexTransExp Chris Eisenhart, done, 2017-05-23
+# TCGA transcript level RNA-seq, from TOIL pipeline recompute (John Vivian)
+# biorxiv.org/content/biorxiv/early/2016/07/07/062497.full.pdf
+mkdir /hive/data/outside/gtex/barChartTrack
+cd /hive/data/outside/gtex/barChartTrack
+
+# Seems John included some TCGA data (CML) in the GTEx matrix and samples, the cleaning steps remove this.
+# Make a clean sample file
+cat ../johnVivianRecompute/sraToSample.txt | sed 's/ male /\tmale\t/g' | sed 's/ female /\tfemale\t/g' | cut -f 3 | sed 's/ - /-/g' | sed 's/ /_/g' > gtexSampleGroups.txt
+cat ../johnVivianRecompute/sraToSample.txt | cut -f 1 -d " " > gtexSampleNames.txt
+paste gtexSampleNames.txt gtexSampleGroups.txt > gtexSamples.txt
+grep -v '(CML)' gtexSamples.tsv > cleanGtexSamples.tsv
+
+# Make a clean matrix
+cut -f 1 ../johnVivianRecompute/gtex.tpm.tab | cut -f 1 -d "." > gtexTranscripts.txt
+cut -f 2- ../johnVivianRecompute/gtex.tpm.tab > gtexTpmValues.tsv
+paste gtexTranscripts.txt gtexTpmValues.tsv > gtexMatrix.tsv
+rowsToCols gtexMatrix.tsv tspsdGtexMatrix.tsv
+sort tspsdGtexMatrix.tsv > sortedTspsdGtexMatrix.tsv
+grep -v '(CML)' gtexSamples.tsv | cut -f 1 | sed 's/Run_s/#transcript/g' | sort > sortedCleanGtexSamples.tsv
+join sortedCleanGtexSamples.tsv sortedTspsdGtexMatrix.tsv > cleanTspsdGtexMatrix.tsv
+rowsToCols cleanTspsdMatrix.tsv cleanGtexMatrix.tsv
+
+# Build a coordinate map
+hgsql hg38 -e "select * from ensGene" | cut -f 2- | sort > ensGene
+hgsql hg38 -e "select * from ensemblToGeneName" | sort >  ensemblToGeneName
+join ensGene ensemblToGeneName | awk '{print $2"\t"$4"\t"$5"\t"$1"\t0\t"$3"\t"$16}' > coord.bed
+# NOTE: CHRISL10-05-2021 - the above ensGene steps weren't actually done or the files were removed,
+# there was a coord.tsv which I used instead so the below re-run could work
+tawk '{print $1,$2,$3,$4,0,$5,$6}' coord.tsv > coord.bed
+# END CHRISL10-05-2021 NOTE)
+
+# Get the gtex ordering
+hgsql hgFixed -e "select * from gtexTissue" | cut -f 3 | sed 's/ - /-/g' | sed 's/ /_/g' | sed '1D' > gtexGroupOrder.txt
+
+# Use the meta data, matrix, and coordinate map to generate a barchart bed
+# NOTE: CHRISL10-05-2021 - re-ran this step to fix float parsing bug:
+time expMatrixToBarchartBed cleanGtexSamples.tsv cleanGtexMatrix.tsv coord.bed gtexTransExp.bed --groupOrderFile gtexGroupOrder.txt
+
+# NOTE: Use the header line of the bed file to populate the barChartBars field in the trackDb.
+# The order of the labels in the barChartBars field should match the order of the labels in the
+# expScores column in the bed file header.
+
+# Sort and convert into a bigBed file.
+sort -k1,1 -k2,2n gtexTransExp.bed > sortedGtexTransExp.bed
+# NOTE: CHRISL10-05-2021 - re-ran bedToBigBed step with correct file names
+bedToBigBed -as=$HOME/kent/src/hg/lib/barChartBed.as -type=bed6+5 sortedGtexTransExp.bed /hive/data/genomes/hg38/chrom.sizes gtexTranscExpr.bb
+
+# Link the files into gbdb
+cd /gbdb/hgFixed/human/expMatrix
+ln -s /hive/data/outside/gtex/barChartTrack/cleanGtexSamples.tsv cleanGtexSamples.tab
+ln -s /hive/data/outside/gtex/barChartTrack/cleanGtexMatrix.tsv cleanGtexMatris.tab
+
+# <2007-08-30 kate)
+cd /gbdb/hg38/gtex
+ln -s /hive/data/outside/gtex/barChartTrack/gtexTranscExpr.bb .
+
+#########################################################################
+# LASTZ human/hg38 vs. Zebrafish /danRer11
+#	(DONE - 2017-06-12 - Chris)
+
+    mkdir /hive/data/genomes/hg38/bed/lastzDanRer11.2017-06-12
+    cd /hive/data/genomes/hg38/bed/lastzDanRer11.2017-06-12
+
+    printf '# human vs zebrafish danRer11
+BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.66/bin/lastz
+BLASTZ_M=254
+
+# TARGET: human hg38
+SEQ1_DIR=/hive/data/genomes/hg38/hg38.2bit
+SEQ1_LEN=/hive/data/genomes/hg38/chrom.sizes
+SEQ1_CTGDIR=/hive/data/genomes/hg38/hg38.contigs.2bit
+SEQ1_CTGLEN=/hive/data/genomes/hg38/hg38.contigs.chrom.sizes
+SEQ1_LIFT=/hive/data/genomes/hg38/jkStuff/hg38.contigs.lift
+SEQ1_CHUNK=40000000
+SEQ1_LIMIT=20
+SEQ1_LAP=10000
+
+# QUERY: zebrafish danRer11
+SEQ2_DIR=/hive/data/genomes/danRer11/danRer11.2bit
+SEQ2_LEN=/hive/data/genomes/danRer11/chrom.sizes
+SEQ2_CHUNK=20000000
+SEQ2_LIMIT=200
+SEQ2_LAP=0
+
+BASE=/hive/data/genomes/hg38/bed/lastzDanRer11.2017-06-12
+TMPDIR=/dev/shm
+' > DEF
+
+    time (doBlastzChainNet.pl `pwd`/DEF -verbose=2 \
+        -chainMinScore=3000 -chainLinearGap=medium \
+          -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
+            -noDbNameCheck -syntenicNet) > do.log 2>&1
+    # real    3327m39.074s
+
+	cat fb.hg38.chainDanRer11Link.txt
+    # 41036733 bases of 3049335806 (1.346%) in intersection
+
+	973293331 bases of 3049335806 (31.918%) in intersection
+
+    time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` hg38 danRer11) \
+       > rbest.log 2>&1 &
+
+    # and for the swap:
+    mkdir /hive/data/genomes/danRer11/bed/blastz.hg38.swap
+    cd /hive/data/genomes/danRer11/bed/blastz.hg38.swap
+
+    time (doBlastzChainNet.pl -verbose=2 \
+      /hive/data/genomes/hg38/bed/lastzDanRer11.2017-06-12/DEF \
+        -swap -chainMinScore=3000 -chainLinearGap=medium \
+          -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
+            -noDbNameCheck -syntenicNet) > swap.log 2>&1
+	#  real	39m24.916s
+
+    cat fb.danRer11.chainHg38Link.txt
+    # 47869194 bases of 1674677181 (2.858%) in intersection
+
+    time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` danRer11 hg38) \
+       > rbest.log 2>&1 &
+    # real	638m45.337s
+_EOF_
+#########################################################################
+# refSeqFuncElems NCBI refSeq functional elements, REDONE 2017-11-29 Angie
+# previously done 2017-08-01 by Chris E
+
+mkdir /hive/data/genomes/hg38/bed/refSeqFuncElems.2017-11-29
+cd /hive/data/genomes/hg38/bed/refSeqFuncElems.2017-11-29
+
+# NOTE FOR NEXT TIME: instead of using interim GFF, in the future these annotations might be
+# folded into the same main release GFF3 from which the ncbiRefSeq* tables are extracted by
+# doNcbiRefSeq.pl.
+wget ftp://ftp.ncbi.nlm.nih.gov/genomes/H_sapiens/GFF_interim/interim_GRCh38.p11_top_level_2017-06-27.gff3.gz
+
+# Get mapping of RefSeq NC_* chromosome accs (and NT_*, NW_*) to hg38 chrom names
+hgsql hg38 -NBe 'select alias, chrom from chromAlias where source = "refseq" order by alias' \
+> refSeqToChrom.tab
+cut -f 2 refSeqToChrom.tab | sed -e 's/^/^/' > chrom.tab
+
+# Use Terence Murphy's list of feature types (and the multi-type attribute regulatory_class)
+# to identify Functional Elements and swap in hg38 chrom names.
+# Use subColumn -miss so it doesn't quit when it sees a patch contig that doesn't map to an
+# hg38 chrom.  Use grep -f chrom.tab to filter out patch contig annotations.
+zcat interim_GRCh38.p11_top_level_2017-06-27.gff3.gz \
+| grep -P "(\t(CAAT_signal|GC_rich_promoter_region|TATA_box|enhancer|insulator|locus_control_region|mobile_genetic_element|origin_of_replication|promoter|protein_binding_site|recombination_feature|regulatory_region|repeat_region|sequence_feature|sequence_secondary_structure|silencer|stem_loop)\t|regulatory_class=)" \
+| subColumn -miss=/dev/null 1 stdin refSeqToChrom.tab stdout \
+| grep -f chrom.tab > funcElems.gff
+wc -l funcElems.gff
+#5756 funcElems.gff
+
+# Transform GFF to BED+
+~/kent/src/hg/utils/automation/parseRefSeqFuncElems funcElems.gff /dev/stdout \
+| sort -k1,1 -k2n,2n > refSeqFuncElems.bed
+wc -l refSeqFuncElems.bed
+#5756 refSeqFuncElems.bed
+
+# Make bigBed and link from /gbdb
+bedToBigBed -tab -type=bed9+7 -as=$HOME/kent/src/hg/lib/refSeqFuncElems.as \
+  refSeqFuncElems.bed /hive/data/genomes/hg38/chrom.sizes refSeqFuncElems.bb
+rm -f /gbdb/hg38/ncbiRefSeq/refSeqFuncElems.bb
+ln -s `pwd`/refSeqFuncElems.bb /gbdb/hg38/ncbiRefSeq/
+
+###################################################################
+# cosmicRegions (DONE 2017-08-03 Chris)
+# Make a new COSCMIC track for hg38 v82
+mkdir /hive/data/outside/cosmic/hg38/v82
+cd /hive/data/outside/cosmic/hg38/v82
+
+# Get the new data
+sftp ceisenha@ucsc.edu@sftp-cancer.sanger.ac.uk
+# Login to SFTP server then run these commands
+get /files/grch38/cosmic/v82/CosmicMutantExport.tsv.gz
+
+# Remove the 'NS' fields, search for the \t after to exclude the E'NS'ST transcripts.
+zcat CosmicMutantExport.tsv.gz | sed 's/NS\t/\t/g' > cosMut.tsv
+
+# Use a script to convert to bed format.
+cosmicToBed cosMut.tsv cosMut.bed
+# This many lines were skipped, 134601 for not having genomic coordinate
+
+# Sort and convert to big bed using the .as file.
+sort -k1,1 -k2,2n cosMut.bed > sCosMut.bed
+bedToBigBed -type=bed8+31 -as=cosmicNew.as sCosMut.bed /hive/data/genomes/hg38/chrom.sizes cosMutHg38V82.bb -tab -extraIndex=name,cosmLabel
+
+
+# Link it up so the outside world can see it.
+cd /gbdb/hg38/cosmic/
+ln -s /hive/data/outside/cosmic/hg38/v82/cosMutHg38V82.bb .
+
+#########################################################################
+# RepeatMasker Visualization track update (DONE - 2018-05-04 - ChrisL)
+    screen -S rmskJoined.2018-05-04
+    mkdir /hive/data/genomes/hg38/bed/rmskJoined.2018-05-04
+    cd /hive/data/genomes/hg38/bed/rmskJoined.2018-05-04
+
+    ln -s ../repeatMasker/hg38.sorted.fa.out .
+    ln -s ../repeatMasker/hg38.fa.align.gz .
+
+    # this script points to the most recent RepeatMasker version:
+    time (/scratch/data/RepeatMasker/util/rmToUCSCTables.pl \
+        -out hg38.sorted.fa.out -align hg38.fa.align.gz) > do.log 2>&1 &
+
+    # no differences, forgot to remake rmsk files
+    # so instead remake the rmsk track and try again
+    mkdir /hive/data/genomes/hg38/bed/repeatMasker.2018-05-04
+    cd /hive/data/genomes/hg38/bed/repeatMasker.2018-05-04
+
+    # remake the sorted.fa.out and fa.align.gz, stop after masking
+    # so rmsk table isn't overwritten
+    time (doRepeatMasker.pl -stop=mask -useHMMER -bigClusterHub=ku \
+       -workhorse=hgwdev -dbHost=hgwdev -buildDir=`pwd` hg38) > mask.log 2>&1 &
+    # RepeatMasker bug?: Undefined id, line 1440295 of input:
+    #    10  26.1  0.0  0.0  chr13     114292339 114292382   (71946) C  L1P4           LINE/L1               (17) 6149   6106
+    # RepeatMasker bug?: Undefined id, line 3529762 of input:
+    #   992   2.3  0.5  0.0  chr3      180461254 180462048 (17833511) C  L1PA3          LINE/L1                (3) 6152   5354
+    # RepeatMasker bug?: Undefined id, line 3529763 of input:
+    #  1153   3.2  0.2  0.0  chr3      180462043 180463006 (17832553) +  L1PA3          LINE/L1               4392 5357  (789)
+    # RepeatMasker bug?: Undefined id, line 5303571 of input:
+    #   220  22.5  0.0 17.7  chr9      105798076 105799127 (32595590) C  SATR2          Satellite              (4)  866      1
+    # real    643m17.617s
+
+    # get rid of the missing id items:
+    grep -v "114292339 114292382\|180461254 180462048\|180462043 180463006\|105798076 105799127" \
+        hg38.fa.out > clean.hg38.fa.out
+    mv clean.hg38.fa.out hg38.fa.out
+
+    # finish the last step of doCat.csh:
+    /cluster/bin/scripts/extractNestedRepeats.pl hg38.fa.out | sort -k1,1 -k2,2n > hg38.nestedRepeats.bed
+
+    cd /hive/data/genomes/hg38/bed/rmskJoined.2018-05-04
+
+    rm hg38.sorted.fa.out
+    rm hg38.fa.align.gz
+    rm *.tsv
+    ln -s ../repeatMasker.2018-05-04/hg38.sorted.fa.out .
+    ln -s ../repeatMasker.2018-05-04/hg38.fa.align .
+
+    # and then re-run
+    time (/scratch/data/RepeatMasker/util/rmToUCSCTables.pl \
+        -out hg38.sorted.fa.out -align hg38.fa.align.gz) > rerun.log 2>&1 &
+    # real    141m7.268s
+
+    # confirm the counts are different from the previous version:
+    # wc -l ../rmskJoined/hg38.fa.align.tsv ../rmskJoined/hg38.sorted.fa.join.bed ../rmskJoined/hg38.sorted.fa.out.tsv
+   7203858 ../rmskJoined/hg38.fa.align.tsv
+   4607727 ../rmskJoined/hg38.sorted.fa.join.bed
+   5520118 ../rmskJoined/hg38.sorted.fa.out.tsv
+  17331703 total
+    # wc -l *.tsv
+   7227245 hg38.fa.align.tsv
+   4828114 hg38.sorted.fa.join.tsv
+   5916189 hg38.sorted.fa.out.tsv
+  17971548 total
+
+    hgLoadBed -sqlTable=$HOME/kent/src/hg/lib/rmskJoined.sql \
+        -renameSqlTable -verbose=4 -tab \
+            -type=bed4+9 -as=$HOME/kent/src/hg/lib/rmskJoined.as hg38 \
+                rmskJoinedCurrent hg38.sorted.fa.join.tsv \
+                    > loadJoined.log 2>&1
+
+    hgLoadSqlTab hg38 rmskAlignCurrent \
+        /cluster/home/chmalee/kent/src/hg/lib/rmskAlign.sql \
+            hg38.fa.align.tsv > loadAlign.log 2>&1
+
+    hgLoadOutJoined -verbose=2 -table=rmskOutCurrent hg38 hg38.sorted.fa.out > loadOut.log 2>&1
+
+    featureBits -countGaps hg38 rmskJoinedCurrent
+    # 2796899855 bases of 3209286105 (87.150%) in intersection
+#########################################################################
+# Hi-C Visualization based on Krietenstein 2019 (DONE - 2019-10-07 - Jonathan)
+mkdir -p /hive/data/genomes/hg38/bed/hic
+cd /hive/data/genomes/hg38/bed/hic
+
+# Files are located on 4D Nucleome (data.4dnucleome.org).  The URL for the paper on that
+# site is https://data.4dnucleome.org/publications/b13590b2-a341-4e5e-ad5e-72e233b32e9d/.
+# The four file IDs downloaded below are for contact matrix .hic files created for
+# different cell-line/protocol combinations
+wget 'https://data.4dnucleome.org/files-processed/4DNFI2TK7L2F/@@download/4DNFI2TK7L2F.hic' # H1-hESC Micro-C XL
+wget 'https://data.4dnucleome.org/files-processed/4DNFIQYQWPF5/@@download/4DNFIQYQWPF5.hic' # H1-hESC in situ
+wget 'https://data.4dnucleome.org/files-processed/4DNFI18Q799K/@@download/4DNFI18Q799K.hic' # HFFc6 Micro-C XL
+wget 'https://data.4dnucleome.org/files-processed/4DNFIFLJLIS5/@@download/4DNFIFLJLIS5.hic' # HFFc6 in situ
+
+printf "All files were downloaded from the 4D Nucleome Data Portal at data.4dnucleome.org.
+These are processed contact matrices from Krietenstein et al. (2019) Ultrastructural details
+of mammalian chromosme architecture. (https://www.biorxiv.org/content/10.1101/639922v1).
+
+4DNFI2TK7L2F.hic - Micro-C XL data set on H1-hESC
+4DNFIQYQWPF5.hic - in situ Hi-C data set on H1-hESC
+4DNFI18Q799K.hic - Micro-C  XL data set on HFFc6
+4DNFIFLJLIS5.hic - in situ Hi-C data set on HFFc6" > README.txt
+
+mkdir -p /gbdb/hg38/bbi/hic
+cd /gbdb/hg38/bbi/hic
+ln -s /hive/data/genomes/hg38/bed/hic/* .
+
+
+#########################################################################
+# LASTZ Self/hg38 (DONE 2020-02-11 - Angie)
+    # RM #24695
+    # Re-run with updated process to include pslDropOverlap .
+    # Use "contigs" from previous run lastzSelf.2014-01-25/hg38.self.2bit
+
+    screen -S hg38Self -t hg38Self
+    mkdir /hive/data/genomes/hg38/bed/lastzSelf.2020-01-27
+    cd /hive/data/genomes/hg38/bed/lastzSelf.2020-01-27
+    cat << _EOF_ > DEF
+# human vs human with mouse defaults
+BLASTZ=/cluster/bin/penn/lastz-distrib-1.04.00/bin/lastz
+
+# TARGET: Human hg38
+SEQ1_DIR=/hive/data/genomes/hg38/hg38.2bit
+SEQ1_LEN=/hive/data/genomes/hg38/chrom.sizes
+SEQ1_CTGDIR=/hive/data/genomes/hg38/bed/lastzSelf.2014-01-25/hg38.self.2bit
+SEQ1_CTGLEN=/hive/data/genomes/hg38/bed/lastzSelf.2014-01-25/hg38.self.chrom.sizes
+SEQ1_LIFT=/hive/data/genomes/hg38/jkStuff/hg38.contigs.lift
+SEQ1_CHUNK=20000000
+SEQ1_LAP=10000
+
+# QUERY: Human hg38
+SEQ2_DIR=/hive/data/genomes/hg38/hg38.2bit
+SEQ2_LEN=/hive/data/genomes/hg38/chrom.sizes
+SEQ2_CTGDIR=/hive/data/genomes/hg38/bed/lastzSelf.2014-01-25/hg38.self.2bit
+SEQ2_CTGLEN=/hive/data/genomes/hg38/bed/lastzSelf.2014-01-25/hg38.self.chrom.sizes
+SEQ2_LIFT=/hive/data/genomes/hg38/jkStuff/hg38.contigs.lift
+SEQ2_CHUNK=20000000
+SEQ2_LAP=0
+
+BASE=/hive/data/genomes/hg38/bed/lastzSelf.2020-01-27
+TMPDIR=/dev/shm
+_EOF_
+
+    # NOTE FOR NEXT TIME: use -chainMinScore=10000 (at least), not 3000
+
+    ~/kent/src/hg/utils/automation/doBlastzChainNet.pl `pwd`/DEF -verbose=2 \
+        -chainMinScore=3000 -chainLinearGap=medium -syntenicNet \
+        -workhorse=hgwdev -smallClusterHub=hgwdev -bigClusterHub=ku \
+        -stop=net >& do.log &
+    tail -f do.log
+
+
+    # After two days, 4 jobs are running, one of which (part014.lst vs itself) crashed with
+    # out-of-mem error.  After 3 days, 3 jobs completed but part014.lst runs lastz out of mem.
+    # Split part014.lst up into components, run on hgwdev (more mem).
+    mkdir /hive/data/genomes/hg38/bed/lastzSelf.2020-01-27/run.blastz/part014
+    cd /hive/data/genomes/hg38/bed/lastzSelf.2020-01-27/run.blastz/part014
+    mkdir psl
+    cp /dev/null jobList
+    for t in $(cat ../tParts/part014.lst); do
+      tBase=$(basename $t)
+      for q in $(cat ../tParts/part014.lst); do
+        qBase=$(basename $q)
+        echo "$HOME/kent/src/hg/utils/automation/blastz-run-ucsc -outFormat psl -dropSelf $t $q ../../DEF {check out exists psl/${tBase}_${qBase}.psl }" >> jobList
+      done
+    done
+    para create jobList
+    para try, check, push, etc,
+    # 94 of the jobs ran for 12s or less.  The other 6 are chr{X_Y}_00 vs. self & each other,
+    # chr13_16 vs self and chr16_03 vs self.  All but chr16_03 vs self completed in < 6 minutes.
+#Completed: 99 of 100 jobs
+#Crashed: 1 jobs
+#CPU time in finished jobs:       1559s      25.98m     0.43h    0.02d  0.000 y
+#IO & Wait Time:                   248s       4.14m     0.07h    0.00d  0.000 y
+#Average job time:                  18s       0.30m     0.01h    0.00d
+#Longest finished job:             321s       5.35m     0.09h    0.00d
+#Submission to last job:         94681s    1578.02m    26.30h    1.10d
+
+    # Dang, chr16_03 vs. self still runs out of mem even on hgwdev.
+    mkdir /hive/data/genomes/hg38/bed/lastzSelf.2020-01-27/run.blastz/part014/chr16_03
+    cd /hive/data/genomes/hg38/bed/lastzSelf.2020-01-27/run.blastz/part014/chr16_03
+    twoBitToFa /hive/data/genomes/hg38/bed/lastzSelf.2014-01-25/hg38.self.2bit:chr16_03:0-1689648 \
+      chr16_03.fa
+    faSplit -lift=chr16_03.lift size chr16_03.fa 169000 chr16_03_split_
+    faToTwoBit chr16_03_split_*.fa chr16_03_split.2bit
+    twoBitInfo chr16_03_split.2bit stdout | sort -k2nr > chr16_03_split.sizes
+    sed -re 's@CTGDIR.*@CTGDIR=/hive/data/genomes/hg38/bed/lastzSelf.2020-01-27/run.blastz/part014/chr16_03/chr16_03_split.2bit@;
+             s@CTGLEN.*@CTGLEN=/hive/data/genomes/hg38/bed/lastzSelf.2020-01-27/run.blastz/part014/chr16_03/chr16_03_split.sizes@;' \
+      ../../../DEF > DEF.split
+    mkdir psl
+    cwd=$(pwd)
+    while read tBase tSize; do
+      while read qBase qSize; do
+        echo "$HOME/kent/src/hg/utils/automation/blastz-run-ucsc -outFormat psl -dropSelf $cwd/chr16_03_split.2bit:$tBase:0-$tSize $cwd/chr16_03_split.2bit:$qBase:0-$qSize DEF.split {check out exists psl/${tBase}_${qBase}.psl}"
+      done < chr16_03_split.sizes
+    done < chr16_03_split.sizes > jobList
+    para create jobList
+    para try, check, push, etc,
+#Completed: 100 of 100 jobs
+#CPU time in finished jobs:     142614s    2376.89m    39.61h    1.65d  0.005 y
+#IO & Wait Time:                   167s       2.79m     0.05h    0.00d  0.000 y
+#Average job time:                1428s      23.80m     0.40h    0.02d
+#Longest finished job:           22861s     381.02m     6.35h    0.26d
+#Submission to last job:         22874s     381.23m     6.35h    0.26d
+    # 6 hours for chr16_03_split_00 vs. itself.  ~4.5h for _09 vs _00.
+    cat psl/*.psl \
+    | liftUp -nohead -type=.psl stdout \
+        chr16_03.lift error stdin \
+    | liftUp -nohead -type=.psl -pslQ \
+        ../psl/hg38.self.2bit:chr16_03:0-1689648_hg38.self.2bit:chr16_03:0-1689648.psl \
+        chr16_03.lift error stdin
+
+    cd ..
+    cat psl/* > ../../psl/part014.lst/part014.lst_part014.lst.psl
+
+    # Make run.time file or doBlastzChainNet.pl won't continue:
+    cd /hive/data/genomes/hg38/bed/lastzSelf.2020-01-27/run.blastz
+    para time >& run.time
+
+    # Resume doBlastzChainNet.pl:
+    cd /hive/data/genomes/hg38/bed/lastzSelf.2020-01-27
+    ~/kent/src/hg/utils/automation/doBlastzChainNet.pl `pwd`/DEF -verbose=2 \
+        -chainMinScore=3000 -chainLinearGap=medium -syntenicNet \
+        -workhorse=hgwdev -smallClusterHub=hgwdev -bigClusterHub=ku \
+        -continue=cat -stop=net >& do2.log &
+    tail -f do2.log
+#Batch failed after 4 tries on chain.csh part016.lst chain/part016.lst.chain
+#Command failed:
+#ssh -x -o 'StrictHostKeyChecking = no' -o 'BatchMode = yes' hgwdev nice /hive/data/genomes/hg38/bed/lastzSelf.2020-01-27/axtChain/run/doChainRun.csh
+
+    cd /hive/data/genomes/hg38/bed/lastzSelf.2020-01-27/axtChain/run
+    para problems
+    # mostly these:
+#errAbort re-entered due to out-of-memory condition. Exiting.
+    # one job made it through errAbort:
+#needLargeMem: Out of memory - request size 564838920 bytes, errno: 12
+    para time
+#Completed: 59 of 68 jobs
+#Crashed: 9 jobs
+#CPU time in finished jobs:      24727s     412.12m     6.87h    0.29d  0.001 y
+#IO & Wait Time:                     0s       0.00m     0.00h    0.00d  0.000 y
+#Average job time:                 409s       6.82m     0.11h    0.00d
+#Longest finished job:            2350s      39.17m     0.65h    0.03d
+#Submission to last job:          2462s      41.03m     0.68h    0.03d
+    para crashed
+#chain.csh part012.lst {check out line+ chain/part012.lst.chain}
+#chain.csh part017.lst {check out line+ chain/part017.lst.chain}
+#chain.csh part016.lst {check out line+ chain/part016.lst.chain}
+#chain.csh part015.lst {check out line+ chain/part015.lst.chain}
+#chain.csh part014.lst {check out line+ chain/part014.lst.chain}
+#chain.csh hg38.self.2bit:chr1_10: {check out line+ chain/hg38.self.2bit:chr1_10:.chain}
+#chain.csh hg38.self.2bit:chr10_05: {check out line+ chain/hg38.self.2bit:chr10_05:.chain}
+#chain.csh hg38.self.2bit:chr7_00: {check out line+ chain/hg38.self.2bit:chr7_00:.chain}
+
+    # Run the jobs outside of parasol (~11h):
+    csh -efx chain.csh part012.lst chain/part012.lst.chain &
+    csh -efx chain.csh part017.lst chain/part017.lst.chain &
+    csh -efx chain.csh part016.lst chain/part016.lst.chain &
+    csh -efx chain.csh part015.lst chain/part015.lst.chain &
+    csh -efx chain.csh part014.lst chain/part014.lst.chain &
+    csh -efx chain.csh hg38.self.2bit:chr1_10: chain/hg38.self.2bit:chr1_10:.chain &
+    csh -efx chain.csh hg38.self.2bit:chr10_05: chain/hg38.self.2bit:chr10_05:.chain &
+    csh -efx chain.csh hg38.self.2bit:chr7_00: chain/hg38.self.2bit:chr7_00:.chain &
+    csh -efx chain.csh hg38.self.2bit:chr16_08: chain/hg38.self.2bit:chr16_08:.chain &
+
+    # Resume doBlastzChainNet.pl again:
+    cd /hive/data/genomes/hg38/bed/lastzSelf.2020-01-27
+    ~/kent/src/hg/utils/automation/doBlastzChainNet.pl `pwd`/DEF -verbose=2 \
+        -chainMinScore=3000 -chainLinearGap=medium -syntenicNet \
+        -workhorse=hgwdev -smallClusterHub=hgwdev -bigClusterHub=ku \
+        -continue=chainMerge -stop=net >& do3.log &
+    tail -f do3.log
+# *** All done !  Elapsed time: 19m11s
+
+    # Load track w/new name chainSelfRedo to compare to existing chainSelf:
+    hgLoadChain -normScore -tIndex hg38 chainSelfRedo axtChain/hg38.hg38.all.chain.gz
+
+    # No idea why but somehow the liftUp seems not to have worked for part012 and part017,
+    # so the all.chain had chr22_31, chr8_01 etc.  :b  run again again.
+    cd /hive/data/genomes/hg38/bed/lastzSelf.2020-01-27/axtChain/run
+    mv chain/part012.lst.chain{,.bak}
+    mv chain/part017.lst.chain{,.bak}
+    csh -efx chain.csh part012.lst chain/part012.lst.chain >& part012.log &
+    csh -efx chain.csh part017.lst chain/part017.lst.chain >& part017.log &
+    # Those completed successfully.  Dunno why the earlier ones didn't get lifted.
+    cd ..
+    mv hg38.hg38.all{,.oopsPartUnlifted}.chain.gz
+    # Reconstruct hg38.hg38.all.chain.gz (the chainMerge step is just this command):
+    find /hive/data/genomes/hg38/bed/lastzSelf.2020-01-27/axtChain/run/chain -name "*.chain" \
+    | chainMergeSort -inputList=stdin \
+    | nice gzip -c \
+      > /hive/data/genomes/hg38/bed/lastzSelf.2020-01-27/axtChain/hg38.hg38.all.chain.gz
+
+    # NOTE FOR NEXT TIME: this filtering step will be unnecessary when -minScore=10000 is used
+    # from the beginning.
+    # Filter to minScore of 10000 (too much fluff with -minScore=3000) per Jim (see #24695)
+    cd /hive/data/genomes/hg38/bed/lastzSelf.2020-01-27/axtChain
+    mv hg38.hg38.all.chain.gz hg38.hg38.all.unfiltered.chain.gz
+    chainFilter hg38.hg38.all.unfiltered chain.gz -minScore=10000 \
+    | gzip -c > hg38.hg38.all.chain.gz
+    hgLoadChain -normScore -tIndex hg38 chainSelfRedo hg38.hg38.all.chain.gz
+    checkTableCoords hg38 chainSelfRedo
+
+    # Rename to chainSelf and update lastz symlinks and downloads
+    hgsql hg38 -e 'drop table chainSelf; drop table chainSelfLink;
+                   rename table chainSelfRedo to chainSelf;
+                   rename table chainSelfRedoLink to chainSelfLink;'
+    cd /hive/data/genomes/hg38/bed
+    rm lastz.self lastz.hg38
+    ln -s lastzSelf.2020-01-27 lastz.self
+    ln -s lastzSelf.2020-01-27 lastz.hg38
+    cd /hive/data/genomes/hg38/bed/lastzSelf.2020-01-27/axtChain
+    cp /hive/data/genomes/hg38/bed/lastzSelf.2014-01-25/axtChain/README.txt .
+    $EDITOR README.txt
+    md5sum hg38.hg38.all.chain.gz > md5sum.txt
+    # Make sure that the old download dir has only symlinks, no real files, then remove and rebuild.
+    ls -lR /usr/local/apache/htdocs-hgdownload/goldenPath/hg38/vsSelf/
+    rm -r /usr/local/apache/htdocs-hgdownload/goldenPath/hg38/vsSelf/
+    mkdir /usr/local/apache/htdocs-hgdownload/goldenPath/hg38/vsSelf/
+    cd /usr/local/apache/htdocs-hgdownload/goldenPath/hg38/vsSelf/
+    ln -s /hive/data/genomes/hg38/bed/lastzSelf.2020-01-27/axtChain/{README.txt,hg38.hg38.all.chain.gz,md5sum.txt} .
+
+
+#########################################################################
+# NCBI ReMap alignments (DONE 2020-02-11 Angie)
+# RM 24449
+    mkdir /hive/data/genomes/hg38/bed/chainHg19ReMap
+    cd /hive/data/genomes/hg38/bed/chainHg19ReMap
+    wget ftp://ftp.ncbi.nlm.nih.gov/pub/remap/Homo_sapiens/current/GCF_000001405.39_GRCh38.p13/GCF_000001405.25_GRCh37.p13/GCF_000001405.39-GCF_000001405.25.gff
+    # We will need to substitute all the RefSeq chrom and contig IDs with our own names.
+    # The same alt contig can appear in both assemblies with the same name, so replace
+    # hg19 names at the beginning of the line and hg38 names after "Target=".
+    hgsql hg19 -NBe 'select alias, chrom from chromAlias where find_in_set("refseq", source)' \
+    | sed -re 's/\./\\./;' \
+    | awk '{print "s/^" $1 "\\b/" $2 "/;";}' \
+      > hg38.hg19.chromAlias.sed
+    hgsql hg38 -NBe 'select alias, chrom from chromAlias where find_in_set("refseq", source)' \
+    | sed -re 's/\./\\./;' \
+    | awk '{print "s/Target=" $1 "\\b/Target=" $2 "/;";}' \
+      >> hg38.hg19.chromAlias.sed
+
+    # There are some GRCh38.p13 sequences that we have not yet imported into hg38 -- use -dropT.
+    sed -f hg38.hg19.chromAlias.sed GCF_000001405.39-GCF_000001405.25.gff \
+    | gff3ToPsl -dropT /hive/data/genomes/{hg19,hg38}/chrom.sizes stdin stdout \
+    | pslPosTarget stdin stdout \
+    | sort -k14,14 -k16n,16n > remap.hg38.hg19.psl
+
+    # Convert to chain for browser display.  Some of the remap chains have minScore < 1000 and
+    # by default would be dropped by chainScore... use -minScore=0 to prevent that.
+    time pslToChain remap.hg38.hg19.psl stdout \
+    | chainScore -minScore=0 stdin /hive/data/genomes/{hg38/hg38.2bit,hg19/hg19.2bit} \
+        remap.hg38.hg19.chain
+#real    9m31.900s
+#user    9m1.624s
+#sys     0m20.863s
+    hgLoadChain hg38 -tIndex chainHg19ReMap remap.hg38.hg19.chain
+#Loading 5315 chains into hg38.chainHg19ReMap
+    time axtChain -psl -linearGap=medium -verbose=0 remap.hg38.hg19.psl \
+      /hive/data/genomes/hg38/hg38.2bit /hive/data/genomes/hg19/hg19.2bit \
+      remap.axtChain.hg38.hg19.chain
+#real    2m26.333s
+#user    2m4.237s
+#sys     0m22.071s
+    hgLoadChain hg38 -tIndex chainHg19ReMapAxtChain remap.axtChain.hg38.hg19.chain
+#Loading 2115 chains into hg38.chainHg19ReMapAxtChain
+
+###################################################
+#Agilent SNP/CNV arrays 3/11/21
+#Downloaded by web browser
+cd /hive/data/genomes/hg38/bed/agilentProbes/genetiSureCyto
+fetchChromSizes hg38 > hg38.chrom.sizes
+bedSort hg38.GenetiSure_Cyto_CGH+SNP_Microarray_4x180K_085591_D_BED_20200302.bed hg38.GenetiSure_Cyto_CGH+SNP_Microarray_4x180K_085591_D_BED_20200302.bed
+uniq hg38.GenetiSure_Cyto_CGH+SNP_Microarray_4x180K_085591_D_BED_20200302.bed >hg38.GenetiSure_Cyto_CGH+SNP_Microarray_4x180K_085591_D_BED_20200302.uniq.bed
+bedToBigBed hg38.GenetiSure_Cyto_CGH+SNP_Microarray_4x180K_085591_D_BED_20200302.uniq.bed hg38.chrom.sizes hg38.GenetiSure_Cyto_CGH+SNP_Microarray_4x180K_085591_D_BED_20200302.bb
+bedSort hg38.GenetiSure_Cyto_CGH_Microarray_8x60K_085590_D_BED_20200302.bed hg38.GenetiSure_Cyto_CGH_Microarray_8x60K_085590_D_BED_20200302.bed
+uniq hg38.GenetiSure_Cyto_CGH_Microarray_8x60K_085590_D_BED_20200302.bed > hg38.GenetiSure_Cyto_CGH_Microarray_8x60K_085590_D_BED_20200302.uniq.bed
+bedToBigBed hg38.GenetiSure_Cyto_CGH_Microarray_8x60K_085590_D_BED_20200302.uniq.bed hg38.chrom.sizes hg38.GenetiSure_Cyto_CGH_Microarray_8x60K_085590_D_BED_20200302.bb
+bedSort hg38.GenetiSure_Cyto_CGH_Microarray_4x180K_085589_D_BED_20200302.bed hg38.GenetiSure_Cyto_CGH_Microarray_4x180K_085589_D_BED_20200302.bed
+uniq hg38.GenetiSure_Cyto_CGH_Microarray_4x180K_085589_D_BED_20200302.bed > hg38.GenetiSure_Cyto_CGH_Microarray_4x180K_085589_D_BED_20200302.uniq.bed
+bedToBigBed hg38.GenetiSure_Cyto_CGH_Microarray_4x180K_085589_D_BED_20200302.uniq.bed hg38.chrom.sizes hg38.GenetiSure_Cyto_CGH_Microarray_4x180K_085589_D_BED_20200302.bb
+mkdir -p /gbdb/hg38/snpCnvArrays/agilent
+cd /gbdb/hg38/snpCnvArrays/agilent
+ln -s /hive/data/genomes/hg38/bed/agilentProbes/genetiSureCyto/hg38.GenetiSure_Cyto_CGH+SNP_Microarray_4x180K_085591_D_BED_20200302.bb
+ln -s /hive/data/genomes/hg38/bed/agilentProbes/genetiSureCyto/hg38.GenetiSure_Cyto_CGH_Microarray_8x60K_085590_D_BED_20200302.bb
+ln -s /hive/data/genomes/hg38/bed/agilentProbes/genetiSureCyto/hg38.GenetiSure_Cyto_CGH_Microarray_4x180K_085589_D_BED_20200302.bb
+vi ~/kent/src/hg/makeDb/trackDb/human/hg38/trackDb.ra
+
+#########################################################################
+# DECIPHER CNV & SNV - initial build (DONE 2022-04-08 Jonathan)
+# RM 29130
+
+cd /hive/data/genomes/outside/otto/decipher
+mkdir 2022-04-05
+cd 2022-04-05
+
+# manually fetch decipher-variants-grch38-2022-04-03.bed from DECIPHER
+../buildDecipher decipher-variants-grch38-2022-04-03.bed
+
+for i in `cat ../decipher.tables`
+        do
+        n=$i"New"
+        o=$i"Old"
+        hgsqlSwapTables hg38 $n $i $o -dropTable3
+        done
+
+mkdir -p /gbdb/hg38/decipher
+cd /gbdb/hg38/decipher
+ln -s /hive/data/outside/otto/decipher/2022-04-05/decipherCnv.bb .
+
+#########################################################################
+# COSMIC (DONE 07-11-2023)
+# RM 29625
+
+#Fetch file
+cd /hive/data/outside/cosmic/hg38/v98/
+wget 'https://cog.sanger.ac.uk/cosmic/GRCh38/ucsc/v98/ucsc_export.bed.gz?AWSAccessKeyId=KRV7P7QR9DL41J9EWGA2&Expires=1686847188&Signature=4YV3CuFKudxIhqVdWAaCe0CMAiY%3D' -O ucsc_export.bed.gz
+wget 'https://cog.sanger.ac.uk/cosmic/GRCh38/ucsc/v98/ucsc_export.bed.gz?AWSAccessKeyId=KRV7P7QR9DL41J9EWGA2&Expires=1687525456&Signature=jBdJOlOOaqmMWNnOtJUyNRptVj4%3D'
+mv ucsc_export.bed.gz\?AWSAccessKeyId\=KRV7P7QR9DL41J9EWGA2\&Expires\=1687525456\&Signature\=jBdJOlOOaqmMWNnOtJUyNRptVj4\= ucsc_export.bed.gz
+
+#Reorder to columns to conform to bed 6+3
+zcat ucsc_export.bed.gz | awk -F'\t' -v OFS="\t" '{ print $1, $2, $3, $7, 0, $6, $4, $5, $8 }' | sort -k1,1 -k2,2n > cosmic.bed
+
+#Tiny bit of python to identify the broken lines in the file where chromStart > chromEnd
+
+#for line in myFile:
+#    newLine = line.split("\t")
+#    if int(newLine[1]) > int(newLine[2]):
+#        print(line)
+#        n+=1
+#print(n)
+
+#remove those broken records from the file
+cat cosmic.bed | grep -vf badRecords.bed > cosmic.fixed.bed
+
+#subtract to conform to bed format for all the items that have same star and endPos
+
+cat cosmic.fixed.bed | awk 'BEGIN {OFS="\t"} {
+if ($2 == $3)
+        print $1,$2-1,$3,$4,$5,$6,$7,$8,$9;
+else
+        print $0;
+}' > cosmic.fixedPos.bed
+
+bedToBigBed -type=bed6+3 -as=/hive/data/outside/cosmic/hg38/v98/cosmic.as /hive/data/outside/cosmic/hg38/v98/cosmic.fixedPos.bed /hive/data/genomes/hg38/chrom.sizes /hive/data/outside/cosmic/hg38/v98/cosmic.bb -tab
+
+#make symlink
+ln -s /hive/data/outside/cosmic/hg38/v98/cosmic.bb /gbdb/hg38/cosmic/cosmic.bb
+
+#This data has since been updated, see new makedoc doc/hg38/cosmicV98.txt and rm #32430
+
+##############################################################################
+# LIFTOVER TO GCA_018873775.2_hg01243.v3.0 (DONE - 2023-08-13 - Hiram)
+    ssh hgwdev
+    # going to need an ooc for hg38.p14.2bit
+    cd /hive/data/genomes/hg38
+    time blat hg38.p14.2bit /dev/null /dev/null -tileSize=11 \
+      -makeOoc=hg38.p14.ooc -repMatch=1024
+    # Wrote 36808 overused 11-mers to hg38.p14.ooc
+    # real    0m50.753s
+
+    # and ooc for this GenArk hub
+    cd /hive/data/genomes/asmHubs/genbankBuild/GCA/018/873/775/GCA_018873775.2_hg01243.v3.0
+  time blat GCA_018873775.2_hg01243.v3.0.2bit /dev/null /dev/null -tileSize=11 \
+      -makeOoc=GCA_018873775.2_hg01243.v3.0.ooc -repMatch=1024
+# Wrote 39087 overused 11-mers to GCA_018873775.2_hg01243.v3.0.ooc
+# real    0m49.426s
+
+  mkdir /hive/data/genomes/hg38/bed/blat.GCA_018873775.2_hg01243.v3.0.2023-08-13
+    cd /hive/data/genomes/hg38/bed/blat.GCA_018873775.2_hg01243.v3.0.2023-08-13
+
+    doSameSpeciesLiftOver.pl -verbose=2 -buildDir=`pwd` \
+        -debug -bigClusterHub=hgwdev -dbHost=hgwdev -workhorse=hgwdev \
+        -target2Bit=/hive/data/genomes/hg38/hg38.2bit \
+        -targetSizes=/hive/data/genomes/hg38/chrom.sizes \
+ -query2Bit=/hive/data/genomes/asmHubs/genbankBuild/GCA/018/873/775/GCA_018873775.2_hg01243.v3.0/GCA_018873775.2_hg01243.v3.0.2bit \
+ -querySizes=/hive/data/genomes/asmHubs/genbankBuild/GCA/018/873/775/GCA_018873775.2_hg01243.v3.0/GCA_018873775.2_hg01243.v3.0.chrom.sizes \
+        -ooc=/hive/data/genomes/hg38/hg38.p14.ooc \
+         hg38 GCA_018873775.2
+
+    # trying -ram=6g to get full use of hgwdev kluster nodes
+    time (~/kent/src/hg/utils/automation/doSameSpeciesLiftOver.pl \
+        -verbose=2 -buildDir=`pwd` \
+        -bigClusterHub=hgwdev -dbHost=hgwdev -workhorse=hgwdev \
+        -target2Bit=/hive/data/genomes/hg38/hg38.2bit \
+        -targetSizes=/hive/data/genomes/hg38/chrom.sizes \
+ -query2Bit=/hive/data/genomes/asmHubs/genbankBuild/GCA/018/873/775/GCA_018873775.2_hg01243.v3.0/GCA_018873775.2_hg01243.v3.0.2bit \
+ -querySizes=/hive/data/genomes/asmHubs/genbankBuild/GCA/018/873/775/GCA_018873775.2_hg01243.v3.0/GCA_018873775.2_hg01243.v3.0.chrom.sizes \
+        -ooc=/hive/data/genomes/hg38/hg38.p14.ooc \
+         hg38 GCA_018873775.2) > doLiftOverToGCA_018873775.2.log 2>&1
+    # real    12654m58.134s
+
+    # broken after the alignment was done, with the parasol endless loop
+    # error message in the log file:
+    #  select failure in rudp: Invalid argument
+    # killed that, cleaned the 4Tb log file, and gave up on this alignment
+    # since the lastz/chain/net is much better
+
+    # see if the liftOver menus function in the browser from hg38
+    #    to GCA_018873775.2
+
+##############################################################################
+# LIFTOVER GCA_018873775.2_hg01243.v3.0 to hg38 (DONE - 2023-08-13 - Hiram)
+    ssh hgwdev
+
+    mkdir /hive/data/genomes/asmHubs/genbankBuild/GCA/018/873/775/GCA_018873775.2_hg01243.v3.0/trackData/blat.hg38.2023-08-13
+    cd /hive/data/genomes/asmHubs/genbankBuild/GCA/018/873/775/GCA_018873775.2_hg01243.v3.0/trackData/blat.hg38.2023-08-13
+
+    doSameSpeciesLiftOver.pl -verbose=2 -buildDir=`pwd` \
+        -debug -bigClusterHub=hgwdev -dbHost=hgwdev -workhorse=hgwdev \
+ -target2Bit=/hive/data/genomes/asmHubs/genbankBuild/GCA/018/873/775/GCA_018873775.2_hg01243.v3.0/GCA_018873775.2_hg01243.v3.0.2bit \
+ -targetSizes=/hive/data/genomes/asmHubs/genbankBuild/GCA/018/873/775/GCA_018873775.2_hg01243.v3.0/GCA_018873775.2_hg01243.v3.0.chrom.sizes \
+        -query2Bit=/hive/data/genomes/hg38/hg38.2bit \
+        -querySizes=/hive/data/genomes/hg38/chrom.sizes \
+        -ooc=/hive/data/genomes/asmHubs/genbankBuild/GCA/018/873/775/GCA_018873775.2_hg01243.v3.0/GCA_018873775.2_hg01243.v3.0.ooc \
+         GCA_018873775.2 hg38
+
+    # trying -ram=6g to get full use of hgwdev kluster nodes
+    time (~/kent/src/hg/utils/automation/doSameSpeciesLiftOver.pl \
+        -verbose=2 -buildDir=`pwd` \
+        -bigClusterHub=hgwdev -dbHost=hgwdev -workhorse=hgwdev \
+ -target2Bit=/hive/data/genomes/asmHubs/genbankBuild/GCA/018/873/775/GCA_018873775.2_hg01243.v3.0/GCA_018873775.2_hg01243.v3.0.2bit \
+ -targetSizes=/hive/data/genomes/asmHubs/genbankBuild/GCA/018/873/775/GCA_018873775.2_hg01243.v3.0/GCA_018873775.2_hg01243.v3.0.chrom.sizes \
+        -query2Bit=/hive/data/genomes/hg38/hg38.2bit \
+        -querySizes=/hive/data/genomes/hg38/chrom.sizes \
+        -ooc=/hive/data/genomes/asmHubs/genbankBuild/GCA/018/873/775/GCA_018873775.2_hg01243.v3.0/GCA_018873775.2_hg01243.v3.0.ooc \
+         GCA_018873775.2 hg38) > doLiftOverToHg38.log 2>&1
+
+    # broken after the alignment was done, with the parasol endless loop
+    # error message in the log file:
+    #  select failure in rudp: Invalid argument
+    # killed that, cleaned the 4Tb log file, and gave up on this alignment
+    # since the lastz/chain/net is much better
+    # real    193m24.137s
+
+    # see if the liftOver menus function in the browser from GCA_018873775.2
+    #    to hg38
+
+##############################################################################
+# LIFTOVER TO GCA_018503275.1_NA19240.pri.mat.f1_v2 (TBD - 2023-08-14 - Hiram)
+    ssh hgwdev
+
+    # ooc for this GenArk hub
+    cd /hive/data/genomes/asmHubs/genbankBuild/GCA/018/503/275/GCA_018503275.1_NA19240.pri.mat.f1_v2
+  time blat GCA_018503275.1_NA19240.pri.mat.f1_v2.2bit /dev/null /dev/null \
+      -tileSize=11 -repMatch=1024 \
+      -makeOoc=GCA_018503275.1_NA19240.pri.mat.f1_v2.11.ooc
+  # Wrote 35866 overused 11-mers to GCA_018503275.1_NA19240.pri.mat.f1_v2.11.ooc
+    # real    0m32.298s
+
+  mkdir /hive/data/genomes/hg38/bed/blat.GCA_018503275.1_NA19240.pri.mat.f1_v2.2023-08-14
+  cd /hive/data/genomes/hg38/bed/blat.GCA_018503275.1_NA19240.pri.mat.f1_v2.2023-08-14
+
+    ~/kent/src/hg/utils/automation/doSameSpeciesLiftOver.pl -verbose=2 \
+        -buildDir=`pwd` -ram=4g -chainRam=16g \
+        -debug -bigClusterHub=hgwdev -dbHost=hgwdev -workhorse=hgwdev \
+        -target2Bit=/hive/data/genomes/hg38/hg38.2bit \
+        -targetSizes=/hive/data/genomes/hg38/chrom.sizes \
+ -query2Bit=/hive/data/genomes/asmHubs/genbankBuild/GCA/018/503/275/GCA_018503275.1_NA19240.pri.mat.f1_v2/GCA_018503275.1_NA19240.pri.mat.f1_v2.2bit \
+ -querySizes=/hive/data/genomes/asmHubs/genbankBuild/GCA/018/503/275/GCA_018503275.1_NA19240.pri.mat.f1_v2/GCA_018503275.1_NA19240.pri.mat.f1_v2.chrom.sizes \
+        -ooc=/hive/data/genomes/hg38/hg38.p14.ooc \
+         hg38 GCA_018503275.1
+
+    # trying -ram=4g to get full use of hgwdev kluster nodes
+    time (~/kent/src/hg/utils/automation/doSameSpeciesLiftOver.pl \
+        -verbose=2 -buildDir=`pwd` -ram=4g -chainRam=16g \
+        -bigClusterHub=hgwdev -dbHost=hgwdev -workhorse=hgwdev \
+        -target2Bit=/hive/data/genomes/hg38/hg38.2bit \
+        -targetSizes=/hive/data/genomes/hg38/chrom.sizes \
+ -query2Bit=/hive/data/genomes/asmHubs/genbankBuild/GCA/018/503/275/GCA_018503275.1_NA19240.pri.mat.f1_v2/GCA_018503275.1_NA19240.pri.mat.f1_v2.2bit \
+ -querySizes=/hive/data/genomes/asmHubs/genbankBuild/GCA/018/503/275/GCA_018503275.1_NA19240.pri.mat.f1_v2/GCA_018503275.1_NA19240.pri.mat.f1_v2.chrom.sizes \
+        -ooc=/hive/data/genomes/hg38/hg38.p14.ooc \
+         hg38 GCA_018503275.1) > doLiftOverToGCA_018503275.1.log 2>&1
+    # real    11370m18.026s
+
+    # broken after the alignment was done, with the parasol endless loop
+    # error message in the log file:
+    #  select failure in rudp: Invalid argument
+    # killed that, cleaned the 4Tb log file, and gave up on this alignment
+    # since the lastz/chain/net is much better
+    # -rw-rw-r-- 1 4363949695640 Aug 22 09:16 doLiftOverToGCA_018503275.1.log
+
+    # see if the liftOver menus function in the browser from hg38
+    #    to GCA_018503275.1
+
+##############################################################################
+# LIFTOVER GCA_018503275.1_NA19240.pri.mat.f1_v2 to hg38 (DONE - 2023-08-14 - Hiram)
+    ssh hgwdev
+
+    mkdir /hive/data/genomes/asmHubs/genbankBuild/GCA/018/503/275/GCA_018503275.1_NA19240.pri.mat.f1_v2/trackData/blat.hg38.2023-08-14
+    cd /hive/data/genomes/asmHubs/genbankBuild/GCA/018/503/275/GCA_018503275.1_NA19240.pri.mat.f1_v2/trackData/blat.hg38.2023-08-14
+
+    ~/kent/src/hg/utils/automation/doSameSpeciesLiftOver.pl -verbose=2 \
+        -buildDir=`pwd` -ram=4g -chainRam=16g \
+        -debug -bigClusterHub=hgwdev -dbHost=hgwdev -workhorse=hgwdev \
+ -target2Bit=/hive/data/genomes/asmHubs/genbankBuild/GCA/018/503/275/GCA_018503275.1_NA19240.pri.mat.f1_v2/GCA_018503275.1_NA19240.pri.mat.f1_v2.2bit \
+ -targetSizes=/hive/data/genomes/asmHubs/genbankBuild/GCA/018/503/275/GCA_018503275.1_NA19240.pri.mat.f1_v2/GCA_018503275.1_NA19240.pri.mat.f1_v2.chrom.sizes \
+        -query2Bit=/hive/data/genomes/hg38/hg38.2bit \
+        -querySizes=/hive/data/genomes/hg38/chrom.sizes \
+        -ooc=/hive/data/genomes/asmHubs/genbankBuild/GCA/018/503/275/GCA_018503275.1_NA19240.pri.mat.f1_v2/GCA_018503275.1_NA19240.pri.mat.f1_v2.11.ooc \
+         GCA_018503275.1 hg38
+
+    time (~/kent/src/hg/utils/automation/doSameSpeciesLiftOver.pl -verbose=2 \
+        -buildDir=`pwd` -ram=4g -chainRam=16g \
+        -bigClusterHub=hgwdev -dbHost=hgwdev -workhorse=hgwdev \
+ -target2Bit=/hive/data/genomes/asmHubs/genbankBuild/GCA/018/503/275/GCA_018503275.1_NA19240.pri.mat.f1_v2/GCA_018503275.1_NA19240.pri.mat.f1_v2.2bit \
+ -targetSizes=/hive/data/genomes/asmHubs/genbankBuild/GCA/018/503/275/GCA_018503275.1_NA19240.pri.mat.f1_v2/GCA_018503275.1_NA19240.pri.mat.f1_v2.chrom.sizes \
+        -query2Bit=/hive/data/genomes/hg38/hg38.2bit \
+        -querySizes=/hive/data/genomes/hg38/chrom.sizes \
+        -ooc=/hive/data/genomes/asmHubs/genbankBuild/GCA/018/503/275/GCA_018503275.1_NA19240.pri.mat.f1_v2/GCA_018503275.1_NA19240.pri.mat.f1_v2.11.ooc \
+         GCA_018503275.1 hg38) > liftOverToHg38.log 2>&1
+    # real    5082m17.500s
+
+    # this is interesting, this alignment completed and actually has good
+    # coverage:
+    cat fb.GCA_018503275.1.chain.Hg38Link.txt
+    # 2928654519 bases of 3032066086 (96.589%) in intersection
+
+    # see if the liftOver menus function in the browser from GCA_018503275.1
+    #    to hg38
+
+##############################################################################
+## update grp table add new row for HPRC (DONE - 2023-08-29 - Hiram)
+## existing structure:
+
+    hgsql -e 'desc grp;' hg38
+
++-----------------+-----------+------+-----+---------+-------+
+| Field           | Type      | Null | Key | Default | Extra |
++-----------------+-----------+------+-----+---------+-------+
+| name            | char(255) | NO   | PRI |         |       |
+| label           | char(255) | NO   |     |         |       |
+| priority        | float     | NO   |     | 0       |       |
+| defaultIsClosed | int(11)   | YES  |     | NULL    |       |
++-----------------+-----------+------+-----+---------+-------+
+
+    #  add one new row:
+    hgsql hg38 \
+      -e "INSERT INTO grp VALUES ('hprc', 'Human Pangenome - HPRC', 3.6, 0);"
+
+    # resulting table:
+
+    hgsql -e 'select * from grp order by priority;' hg38
++------------+------------------------------------+----------+-----------------+
+| name       | label                              | priority | defaultIsClosed |
++------------+------------------------------------+----------+-----------------+
+| user       | Custom Tracks                      |        1 |               0 |
+| remc       | Reference Epigenome Mapping Center |      1.2 |               1 |
+| map        | Mapping and Sequencing             |        2 |               0 |
+| genes      | Genes and Gene Predictions         |        3 |               0 |
+| phenDis    | Phenotype and Literature           |      3.4 |               0 |
+| pub        | Literature                         |      3.5 |               0 |
+| hprc       | Human Pangenome - HPRC             |      3.6 |               0 |
+| covid      | COVID-19                           |      3.6 |               0 |
+| singleCell | Single Cell RNA-seq                |      3.7 |               0 |
+| rna        | mRNA and EST                       |        4 |               0 |
+| expression | Expression                         |      4.5 |               0 |
+| regulation | Regulation                         |        5 |               0 |
+| compGeno   | Comparative Genomics               |        6 |               0 |
+| varRep     | Variation                          |        7 |               0 |
+| rep        | Repeats                            |        8 |               0 |
+| x          | Experimental                       |       10 |               1 |
++------------+------------------------------------+----------+-----------------+
+
+##############################################################################
+# Affy CytoScan HD track, refs #32856  (2024-01-23 Gerardo)
+cd /hive/data/genomes/hg38/bed/
+mkdir genotypeArrays; cd genotypeArrays
+#The user sent Gerardo a direct email with a shared folder link. Gerardo downloaded the bed files and made them available on dev.
+#The user provided two bed files (https://hgwdev-gperez2.gi.ucsc.edu/~gperez2/mlq/mlq_32791/). Gerardo used the version 2 bed file for the track.
+wget https://hgwdev-gperez2.gi.ucsc.edu/~gperez2/mlq/mlq_32791/CytoScanHD_Accel_Array.na36.bed.zip
+unzip CytoScanHD_Accel_Array.na36.bed.zip
+# Removed header and sorted the file
+grep -v 'track' CytoScanHD_Accel_Array.na36.bed | bedSort stdin stdout > affyCytoScanHD.bed
+bedToBigBed -tab -type=bed12 affyCytoScanHD.bed https://hgdownload.soe.ucsc.edu/goldenPath/hg38/bigZips/hg38.chrom.sizes affyCytoScanHD.bb 
+cd /gbdb/hg38
+mkdir genotypeArrays; cd genotypeArrays
+# Making symlink for big file and raw bed file
+ln -s /hive/data/genomes/hg38/bed/genotypeArrays/affyCytoScanHD.bb
+ln -s /hive/data/genomes/hg38/bed/genotypeArrays/CytoScanHD_Accel_Array.na36.bed.zip
+cd ~/kent/src/hg/makeDb/trackDb/human/hg38
+vi trackDb.ra
+
+##############################################################################
+# LASTZ Human Hg38 vs. California sea lion GCF_009762305.2
+#    (DONE - 2024-03-06 - jairo)
+
+    mkdir /hive/data/genomes/hg38/bed/lastzGCF_009762305.2.2024-03-06
+    cd /hive/data/genomes/hg38/bed/lastzGCF_009762305.2.2024-03-06
+
+    printf '# California sea lion GCF_009762305.2 vs. Human Hg38
+BLASTZ=/cluster/bin/penn/lastz-distrib-1.04.03/bin/lastz
+
+# TARGET: Human  hg38
+SEQ1_DIR=/hive/data/genomes/hg38/hg38.2bit
+SEQ1_LEN=/hive/data/genomes/hg38/chrom.sizes
+SEQ1_CHUNK=20000000
+SEQ1_LAP=10000
+SEQ1_LIMIT=40
+
+# QUERY: California sea lion 2020-07-14 GCF_009762305.2_mZalCal1.pri.v2
+SEQ2_DIR=/hive/data/genomes/asmHubs/GCF/009/762/305/GCF_009762305.2/GCF_009762305.2.2bit
+SEQ2_LEN=/hive/data/genomes/asmHubs/GCF/009/762/305/GCF_009762305.2/GCF_009762305.2.chrom.sizes.txt
+SEQ2_CHUNK=20000000
+SEQ2_LAP=0
+SEQ2_LIMIT=100
+
+BASE=/hive/data/genomes/hg38/bed/lastzGCF_009762305.2.2024-03-06
+TMPDIR=/dev/shm
+
+' > DEF
+
+    time (~/kent/src/hg/utils/automation/doBlastzChainNet.pl -trackHub -noDbNameCheck -verbose=2 `pwd`/DEF -syntenicNet \
+       -qAsmId GCF_009762305.2_mZalCal1.pri.v2 -workhorse=hgwdev -smallClusterHub=hgwdev -bigClusterHub=ku \
+        -chainMinScore=3000 -chainLinearGap=medium) > do.log 2>&1
+    grep -w real do.log | sed -e 's/^/    # /;'
+    # real      1018m28.119s
+
+    sed -e 's/^/    # /;' fb.hg38.chainGCF_009762305.2Link.txt
+    # 1633315994 bases of 3299210039 (49.506%) in intersection
+    sed -e 's/^/    # /;' fb.hg38.chainSynGCF_009762305.2Link.txt
+    # 1564193911 bases of 3299210039 (47.411%) in intersection
+
+    time (~/kent/src/hg/utils/automation/doRecipBest.pl -trackHub -load -workhorse=hgwdev -buildDir=`pwd` \
+       \
+      -query2Bit="/hive/data/genomes/asmHubs/GCF/009/762/305/GCF_009762305.2/GCF_009762305.2.2bit" \
+-querySizes="/hive/data/genomes/asmHubs/GCF/009/762/305/GCF_009762305.2/GCF_009762305.2.chrom.sizes.txt" \
+        hg38 GCF_009762305.2) > rbest.log 2>&1
+
+    grep -w real rbest.log | sed -e 's/^/    # /;'
+    # real      303m36.739s
+
+    sed -e 's/^/    # /;' fb.hg38.chainRBest.GCF_009762305.2.txt
+    # 1461974620 bases of 3299210039 (44.313%) in intersection
+
+    ### and for the swap
+
+    cd /hive/data/genomes/asmHubs/allBuild/GCF/009/762/305/GCF_009762305.2_mZalCal1.pri.v2/trackData/blastz.hg38.swap
+
+   time (~/kent/src/hg/utils/automation/doBlastzChainNet.pl -trackHub -noDbNameCheck -swap -verbose=2 \
+   -qAsmId GCF_009762305.2_mZalCal1.pri.v2 /hive/data/genomes/hg38/bed/lastzGCF_009762305.2.2024-03-06/DEF -swapDir=`pwd` \
+  -syntenicNet -workhorse=hgwdev -smallClusterHub=hgwdev -bigClusterHub=ku \
+    -chainMinScore=3000 -chainLinearGap=medium) > swap.log 2>&1
+
+    grep -w real swap.log | sed -e 's/^/    # /;'
+    # real      103m25.220s
+
+    sed -e 's/^/    # /;' fb.GCF_009762305.2.chainHg38Link.txt
+    # 1493183463 bases of 2409685272 (61.966%) in intersection
+    sed -e 's/^/    # /;' fb.GCF_009762305.2.chainSynHg38Link.txt
+    # 1457122207 bases of 2409685272 (60.469%) in intersection
+\    time (~/kent/src/hg/utils/automation/doRecipBest.pl -trackHub -load -workhorse=hgwdev -buildDir=`pwd` \
+    \
+   -target2bit="/hive/data/genomes/asmHubs/GCF/009/762/305/GCF_009762305.2/GCF_009762305.2.2bit" \
+-targetSizes="/hive/data/genomes/asmHubs/GCF/009/762/305/GCF_009762305.2/GCF_009762305.2.chrom.sizes.txt" \
+   GCF_009762305.2 hg38) > rbest.log 2>&1
+
+    grep -w real rbest.log | sed -e 's/^/    # /;'
+    # real      286m31.189s
+
+    sed -e 's/^/    # /;' fb.GCF_009762305.2.chainRBest.Hg38.txt
+    # 1461710350 bases of 2409685272 (60.660%) in intersection
+
+##############################################################################
+# hg38.chromAlias.bb was incorrectly built without indexes so it will not
+# work with bedToBigBed 2024-04-08 markd
+
+cd /hive/data/genomes/hg38/goldenPath/bigZips/initial
+mv hg38.chromAlias.bb  hg38.chromAlias.noindexes.bb
+bigBedInfo -asOut hg38.chromAlias.noindexes.bb >hg38.chromAlias.as
+bigBedToBed hg38.chromAlias.noindexes.bb  hg38.chromAlias.bed
+bedToBigBed -tab -type=bed3+ -as=hg38.chromAlias.as hg38.chromAlias.bed -sizesIs2Bit  -extraIndex=ucsc,assembly,ensembl,genbank,refseq hg38.2bit hg38.chromAlias.bb
+
+##############################################################################
+
+# ENCODE 4 TF rPeak Clusters - RM #34930 - Lou 12/19/24
+
+mkdir /hive/data/genomes/hg38/bed/ENCODEv4TFrPeaks
+cd /hive/data/genomes/hg38/bed/ENCODEv4TFrPeaks
+hubClone -download https://users.wenglab.org/gaomingshi/TF.rpeak.test.txt
+ln -s /hive/data/genomes/hg38/bed/ENCODEv4TFrPeaks/no_trim.TF_name.rPeaks.bb /gbdb/hg38/bbi/ENCODE4/TFrPeakClusters.bb
+ln -s /hive/data/genomes/hg38/bed/ENCODEv4TFrPeaks/no_trim.TF_name.decorator.bb /gbdb/hg38/bbi/ENCODE4/TFrPeakClustersDecorator.bb
+# Then just moved the files to the ENCODEv4TFrPeaks dir, moved/tweaked HTML and trackDb
 
-# alphaMissense ticket #32269 (Jeltje, Jan 2025)
-mkdir -p /hive/data/genomes/hg38/bed/alphaMissense/
-cd /hive/data/genomes/hg38/bed/alphaMissense
-wget https://storage.googleapis.com/dm_alphamissense/AlphaMissense_hg38.tsv.gz
-time python ~/kent/src/hg/makeDb/outside/alphaMissense/alphaMissenseToWig.py AlphaMissense_hg38.tsv.gz
-wigToBigWig a.wig ../../chrom.sizes a.bw &
-wigToBigWig c.wig ../../chrom.sizes c.bw &
-wigToBigWig g.wig ../../chrom.sizes g.bw &
-wigToBigWig t.wig ../../chrom.sizes t.bw &
-wait
-
-##Colors were added using the script
-#kent/src/hg/makeDb/scripts/wigColorByColors/makeWigColorByRevelCadd.py