ee98569fb749e16cf3e4601a7cef331432d062f8
jeltje.van.baren
  Tue Jan 21 10:25:19 2025 -0800
adding alphaMissense

diff --git src/hg/makeDb/doc/hg38/hg38.txt src/hg/makeDb/doc/hg38/hg38.txt
index 1f3dc1234db..20b6e2a715a 100644
--- src/hg/makeDb/doc/hg38/hg38.txt
+++ src/hg/makeDb/doc/hg38/hg38.txt
@@ -1,7390 +1,14 @@
-# for emacs: -*- mode: sh; -*-
-
-# This file describes how we made the browser database on
-# NCBI build 38 (December 2013 freeze) aka:
-#	GRCh38 - Genome Reference Consortium Human Reference 38
-#	Assembly Accession: GCA_000001405.2
-
-#############################################################################
-## Download sequence - DONE - 2013-12-24
-    mkdir /hive/data/genomes/hg38
-    mkdir /hive/data/genomes/hg38/genbank
-    cd /hive/data/genomes/hg38/genbank
-    time rsync -a -P rsync://ftp.ncbi.nlm.nih.gov/genbank/genomes/Eukaryotes/vertebrates_mammals/Homo_sapiens/GRCh38/ ./
-# sent 19643 bytes  received 4914689807 bytes  4490369.53 bytes/sec
-# total size is 4914019581  speedup is 1.00
-
-# real    18m14.497s
-
-#############################################################################
-## convert to UCSC names - DONE - 2013-12-24
-#  with this release, NCBI has adopted a naming convention that is similar
-#  to UCSC.  The delivered sequence with these names can be found in:
-#  /hive/data/genomes/hg38/genbank/seqs_for_alignment_pipelines/
-#
-#  The following scripts reproduce this naming scheme from the separate
-#  files in the release
-#
-    mkdir /hive/data/genomes/hg38/ucsc
-    cat << '_EOF_' > ucscCompositeAgp.pl
-#!/bin/env perl
-
-use strict;
-use warnings;
-
-my %accToChr;
-
-open (FH, "<../genbank/Primary_Assembly/assembled_chromosomes/chr2acc") or
-        die "can not read Primary_Assembly/assembled_chromosomes/chr2acc";
-while (my $line = <FH>) {
-    next if ($line =~ m/^#/);
-    chomp $line;
-    my ($chrN, $acc) = split('\s+', $line);
-    $accToChr{$acc} = $chrN;
-}
-close (FH);
-
-foreach my $acc (keys %accToChr) {
-    my $chrN =  $accToChr{$acc};
-    print "$acc $accToChr{$acc}\n";
-    open (FH, "zcat ../genbank/Primary_Assembly/assembled_chromosomes/AGP/chr${chrN}.comp.agp.gz|") or die "can not read chr${chrN}.comp.agp.gz";
-    open (UC, ">chr${chrN}.agp") or die "can not write to chr${chrN}.agp";
-    while (my $line = <FH>) {
-        if ($line =~ m/^#/) {
-            print UC $line;
-        } else {
-            $line =~ s/^$acc/chr${chrN}/;
-            print UC $line;
-        }
-    }
-    close (FH);
-    close (UC);
-    open (FH, "zcat ../genbank/Primary_Assembly/assembled_chromosomes/FASTA/chr${chrN}.fa.gz|") or die "can not read chr${chrN}.fa.gz";
-    open (UC, ">chr${chrN}.fa") or die "can not write to chr${chrN}.fa";
-    while (my $line = <FH>) {
-        if ($line =~ m/^>/) {
-            printf UC ">chr${chrN}\n";
-        } else {
-            print UC $line;
-        }
-    }
-    close (FH);
-    close (UC);
-}
-'_EOF_'
-    # << happy emacs
-    chmod +x ucscCompositeAgp.pl
-
-    cat << '_EOF_' > unlocalized.pl
-#!/bin/env perl
-
-use strict;
-use warnings;
-
-my %accToChr;
-my %chrNames;
-
-open (FH, "<../genbank/Primary_Assembly/unlocalized_scaffolds/unlocalized.chr2scaf") or
-        die "can not read Primary_Assembly/unlocalized_scaffolds/unlocalized.chr2scaf";
-while (my $line = <FH>) {
-    next if ($line =~ m/^#/);
-    chomp $line;
-    my ($chrN, $acc) = split('\s+', $line);
-    $acc =~ s/\./v/;
-    $accToChr{$acc} = $chrN;
-    $chrNames{$chrN} += 1;
-}
-close (FH);
-
-foreach my $chrN (keys %chrNames) {
-    my $agpFile =  "../genbank/Primary_Assembly/unlocalized_scaffolds/AGP/chr$chrN.unlocalized.scaf.agp.gz";
-    my $fastaFile =  "../genbank/Primary_Assembly/unlocalized_scaffolds/FASTA/chr$chrN.unlocalized.scaf.fa.gz";
-    open (FH, "zcat $agpFile|") or die "can not read $agpFile";
-    open (UC, ">chr${chrN}_random.agp") or die "can not write to chr${chrN}_random.agp";
-    while (my $line = <FH>) {
-        if ($line =~ m/^#/) {
-            print UC $line;
-        } else {
-            chomp $line;
-            my (@a) = split('\t', $line);
-            my $acc = $a[0];
-            $acc =~ s/\./v/;
-            die "ERROR: chrN $chrN not correct for $acc"
-                if ($accToChr{$acc} ne $chrN);
-            my $ucscName = "chr${chrN}_${acc}_random";
-            printf UC "%s", $ucscName;
-            for (my $i = 1; $i < scalar(@a); ++$i) {
-                printf UC "\t%s", $a[$i];
-            }
-            printf UC "\n";
-        }
-    }
-    close (FH);
-    close (UC);
-    printf "chr%s\n", $chrN;
-    open (FH, "zcat $fastaFile|") or die "can not read $fastaFile";
-    open (UC, ">chr${chrN}_random.fa") or die "can not write to chr${chrN}_random.fa";
-    while (my $line = <FH>) {
-        if ($line =~ m/^>/) {
-            chomp $line;
-            my $acc = $line;
-            $acc =~ s/.*gb\|//;
-            $acc =~ s/. Homo.*//;
-            $acc =~ s/\./v/;
-            die "ERROR: chrN $chrN not correct for $acc"
-                if ($accToChr{$acc} ne $chrN);
-            my $ucscName = "chr${chrN}_${acc}_random";
-            printf UC ">$ucscName\n";
-        } else {
-            print UC $line;
-        }
-    }
-    close (FH);
-    close (UC);
-}
-'_EOF_'
-    # << happy emacs
-    chmod +x unlocalized.pl
-
-    cat << '_EOF_' > unplaced.pl
-#!/bin/env perl
-
-use strict;
-use warnings;
-
-my $agpFile =  "../genbank/Primary_Assembly/unplaced_scaffolds/AGP/unplaced.scaf.agp.gz";
-my $fastaFile =  "../genbank/Primary_Assembly/unplaced_scaffolds/FASTA/unplaced.scaf.fa.gz";
-open (FH, "zcat $agpFile|") or die "can not read $agpFile";
-open (UC, ">chrUn.agp") or die "can not write to chrUn.agp";
-while (my $line = <FH>) {
-    if ($line =~ m/^#/) {
-        print UC $line;
-    } else {
-        $line =~ s/\./v/;
-        printf UC "chrUn_%s", $line;
-    }
-}
-close (FH);
-close (UC);
-
-open (FH, "zcat $fastaFile|") or die "can not read $fastaFile";
-open (UC, ">chrUn.fa") or die "can not write to chrUn.fa";
-while (my $line = <FH>) {
-    if ($line =~ m/^>/) {
-        chomp $line;
-        $line =~ s/.*gb\|//;
-        $line =~ s/. Homo.*//;
-        $line =~ s/\./v/;
-        printf UC ">chrUn_$line\n";
-    } else {
-        print UC $line;
-    }
-}
-close (FH);
-close (UC);
-'_EOF_'
-    # << happy emacs
-    chmod +x unplaced.pl
-
-    cat << '_EOF_' > altSequence.pl
-#!/usr/bin/env perl
-
-use strict;
-use warnings;
-use File::Basename;
-
-open (AG, ">chrAlt.agp") or die "can not write to chrAlt.agp";
-open (FA, ">chrAlt.fa") or die "can not write to chrAlt.fa";
-open (FH, "find ../genbank/ALT* -type f | grep alt_scaffold_placement.txt|") or die "can not find alt_scaffold_placement.txt files";
-while (my $file = <FH>) {
-  chomp $file;
-  my $dirName = dirname($file);
-  my $agpFile = "$dirName/AGP/alt.scaf.agp.gz";
-  my $fastaFile = "$dirName/FASTA/alt.scaf.fa.gz";
-  # key is genbank acc name, value is UCSC chr name
-  my %nameDelta;
-#  printf STDERR "# %s\n", $file;
-  open (AL, "<$file") or die "can not read $file";
-  while (my $line = <AL>) {
-     next if ($line =~ m/^#/);
-     chomp $line;
-     my ($alt_asm_name, $prim_asm_name, $alt_scaf_name, $alt_scaf_acc,
-          $parent_type, $parent_name, $parent_acc, $region_name, $ori,
-           $alt_scaf_start, $alt_scaf_stop, $parent_start, $parent_stop,
-            $alt_start_tail, $alt_stop_tail) = split('\t', $line);
-     my $ucscAcc = $alt_scaf_acc;
-     $ucscAcc =~ s/\./v/;
-     my $ucscName = sprintf("chr%s_%s_alt", $parent_name, $ucscAcc);
-     printf "%s %s\n", $alt_scaf_acc, $ucscName;
-     if (exists ($nameDelta{$alt_scaf_acc})) {
-         die "duplicate name incorrect ? $alt_scaf_acc $nameDelta{$alt_scaf_acc} ne $ucscName" if ($nameDelta{$alt_scaf_acc} ne $ucscName);
-     } else {
-         $nameDelta{$alt_scaf_acc} = $ucscName;
-     }
-  }
-  close (AL);
-  open (AL, "zcat $agpFile|") or die "can not read $agpFile";
-  while (my $line = <AL>) {
-     if ($line =~ m/^#/) {
-       print AG "$line";
-     } else {
-       my ($acc, $rest) = split('\t', $line, 2);
-       die "can not find ucsc name for $acc" if (!exists($nameDelta{$acc}));
-       printf AG "%s\t%s", $nameDelta{$acc}, $rest;
-     }
-  }
-  close (AL);
-  open (AL, "zcat $fastaFile|") or die "can not read $fastaFile";
-  while (my $line = <AL>) {
-     chomp $line;
-     if ($line =~ m/^>/) {
-       $line =~ s/.*gb.//;
-       $line =~ s/. Homo.*//;
-       die "can not find ucsc name for $line" if (!exists($nameDelta{$line}));
-       printf FA ">%s\n", $nameDelta{$line};
-     } else {
-       printf FA "%s\n", $line;
-     }
-  }
-  close (AL);
-}
-close (FH);
-close (AG);
-close (FA);
-'_EOF_'
-    # << happy emacs
-    chmod +x altSequence.pl
-
-    ./ucscCompositeAgp.pl
-    ./unlocalized.pl
-    ./unplaced.pl
-    ./altSequence.pl
-
-    # temporarily verify the fasta and AGP are complete and compatible
-    faToTwoBit chr*.fa hg38.test.2bit
-    cat chr*.agp > hg38.agp
-    checkAgpAndFa hg38.agp hg38.test.2bit 2>&1 | tail -1
-# All AGP and FASTA entries agree - both files are valid
-
-    rm -f hg38.agp hg38.test.2bit
-
-    # comparing faCounts of this 2bit file and the sequences delivered
-    # in genbank/seqs_for_alignment_pipelines/
-    # result in the exact same sequence
-
-#############################################################################
-## initial db build - DONE - 2013-12-24 - Hiram
-
-    cd /hive/data/genomes/hg38
-    cat << '_EOF_' > hg38.config.ra
-# Config parameters for makeGenomeDb.pl:
-db hg38
-scientificName Homo sapiens
-commonName Human
-assemblyDate Dec. 2013
-assemblyLabel GRCh38 Genome Reference Consortium Human Reference 38 (GCA_000001405.2)
-assemblyShortLabel GRCh38
-orderKey 13
-mitoAcc none
-fastaFiles /hive/data/genomes/hg38/ucsc/chr*.fa
-agpFiles /hive/data/genomes/hg38/ucsc/chr*.agp
-# qualFiles /dev/null
-dbDbSpeciesDir human
-photoCreditURL http://www.cbse.ucsc.edu/
-photoCreditName Graphic courtesy of CBSE
-ncbiGenomeId 51
-ncbiAssemblyId 883148
-ncbiAssemblyName GRCh38
-ncbiBioProject 31257
-genBankAccessionID GCA_000001305.2
-taxId   9606
-'_EOF_'
-    # << happy emacs
-
-    # step wise to first verify AGP and Fasta files
-    time makeGenomeDb.pl -stop=agp hg38.config.ra > agp.log 2>&1
-
-    # looking good, continue:
-    time makeGenomeDb.pl -continue=db hg38.config.ra > db.log 2>&1
-
-    # add the files produced by the trackDb build to the source tree
-
-    # this path is fixed in the makeGenomeDb.pl for next time
-    # honor new convention for bbi location files:
-    cd /gbdb/hg38/bbi
-    mkdir gc5BaseBw
-    mv gc5Base.bw gc5BaseBw
-    cd gc5BaseBw
-    # before
-    hgsql -e 'select * from gc5BaseBw;' hg38
-# +---------------------------+
-# | fileName                  |
-# +---------------------------+
-# | /gbdb/hg38/bbi/gc5Base.bw |
-# +---------------------------+
-    # and fixed
-    hgBbiDbLink hg38 gc5BaseBw `pwd`/gc5Base.bw
-    hgsql -e 'select * from gc5BaseBw;' hg38
-# +-------------------------------------+
-# | fileName                            |
-# +-------------------------------------+
-# | /gbdb/hg38/bbi/gc5BaseBw/gc5Base.bw |
-# +-------------------------------------+
-
-#############################################################################
-## RepeatMasker with CrossMatch - DONE - 2013-12-24,27 - Hiram
-    mkdir /hive/data/genomes/hg38/bed/repeatMaskerCM
-    cd /hive/data/genomes/hg38/bed/repeatMaskerCM
-    # running this step wise so it can be loaded into its own table
-    time doRepeatMasker.pl -stop=mask -bigClusterHub=ku \
-       -workhorse=hgwdev -dbHost=hgwdev -buildDir=`pwd` hg38 > mask.log 2>&1
-    # real    3443m13.026s
-# RepeatMasker version June 20 2013 open-4.0.3
-# Search Engine: cross-match version 1.090518
-# RepeatMasker Database: 20130422
-
-    # take the install script from this -debug run and alter it to load
-    # the table into rmskCM
-    time doRepeatMasker.pl -continue=install -stop=install -debug \
-       -bigClusterHub=ku -workhorse=hgwdev -dbHost=hgwdev -buildDir=`pwd` hg38
-    cat fb.hg38.rmskCM.txt
-    # 1586326530 bases of 3209286105 (49.429%) in intersection
-
-    # profile of repeat elements:
-#  1852545 rmskClass/SINE.tab
-#  1570523 rmskClass/LINE.tab
-#   748597 rmskClass/LTR.tab
-#   703682 rmskClass/Simple_repeat.tab
-#   499108 rmskClass/DNA.tab
-#   102856 rmskClass/Low_complexity.tab
-#     7962 rmskClass/Satellite.tab
-#     5750 rmskClass/Retroposon.tab
-#     5667 rmskClass/LTR?.tab
-#     5622 rmskClass/Unknown.tab
-#     4516 rmskClass/snRNA.tab
-#     3294 rmskClass/DNA?.tab
-#     2026 rmskClass/tRNA.tab
-#     1840 rmskClass/rRNA.tab
-#     1784 rmskClass/RC.tab
-#     1672 rmskClass/srpRNA.tab
-#     1420 rmskClass/scRNA.tab
-#      704 rmskClass/RNA.tab
-#      411 rmskClass/RC?.tab
-#       38 rmskClass/SINE?.tab
-
-    # using this RM result with trfMask for the final masked sequence
-    cd /hive/data/genomes/hg38
-    twoBitMask hg38.rmskCM.2bit -add bed/simpleRepeat/trfMask.bed hg38.2bit
-    twoBitToFa hg38.2bit stdout | faSize stdin > faSize.hg38.2bit.txt
-# 3209286105 bases (159970322 N's 3049315783 real 1460684798 upper 1588630985 lower) in 455 sequences in 1 files
-# %49.50 masked total, %52.10 masked real
-
-    featureBits -countGaps hg38 rmskCM '!rmskHmmer' -bed=crossMatchUnique.bed
-    # 24868153 bases of 3209286105 (0.775%) in intersection
-    hgLoadBed hg38 crossMatchUnique crossMatchUnique.bed
-    # Read 2352219 elements of size 4 from crossMatchUnique.bed
-
-#############################################################################
-## repeating RepeatMasker Blastn run (DONE - 2014-01-07 - Hiram)
-    mkdir /hive/data/genomes/hg38/bed/rmskBlastn
-    cd /hive/data/genomes/hg38/bed/rmskBlastn
-
-    time $HOME/kent/src/hg/utils/automation/doRepeatMasker.pl \
-      -useRMBlastn -bigClusterHub=ku -workhorse=hgwdev -dbHost=hgwdev \
-        -stop=mask -buildDir=`pwd` hg38 > mask.log
-    # real    203m33.670s
-
-# 3209286105 bases (159970322 N's 3049315783 real 1491207906 upper 1558107877 lower) in 455 sequences in 1 files
-# %48.55 masked total, %51.10 masked real
-
-    # install step with debug so the script can be altered to load into
-    # a specific rmskBlastn table:
-
-    $HOME/kent/src/hg/utils/automation/doRepeatMasker.pl \
-      -useRMBlastn -bigClusterHub=ku -workhorse=hgwdev -dbHost=hgwdev \
-        -continue=install -debug -buildDir=`pwd` hg38
-
-#############################################################################
-## repeating RepeatMasker cross-match run (DONE - 2014-01-07 - Hiram)
-    mkdir /hive/data/genomes/hg38/bed/rmskCM
-    cd /hive/data/genomes/hg38/bed/rmskCM
-
-    # missed recording stderr ....  forgot the 2>&1
-    time $HOME/kent/src/hg/utils/automation/doRepeatMasker.pl \
-      -bigClusterHub=ku -workhorse=hgwdev -dbHost=hgwdev \
-        -stop=mask -buildDir=`pwd` hg38 > mask.log
-    # real    1897m33.517s
-    # running from Tue Jan  7 16:10:33 PST 2014 thru 08 Jan 23:48
-#  *** All done!  (through the 'mask' step) - Elapsed time: 1897m34s
-#  *** Steps were performed in /hive/data/genomes/hg38/bed/rmskCM
-    # running install manually to allow edit of the script to load
-    # a specific rmskCm table
-    time $HOME/kent/src/hg/utils/automation/doRepeatMasker.pl \
-      -bigClusterHub=ku -workhorse=hgwdev -dbHost=hgwdev \
-        -continue=install -stop=install -buildDir=`pwd` hg38 -debug
-
-#############################################################################
-## RepeatMasker with RM Blastn - DONE - 2013-12-24,25 - Hiram
-    mkdir /hive/data/genomes/hg38/bed/repeatMaskerBlastn
-    cd /hive/data/genomes/hg38/bed/repeatMaskerBlastn
-    # running this step wise so it can be loaded into its own table
-    time doRepeatMasker.pl -stop=mask -useRMBlastn -bigClusterHub=ku \
-       -workhorse=hgwdev -dbHost=hgwdev -buildDir=`pwd` hg38 > mask.log 2>&1
-    # real    354m55.842s
-
-    # take the install script from this -debug run and alter it to load
-    # the table into rmskBlastn
-    doRepeatMasker.pl -useRMBlastn -bigClusterHub=ku  -continue=install \
-     -stop=install -debug -workhorse=hgwdev -dbHost=hgwdev -buildDir=`pwd` hg38
-    # 1560264046 bases of 3209286105 (48.617%) in intersection
-    # profile of repeat elements:
-#   1824560 rmskClass/SINE.tab
-#   1552814 rmskClass/LINE.tab
-#    738435 rmskClass/LTR.tab
-#    715998 rmskClass/Simple_repeat.tab
-#    486591 rmskClass/DNA.tab
-#    105026 rmskClass/Low_complexity.tab
-#      7712 rmskClass/Satellite.tab
-#      5638 rmskClass/Retroposon.tab
-#      5276 rmskClass/Unknown.tab
-#      5100 rmskClass/LTR?.tab
-#      4548 rmskClass/snRNA.tab
-#      3033 rmskClass/DNA?.tab
-#      1987 rmskClass/tRNA.tab
-#      1809 rmskClass/rRNA.tab
-#      1710 rmskClass/RC.tab
-#      1633 rmskClass/srpRNA.tab
-#      1428 rmskClass/scRNA.tab
-#       614 rmskClass/RNA.tab
-#       376 rmskClass/RC?.tab
-#        38 rmskClass/SINE?.tab
-#         3 rmskClass/Unspecified.tab
-#   5464329 total
-
-#############################################################################
-## repeating RepeatMasker run with HMMER - DONE - 2014-01-08 - Hiram
-    mkdir /hive/data/genomes/hg38/bed/rmskHmmer
-    cd /hive/data/genomes/hg38/bed/rmskHmmer
-
-    # trying cpu=4 and ram=32g
-    time $HOME/kent/src/hg/utils/automation/doRepeatMasker.pl \
-      -stop=mask -useHMMER -bigClusterHub=ku \
-       -workhorse=hgwdev -dbHost=hgwdev -buildDir=`pwd` hg38 > mask.log 2>&1
-    # 6 jobs required more than 32 Gb of memory to complete, ran them on
-    # hgwdev to complete, then continuing:
-    time $HOME/kent/src/hg/utils/automation/doRepeatMasker.pl \
-      -continue=cat -stop=mask -useHMMER -bigClusterHub=ku \
-       -workhorse=hgwdev -dbHost=hgwdev -buildDir=`pwd` hg38 > cat.log 2>&1
-    #  real    24m5.274s
-# 3209286105 bases (159970322 N's 3049315783 real 1314916231 upper 1734399552 lower) in 455 sequences in 1 files
-# %54.04 masked total, %56.88 masked real
-
-    # running install manually to allow edit of the script to load
-    # a specific rmskHmmer table
-    time $HOME/kent/src/hg/utils/automation/doRepeatMasker.pl \
-      -continue=install -debug -useHMMER -bigClusterHub=ku \
-       -workhorse=hgwdev -dbHost=hgwdev -buildDir=`pwd` hg38
-
-    time ./doLoad_rmskHmmer.bash > load.log 2>&1
-    # real    4m47.432s
-
-    featureBits -countGaps hg38 rmskHmmer > fb.hg38.rmskHmmer.txt 2>&1
-    # 1734398971 bases of 3209286105 (54.043%) in intersection
-
-    grep rmskClass hg38.class.profile.txt \
-        | sed -e 's#rmskClass/##; s/.tab//;' | sort -rn
-    # profile of repeat elements:
-#  1884179 SINE
-#  1702529 LINE
-#   805427 LTR
-#   636906 Simple_repeat
-#   565171 DNA
-#    95480 Low_complexity
-#    11861 Retroposon
-#    10852 Satellite
-#     9181 LTR?
-#     6783 scRNA
-#     4582 DNA?
-#     3914 Unknown
-#     2059 RC
-#     1517 srpRNA
-#     1484 RNA
-#      970 SINE?
-#      806 RC?
-#      464 rRNA
-#  5744165 total
-
-    featureBits -countGaps hg38 rmskHmmer '!rmskCM' -bed=hmmerUnique.bed
-    # 172940594 bases of 3209286105 (5.389%) in intersection
-    hgLoadBed hg38 hmmerUnique hmmerUnique.bed
-    # Read 3099505 elements of size 4 from hmmerUnique.bed
-
-#############################################################################
-## RepeatMasker with HMMER - DONE - 2013-12-24,26 - Hiram
-    mkdir /hive/data/genomes/hg38/bed/repeatMaskerHMMER
-    cd /hive/data/genomes/hg38/bed/repeatMaskerHMMER
-
-    time doRepeatMasker.pl -stop=mask -useHMMER -bigClusterHub=ku \
-       -workhorse=hgwdev -dbHost=hgwdev -buildDir=`pwd` hg38 > mask.log 2>&1
-    # take the install script from this -debug run and alter it to load
-    # the table into rmskHmmer
-    doRepeatMasker.pl -continue=install -stop=install -useHMMER \
-      -bigClusterHub=ku -workhorse=hgwdev -dbHost=hgwdev \
-         -buildDir=`pwd` hg38 > mask.log 2>&1
-    # 1702017722 bases of 3209286105 (53.034%) in intersection
-    # profile of repeat elements:
-#   1879864 rmskClass/SINE.tab
-#   1678216 rmskClass/LINE.tab
-#    794231 rmskClass/LTR.tab
-#    651561 rmskClass/Simple_repeat.tab
-#    551965 rmskClass/DNA.tab
-#     97186 rmskClass/Low_complexity.tab
-#     10756 rmskClass/Retroposon.tab
-#     10448 rmskClass/Satellite.tab
-#      8393 rmskClass/LTR?.tab
-#      5849 rmskClass/scRNA.tab
-#      4282 rmskClass/Unknown.tab
-#      4276 rmskClass/DNA?.tab
-#      2000 rmskClass/RC.tab
-#      1573 rmskClass/srpRNA.tab
-#      1291 rmskClass/RNA.tab
-#       906 rmskClass/snRNA.tab
-#       747 rmskClass/SINE?.tab
-#       723 rmskClass/RC?.tab
-#       722 rmskClass/rRNA.tab
-#       468 rmskClass/tRNA.tab
-#   5705457 total
-
-#############################################################################
-# rmsk from genbank release (DONE - 2014-12-25 - Hiram)
-    mkdir /hive/data/genomes/hg38/bed/repeatMaskerGenbank
-    cd /hive/data/genomes/hg38/bed/repeatMaskerGenbank
-
-    head -3 ../repeatMaskerBlastn/hg38.fa.out > genbank.rm.out
-find ../../genbank -type f | grep rm.out | grep -v "/placed_scaffolds/" | while read F
-do
-  headRest 3 $F
-done | sort -k5,45 -k6,6n >> genbank.rm.out
-    grep -v "^#" ../../genbank/GCA_000001405.15_GRCh38_top-level.acc2name \
-       | awk '{printf "s/%s/%s/g;\n", $1, $3}' > accessionToUcsc.sed.txt
-
-    sed -e "`cat accessionToUcsc.sed.txt`" genbank.rm.out > ucscNames.rm.out
-
-    head -3 ucscNames.rm.out > hg38.sorted.fa.out
-    tail -n +4 ucscNames.rm.out  | sort -k5,5 -k6,6n >> hg38.sorted.fa.out
-
-    hgLoadOut -table=rmskGenbank -nosplit hg38 hg38.sorted.fa.out
-    hgLoadOut -verbose=2 -tabFile=hg38.rmskGenbank.tab -table=rmskGenbank \
-       -nosplit hg38 hg38.sorted.fa.out 2> bad.records.txt
-    # fixed up one of the masking scripts from the other runs to construct
-    # the bbi files
-
-    # 1581568556 bases of 3209286105 (49.281%) in intersection
-    # profile of repeat elements:
-#   1849444 rmskClass/SINE.tab
-#   1586141 rmskClass/LINE.tab
-#    759248 rmskClass/LTR.tab
-#    502186 rmskClass/DNA.tab
-#    433789 rmskClass/Simple_repeat.tab
-#    396378 rmskClass/Low_complexity.tab
-#     10198 rmskClass/Satellite.tab
-#      5884 rmskClass/LTR?.tab
-#      4595 rmskClass/snRNA.tab
-#      4163 rmskClass/Retroposon.tab
-#      2802 rmskClass/Unknown.tab
-#      2157 rmskClass/DNA?.tab
-#      2154 rmskClass/tRNA.tab
-#      1915 rmskClass/rRNA.tab
-#      1860 rmskClass/RC.tab
-#      1784 rmskClass/srpRNA.tab
-#      1397 rmskClass/scRNA.tab
-#       822 rmskClass/RNA.tab
-#       488 rmskClass/SINE?.tab
-#       445 rmskClass/RC?.tab
-#   5567850 total
-
-#############################################################################
-## running TRF simple repeats - DONE - 2013-12-24,29 - Hiram
-    # this procedure ran into much trouble on this release.  The new
-    # repeat sequences in the centromeres caused trf to run indefinitely.
-    # I tried different sizes of chunks, working down to 20 Mbase chunks.
-    # Even still, some jobs would not complete.  Those broke down even
-    # more, eventually to the smallest bit of 30 Kbase that needed to
-    # run all the way down to 3,000 based chunks with 1,000 base overlaps.
-
-    # this did not work:
-    screen # use screen to manage this day-long job
-    mkdir /hive/data/genomes/hg38/bed/simpleRepeat
-    cd /hive/data/genomes/hg38/bed/simpleRepeat
-    time doSimpleRepeat.pl -bigClusterHub=ku -workhorse=hgwdev \
-	-smallClusterHub=ku -buildDir=`pwd` hg38 > do.log 2>&1
-    cd /hive/data/genomes/hg38/bed
-    # move it aside:
-    mv simpleRepeat simpleRepeat.2013-12-24
-
-    # Instead, something like this:
-    mkdir /hive/data/genomes/hg38/bed/simpleRepeat.2013-12-27/splitGap
-    cd /hive/data/genomes/hg38/bed/simpleRepeat.2013-12-27/splitGap
-    mkdir -p noGap
-
-    twoBitToFa ../../../hg38.unmasked.2bit stdout \
-       | faSplit -lift=noGap.lift gap stdin 5000000 noGap/hg38_
-    # make sure nothing has gone missing:
-    faCount noGap/*.fa > faCount.txt
-    tail -1 faCount.txt
-# total 3068387174 898285419 623727342 626335137 900967885  19071391 30979734
-    # compared to the full sequence, same numbers for ACGT:
-    twoBitToFa ../../../hg38.unmasked.2bit stdout | faCount stdin
-# total 3209286105 898285419 623727342 626335137 900967885 159970322 30979743
-    faToTwoBit noGap/*.fa hg38.nogap.2bit
-    twoBitInfo hg38.nogap.2bit stdout | sort -k2,2nr > hg38.nogap.sizes
-
-
-    mkdir /hive/data/genomes/hg38/bed/simpleRepeat.2013-12-27/run20M
-    cd /hive/data/genomes/hg38/bed/simpleRepeat.2013-12-27/run20M
-    rm -rf /hive/data/genomes/hg38/TrfPart20M
-    /cluster/bin/scripts/simplePartition.pl \
-/hive/data/genomes/hg38/bed/simpleRepeat.2013-12-27/splitGap/hg38.nogap.2bit \
-   20000000 /hive/data/genomes/hg38/TrfPart20M
-   rm -f /hive/data/genomes/hg38/bed/simpleRepeat.2013-12-27/TrfPart20M
-   ln -s /hive/data/genomes/hg38/TrfPart20M \
-      /hive/data/genomes/hg38/bed/simpleRepeat.2013-12-27/TrfPart20M
-   ssh ku
-   cd /hive/data/genomes/hg38/bed/simpleRepeat.2013-12-27/run20M
-   gensub2 /hive/data/genomes/hg38/TrfPart20M/partitions.lst single gsub jobList
-   para create jobList
-   para push
-   # 20 jobs would not complete:
-# Completed: 143 of 163 jobs
-# Jobs currently running: 20
-# CPU time in finished jobs:      76994s    1283.24m    21.39h    0.89d  0.002 y
-# IO & Wait Time:                  1095s      18.24m     0.30h    0.01d  0.000 y
-# Time in running jobs:         1807279s   30121.32m   502.02h   20.92d  0.057 y
-# Average job time:                 546s       9.10m     0.15h    0.01d
-# Longest running job:            90422s    1507.03m    25.12h    1.05d
-# Longest finished job:           43348s     722.47m    12.04h    0.50d
-# Submission to last job:         43363s     722.72m    12.05h    0.50d
-   # determine which are the last jobs as individual bits:
-   para status | grep -v done | awk '{print $(NF-1),$NF}' | grep TrfRun \
-     > not.done.list
-   awk '{print $NF}' not.done.list | sed -e 's/.bed//' | while read F
-do
-   cat $F
-done > seq.specs.not.done
-
-   mkdir /hive/data/genomes/hg38/bed/simpleRepeat.2013-12-27/run20M/lastJobs
-   cd /hive/data/genomes/hg38/bed/simpleRepeat.2013-12-27/run20M/lastJobs
-   mkdir fasta
-   for seqSpec in `cat ../seq.specs.not.done`
-do
-  fName=`echo $seqSpec | sed -e 's/.*://'`
-  echo $fName
-  twoBitToFa $seqSpec fasta/$fName.fa
-done
-  ls -1S `pwd`/fasta > part.list
-  cat << '_EOF_' > template
-#LOOP
-./runTrf {check in line+ $(path1)}  {check out line bed/$(root1).bed}
-#ENDLOOP
-'_EOF_'
-  # << happy emacs
-
-  cat << '_EOF_' > runTrf
-#!/bin/bash
-set -beEu -o pipefail
-export path1=$1
-export inputFN=`basename $1`
-export outpath=$2
-export outputFN=`basename $2`
-mkdir -p /dev/shm/$outputFN
-cp -p $path1 /dev/shm/$outputFN
-cd /dev/shm/$outputFN
-/cluster/bin/x86_64/trfBig -trf=/cluster/bin/x86_64/trf \
-      $inputFN /dev/null -bedAt=$outputFN -tempDir=/dev/shm
-cd /hive/data/genomes/hg38/bed/simpleRepeat.2013-12-27/run20M/lastJobs
-rm -f $outpath
-cp -p /dev/shm/$outputFN/$outputFN $outpath
-rm -fr /dev/shm/$outputFN/*
-rmdir --ignore-fail-on-non-empty /dev/shm/$outputFN
-'_EOF_'
-  # << happy emacs
-  chmod +x runTrf
-
-  gensub2 part.list single template jobList
-  para create jobList
-  para push
-  # not all of these jobs will finish either:
-# Completed: 85 of 106 jobs
-# Jobs currently running: 21
-# CPU time in finished jobs:      58076s     967.93m    16.13h    0.67d  0.002 y
-# IO & Wait Time:                   828s      13.81m     0.23h    0.01d  0.000 y
-# Time in running jobs:         1988997s   33149.95m   552.50h   23.02d  0.063 y
-# Average job time:                 693s      11.55m     0.19h    0.01d
-# Longest running job:            94730s    1578.83m    26.31h    1.10d
-# Longest finished job:           34216s     570.27m     9.50h    0.40d
-# Submission to last job:         34342s     572.37m     9.54h    0.40d
-
-  # can use what we have here:
-  liftUp result.bed ../../splitGap/noGap.lift error bed/*.bed
-  # find jobs not done
-  para status | grep -v done | awk '{print $(NF-1),$NF}' | grep TrfRun \
-     > not.done.list
-  # splitting up those last jobs:
-  mkdir /hive/data/genomes/hg38/bed/simpleRepeat.2013-12-27/run20M/splitBits
-  cd /hive/data/genomes/hg38/bed/simpleRepeat.2013-12-27/run20M/splitBits
-  mkdir noGap
-  awk '{print $2}' ../lastJobs/not.done.list | while read F
-do
-  cp -p $F ./noGap/
-done
-
-  # split into 1,000,000 chunks with 10,000 overlap:
-  mkdir -p 1M_10K
-
-for F in noGap/*.fa
-do
-  B=`basename $F | sed -e 's/.fa//'`
-  echo "faSplit -lift=$B.lift -extra=10000 size $F 1000000 1M_10K/$B_"
-  faSplit -lift=$B.lift -extra=10000 size $F 1000000 1M_10K/${B}_
-done
-
-  ls -1S `pwd`/1M_10K/*.fa > part.list
-  cat << '_EOF_' > runTrf
-#!/bin/bash
-set -beEu -o pipefail
-export path1=$1
-export inputFN=`basename $1`
-export outpath=$2
-export outputFN=`basename $2`
-mkdir -p /dev/shm/$outputFN
-cp -p $path1 /dev/shm/$outputFN
-cd /dev/shm/$outputFN
-/cluster/bin/x86_64/trfBig -trf=/cluster/bin/x86_64/trf \
-      $inputFN /dev/null -bedAt=$outputFN -tempDir=/dev/shm
-cd /hive/data/genomes/hg38/bed/simpleRepeat.2013-12-27/run20M/splitBits
-rm -f $outpath
-cp -p /dev/shm/$outputFN/$outputFN $outpath
-rm -fr /dev/shm/$outputFN/*
-rmdir --ignore-fail-on-non-empty /dev/shm/$outputFN
-'_EOF_'
-  # << happy emacs
-
-  cat << '_EOF_' > template
-#LOOP
-./runTrf {check in line+ $(path1)}  {check out line bed/$(root1).bed}
-#ENDLOOP
-'_EOF_'
-  # << happy emacs
-
-  gensub2 part.list single template jobList
-  para create jobList
-  para push
-  # not all of these jobs will complete either:
-# Completed: 53 of 96 jobs
-# CPU time in finished jobs:     212403s    3540.05m    59.00h    2.46d  0.007 y
-# IO & Wait Time:                  1851s      30.85m     0.51h    0.02d  0.000 y
-# Average job time:                4043s      67.38m     1.12h    0.05d
-# Longest finished job:           68726s    1145.43m    19.09h    0.80d
-# Submission to last job:         68890s    1148.17m    19.14h    0.80d
-  # use what results we have here:
-  cat *.lift  | liftUp parts.bed stdin error bed/*.bed
-  liftUp -type=.bed stdout ../../splitGap/noGap.lift error parts.bed \
-    | sort -u | sort -k1,1 -k2,2n > hg38.result.bed
-
-  para status | grep -v -w done | awk '{print $(NF-1)}' > will.not.finish.txt
-
-  # split those last bits:
-  mkdir /hive/data/genomes/hg38/bed/simpleRepeat.2013-12-27/run20M/splitSplitBits
-  cd /hive/data/genomes/hg38/bed/simpleRepeat.2013-12-27/run20M/splitSplitBits
-  mkdir splitBits
-  cat ../splitBits/will.not.finish.txt | while read F
-do
-  cp -p $F splitBits
-done
-
-  #  100K chunks with 10K overlap
-  mkdir -p 100K_10K
-
-for F in splitBits/*.fa
-do
-  B=`basename $F | sed -e 's/.fa//'`
-  echo "faSplit -lift=$B.lift -extra=10000 size $F 1000000 1M_10K/$B_"
-  faSplit -lift=$B.lift -extra=10000 size $F 100000 100K_10K/${B}_
-done
-
-  cat << '_EOF_' > runTrf
-#!/bin/bash
-set -beEu -o pipefail
-export path1=$1
-export inputFN=`basename $1`
-export outpath=$2
-export outputFN=`basename $2`
-mkdir -p /dev/shm/$outputFN
-cp -p $path1 /dev/shm/$outputFN
-cd /dev/shm/$outputFN
-/cluster/bin/x86_64/trfBig -trf=/cluster/bin/x86_64/trf \
-      $inputFN /dev/null -bedAt=$outputFN -tempDir=/dev/shm
-cd /hive/data/genomes/hg38/bed/simpleRepeat.2013-12-27/run20M/splitSplitBits
-rm -f $outpath
-cp -p /dev/shm/$outputFN/$outputFN $outpath
-rm -fr /dev/shm/$outputFN/*
-rmdir --ignore-fail-on-non-empty /dev/shm/$outputFN
-'_EOF_'
-  # << happy emacs
-  chmod +x runTrf
-
-  cat << '_EOF_' > template
-#LOOP
-./runTrf {check in line+ $(path1)}  {check out line bed/$(root1).bed}
-#ENDLOOP
-'_EOF_'
-  # << happy emacs
-
-  ls -1S `pwd`/100K_10K/*.fa > part.list
-  gensub2 part.list single template jobList
-  para create jobList
-  para push
-  # one last bit does not complete:
-# Completed: 420 of 421 jobs
-# CPU time in finished jobs:      19862s     331.04m     5.52h    0.23d  0.001 y
-# IO & Wait Time:                  2360s      39.33m     0.66h    0.03d  0.000 y
-# Average job time:                  53s       0.88m     0.01h    0.00d
-# Longest finished job:             368s       6.13m     0.10h    0.00d
-# Submission to last job:           448s       7.47m     0.12h    0.01d
-
-  # can use the results obtained here:
-  cat *.lift  | liftUp splitParts.bed stdin error bed/*.bed
-  cat ../splitBits/*.lift | liftUp parts.bed  stdin error splitParts.bed
-  liftUp -type=.bed stdout ../../splitGap/noGap.lift error parts.bed | sort -u \
-    | sort -k1,1 -k2,2n > hg38.result.bed
-
-  para status | grep -v -w done | awk '{print $(NF-1)}'
-  # last chunk: 100K_10K/hg38_89_2_00.fa
-
-  mkdir /hive/data/genomes/hg38/bed/simpleRepeat.2013-12-27/run20M/last100K
-  cd /hive/data/genomes/hg38/bed/simpleRepeat.2013-12-27/run20M/last100K
-  cp -p ../splitSplitBits/100K_10K/hg38_89_2_00.fa .
-
-  # 20K chunks with 10K overlap:
-  mkdir -p 20K_10K
-
-for F in hg38_89_2_00.fa
-do
-  B=`basename $F | sed -e 's/.fa//'`
-  echo "faSplit -lift=$B.lift -extra=10000 size $F 20000 20K_10K/$B_"
-  faSplit -lift=$B.lift -extra=10000 size $F 20000 20K_10K/${B}_
-done
-
-  ls -1S `pwd`/20K_10K/*.fa > part.list
-  cat << '_EOF_' > runTrf
-#!/bin/bash
-set -beEu -o pipefail
-export path1=$1
-export inputFN=`basename $1`
-export outpath=$2
-export outputFN=`basename $2`
-mkdir -p /dev/shm/$outputFN
-cp -p $path1 /dev/shm/$outputFN
-cd /dev/shm/$outputFN
-/cluster/bin/x86_64/trfBig -trf=/cluster/bin/x86_64/trf \
-      $inputFN /dev/null -bedAt=$outputFN -tempDir=/dev/shm
-cd /hive/data/genomes/hg38/bed/simpleRepeat.2013-12-27/run20M/last100K
-rm -f $outpath
-cp -p /dev/shm/$outputFN/$outputFN $outpath
-rm -fr /dev/shm/$outputFN/*
-rmdir --ignore-fail-on-non-empty /dev/shm/$outputFN
-'_EOF_'
-  # << happy emacs
-  chmod +s runTrf
-  cat << '_EOF_' > template
-#LOOP
-./runTrf {check in line+ $(path1)}  {check out line bed/$(root1).bed}
-#ENDLOOP
-'_EOF_'
-  # << happy emacs
-
-  gensub2 part.list single template jobList
-  para create jobList
-  para push
-  # one of these jobs will not finish:
-# Completed: 4 of 5 jobs
-# CPU time in finished jobs:         10s       0.17m     0.00h    0.00d  0.000 y
-# IO & Wait Time:                    16s       0.26m     0.00h    0.00d  0.000 y
-# Average job time:                   7s       0.11m     0.00h    0.00d
-# Longest finished job:               8s       0.13m     0.00h    0.00d
-# Submission to last job:            16s       0.27m     0.00h    0.00d
-
-  # can use the results we have here:
-  cat *.lift  | liftUp 20Kparts.bed stdin error bed/*.bed
-  cat ../splitSplitBits/*.lift | liftUp 100Kpart.bed stdin error 20Kparts.bed
-  cat ../splitBits/*.lift | liftUp parts.bed  stdin error 100Kpart.bed
-  liftUp -type=.bed stdout ../../splitGap/noGap.lift error parts.bed | sort -u \
-    | sort -k1,1 -k2,2n > hg38.result.bed
-
-  # finally, what turns out to be the last batch:
-  mkdir /hive/data/genomes/hg38/bed/simpleRepeat.2013-12-27/run20M/last30K
-  cd /hive/data/genomes/hg38/bed/simpleRepeat.2013-12-27/run20M/last30K
-  cp -p ../last100K/20K_10K/hg38_89_2_00_3.fa .
-
-  # 2K chunks with 1K overlap
-  mkdir -p 2K_1K
-
-for F in hg38_89_2_00_3.fa
-do
-  B=`basename $F | sed -e 's/.fa//'`
-  echo "faSplit -lift=$B.lift -extra=1000 size $F 2000 2K_1K/$B_"
-  faSplit -lift=$B.lift -extra=1000 size $F 2000 2K_1K/${B}_
-done
-
-  ls -1S `pwd`/2K_1K/*.fa > part.list
-  cat << '_EOF_' > runTrf
-#!/bin/bash
-set -beEu -o pipefail
-export path1=$1
-export inputFN=`basename $1`
-export outpath=$2
-export outputFN=`basename $2`
-mkdir -p /dev/shm/$outputFN
-cp -p $path1 /dev/shm/$outputFN
-cd /dev/shm/$outputFN
-/cluster/bin/x86_64/trfBig -trf=/cluster/bin/x86_64/trf \
-      $inputFN /dev/null -bedAt=$outputFN -tempDir=/dev/shm
-cd /hive/data/genomes/hg38/bed/simpleRepeat.2013-12-27/run20M/last30K
-rm -f $outpath
-cp -p /dev/shm/$outputFN/$outputFN $outpath
-rm -fr /dev/shm/$outputFN/*
-rmdir --ignore-fail-on-non-empty /dev/shm/$outputFN
-'_EOF_'
-  # << happy emacs
-  chmod +x runTrf
-  cat << '_EOF_' > template
-#LOOP
-./runTrf {check in line+ $(path1)}  {check out line bed/$(root1).bed}
-#ENDLOOP
-'_EOF_'
-  # << happy emacs
-
-  gensub2 part.list single template jobList
-  para create
-  para push
-# Completed: 15 of 15 jobs
-# CPU time in finished jobs:          1s       0.02m     0.00h    0.00d  0.000 y
-# IO & Wait Time:                    26s       0.43m     0.01h    0.00d  0.000 y
-# Average job time:                   2s       0.03m     0.00h    0.00d
-# Longest finished job:               4s       0.07m     0.00h    0.00d
-# Submission to last job:            14s       0.23m     0.00h    0.00d
-
-  cat *.lift  | liftUp 2Kparts.bed stdin error bed/*.bed
-  cat ../last100K/*.lift | liftUp 20Kpart.bed stdin error 2Kparts.bed
-  cat ../splitSplitBits/*.lift | liftUp 100Kpart.bed stdin error 20Kpart.bed
-  cat ../splitBits/*.lift | liftUp parts.bed  stdin error 100Kpart.bed
-  liftUp -type=.bed stdout ../../splitGap/noGap.lift error parts.bed | sort -u \
-    | sort -k1,1 -k2,2n > hg38.result.bed
-
-  ## To put it all together:
-  cd /hive/data/genomes/hg38/bed/simpleRepeat.2013-12-27/run20M
-  cat /hive/data/genomes/hg38/TrfPart20M/???/*.bed lastJobs/bed/*.bed \
-     splitBits/parts.bed splitSplitBits/parts.bed last100K/parts.bed \
-     last30K/parts.bed > beforeLift.simpleRepeat.bed
-  liftUp -type=.bed stdout ../splitGap/noGap.lift error \
-     beforeLift.simpleRepeat.bed | sort -u \
-       | sort -k1,1 -k2,2n > simpleRepeat.bed
-
-  awk '{if ($5 <= 12) print;}' simpleRepeat.bed > trfMask.bed
-
-  hgLoadBed hg38 simpleRepeat simpleRepeat.bed \
-        -sqlTable=$HOME/kent/src/hg/lib/simpleRepeat.sql
-  featureBits hg38 simpleRepeat > fb.simpleRepeat 2>&1
-  cat fb.simpleRepeat
-# 146785521 bases of 3049335806 (4.814%) in intersection
-
-  cd /hive/data/genomes/hg38/bed
-  ln -s simpleRepeat.2013-12-27/run20M simpleRepeat
-
-############################################################################
-
- # WINDOWMASKER - DONE - 2013-12-24 - Hiram
-    mkdir /hive/data/genomes/hg38/bed/windowMasker
-    cd /hive/data/genomes/hg38/bed/windowMasker
-    time nice -n +19 doWindowMasker.pl -buildDir=`pwd` -workhorse=hgwdev \
-	-dbHost=hgwdev hg38 > do.log 2>&1 &
-
-############################################################################
-# Verify all gaps are marked - DONE - 2013-12-24 - Hiram
-    mkdir /hive/data/genomes/hg38/bed/gap
-    cd /hive/data/genomes/hg38/bed/gap
-    time nice -n +19 findMotif -motif=gattaca -verbose=4 \
-	-strand=+ ../../hg38.unmasked.2bit > findMotif.txt 2>&1
-    #	real    0m28.634s
-    grep "^#GAP " findMotif.txt | sed -e "s/^#GAP //" > allGaps.bed
-    featureBits hg38 -not gap -bed=notGap.bed
-    #	3049335806 bases of 3049335806 (100.000%) in intersection
-    time featureBits hg38 allGaps.bed notGap.bed -bed=new.gaps.bed
-    #   20023 bases of 3049335806 (0.001%) in intersection
-    # real    0m20.427s
-    # this indicates that 20,023 bases are not marked as N's
-    # with this element size profile:
-    awk '{print $3-$2}' new.gaps.bed | ave stdin
-# Q1 1.000000
-# median 1.000000
-# Q3 100.000000
-# average 44.894619
-# min 1.000000
-# max 1000.000000
-# count 446
-# total 20023.000000
-# standard deviation 81.743447
-
-    # the four largest ones:
-# 1000 chr2         32916625        32917625        chr2.7
-# 1000 chr2         32867130        32868130        chr2.6
-#  348 chr20        36314371        36314719        chr20.36
-#  200 chr12       123443533       123443733        chr12.10
-
-#########################################################################
-## CYTOBAND - fixing the ideogram track (DONE - 2014-06-11 - Hiram)
-    ## the file we used before was broken
-    mkdir -p /hive/data/outside/ncbi/ideogram/2014-06
-    cd /hive/data/outside/ncbi/ideogram/2014-06
-    # fetch all the ideogram files:
-    rsync -a -P rsync://ftp.ncbi.nlm.nih.gov/pub/gdp/ ./
-    mkdir /hive/data/genomes/hg38/bed/cytoBandUpdate
-    cd /hive/data/genomes/hg38/bed/cytoBandUpdate
-
-    # Create bed file
-    $HOME/kent/src/utils/ncbi/createNcbiCytoBand.pl \
-/hive/data/outside/ncbi/ideogram/2014-06/ideogram_9606_GCF_000001305.14_850_V1
-
-    # add in the other genome data:
-    hgsql -N -e 'select * from cytoBand;' hg38 \
-        | egrep "chrU|chrM|_alt|_random" >> cytoBand.bed
-
-    $HOME/kent/src/utils/ncbi/cytoBandVerify.pl
-    #   everything checks out OK on 455 chroms
-
-    # Load the bed file
-    hgLoadBed -tab -noBin -sqlTable=$HOME/kent/src/hg/lib/cytoBand.sql \
-	hg38 cytoBand cytoBand.bed
-    cut -f1 cytoBand.bed | sort -u | awk '{print length($1)}' | sort -rn | head
-    #  23
-    sed -e 's/12/23/' $HOME/kent/src/hg/lib/cytoBand.sql > cytoBand.sql
-    sort -k1,1 -k2,2n cytoBand.bed \
-	| hgLoadSqlTab hg38 cytoBand cytoBand.sql stdin
-
-    # Make cytoBandIdeo track for ideogram gif on hgTracks page.
-    # cytoBandIdeo is just a replicate of the cytoBand track.
-    hgsql -e "drop table cytoBandIdeo;" hg38
-    hgsql hg38 -e "create table cytoBandIdeo (index(chrom(23),chromStart)) as select * from cytoBand;"
-
-#########################################################################
-##  CYTOBAND - ideogram track (DONE - 2014-03-04 - Hiram)
-    ssh hgwdev
-    mkdir -p /hive/data/outside/ncbi/ideogram/2014-03
-    cd /hive/data/outside/ncbi/ideogram/2014-03
-
-    # fetch all the ideogram files:
-    rsync -a -P rsync://ftp.ncbi.nlm.nih.gov/pub/gdp/ ./
-
-    mkdir /hive/data/genomes/hg38/bed/cytoBand
-    cd /hive/data/genomes/hg38/bed/cytoBand
-
-    # Create bed file
-    $HOME/kent/src/utils/ncbi/createNcbiCytoBand.pl \
-/hive/data/outside/ncbi/ideogram/2014-03/ideogram_9606_GCF_000001305.14_850_V1
-
-    # add in the other genome data:
-    hgsql -N -e 'select * from cytoBand;' hg38 > bobTable.bed
-
-    egrep "chrU|chrM|_alt|_random" bobTable.bed >> cytoBand.bed
-
-    ## can now verify before load:
-    $HOME/kent/src/utils/ncbi/cytoBandVerify.pl
-    #   everything checks out OK on 455 chroms
-
-    # Load the bed file
-    hgLoadBed -tab -noBin -sqlTable=$HOME/kent/src/hg/lib/cytoBand.sql \
-	hg38 cytoBand cytoBand.bed
-    cut -f1 cytoBand.bed | sort -u | awk '{print length($1)}' | sort -rn | head
-    #  23
-    sed -e 's/12/23/' $HOME/kent/src/hg/lib/cytoBand.sql > cytoBand.sql
-    sort -k1,1 -k2,2n cytoBand.bed \
-	| hgLoadSqlTab hg38 cytoBand cytoBand.sql stdin
-
-    # Make cytoBandIdeo track for ideogram gif on hgTracks page.
-    # cytoBandIdeo is just a replicate of the cytoBand track.
-    hgsql -e "drop table cytoBandIdeo;" hg38
-    hgsql hg38 -e "create table cytoBandIdeo (index(chrom(23),chromStart)) as select * from cytoBand;"
-
-##########################################################################
-# cytoBandIdeo - (DONE - 2013-12-26 - Hiram)
-    mkdir /hive/data/genomes/hg38/bed/cytoBand
-    cd /hive/data/genomes/hg38/bed/cytoBand
-    makeCytoBandIdeo.csh hg38
-
-#making temporary liftover of items from hg19
-liftOver /hive/data/genomes/hg19/bed/ncbiCytoBand/cytobands.bed \
-      /hive/data/gbdb/hg19/liftOver/hg19ToHg38.over.chain.gz \
-      cytobands.bed unMapped
-
-liftOver -minBlocks=0.5 /hive/data/genomes/hg19/bed/ncbiCytoBand/cytobands.bed \
-      /hive/data/gbdb/hg19/liftOver/hg19ToHg38.over.chain.gz \
-      cytobands.0.5.bed unMapped0.5
-
-###############################                    ######################
-# cytoBandIdeo - (reDONE - 2014-02-25 - kuhn)
-
-# adding centromeres to generic cytonBandIdeo tavle as it exists.
-# (lifted track is already gone)
-
-# get the cen values for hg38
-hgsql -Ne "SELECT DISTINCT chrom FROM centromeres" hg38 | sort > hg38.chroms
-rm -f hg38.cens
-foreach chrom (`cat hg38.chroms`)
-  set cenStart=""
-  set cenEnd=""
-  set cenStart=`hgsql -Ne 'SELECT MIN(chromStart) FROM centromeres WHERE chrom = "'$chrom'"' hg38`
-  set cenEnd=`hgsql -Ne 'SELECT MAX(chromEnd) FROM centromeres WHERE chrom = "'$chrom'"' hg38`
-  echo "$chrom $cenStart $cenEnd" >> hg38.cens
-end
-
-# Modified makeCytoBandIdeo.csh to use this file instead of looking
-#   for centromeres in a gap table.
-# Replaced existing cytoBandIdeo table, which was really only a copy
-#   of chromInfo.
-
-##########################################################################
-# hg19 <-> hg38 difference tracks (DONE - 2013-12-28 - Hiram)
-    mkdir /hive/data/genomes/hg19/bed/liftOverHg38
-    cd /hive/data/genomes/hg19/bed/liftOverHg38
-
-    #	not needed, but interesting, collect all the fragment
-    #	definitions from the gold tables:
-    hgsql -N -e "select frag,fragStart,fragEnd,strand from gold;" hg19 \
-        | sort > hg19.gold.frags.tab
-
-    hgsql -N -e "select frag,fragStart,fragEnd,strand from gold;" hg38 \
-        | sort > hg38.gold.frags.tab
-
-    # construct common and difference listings
-    comm -12 hg19.gold.frags.tab hg38.gold.frags.tab \
-	> identical.hg19.hg38.frags.tab
-    comm -23 hg19.gold.frags.tab hg38.gold.frags.tab \
-	> unique.hg19Only.frags.tab
-    comm -13 hg19.gold.frags.tab hg38.gold.frags.tab \
-	> unique.hg38Only.frags.tab
-
-    # better yet, get full information about each fragment
-    hgsql -N -e "select chrom,chromStart,chromEnd,ix,type,frag,fragStart,fragEnd,strand from gold;" hg19 \
-        | sort -k6 > hg19.gold.tab
-
-    hgsql -N -e "select chrom,chromStart,chromEnd,ix,type,frag,fragStart,fragEnd,strand from gold;" hg38 \
-        | sort -k6 > hg38.gold.tab
-
-    # construct a single key for each fragment for joining.
-    # the key is frag,fragStart,fragEnd,strand
-    awk '{printf "%s,%d,%d,%s\t%s\t%s\t%s\t%d\t%d\t%d\t%s\n",
-	$6,$7,$8,$9,$6,$9,$1,$2,$3,$4,$5}' hg19.gold.tab | sort \
-	> hg19.fragKey.tab
-    awk '{printf "%s,%d,%d,%s\t%s\t%s\t%s\t%d\t%d\t%d\t%s\n",
-	$6,$7,$8,$9,$6,$9,$1,$2,$3,$4,$5}' hg38.gold.tab | sort \
-	> hg38.fragKey.tab
-
-    # now, by joining those keys, we can get exact identicals, and
-    # the only-in listings as bed files to load as tracks:
-    join hg19.fragKey.tab hg38.fragKey.tab \
-	| awk '{printf "%s\t%d\t%d\t%s\t1000\t%s\t%d\t%d\t0,0,128\n", $4,$5,$6,$2,$3,$5,$6}' \
-        | sort -k1,1 -k2,2n > hg19.hg38.identical.bed
-
-    join hg19.fragKey.tab hg38.fragKey.tab \
-	| awk '{printf "%s\t%d\t%d\t%s\t1000\t%s\t%d\t%d\t0,0,128\n", $11,$12,$13,$9,$10,$12,$13}' \
-        | sort -k1,1 -k2,2n > hg38.hg19.identical.bed
-
-    join -v 1 hg19.fragKey.tab hg38.fragKey.tab \
-	| awk '{printf "%s\t%d\t%d\t%s\t0\t%s\n", $4,$5,$6,$2,$3}' \
-        | sort -k1,1 -k2,2n > hg19.only.bed
-
-    join -v 2 hg19.fragKey.tab hg38.fragKey.tab \
-	| awk '{printf "%s\t%d\t%d\t%s\t0\t%s\n", $4,$5,$6,$2,$3}' \
-        | sort -k1,1 -k2,2n > hg38.only.bed
-
-    hgLoadBed hg19 hg38ContigDiff hg19.only.bed
-    hgLoadBed hg38 hg19ContigDiff hg38.only.bed
-
-    wc -l hg??.only.bed
-    #  6097 hg19.only.bed
-    #  23632 hg38.only.bed
-
-    # this leaves the outstanding question of "why" they might be in
-    #	the only-in listings.  Some contigs may be different versions,
-    #   sometimes different sections of the same contig are used,
-    #	and contigs are dropped from hg19 to hg38, or new contigs added
-    #	to hg38 to fill in gaps from hg19
-    # Let's see if we can measure some of this:
-    awk '{print $4}' hg19.only.bed | sort -u > hg19.only.ids.list
-    awk '{print $4}' hg38.only.bed | sort -u > hg38.only.ids.list
-
-    # Looks like 5405 idential contigs with different parts used:
-    comm -12 hg19.only.ids.list hg38.only.ids.list > differentPortions.list
-    wc -l differentPortions.list
-    # 5405
-
-    # and perhaps 63 = 5468-5405 of different versions of same contig:
-    sed -e "s/\.[0-9]*$//" hg19.only.ids.list | sort -u \
-	> hg19.noVersions.ids.list
-    sed -e "s/\.[0-9]*$//" hg38.only.ids.list | sort -u \
-	> hg38.noVersions.ids.list
-    comm -12 hg19.noVersions.ids.list hg38.noVersions.ids.list | wc -l
-    #	5468
-    sed -e "s/\.[0-9]*$//" differentPortions.list | sort -u \
-	> differentPortions.noVersions.list
-    comm -12 hg19.noVersions.ids.list hg38.noVersions.ids.list | sort -u \
-	> noVersions.common.list
-    # indeed, 63 contigs of different versions:
-    comm -23 noVersions.common.list differentPortions.noVersions.list \
-	| sort -u > differentVersions.list
-    wc -l differentVersions.list
-    #	63
-
-    # dividing up these items:
-    cat << '_EOF_' > identifyPortions.pl
-#!/usr/bin/env perl
-
-use strict;
-use warnings;
-
-my %differentVersions;
-my %differentPortions;
-
-open (FH, "<differentVersions.list" ) or
-	die "can not read differentVersions.list";
-while (my $line = <FH>) {
-    chomp $line;
-    $differentVersions{$line} = 1;
-}
-close (FH);
-
-open (FH, "differentPortions.list" ) or
-	die "can not read differentPortions.list";
-while (my $line = <FH>) {
-    chomp $line;
-    $differentPortions{$line} = 1;
-}
-close (FH);
-
-my %hg19Done;
-open (DP, ">hg19.differentPortions.bed") or die "can not write to hg19.differentPortions.bed";
-open (DV, ">hg19.differentVersions.bed") or die "can not write to hg19.differentVersions.bed";
-open (FH, "<hg19.only.bed" ) or die "can not read hg19.only.bed";
-while (my $line = <FH>) {
-    chomp $line;
-    my ($chr, $start, $end, $acc, $score, $strand) = split('\s+', $line);
-    # assume done while $acc is still complete
-    $hg19Done{$acc} = 1;
-    if (exists($differentPortions{$acc})) {
-	printf DP "%s\n", $line;
-    } else {
-	my $trimAcc = $acc;
-	$trimAcc =~ s/\.[0-9]+$//;
-	if (exists($differentVersions{$trimAcc})) {
-	    printf DV "%s\n", $line;
-	} else {
-            # this one does not match
-	    $hg19Done{$acc} = 0;
-	}
-    }
-}
-close (FH);
-close (DV);
-close (DP);
-open (DR, ">hg19.dropped.bed") or die "can not write to hg19.dropped.bed";
-open (FH, "<hg19.only.bed" ) or die "can not read hg19.only.bed";
-while (my $line = <FH>) {
-    chomp $line;
-    my ($chr, $start, $end, $acc, $score, $strand) = split('\s+', $line);
-    if (0 == $hg19Done{$acc}) {
-	printf DR "%s\n", $line;
-    }
-}
-close (FH);
-close (DR);
-
-my %hg38Done;
-open (DP, ">hg38.differentPortions.bed") or die "can not write to hg38.differentPortions.bed";
-open (DV, ">hg38.differentVersions.bed") or die "can not write to hg38.differentVersions.bed";
-open (FH, "<hg38.only.bed" ) or die "can not read hg38.only.bed";
-while (my $line = <FH>) {
-    chomp $line;
-    my ($chr, $start, $end, $acc, $score, $strand) = split('\s+', $line);
-    # assume done while $acc is still complete
-    $hg38Done{$acc} = 1;
-    if (exists($differentPortions{$acc})) {
-	printf DP "%s\n", $line;
-    } else {
-	my $trimAcc = $acc;
-	$trimAcc =~ s/\.[0-9]+$//;
-	if (exists($differentVersions{$trimAcc})) {
-	    printf DV "%s\n", $line;
-	} else {
-            # this one does not match
-	    $hg38Done{$acc} = 0;
-	}
-    }
-}
-close (FH);
-close (DV);
-close (DP);
-open (DR, ">hg38.newTo19.bed") or die "can not write to hg38.newTo19.bed";
-open (FH, "<hg38.only.bed" ) or die "can not read hg38.only.bed";
-while (my $line = <FH>) {
-    chomp $line;
-    my ($chr, $start, $end, $acc, $score, $strand) = split('\s+', $line);
-    if (0 == $hg38Done{$acc}) {
-	printf DR "%s\n", $line;
-    }
-}
-close (FH);
-close (DR);
-'_EOF_'
-    # << happy emacs
-    chmod +x identifyPortions.pl
-    ./identifyPortions.pl
-    # make sure nothing was lost
-    sort hg19.differentVersions.bed hg19.differentPortions.bed \
-	hg19.dropped.bed  | sum
-    #	43711   233
-    sort hg19.only.bed | sum
-    #	43711   233
-    sort hg38.differentVersions.bed hg38.differentPortions.bed \
-	hg38.newTo19.bed | sum
-    #	00502   911
-    sort hg38.only.bed | sum
-    #	00502   911
-
-    sort -k1,1 -k2,2n hg38.differentVersions.bed hg38.differentPortions.bed \
-	hg38.newTo19.bed > hg38.itemRgb.bed
-    sort -k1,1 -k2,2n hg19.differentVersions.bed hg19.differentPortions.bed \
-	hg19.dropped.bed > hg19.itemRgb.bed
-
-    hgLoadBed hg19 hg38ContigDiff hg19.itemRgb.bed
-    # if you wanted to load the identicals in this track too:
-    sort -k1,1 -k2,2n hg38.hg19.identical.bed hg38.itemRgb.bed \
-       | hgLoadBed hg38 hg38ContigDiff stdin
-    # but we don't, we deliver only the differences
-    hgLoadBed hg38 hg38ContigDiff hg38.itemRgb.bed
-
-#########################################################################
-# construct ooc file to be used in blat operations
-#                      DONE - 2012-12-30 - Hiram
-# can be done on unmasked sequence the same result as masked:
-    cd /hive/data/genomes/hg38
-    time blat hg38.unmasked.2bit /dev/null /dev/null \
-       -tileSize=11 -makeOoc=jkStuff/hg38.11.ooc -repMatch=1024
-
-    # been confirmed, the 100-base non-bridged gaps are really non-bridged
-    gapToLift -minGap=100 -bedFile=jkStuff/nonBridgedGaps.bed hg38 \
-	jkStuff/hg38.nonBridged.lft
-
-##############################################################################
-# cpgIslands - (DONE - 2014-01-07 - Hiram)
-    # run on the Hmmer + trfMask sequence
-    mkdir /hive/data/genomes/hg38/bed/cpgIslands
-    cd /hive/data/genomes/hg38/bed/cpgIslands
-    time $HOME/kent/src/hg/utils/automation/doCpgIslands.pl \
-      -dbHost=hgwdev -bigClusterHub=ku -buildDir=`pwd` \
-        -workhorse=hgwdev -smallClusterHub=ku hg38 > do.log 2>&1
-    # real    3m31.684s
-    # wc -l cpgIsland.bed -> 30456 cpgIsland.bed
-    cat fb.hg38.cpgIslandExt.txt
-    #  23654068 bases of 3049335806 (0.776%) in intersection
-
-    # Previously in hg19:
-    featureBits -countGaps hg19 cpgIslandExt
-    # 21842742 bases of 3137161264 (0.696%) in intersection
-
-    # when run on Hmmer and Trf masked sequence:
-    # wc -l cpgIsland.bed -> 30416 cpgIsland.bed
-    #   23635946 bases of 3049335806 (0.775%) in intersection
-
-    # when run on unmasked sequence:
-    # wc -l cpgIsland.bed -> 55149 cpgIsland.bed
-    # 33637531 bases of 3049335806 (1.103%) in intersection
-##############################################################################
-# rerun cpgIslands on contig sequence (DONE - 2014-01-07 - Hiram)
-    # this is a test of the contig sequence file,
-    # should get a very similar answer to the above
-    mkdir /hive/data/genomes/hg38/bed/cpgIslandsContigs
-    cd /hive/data/genomes/hg38/bed/cpgIslandsContigs
-
-    # run stepwise so the lift can be done on the result before loading
-    time $HOME/kent/src/hg/utils/automation/doCpgIslands.pl \
-      -dbHost=hgwdev -bigClusterHub=ku -buildDir=`pwd` \
-       -stop=makeBed -maskedSeq=/hive/data/genomes/hg38/hg38.contigs.2bit \
-        -workhorse=hgwdev -smallClusterHub=ku hg38 > makeBed.log 2>&1
-    # real    9m31.502s
-    # fails on the bedToBigBed creation since this isn't the actual
-    # hg38 sequence.
-    mv cpgIsland.bed cpgIsland.beforeLift.bed
-    liftUp -type=.bed stdout ../../jkStuff/hg38.contigs.lift carry \
-      cpgIsland.beforeLift.bed | sort -k1,1 -k2,2n > cpgIsland.bed
-    bedToBigBed -tab -type=bed4+6 -as=$HOME/kent/src/hg/lib/cpgIslandExt.as \
-       cpgIsland.bed ../../chrom.sizes hg38.cpgIslandExt.bb
-    zcat ../cpgIslands/cpgIsland.bed.gz | sort -k1,1 -k2,2n > t.bed
-    # Surprisingly, a few more are detected, perhaps due to the different
-    # masking since this contig run is on the final corrected cross-match rmsk
-    # plus TRF, the above was on the corrupted HMMER+TRF mask:
-    wc -l cpgIsland.bed t.bed
-#   30477 cpgIsland.bed
-#   30456 t.bed
-    # 2,835 different items between the two:
-    sort t.bed cpgIsland.bed | uniq -c | awk '$1 < 2' | wc -l
-    # 2835
-    # 29.049 identical items
-    sort t.bed cpgIsland.bed | uniq -c | awk '$1 == 2' | wc -l
-    # 29049
-    cut -f1-3 cpgIsland.bed | sort > contigs.bed
-    cut -f1-3 t.bed | sort > fullSequence.bed
-    # 29,339 identical locations:
-    comm -12 contigs.bed fullSequence.bed | wc -l
-    # 29339
-
-    time $HOME/kent/src/hg/utils/automation/doCpgIslands.pl \
-      -dbHost=hgwdev -bigClusterHub=ku -buildDir=`pwd` \
-       -continue=load -maskedSeq=/hive/data/genomes/hg38/hg38.contigs.2bit \
-        -workhorse=hgwdev -smallClusterHub=ku hg38 > load.log 2>&1
-    # real    0m12.056s
-
-    cat fb.hg38.cpgIslandExt.txt
-    # 23610399 bases of 3049335806 (0.774%) in intersection
-
-##############################################################################
-# rerun cpgIslands on contig UNMASKED sequence (DONE - 2014-01-07 - Hiram)
-    mkdir /hive/data/genomes/hg38/bed/cpgIslandsContigsUnmasked
-    cd /hive/data/genomes/hg38/bed/cpgIslandsContigsUnmasked
-
-    twoBitToFa -noMask ../../hg38.contigs.2bit stdout \
-      | faToTwoBit stdin hg38.contigsUnmasked.2bit
-
-    # verify sequence is OK:
-    twoBitToFa hg38.contigsUnmasked.2bit stdout | faSize stdin
-# 3061688741 bases (12372958 N's 3049315783 real 3049315783 upper 0 lower)
-#    in 733 sequences in 1 files
-# %0.00 masked total, %0.00 masked real
-    twoBitToFa hg38.contigsUnmasked.2bit stdout | faCount stdin | tail -1
-# total 3061688741 898285419 623727342 626335137 900967885  12372958 30979743
-    # ACGT CpG same as original hg38.2bit except for the missing N's:
-# total 3209286105 898285419 623727342 626335137 900967885 159970322 30979743
-
-    # run stepwise so the lift can be done on the result before loading
-    time $HOME/kent/src/hg/utils/automation/doCpgIslands.pl \
-      -dbHost=hgwdev -bigClusterHub=ku -buildDir=`pwd` \
-       -stop=makeBed -maskedSeq=`pwd`/hg38.contigsUnmasked.2bit \
-        -workhorse=hgwdev -smallClusterHub=ku hg38 > makeBed.log 2>&1
-    # real    11m0.690s
-    # as above, failed on the bedToBigBed step since this isn't the full hg38
-    # sequence
-    mv cpgIsland.bed cpgIsland.beforeLift.bed
-    liftUp -type=.bed stdout ../../jkStuff/hg38.contigs.lift carry \
-      cpgIsland.beforeLift.bed | sort -k1,1 -k2,2n > cpgIsland.bed
-    bedToBigBed -tab -type=bed4+6 -as=$HOME/kent/src/hg/lib/cpgIslandExt.as \
-       cpgIsland.bed ../../chrom.sizes hg38.cpgIslandExt.bb
-    # a lot more here that for masked sequence:
-    wc -l cpgIsland.bed ../cpgIslandsContigs/cpgIsland.bed
-    # 55149 cpgIsland.bed
-    # 30477 ../cpgIslandsContigs/cpgIsland.bed
-    featureBits -countGaps hg38 cpgIsland.bed
-    # 33637531 bases of 3209286105 (1.048%) in intersection
-    featureBits -countGaps hg38 ../cpgIslandsContigs/cpgIsland.bed
-    # 23610399 bases of 3209286105 (0.736%) in intersection
-
-    # debug load step so it can be loaded into a separate table:
-    $HOME/kent/src/hg/utils/automation/doCpgIslands.pl \
-      -dbHost=hgwdev -bigClusterHub=ku -buildDir=`pwd` \
-       -debug -continue=load -maskedSeq=`pwd`/hg38.contigsUnmasked.2bit \
-        -workhorse=hgwdev -smallClusterHub=ku hg38
-
-    time ./doLoadCpg.csh > load.log 2>&1
-    # real    0m2.179s
-    # 33637531 bases of 3049335806 (1.103%) in intersection
-
-#########################################################################
-# construct liftOver to hg19 (DONE - 2013-12-31 - Hiram)
-    # it turns out it doesn't matter if the query or target 2bit files
-    # are masked.  This procedure can be done on completely unmasked sequences
-    # for both, same result masked or not masked
-    screen -S hg38	# manage this longish running job in a screen
-    mkdir /hive/data/genomes/hg38/bed/blat.hg19.2013-12-31
-    cd /hive/data/genomes/hg38/bed/blat.hg19.2013-06-10
-    # this was run in manual steps as experiments were done about the masking
-    # check it with -debug first to see if it is going to work:
-    doSameSpeciesLiftOver.pl -stop=net -buildDir=`pwd` -bigClusterHub=ku \
-      -dbHost=hgwdev -workhorse=hgwdev -debug \
-        -ooc=/hive/data/genomes/hg38/jkStuff/hg38.11.ooc hg38 hg19
-    # the debug step doesn't actually construct enough files to run the
-    # steps manually.  The chaining has an extra procedure that is performed
-    # while not in 'debug' mode
-    # the run.blat was operated manually, then chaining:
-    time doSameSpeciesLiftOver.pl -continue=chain -stop=net -buildDir=`pwd` \
-      -bigClusterHub=ku \
-        -dbHost=hgwdev -workhorse=hgwdev \
-           -ooc=/hive/data/genomes/hg38/jkStuff/hg38.11.ooc \
-             hg38 hg19 > chain.log 2>&1
-    # real    22m31.635s
-    # loading is only a few seconds:
-    doSameSpeciesLiftOver.pl -continue=load -buildDir=`pwd` \
-     -bigClusterHub=ku \
-       -dbHost=hgwdev -workhorse=hgwdev \
-          -ooc=/hive/data/genomes/hg38/jkStuff/hg38.11.ooc \
-             hg38 hg19 > load.log 2>&1
-
-    # verify this file exists:
-    #	/gbdb/hg38/liftOver/hg38ToHg19.over.chain.gz
-    # and try out the conversion on genome-test from hg38 to hg19
-    # same file should exist for downloads:
-    #  /usr/local/apache/htdocs-hgdownload/goldenPath/hg38/liftOver/hg38ToHg19.over.chain.gz
-
-############################################################################
-# marking the PAR regions: (DONE - 2014-01-09 - Hiram)
-    # after much experimentation with the AGP files and the given NCBI
-    # files in hg38/genbank/Primary_Assembly/pseudoautosomal_region
-    # the PAR region definitions can be seen in the par_align.gff file:
-# CM000685.2  10001  2781479  ->  CM000686.2 10001 2781479
-# CM000685.2  155701383  156030895 -> CM000686.2 56887903 57217415
-    # equivalent to:
-# chrX  10001  2781479  ->  chrY 10001 2781479
-# chrX  155701383  156030895 -> chrY 56887903 57217415
-
-    # subtract one for the chromStart position:
-    cat << '_EOF_' > hg38Par.bed4
-chrX 10000      2781479   PAR1
-chrX 155701382  156030895 PAR2
-chrY 10000      2781479   PAR1
-chrY 56887902   57217415  PAR2
-'_EOF_'
-    # << happy emacs
-
-    hgLoadBed hg38 par hg38Par.bed4
-    checkTableCoords  hg38
-
-    # hg19 had:
-+-------+------------+-----------+------+
-| chrom | chromStart | chromEnd  | name |
-+-------+------------+-----------+------+
-| chrX  |      60000 |   2699520 | PAR1 |
-| chrX  |  154931043 | 155260560 | PAR2 |
-| chrY  |      10000 |   2649520 | PAR1 |
-| chrY  |   59034049 |  59363566 | PAR2 |
-+-------+------------+-----------+------+
-
-    # The AGP files come close to definining the location, but not
-    # precisely.  The first region uses different bits of AC006209.25:
-zcat ../../genbank/Primary_Assembly/assembled_chromosomes/AGP/chrX.comp.agp.gz\
-  | grep AC006209.25
-CM000685.2      2665048 2677319 56      F       AC006209.25     127483  139754 -
-CM000685.2      2677869 2804801 58      F       AC006209.25     1       126933 -
-zcat ../../genbank/Primary_Assembly/assembled_chromosomes/AGP/chrY.comp.agp.gz\
-  | grep AC006209.25
-CM000686.2      2665048 2677319 56      F       AC006209.25     127483  139754 -
-CM000686.2      2677869 2781479 58      F       AC006209.25     23323   126933 -
-
-    # and the second region uses different bits of AJ271735.1:
-zcat ../../genbank/Primary_Assembly/assembled_chromosomes/AGP/chrX.comp.agp.gz\
-  | grep AJ271735.1 | head -1
-CM000685.2 155676925 155719966 3096  O AJ271735.1     44687    87728   +
-zcat ../../genbank/Primary_Assembly/assembled_chromosomes/AGP/chrY.comp.agp.gz\
-  | grep AJ271735.1 | head -1
-CM000686.2  56887903  56906486  356  O AJ271735.1     69145    87728   +
-
-    # combining all the contig definitions from each will find all the
-    # exact identical contig bits:
-zcat ../../genbank/Primary_Assembly/assembled_chromosomes/AGP/chrY.comp.agp.gz\
-  | grep -v "^#" | awk '$5 != "N"' \
-    | awk '{printf "%s_%d_%d\t%s\t%d\t%d\n", $6,$7,$8,$1,$2,$3}' \
-    | sort > chrY.comp.agp.txt
-zcat ../../genbank/Primary_Assembly/assembled_chromosomes/AGP/chrX.comp.agp.gz\
-  | grep -v "^#" | awk '$5 != "N"' \
-    | awk '{printf "%s_%d_%d\t%s\t%d\t%d\n", $6,$7,$8,$1,$2,$3}' \
-    | sort > chrX.comp.agp.txt
-   join -t'^I' chrY.comp.agp.txt chrX.comp.agp.txt | head
-
-CM000685.2  10001   44821   CM000686.2      10001   44821
-...
-CM000685.2  2677320 2677868 CM000686.2      2677320 2677868
-
-CM000685.2 155719967  155720351       CM000686.2      56906487        56906871
-...
-CM000685.2 155964490  156030895       CM000686.2      57151010        57217415
-
-############################################################################
-## altLocations track (DONE - 2014-01-02 - Hiram)
-    # indicate corresponding locations between haplotypes and reference
-    mkdir /hive/data/genomes/hg38/bed/altLocations
-    cd /hive/data/genomes/hg38/bed/altLocations
-
-    find ../../genbank/ALT_* -type f | grep alt_scaffold_placement.txt \
-  | while read F
-do
-  grep -v "^#" ${F} | sed -e 's/\./v/;' | awk -F'\t' '{printf "chr%s\t%d\t%d\tchr%s_%s_alt\n", $6,$12-1,$13,$6, $4}'
-done | sort -k1,1 -k2,2n > chrToAlt.bed
-
-    # note silent hidden <tab> character in the join -t argument
-    # explicit as written here
-
-find ../../genbank/ALT_* -type f | grep alt_scaffold_placement.txt \
-  | while read F
-do
-  grep -v "^#" ${F} | sed -e 's/\./v/;' | awk -F'\t' '{printf "chr%s_%s_alt\tchr%s:%d-%d\n", $6,$4,$6,$12,$13}'
-done | sort > altToChr.tab
-sort ../../chrom.sizes | join -t'^I' - altToChr.tab \
-   | awk '{printf "%s\t0\t%d\t%s\n", $1,$2,$3}' > altToChr.bed
-
-
-   hgLoadBed hg38 altLocations chrToAlt.bed altToChr.bed
-   featureBits -countGaps hg38 altLocations
-   # 170113652 bases of 3209286105 (5.301%) in intersection
-
-############################################################################
-## genscan (DONE - 2014-01-07 - Hiram)
-   mkdir /hive/data/genomes/hg38/bed/genscan
-   cd /hive/data/genomes/hg38/bed/genscan
-
-   # using the contig sequence
-   # running stepwise to allow the lifting of the final result
-   time $HOME/kent/src/hg/utils/automation/doGenscan.pl hg38 -buildDir=`pwd` \
-     -maskedSeq=/hive/data/genomes/hg38/hg38.contigs.2bit \
-       -stop=makeBed -bigClusterHub=ku -dbHost=hgwdev -workhorse=hgwdev \
-        > do.log 2>&1
-   # three jobs did not finish due to almost all N's in the sequence,
-   # just a couple of bases in each piece.  Their empty result is good enough.
-   time $HOME/kent/src/hg/utils/automation/doGenscan.pl hg38 -buildDir=`pwd` \
-     -maskedSeq=/hive/data/genomes/hg38/hg38.contigs.2bit \
-       -continue=makeBed -stop=makeBed -bigClusterHub=ku -dbHost=hgwdev \
-         -workhorse=hgwdev > makeBed.log 2>&1
-   # real    0m48.161s
-
-   cd lifted
-   mkdir -p gtf subopt nameFixed/gtf nameFixed/pep newNames pep
-   for F in ../gtf/000/*.gtf
-do
-   B=`basename $F`
-   liftUp gtf/${B} ../../../jkStuff/hg38.contigs.lift carry $F
-   echo $B
-done
-   for F in ../subopt/000/*.bed
-do
-   B=`basename $F`
-   liftUp subopt/${B} ../../../jkStuff/hg38.contigs.lift carry $F
-   echo $B
-done
-
-   ls gtf/chr*_[0-9][0-9].gtf \
-     | sed -e 's/_[0-9][0-9]//; s#gtf/##; s/.gtf//;' | sort -u | while read C
-do
-   cat ../pep/000/${C}_[0-9][0-9].pep > pep/${C}.pep
-   cat gtf/${C}_[0-9][0-9].gtf | ./gtfFixId.pl ${C} > nameFixed/gtf/${C}.gtf
-   ./pepNameFix.pl ${C} > nameFixed/pep/${C}.pep
-done
-
-   cat nameFixed/gtf/*.gtf > ../hg38.genscan.gtf
-   ls gtf | egrep -v '^chr[0-9XY][0-9]*_[0-9][0-9].gtf' | while read C
-do
-   cat gtf/${C}
-done >> ../hg38.genscan.gtf
-
-   cat nameFixed/pep/*.pep > ../hg38.genscan.pep
-   ls gtf | egrep -v '^chr[0-9XY][0-9]*_[0-9][0-9].gtf' \
-     | sed -e 's/.gtf/.pep/' | while read C
-do
-   cat ../pep/000/${C}
-done >> ../hg38.genscan.pep
-
-   cd /hive/data/genomes/hg38/bed/genscan
-   cat lifted/subopt/*.bed | sort -k1,1 -k2,2n > hg38.genscanSubopt.bed
-
-   gtfToGenePred hg38.genscan.gtf hg38.genscan.gp
-   genePredCheck -db=hg38 hg38.genscan.gp
-   # checked: 44149 failed: 0
-   genePredToBed hg38.genscan.gp hg38.genscan.bed
-   bedToBigBed hg38.genscan.bed ../../chrom.sizes hg38.genscan.bb
-   bedToBigBed hg38.genscanSubopt.bed ../../chrom.sizes hg38.genscanSubopt.bb
-   ldHgGene -gtf hg38 genscan hg38.genscan.gtf
-# Read 44149 transcripts in 339212 lines in 1 files
-#  44149 groups 345 seqs 1 sources 1 feature types
-
-    cat fb.hg38.genscan.txt
-    # 58278346 bases of 3049335806 (1.911%) in intersection
-    cat fb.hg38.genscanSubopt.txt
-    # 55020514 bases of 3049335806 (1.804%) in intersection
-
-    # oddly, we are getting half of what hg19 had ?
-    featureBits hg19 genscan
-    # 106433874 bases of 2897316137 (3.674%) in intersection
-
-    # This is because hg19 was run on soft-masked sequence and not
-    # on hard masked sequence
-
-############################################################################
-## genscan on unmasked sequence experiment (DONE - 2013-12-03 - Hiram)
-   ## instead, working on unmasked sequence:
-   mkdir /hive/data/genomes/hg38/bed/genscan/unmaskedRun
-   cd /hive/data/genomes/hg38/bed/genscan/unmaskedRun
-
-   mkdir liftSpecs
-   split -a 3 -d -l 1 ../../../jkStuff/hg38.nonBridged.lift liftSpecs/hg38_
-
-   mkdir fasta
-for F in liftSpecs/hg38_*
-do
-   L=`cut -f2 $F`
-   echo $L
-   /cluster/home/hiram/kent/src/hg/utils/lft2BitToFa.pl \
-       ../../../hg38.unmasked.2bit $F > fasta/${L}.fa
-done
-
-
-   cat << '_EOF_' > template
-#LOOP
-./runGsBig.bash $(path1) {check out exists gtf/$(root1).gtf} {check out exists pep/$(root1).pep} {check out exists subopt/$(root1).bed}
-#ENDLOOP
-'_EOF_'
-  # << happy emacs
-   cat << '_EOF_' > runGsBig.bash
-#!/bin/bash
-
-set -beEu -o pipefail
-
-export seqFile=$1
-export resultGtf=$2
-export resultPep=$3
-export resultSubopt=$4
-/cluster/bin/x86_64/gsBig $seqFile $resultGtf -trans=$resultPep -subopt=$resultSubopt -exe=/scratch/data/genscan/genscan -par=/scratch/data/genscan/HumanIso.smat -tmp=/dev/shm -window=2400000
-'_EOF_'
-  # << happy emacs
-
-  ls -1S `pwd`/fasta/*.fa > part.list
-  gensub2 part.list single template jobList
-  para create jobList
-  para push
-  # several jobs crashed:
-# Completed: 726 of 733 jobs
-# Crashed: 7 jobs
-# CPU time in finished jobs:      62501s    1041.68m    17.36h    0.72d  0.002 y
-# IO & Wait Time:                  2563s      42.72m     0.71h    0.03d  0.000 y
-# Average job time:                  90s       1.49m     0.02h    0.00d
-# Longest finished job:            3288s      54.80m     0.91h    0.04d
-# Submission to last job:          3294s      54.90m     0.92h    0.04d
-
-  para status | grep -v -w done | awk '{print $(NF-3)}' > crashed.job.list
-
-  mkdir /hive/data/genomes/hg38/bed/genscan/unmaskedRun/crashedJobs
-  cd /hive/data/genomes/hg38/bed/genscan/unmaskedRun/crashedJobs
-  mkdir splitBits
-
-  for F in chr2.06 chr1.03 chr3.05 chr12.07 chr10.05 chr17.08 chr11.04
-do
-   faSplit -lift=${F}.lift gap ../fasta/${F}.fa 2000000 splitBits/${F}_
-done
-
-  ls -1S `pwd`/splitBits/*.fa > part.list
-  cat << '_EOF_' > runGsBig.bash
-#!/bin/bash
-
-set -beEu -o pipefail
-
-export seqFile=$1
-export resultGtf=$2
-export resultPep=$3
-export resultSubopt=$4
-/cluster/bin/x86_64/gsBig $seqFile $resultGtf -trans=$resultPep -subopt=$resultSubopt -exe=/scratch/data/genscan/genscan -par=/scratch/data/genscan/HumanIso.smat -tmp=/dev/shm -window=2400000
-'_EOF_'
-  # << happy emacs
-  chmod +x runGsBig.bash
-
-  cat << '_EOF_' > template
-#LOOP
-./runGsBig.bash $(path1) {check out exists gtf/$(root1).gtf} {check out exists pep/$(root1).pep} {check out exists subopt/$(root1).bed}
-#ENDLOOP
-'_EOF_'
-  # << happy emacs
-
-  gensub2 part.list single template jobList
-  para create jobList
-  para push
-# Completed: 331 of 334 jobs
-# Crashed: 3 jobs
-# CPU time in finished jobs:      18097s     301.62m     5.03h    0.21d  0.001 y
-# IO & Wait Time:                  1085s      18.08m     0.30h    0.01d  0.000 y
-# Average job time:                  58s       0.97m     0.02h    0.00d
-# Longest finished job:              79s       1.32m     0.02h    0.00d
-# Submission to last job:           249s       4.15m     0.07h    0.00d
-  # the last three completed with -window=1600000
-
-  # lifting results:
-  cat << '_EOF_' > fixIds.pl
-#!/usr/bin/env perl
-
-use strict;
-use warnings;
-
-my $argc = scalar(@ARGV);
-
-if ($argc != 1) {
-  printf STDERR "usage: cat chrN.M.lifted | ./fixIds.pl chrN.M\n";
-  exit 255;
-}
-
-my $F=shift;
-my $C = $F;
-$C =~ s/\.[0-9][0-9]//;
-
-my $id = 0;
-my $prevId = "";
-open (GT, ">${F}.gtf") or die "can not write to ${F}.gtf";
-while (my $line=<>) {
-   chomp $line;
-   my $geneId = $line;
-   $geneId =~ s/^${C}.*gene_id "${C}//;
-   $geneId =~ s/";.*//;
-   $id += 1 if ( $prevId ne $geneId);
-   $line =~ s/${C}[0-9]+.[0-9]+/${F}.$id/g;
-   printf GT "%s\n", $line;
-   $prevId = $geneId;
-}
-close (GT);
-'_EOF_'
-  # << happy emacs
-  chmod +x fixIds.pl
-  for F in chr1.03 chr10.05 chr11.04 chr12.07 chr17.08 chr2.06 chr3.05
-do
-  echo "${F}" 1>&2
-  cut -f2 ${F}.lift | while read P
-  do
-     liftUp -type=.gtf stdout ${F}.lift error gtf/${P}.gtf
-  done > ${F}.lifted.gtf
-  cat ${F}.lifted.gtf | ./fixIds.pl ${F}
-done
-  # copied these results to ../gtf/ to get into the final result
-# -rw-rw-r-- 1 3349959 Jan  2 15:33 chr1.03.gtf
-# -rw-rw-r-- 1 2439182 Jan  2 15:33 chr10.05.gtf
-# -rw-rw-r-- 1 1068097 Jan  2 15:33 chr11.04.gtf
-# -rw-rw-r-- 1 2392548 Jan  2 15:33 chr12.07.gtf
-# -rw-rw-r-- 1 1831336 Jan  2 15:33 chr17.08.gtf
-# -rw-rw-r-- 1 3539694 Jan  2 15:33 chr2.06.gtf
-# -rw-rw-r-- 1 2309903 Jan  2 15:33 chr3.05.gtf
-
-  for F in chr1.03 chr10.05 chr11.04 chr12.07 chr17.08 chr2.06 chr3.05
-do
-  echo "${F}" 1>&2
-  cut -f2 ${F}.lift | while read P
-  do
-     liftUp -type=.bed stdout ${F}.lift error subopt/${P}.bed
-  done > ${F}.lifted.subopt.bed
-done
-  # copied these results to ../subopt/ to get into the final result
-# -rw-rw-r-- 1 3349959 Jan  2 15:33 chr1.03.gtf
-# -rw-rw-r-- 1 2439182 Jan  2 15:33 chr10.05.gtf
-# -rw-rw-r-- 1 1068097 Jan  2 15:33 chr11.04.gtf
-# -rw-rw-r-- 1 2392548 Jan  2 15:33 chr12.07.gtf
-# -rw-rw-r-- 1 1831336 Jan  2 15:33 chr17.08.gtf
-# -rw-rw-r-- 1 3539694 Jan  2 15:33 chr2.06.gtf
-# -rw-rw-r-- 1 2309903 Jan  2 15:33 chr3.05.gtf
-
-
-  cat << '_EOF_' > pepNameFix.pl
-#!/usr/bin/env perl
-
-use strict;
-use warnings;
-
-# BIG ASSUMPTION ! ! ! - the peptides are in the same order as
-# they are in the GTF file ! ! !
-
-my $argc = scalar(@ARGV);
-
-if ($argc != 1) {
-  printf STDERR "usage: cat chrN.M.needNameFix.pep | ./pepNameFix.pl chrN.M > chrN.M.pep\n";
-  exit 255;
-}
-
-my $C=shift;
-
-my $id = 1;
-
-while (my $line = <>) {
-  if ($line =~ m/^>/) {
-    printf ">%s.%d\n", $C, $id++;
-  } else {
-    print $line;
-  }
-}
-'_EOF_'
-  # << happy emacs
-  chmod +x pepNameFix.pl
-
-for F in chr1.03 chr10.05 chr11.04 chr12.07 chr17.08 chr2.06 chr3.05
-do
-  echo "${F}" 1>&2
-  cut -f2 ${F}.lift | while read P
-  do
-     cat pep/${P}.pep
-  done > ${F}.needNameFix.pep
-  cat ${F}.needNameFix.pep | ./pepNameFix.pl ${F} > ${F}.pep
-done
-  # copied these results to ../pep/ to get into the final result:
-# -rw-rw-r-- 1 1592655 Jan  2 15:55 chr1.03.pep
-# -rw-rw-r-- 1 1169168 Jan  2 15:55 chr10.05.pep
-# -rw-rw-r-- 1  519106 Jan  2 15:55 chr11.04.pep
-# -rw-rw-r-- 1 1152111 Jan  2 15:55 chr12.07.pep
-# -rw-rw-r-- 1  775052 Jan  2 15:55 chr17.08.pep
-# -rw-rw-r-- 1 1799546 Jan  2 15:55 chr2.06.pep
-# -rw-rw-r-- 1 1248762 Jan  2 15:55 chr3.05.pep
-
-  # and then, adding in all the results together
-
-  cd /hive/data/genomes/hg38/bed/genscan/unmaskedRun
-  cat << '_EOF_' > gtfIdFix.pl
-#!/usr/bin/env perl
-
-use strict;
-use warnings;
-
-my $argc = scalar(@ARGV);
-
-if ($argc != 1) {
-  printf STDERR "usage: cat lifted/gtf/chrN.gtf | ./gtfIdFix.pl chrN\n";
-  exit 255;
-}
-
-my $C=shift;
-
-my $id = 0;
-my $prevId = "";
-open (NM, ">nameFixed/newNames/${C}.tab") or die "can not write to nameFixed/newNames/${C}.tab";
-open (GT, ">nameFixed/gtf/${C}.gtf") or die "can not write to nameFixed/gtf/${C}.gtf";
-while (my $line=<>) {
-   chomp $line;
-   my $geneId = $line;
-   $geneId =~ s/^${C}.*gene_id "//;
-   $geneId =~ s/";.*//;
-   if ( $prevId ne $geneId) {
-     $id += 1;
-     printf NM "%s\t%s.%d\n", $geneId, $C, $id;
-   }
-   $line =~ s/${C}.[0-9]+.[0-9]+/${C}.$id/g;
-   printf GT "%s\n", $line;
-   $prevId = $geneId;
-}
-close (GT);
-close (NM);
-'_EOF_'
-  # << happy emacs
-  chmod +x gtfIdFix.pl
-
-  rm -fr lifted
-  rm -fr nameFix
-  mkdir -p lifted
-  mkdir -p lifted/gtf
-  mkdir -p lifted/pep
-  mkdir -p lifted/subopt
-  mkdir -p nameFix
-  mkdir -p nameFix/gtf
-  mkdir -p nameFix/newNames
-
-  for F in liftSpecs/hg38_*
-do
-   L=`cut -f2 $F`
-   C=`cut -f4 $F`
-   liftUp -type=.gtf stdout ${F} error gtf/${L}.gtf >> lifted/gtf/${C}.gtf
-   cat pep/${L}.pep >> lifted/pep/${C}.pep
-   liftUp -type=.bed stdout ${F} error subopt/${L}.bed >> lifted/subopt/${C}.bed
-done
-
-  for F in lifted/gtf/*.gtf
-do
-  C=`basename $F | sed -e 's/.gtf//'`
-  cat $F | ./gtfIdFix.pl $C
-done
-
-mkdir -p nameFixed/pep
-
-  cat << '_EOF_' > pepNameFix.pl
-#!/usr/bin/env perl
-
-use strict;
-use warnings;
-
-my $argc = scalar(@ARGV);
-if ($argc != 1) {
-  printf STDERR "usage: ./pepNameFix.pl chrN > chrN.pep\n";
-  exit 255
-}
-
-my $C = shift;
-my %newName;
-
-open (FH, "<lifted/pep/$C.pep") or die "can not read <lifted/pep/$C.pep";
-open (NM, "<nameFixed/newNames/$C.tab") or die "can not read nameFixed/newNames/$C.tab";
-while (my $line = <NM>) {
-  chomp $line;
-  my ($needFix, $fixedName) = split('\t', $line);
-  $newName{$needFix} = $fixedName;
-}
-close (NM);
-
-while (my $line = <FH>) {
-  if ($line =~m /^>/) {
-    chomp $line;
-    $line =~ s/^>//;
-    die "can not find name to fix $line" if (!exists($newName{$line}));
-    printf ">%s\n", $newName{$line};
-  } else {
-    print $line;
-  }
-}
-close (FH);
-'_EOF_'
-  # << happy emacs
-  chmod +x pepNameFix.pl
-
-  for F in lifted/pep/*.pep
-do
-  C=`basename $F | sed -e 's/.pep//'`
-  echo $C
-  ./pepNameFix.pl $C > nameFixed/pep/$C.pep
-done
-
-#############################################################################
-# Mark the new centromere regions (DONE - 2014-01-09 - Hiram)
-    mkdir /hive/data/genomes/hg38/bed/centromere
-    cd /hive/data/genomes/hg38/bed/centromere
-    grep GJ ../../hg38.agp > hg38.centContigs.agp
-
-    awk '{printf "%s\t%d\t%d\t%s\n", $1, $2-1, $3, $6}' hg38.centContigs.agp \
-      > hg38.centContigs.bed4
-
-    hgLoadBed hg38 centromeres hg38.centContigs.bed4
-    checkTableCoords hg38 centromeres
-
-#############################################################################
-## alternate sequence/haplotype alignments (DONE - 2014-01-23 - Hiram)
-    mkdir /hive/data/genomes/hg38/bed/lastzAltSequences
-    cd /hive/data/genomes/hg38/bed/lastzAltSequences
-
-rm -fr hg38.haplotypes.lift temp.lift targetFa queryFa
-mkdir targetFa
-mkdir queryFa
-touch temp.lift
-
-cat ../altLocations/chrToAlt.bed | while read L
-do
-  chrName=`echo $L | awk '{print $1}'`
-  chromSize=`egrep "^$chrName   " ../../chrom.sizes | cut -f2`
-  chrStart=`echo $L | awk '{if (($2-10000)>=0) {printf "%d", $2-10000} else {printf "0"}}'`
-  chrEnd=`echo $L | awk -v chromSize=$chromSize '{if (($3+10000)<=chromSize) {printf "%d", $3+10000} else {printf "%d", chromSize}}'`
-  chrSize=`echo $chrEnd $chrStart | awk '{print $1-$3}'`
-  queryName=`echo $L | awk '{print $4}'`
-  partName="${chrName}_${chrStart}_${chrEnd}"
-  echo $chrName $chrStart $chrEnd $queryName $partName $chromSize
-  echo -e "$chrStart\t${partName}\t$chrSize\t$chrName\t$chromSize" >> temp.lift
-  twoBitToFa ../../hg38.unmasked.2bit:$chrName:$chrStart-$chrEnd stdout | sed -e "s/^>.*/>$partName/;" > targetFa/$queryName.fa
-  twoBitToFa ../../hg38.unmasked.2bit:$queryName queryFa/$queryName.fa
-done
-
-sort -u temp.lift | sort -k4,4 -k1,1n > hg38.haplotypes.lift
-
-    # these were run serially on hgwdev, they could be a cluster run:
-    ssh ku
-    mkdir /hive/data/genomes/hg38/bed/lastzAltSequences/run.blastz
-    cd /hive/data/genomes/hg38/bed/lastzAltSequences/run.blastz
-    mkdir ../lav ../psl
-
-    # construct the jobList
-    ls ../targetFa | sed -e 's/.fa//;' | while read partName
-do
-   echo "./runJob.sh ${partName}"
-done > jobList
-
-    cat << '_EOF_' > runJob
-#!/bin/sh
-
-export partName=$1
-export target="../targetFa/$partName.fa"
-export query="../queryFa/$partName.fa"
-export lav="../lav/$partName.lav"
-export psl="../psl/$partName.psl"
-
-/cluster/bin/penn/lastz-distrib-1.03.46/bin/lastz \
-  $target $query \
-  Y=15000 T=2 M=254 O=600 H=2000 O=600 E=150 K=10000 L=10000 \
-  Q=/scratch/data/blastz/human_chimp.v2.q > $lav
-lavToPsl $lav stdout | liftUp $psl ../hg38.haplotypes.lift error stdin
-'_EOF_'
-    # << happy emacs
-
-    # these were run serially on hgwdev, they could be a cluster run:
-    time ./jobList > do.log
-    # real    61m35.898s
-
-    # chaining lastz results:
-    mkdir -p /hive/data/genomes/hg38/bed/lastzAltSequences/axtChain/run/chain
-    cd /hive/data/genomes/hg38/bed/lastzAltSequences/axtChain/run
-
-    ls ../../psl/*.psl | while read P
-do
-  B=`basename $P | sed -e 's/.psl//'`
-  echo $B $P
-  ls -og $P ../../targetFa/${B}.fa ../../queryFa/${B}.fa
-  /cluster/home/hiram/kent/src/hg/mouseStuff/axtChain/axtChain \
-    -psl -scoreScheme=/scratch/data/blastz/human_chimp.v2.q \
-    -minScore=1000 -linearGap=medium $P \
-    ../../../../hg38.unmasked.2bit \
-    ../../../../hg38.unmasked.2bit stdout \
-  | chainAntiRepeat ../../../../hg38.unmasked.2bit \
-    ../../../../hg38.unmasked.2bit stdin chain/${B}.chain
-done
-
-   # real    7m54.677s
-
-   cd /hive/data/genomes/hg38/bed/lastzAltSequences/axtChain
-   find ./run/chain -name "*.chain" | chainMergeSort -inputList=stdin \
-       | nice gzip -c > hg38.haplotypes.all.chain.gz
-   chainPreNet  hg38.haplotypes.all.chain.gz ../../../chrom.sizes \
-     /hive/data/genomes/hg38/chrom.sizes stdout \
-       | chainNet  stdin -minSpace=1 ../../../chrom.sizes \
-          ../../../chrom.sizes stdout /dev/null \
-             | netSyntenic stdin noClass.net
-
-    # Make liftOver chains from chroms to alternates:
-    netChainSubset -verbose=0 noClass.net hg38.haplotypes.all.chain.gz stdout \
-      | chainStitchId stdin stdout | gzip -c > hg38.haplotypes.over.chain.gz
-    # swap the alignments to get the alternates to chrom mappings:
-    chainSwap hg38.haplotypes.over.chain.gz stdout \
-       | gzip -c > hg38.reference.over.chain.gz
-    # and put them all together so mappings go both directions
-    chainMergeSort hg38.haplotypes.over.chain.gz hg38.reference.over.chain.gz \
-        | gzip -c > hg38.haploReference.over.chain.gz
-
-    hgLoadChain -tIndex hg38 chainAltSequence hg38.haploReference.over.chain.gz
-    netClass -verbose=0 -noAr noClass.net hg38 hg38 hg38.hg38AltSequence.net
-    netFilter -minGap=10 hg38.hg38AltSequence.net \
-      | hgLoadNet -verbose=0 hg38 netAltSequence stdin
-
-    chainToPsl hg38.haploReference.over.chain.gz ../../../chrom.sizes \
-      ../../../chrom.sizes \
-        /hive/data/genomes/hg38/hg38.unmasked.2bit  \
-          /hive/data/genomes/hg38/hg38.unmasked.2bit  \
-             hg38.beforeRecalc.haploReference.over.psl
-
-    pslCheck -targetSizes=../../../chrom.sizes \
-        -querySizes=../../../chrom.sizes \
-    hg38.beforeRecalc.haploReference.over.psl 2>&1 | tail -1
-    # checked: 3092 failed: 57 errors: 57
-
-    pslRecalcMatch hg38.beforeRecalc.haploReference.over.psl \
-    ../../../hg38.unmasked.2bit ../../../hg38.unmasked.2bit  \
-        hg38.haploReference.over.psl
-
-    pslCheck -targetSizes=../../../chrom.sizes \
-      -querySizes=../../../chrom.sizes \
-         hg38.haploReference.over.psl 2>&1 | tail -1
-    # checked: 3092 failed: 0 errors: 0
-
-    hgLoadPsl hg38 -table=altSequenceLiftOver hg38.haploReference.over.psl
-
-#############################################################################
-## construct non-bridged contig sequence (DONE - 2014-01-10 - Hiram)
-    mkdir /hive/data/genomes/hg38/bed/nonBridgedContigs
-    cd /hive/data/genomes/hg38/bed/nonBridgedContigs
-
-    # only need the actual split chroms in this lift, and the
-    # _nn name is a bit more convenient than the .nn:
-    gapToLift -minGap=100 hg38 stdout | sed -e 's/\./_/;' \
-        | awk '$1 != 0' > hg38.contigs.lift
-    # the warnings gapToLift issues are about gaps defined in the table
-    # that are abutting to each other.  teleomere gaps are next to contig gaps
-    # those lifts in the format of a bed file:
-    awk '{printf "%s\t%d\t%d\t%s\n", $4, $1, $1+$3, $2}' hg38.contigs.lift \
-        > hg38.contigs.bed
-    # the negation of that is the gaps between the contigs
-    #  fixup the .N to _nn with the awk:
-    featureBits -not -countGaps hg38 hg38.contigs.bed -bed=stdout \
-| awk '{split($4,a,"."); printf "%s\t%d\t%d\t%s_%02d\n", $1,$2,$3,a[1],a[2]}' \
-             > hg38.gaps.bed
-    # 268613637 bases of 3209286105 (8.370%) in intersection
-
-    # together, those two should be %100 of the genome exactly:
-    featureBits -countGaps -or hg38 hg38.contigs.bed hg38.gaps.bed
-    #  3209286105 bases of 3209286105 (100.000%) in intersection
-
-    # the list of all those other bits not in the split chroms:
-    egrep "_alt|chrUn|chrM|_random" hg38.gaps.bed | cut -f1 \
-       | sort > other.bits.list
-
-    # extract those chrom pieces and the other bits from the masked sequence:
-    (twoBitToFa -bed=hg38.contigs.bed ../../hg38.2bit stdout; \
-      twoBitToFa -seqList=other.bits.list ../../hg38.2bit stdout) \
-        | faToTwoBit stdin hg38.contigs.2bit
-    twoBitInfo hg38.contigs.2bit stdout | sort -k2nr > hg38.contigs.chrom.sizes
-    # verify nothing has been lost:
-    twoBitToFa ../../hg38.2bit stdout | faCount stdin | tail -1
-# total 3209286105 898285419 623727342 626335137 900967885 159970322 30979743
-    twoBitToFa hg38.contigs.2bit stdout | faCount stdin | tail -1
-# total 3061688741 898285419 623727342 626335137 900967885  12372958 30979743
-    # the ACGT and CPG counts remain the same, only N's have been lost
-
-    # make a copy of this at the top:
-    cp -p hg38.contigs.2bit ../..
-    cp -p hg38.contigs.lift ../../jkStuff
-
-    # load as a track to be able to see where they are:
-    egrep "chrUn|chrM|_alt|_random" hg38.contigs.chrom.sizes \
-	| awk '{printf "%s\t0\t%d\t%s\n", $1, $2, $1}' \
-	> fullCoverage.hg38Contigs.bed
-    cat hg38.contigs.bed >>  fullCoverage.hg38Contigs.bed
-    featureBits -or -countGaps hg38 fullCoverage.hg38Contigs.bed gap
-    # 3209286105 bases of 3209286105 (100.000%) in intersection
-
-    hgLoadBed hg38 contigAlignmentSegments fullCoverage.hg38Contigs.bed
-
-#############################################################################
-## analysis of repeat elements from each RM run
-## (DONE - 2014-01-10 - Hiram)
-    mkdir /hive/data/genomes/hg38/bed/repeatElementCount
-    cd /hive/data/genomes/hg38/bed/repeatElementCount
-    for F in ../rmsk*/hg38.class.profile.txt \
-          ../repeatMaskerGenbank/hg38.class.profile.txt
-do
-   D=`dirname $F`
-   B=`basename $D | sed -e 's/repeatMaskerGenbank/NCBI/; s/rmsk//;'`
-   echo "==== $B ===="
-   grep rmskClass $F | sed -e 's#rmskClass/##; s/.tab//;' \
-     | awk '{printf "%s\t%d\n", $2, $1}' | sort > ${B}.tab
-done
-
-   # Hmmer does not have snRNA and tRNA ?
-   echo -e "snRNA\t0" >> Hmmer.tab
-   echo -e "tRNA\t0" >> Hmmer.tab
-   sort Hmmer.tab > t.tab
-   mv t.tab Hmmer.tab
-
-   echo "#  Repeat Masker item counts" > table.result.txt
-   echo "#  class         NCBI cross-match rmblastn HMMER" >> table.result.txt
-   join NCBI.tab CM.tab  | join - Blastn.tab  | join - Hmmer.tab \
-     | awk '{printf "%-15s\t%7d\t%7d\t%7d\t%7d\n", $1,$2,$3,$4,$5}' \
-       | sort -k2,2nr >> table.result.txt
-
-   cat table.result.txt
-#  Repeat Masker item counts
-#  class         NCBI cross-match rmblastn HMMER
-SINE            1849444 1852545 1822406 1884179
-LINE            1586141 1570523 1551012 1702529
-LTR              759248  748597  737799  805427
-DNA              502186  499108  485558  565171
-Simple_repeat    433789  703682  716968  636906
-Low_complexity   396378  102856  105181   95480
-Satellite         10198    7962    7703   10852
-LTR?               5884    5667    5068    9181
-snRNA              4595    4516    4548       0
-Retroposon         4163    5750    5630   11861
-Unknown            2802    5622    5263    3914
-DNA?               2157    3294    3018    4582
-tRNA               2154    2026    1983       0
-rRNA               1915    1840    1810     464
-RC                 1860    1784    1706    2059
-srpRNA             1784    1672    1633    1517
-scRNA              1397    1420    1426    6783
-RNA                 822     704     611    1484
-SINE?               488      38      38     970
-RC?                 445     411     374     806
-
-total           5567850 5520017 5459735 5744165
-
-#############################################################################
-## blat server turned on (DONE - 2014-01-13 - Hiram)
-#	After getting a blat server assigned by the Blat Server Gods,
-    ssh hgwdev
-
-    hgsql -e 'INSERT INTO blatServers (db, host, port, isTrans, canPcr) \
-	VALUES ("hg38", "blat4c", "17780", "1", "0"); \
-	INSERT INTO blatServers (db, host, port, isTrans, canPcr) \
-	VALUES ("hg38", "blat4c", "17781", "0", "1");' \
-	    hgcentraltest
-    #	test it with some sequence
-
-############################################################################
-## reset default position to ABO gene (DONE - 2014-01-13 - Hiram)
-    ssh hgwdev
-    hgsql -e 'update dbDb set defaultPos="chr9:133252000-133280861"
-	where name="hg38";' hgcentraltest
-
-#########################################################################
-## update grp table with new set of standard rows (DONE - 2014-01-29 - Hiram)
-    hgsql -e 'alter table grp rename grpOriginal;' hg38
-    hgsql -e 'drop table grp;' hg38
-    hgsql -e "create table grp (PRIMARY KEY(NAME)) select * from hg19.grp" hg38
-    hgsql -e 'delete from grp where name="denisova";' hg38
-    hgsql -e 'delete from grp where name="pub";' hg38
-    hgsql -e 'delete from grp where name="neandertal";' hg38
-    hgsql -e 'update grp set defaultIsClosed=0 where name="map";' hg38
-
-    hgsql -e 'drop table grpOriginal;' hg38
-
-############################################################################
-# PREPARE LINEAGE SPECIFIC REPEAT FILES FOR LASTZ (DONE - 2014-01-21 - Hiram)
-    ssh ku
-    mkdir /hive/data/genomes/hg38/bed/linSpecRep
-    cd /hive/data/genomes/hg38/bed/linSpecRep
-    #	create individual .out files from the master record in ../repeatMasker
-    mkdir splitOut
-    cat << '_EOF_' > split.csh
-#!/bin/csh -fe
-set C = $1
-head -3 ../repeatMasker/hg38.sorted.fa.out > splitOut/${C}.out
-grep "${C} " ../repeatMasker/hg38.sorted.fa.out >> splitOut/${C}.out
-'_EOF_'
-    # << happy emacs
-    chmod +x split.csh
-
-    cat << '_EOF_' > template
-#LOOP
-split.csh $(root1) {check out line+ splitOut/$(root1).out}
-#ENDLOOP
-'_EOF_'
-    # << happy emacs
-
-    # small ones first:
-    cut -f1 ../../chrom.sizes | tac > chrom.list
-    gensub2 chrom.list single template jobList
-    para create jobList
-    para try ... check ... push ... etc...
-# Completed: 93 of 93 jobs
-# CPU time in finished jobs:        127s       2.12m     0.04h    0.00d  0.000 y
-# IO & Wait Time:                 17154s     285.90m     4.76h    0.20d  0.001 y
-# Average job time:                 186s       3.10m     0.05h    0.00d
-# Longest finished job:             224s       3.73m     0.06h    0.00d
-# Submission to last job:           280s       4.67m     0.08h    0.00d
-
-    #	now, we can date and process each of those .out files
-    #	constructing the humanSpecific set of repeats
-    #   this means repeats found in human, and not in others
-    #   using mouse here for 'others' is good enough, a variety
-    #   of other species could be used (rat dog cow) where they all
-    #   produce the same result
-    mkdir dateRepeats
-    cd dateRepeats
-    cat << '_EOF_' > mkLSR
-#!/bin/bash
-set -beEu -o pipefail
-rm -f $1.out_mus-musculus
-ln -s ../splitOut/$1.out .
-/scratch/data/RepeatMasker/DateRepeats $1.out -query human -comp mouse
-rm $1.out
-mkdir -p ../humanSpecific
-/cluster/bin/scripts/extractRepeats 1 $1.out_mus-musculus \
-	> ../humanSpecific/$1.out.spec
-'_EOF_'
-    #	<< happy emacs
-    chmod +x mkLSR
-
-    cat << '_EOF_' > template
-#LOOP
-./mkLSR $(path1) {check out line+ ../humanSpecific/$(path1).out.spec}
-#ENDLOOP
-'_EOF_'
-    #	<< happy emacs
-
-    gensub2 ../chrom.list single template jobList
-    para try ... check ... push ... etc...
-    para time
-# Completed: 455 of 455 jobs
-# CPU time in finished jobs:      13985s     233.08m     3.88h    0.16d  0.000 y
-# IO & Wait Time:                  1470s      24.50m     0.41h    0.02d  0.000 y
-# Average job time:                  34s       0.57m     0.01h    0.00d
-# Longest finished job:             111s       1.85m     0.03h    0.00d
-# Submission to last job:          1427s      23.78m     0.40h    0.02d
-
-
-    # We also need the nibs for blastz runs with lineage specific repeats
-    mkdir /hive/data/genomes/hg38/bed/nibs
-    cd /hive/data/genomes/hg38/bed/nibs
-    cut -f1 ../../chrom.sizes | while read C
-do
-    twoBitToFa -seq=${C} ../../hg38.2bit stdout \
-	| faToNib -softMask stdin ${C}.nib
-    echo "${C} done"
-done
-
-    # verify nothing lost
-    cat ../../chrom.sizes \
-     | awk '{printf "nibFrag -masked %s.nib 0 %d + stdout\n", $1, $2}' \
-        | sh | faSize stdin
-# 3209286105 bases (159970322 N's 3049315783 real 1460684798 upper
-#  1588630985 lower) in 455 sequences in 1 files
-# Total size: mean 7053376.1 sd 31548372.6
-#  min 970 (chrUn_KI270394v1.nib:0-970)
-#  max 248956422 (chr1.nib:0-248956422) median 161218
-# %49.50 masked total, %52.10 masked real
-
-    mkdir /hive/data/staging/data/hg38/nib
-    rsync -a --progress ./ /hive/data/staging/data/hg38/nib
-
-#############################################################################
-## GRC Contigs/ctgPos2 track (DONE - 2014-12-25 - Hiram)
-    # provide mapping of UCSC chrom names to GRC names
-    mkdir /hive/data/genomes/hg38/bed/ctgPos2
-    cd /hive/data/genomes/hg38/bed/ctgPos2
-    grep -v "^#" ../../genbank/GCA_000001405.15_GRCh38_top-level.acc2name \
-	| awk '{printf "s/^%s/%s/g;\n", $1, $3}' > accessionToUcsc.sed.txt
-
-    find ../../genbank -type f | grep "/assembled_chromosomes/AGP/" | sed -e 's/.comp//' | while read F
-do
-   if [ -s $F ]; then
-      zcat $F | grep -v "^#"
-   fi
-done | sed -e "`cat accessionToUcsc.sed.txt`" > ucsc.grch38.agp
-
-    awk '$5 != "N"' ucsc.grch38.agp \
-| awk '{printf "%s\t%d\t%s\t%d\t%d\t%s\n", $6, $3-$2+1, $1, $2-1, $3, $5}' \
-	| sort -u | sort -k3,3 -k4,4n > ctgPos2.tab
-
-
-    export ctgSize=`awk '{print length($1)}' ctgPos2.tab | sort -n | tail -1`
-    export chrSize=`awk '{print length($3)}' ctgPos2.tab | sort -n | tail -1`
-
-    sed -e "s/20/$ctgSize/; s/16/$chrSize/;" \
-	/cluster/home/hiram/kent/src/hg/lib/ctgPos2.sql > hg38.ctgPos2.sql
-
-    hgLoadSqlTab hg38 ctgPos2 hg38.ctgPos2.sql ctgPos2.tab
-
-############################################################################
-# constructing download files (WORKING - 2014-01-15 - Hiram)
-    # add hg38 to all.joiner and verify it is clean:
-    joinerCheck -database=hg38 -keys all.joiner
-# Checking keys on database hg38
-#  hg38.ucscToINSDC.chrom - hits 455 of 455 (100.000%) ok
-    # and all table coordinates are OK:
-    checkTableCoords hg38
-
-    cd /hive/data/genomes/hg38
-    time $HOME/kent/src/hg/utils/automation/makeDownloads.pl \
-      -workhorse=hgwdev hg38
-    # makeDownloads.pl has made a preliminary set of files
-
-    # need to fixup these names and add chromFa.tar.gz files
-    cd /hive/data/genomes/hg38/goldenPath/bigZips
-
-    mkdir chroms
-    mkdir maskedChroms
-
-    faSplit byname hg38.fa.gz chroms/
-    faSplit byname hg38.fa.masked.gz maskedChroms/
-
-    tar cvzf ./hg38.chromFa.tar.gz ./chroms/
-    tar cvzf ./hg38.chromFaMasked.tar.gz ./maskedChroms/
-
-    cd /usr/local/apache/htdocs-hgdownload/goldenPath/hg38/bigZips
-    ln -s /hive/data/genomes/hg38/goldenPath/bigZips/hg38.chromFa.tar.gz hg38.chromFa.tar.gz
-    ln -s /hive/data/genomes/hg38/goldenPath/bigZips/hg38.chromFaMasked.tar.gz hg38.chromFaMasked.tar.gz
-
-    #also added entries for above to md5sum.txt and README.txt
-
-############################################################################
-# LASTZ MOUSE Mm10 (DONE - 2014-01-23,31 - Hiram)
-    # can no longer use the lineage specific repeats with the new lastz
-    # use a screen to manage this longish job:
-    screen -S hg38Mm10
-
-    mkdir /hive/data/genomes/hg38/bed/lastzMm10.2014-01-23
-    cd /hive/data/genomes/hg38/bed/lastzMm10.2014-01-23
-
-    # best to always specify an exact path to lastz so we know which one is used
-    # lastz default parameters are human-mouse parameters
-
-    cat << '_EOF_' > DEF
-# human vs mouse
-BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.52/bin/lastz
-
-# TARGET: Human Hg38
-SEQ1_DIR=/hive/data/genomes/hg38/hg38.2bit
-SEQ1_LEN=/hive/data/genomes/hg38/chrom.sizes
-SEQ1_CTGDIR=/hive/data/genomes/hg38/hg38.contigs.2bit
-SEQ1_CTGLEN=/hive/data/genomes/hg38/hg38.contigs.chrom.sizes
-SEQ1_LIFT=/hive/data/genomes/hg38/jkStuff/hg38.contigs.lift
-SEQ1_CHUNK=40000000
-SEQ1_LAP=10000
-
-# QUERY: Mouse Mm10
-SEQ2_DIR=/scratch/data/mm10/mm10.2bit
-SEQ2_LEN=/scratch/data/mm10/chrom.sizes
-SEQ2_CHUNK=20000000
-SEQ2_LAP=0
-
-BASE=/hive/data/genomes/hg38/bed/lastzMm10.2014-01-23
-TMPDIR=/dev/shm
-'_EOF_'
-    # << happy emacs
-
-    time $HOME/kent/src/hg/utils/automation/doBlastzChainNet.pl \
-	-verbose=2 \
-        -stop=net `pwd`/DEF \
-        -syntenicNet -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
-	-fileServer=hgwdev \
-        -chainMinScore=3000 -chainLinearGap=medium > net.log 2>&1
-    #	real    1494m26.135s ---- busy cluster
-    time $HOME/kent/src/hg/utils/automation/doBlastzChainNet.pl \
-	-verbose=2 \
-        -continue=load `pwd`/DEF \
-        -syntenicNet -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
-	-fileServer=hgwdev \
-        -chainMinScore=3000 -chainLinearGap=medium > load.log 2>&1
-    #	Elapsed time: 43m11s
-    cat fb.hg38.chainMm10Link.txt
-    # 964465044 bases of 3049335806 (31.629%) in intersection
-
-    #	and the swap
-    mkdir /hive/data/genomes/mm10/bed/blastz.hg38.swap
-    cd /hive/data/genomes/mm10/bed/blastz.hg38.swap
-    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
-	/hive/data/genomes/hg38/bed/lastzMm10.2014-01-23/DEF \
-	-swap -syntenicNet \
-	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
-	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1
-    #   real    83m28.397s
-
-    cat fb.mm10.chainHg38Link.txt
-    #	937030766 bases of 2652783500 (35.323%) in intersection
-
-#########################################################################
-# LASTZ Dog CanFam3 (DONE - 2014-01-26 - Hiram)
-    mkdir /hive/data/genomes/hg38/bed/lastzCanFam3.2014-01-26
-    cd /hive/data/genomes/hg38/bed/lastzCanFam3.2014-01-26
-
-    cat << '_EOF_' > DEF
-# human vs dog
-BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.52/bin/lastz
-
-# TARGET: Human Hg38
-SEQ1_DIR=/hive/data/genomes/hg38/hg38.2bit
-SEQ1_LEN=/hive/data/genomes/hg38/chrom.sizes
-SEQ1_CTGDIR=/hive/data/genomes/hg38/hg38.contigs.2bit
-SEQ1_CTGLEN=/hive/data/genomes/hg38/hg38.contigs.chrom.sizes
-SEQ1_LIFT=/hive/data/genomes/hg38/jkStuff/hg38.contigs.lift
-SEQ1_CHUNK=20000000
-SEQ1_LAP=10000
-
-# QUERY: Dog CanFam3
-SEQ2_DIR=/hive/data/genomes/canFam3/canFam3.2bit
-SEQ2_LEN=/hive/data/genomes/canFam3/chrom.sizes
-SEQ2_CHUNK=20000000
-SEQ2_LAP=0
-
-BASE=/hive/data/genomes/hg38/bed/lastzCanFam3.2014-01-26
-TMPDIR=/dev/shm
-'_EOF_'
-    # << happy emacs
-
-    #	establish a screen to control this job
-    screen hg38CanFam3
-    time $HOME/kent/src/hg/utils/automation/doBlastzChainNet.pl -verbose=2 \
-	`pwd`/DEF \
-	-syntenicNet \
-	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
-	-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1
-    # Elapsed time: 1396m22s - busy cluster
-    cat fb.hg38.chainCanFam3Link.txt
-    #  1523987456 bases of 3049335806 (49.978%) in intersection
-
-    #	running the swap
-    mkdir /hive/data/genomes/canFam3/bed/blastz.hg38.swap
-    cd /hive/data/genomes/canFam3/bed/blastz.hg38.swap
-    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
-	/hive/data/genomes/hg38/bed/lastzCanFam3.2014-01-26/DEF \
-	-syntenicNet -swap \
-	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
-	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1
-    #	real    107m57.787s
-
-    cat fb.canFam3.chainHg38Link.txt
-    #	1437624815 bases of 2392715236 (60.083%) in intersection
-
-#########################################################################
-# LASTZ Macaca Mulatta RheMac3 (DONE - 2014-01-27,02-10 - Hiram)
-    mkdir /hive/data/genomes/hg38/bed/lastzRheMac3.2014-01-27
-    cd /hive/data/genomes/hg38/bed/lastzRheMac3.2014-01-27
-
-    # best to always specify an exact path to lastz so we know which one is used
-    # lastz default parameters are human-mouse parameters
-
-    cat << '_EOF_' > DEF
-# human vs macaca mulatta
-BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.52/bin/lastz
-# maximum M allowed with lastz is only 254
-BLASTZ_M=254
-BLASTZ_Q=/scratch/data/blastz/human_chimp.v2.q
-BLASTZ_O=600
-BLASTZ_E=150
-# other parameters from panTro2 vs hg18 lastz on advice from Webb
-BLASTZ_K=4500
-BLASTZ_Y=15000
-BLASTZ_T=2
-
-# TARGET: Human Hg38
-SEQ1_DIR=/hive/data/genomes/hg38/hg38.2bit
-SEQ1_LEN=/hive/data/genomes/hg38/chrom.sizes
-SEQ1_CTGDIR=/hive/data/genomes/hg38/hg38.contigs.2bit
-SEQ1_CTGLEN=/hive/data/genomes/hg38/hg38.contigs.chrom.sizes
-SEQ1_LIFT=/hive/data/genomes/hg38/jkStuff/hg38.contigs.lift
-SEQ1_CHUNK=20000000
-SEQ1_LAP=10000
-
-# QUERY: Macaca Mulatta RheMac3
-SEQ2_DIR=/scratch/data/rheMac3/rheMac3.2bit
-SEQ2_LEN=/scratch/data/rheMac3/chrom.sizes
-SEQ2_CHUNK=20000000
-SEQ2_LAP=0
-SEQ2_IN_CONTIGS=0
-
-BASE=/hive/data/genomes/hg38/bed/lastzRheMac3.2014-01-27
-TMPDIR=/dev/shm
-'_EOF_'
-    # << happy emacs
-    time $HOME/kent/src/hg/utils/automation/doBlastzChainNet.pl \
-        `pwd`/DEF \
-        -syntenicNet -fileServer=hgwdev \
-	-chainMinScore=5000 -chainLinearGap=medium \
-        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > do.log 2>&1
-    #   Elapsed time: 1426m43s - busy cluster
-    cat fb.hg38.chainRheMac3Link.txt
-    #   2431208700 bases of 3049335806 (79.729%) in intersection
-
-    #   running the swap
-    mkdir /hive/data/genomes/rheMac3/bed/blastz.hg38.swap
-    cd /hive/data/genomes/rheMac3/bed/blastz.hg38.swap
-    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
-        /hive/data/genomes/hg38/bed/lastzRheMac3.2014-01-27/DEF \
-        -swap -syntenicNet -chainMinScore=5000 -chainLinearGap=medium \
-        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > swap.log 2>&1
-    #    82m32.329s
-    cat fb.rheMac3.chainHg38Link.txt
-    #   2288533769 bases of 2639145830 (86.715%) in intersection
-
-#########################################################################
-## construct analysis set (DONE - 2014-01-27 - Hiram)
-    mkdir /hive/data/genomes/hg38/bed/analysisSet
-    cd /hive/data/genomes/hg38/bed/analysisSet
-    mkdir -p splitFa
-
-    faToTwoBit \
-../../genbank/seqs_for_alignment_pipelines/GCA_000001405.15_GRCh38_no_alt_analysis_set.fna.gz \
-	hg38.unmasked.analysisSet.2bit
-
-    faCount splitFa/c*.fa > splitFa.faCount.txt
-
-    egrep -v "chr[0-9_BGHIJKLXv]*_alt" ../rmskCM/hg38.sorted.fa.out \
-	> hg38.analysisSet.out
-
-    twoBitMask hg38.unmasked.analysisSet.2bit hg38.analysisSet.out \
-	hg38.rmsk.analysisSet.2bit
-
-    egrep -v "chr[0-9_BGHIJKLXv]*_alt" ../simpleRepeat/trfMask.bed \
-	> trfMask.analysisSet.bed
-
-    twoBitMask hg38.rmsk.analysisSet.2bit -add trfMask.analysisSet.bed \
-	hg38.analysisSet.2bit
-
-    twoBitToFa hg38.unmasked.analysisSet.2bit stdout | faSize stdin
-# 3099922541 bases (165046090 N's 2934876451 real 2934876451 upper 0 lower)
-#	in 195 sequences in 1 files
-# Total size: mean 15897038.7 sd 46804464.6 min 970 (chrUn_KI270394v1)
-#	max 248956422 (chr1) median 32032
-# %0.00 masked total, %0.00 masked real
-
-    twoBitToFa hg38.analysisSet.2bit stdout | faSize stdin
-# 3099922541 bases (165046090 N's 2934876451 real 1409378896 upper 1525497555
-#	lower) in 195 sequences in 1 files
-# Total size: mean 15897038.7 sd 46804464.6 min 970 (chrUn_KI270394v1)
-#	max 248956422 (chr1) median 32032
-# %49.21 masked total, %51.98 masked real
-
-    mkdir hg38.analysisSet.chroms
-    twoBitToFa hg38.analysisSet.2bit stdout \
-	| faSplit byname stdin hg38.analysisSet.chroms/
-
-    tar cvzf ./hg38.analysisSet.chroms.tar.gz ./hg38.analysisSet.chroms
-
-    ln -s `pwd`/hg38.analysisSet.2bit \
-        /usr/local/apache/htdocs-hgdownload/goldenPath/hg38/bigZips
-    ln -s `pwd`/hg38.analysisSet.chroms.tar.gz \
-        /usr/local/apache/htdocs-hgdownload/goldenPath/hg38/bigZips
-    # add these md5 sums to md5sum.txt
-    md5sum hg38.analysisSet.2bit hg38.analysisSet.chroms.tar.gz >> \
-        /usr/local/apache/htdocs-hgdownload/goldenPath/hg38/bigZips/md5sum.txt
-
-    cp ../../genbank/README_ANALYSIS_SETS README.analysisSet.txt
-    # add note at the top of README:
-    ######################################################################
-    UCSC copy of the file from:
-
-    ftp://ftp.ncbi.nlm.nih.gov/genbank/genomes/Eukaryotes/vertebrates_mammals/Homo_sapiens/GRCh38/seqs_for_alignment_pipelines/README_ANALYSIS_SETS
-
-    ln -s `pwd`/README.analysisSet.txt \
-        /usr/local/apache/htdocs-hgdownload/goldenPath/hg38/bigZips
-
-#########################################################################
-# the FULL analysis set (DONE - 2014-03-18 - Hiram
-    mkdir /hive/data/genomes/hg38/bed/fullAnalysisSet
-    cd /hive/data/genomes/hg38/bed/fullAnalysisSet
-
-    mkdir hg38.fullAnalysisSet.chroms
-    twoBitToFa ../analysisSet/hg38.analysisSet.2bit stdout \
-       | faSplit byname stdin hg38.fullAnalysisSet.chroms/
-
-    grep _alt ../../chrom.sizes | cut -f 1 > alt.list
-
-    twoBitToFa -seqList=alt.list ../../hg38.2bit stdout \
-       | faSplit byname stdin hg38.fullAnalysisSet.chroms/
-
-    faCount hg38.fullAnalysisSet.chroms/chr*.fa > faCount.fullAnalysisSet.txt
-
-    faToTwoBit hg38.fullAnalysisSet.chroms/chr*.fa hg38.fullAnalysisSet.2bit
-    twoBitInfo hg38.fullAnalysisSet.2bit stdout | sort -k2nr > chrom.sizes
-
-    tar cvzf ./hg38.fullAnalysisSet.chroms.tar.gz ./hg38.fullAnalysisSet.chroms
-
-#########################################################################
-# LASTZ Self/hg38 (DONE - 2014-01-25,02-10 - Hiram)
-    # can no longer use the lineage specific repeats with the new lastz
-    # use a screen to manage this longish job:
-    screen -S hg38Self
-
-    mkdir /hive/data/genomes/hg38/bed/lastzSelf.2014-01-25
-    cd /hive/data/genomes/hg38/bed/lastzSelf.2014-01-25
-    # construct the non-bridged contigs sequence to use:
-    (twoBitToFa ../nonBridgedContigs/hg38.chroms.contigs.2bit stdout;
-      twoBitToFa ../../hg38.2bit:chrM stdout) | faToTwoBit stdin hg38.self.2bit
-    twoBitInfo hg38.self.2bit stdout | sort -k2nr > hg38.self.chrom.sizes
-
-    # best to always specify an exact path to lastz so we know which one is used
-    # lastz default parameters are human-mouse parameters
-
-    cat << '_EOF_' > DEF
-# human vs human with mouse defaults
-BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.52/bin/lastz
-
-# TARGET: Human Hg38
-SEQ1_DIR=/hive/data/genomes/hg38/hg38.2bit
-SEQ1_LEN=/hive/data/genomes/hg38/chrom.sizes
-SEQ1_CTGDIR=/hive/data/genomes/hg38/bed/lastzSelf.2014-01-25/hg38.self.2bit
-SEQ1_CTGLEN=/hive/data/genomes/hg38/bed/lastzSelf.2014-01-25/hg38.self.chrom.sizes
-SEQ1_LIFT=/hive/data/genomes/hg38/jkStuff/hg38.contigs.lift
-SEQ1_CHUNK=20000000
-SEQ1_LAP=10000
-
-# QUERY: Human Hg38
-SEQ2_DIR=/hive/data/genomes/hg38/hg38.2bit
-SEQ2_LEN=/hive/data/genomes/hg38/chrom.sizes
-SEQ2_CTGDIR=/hive/data/genomes/hg38/bed/lastzSelf.2014-01-25/hg38.self.2bit
-SEQ2_CTGLEN=/hive/data/genomes/hg38/bed/lastzSelf.2014-01-25/hg38.self.chrom.sizes
-SEQ2_LIFT=/hive/data/genomes/hg38/jkStuff/hg38.contigs.lift
-SEQ2_CHUNK=20000000
-SEQ2_LAP=0
-
-BASE=/hive/data/genomes/hg38/bed/lastzSelf.2014-01-25
-TMPDIR=/dev/shm
-'_EOF_'
-_EOF_
-
-    time $HOME/kent/src/hg/utils/automation/doBlastzChainNet.pl \
-	-verbose=2 \
-        -stop=net `pwd`/DEF \
-        -syntenicNet -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
-	-fileServer=hgwdev \
-        -chainMinScore=3000 -chainLinearGap=medium > net.log 2>&1
-    #  real    1518m15.817s -- problems
-    # there was a problem in the 'part014' batch.  running that manually:
-    mkdir /hive/data/genomes/hg38/bed/lastzSelf.2014-01-25/run.blastz/lastJob
-    cd /hive/data/genomes/hg38/bed/lastzSelf.2014-01-25/run.blastz/lastJob
-    # make 100 jobs out of the 10 parts:
-    mkdir -p psl
-    cp ../tParts/part014.lst ./xpart014.lst
-    split -l 1 xpart014.lst -d -a 3 part
-    for F in part0*
-do
-   mv $F $F.lst
-done
-
-for T in part0*.lst
-do
-  for Q in part0*.lst
-  do
-    mkdir -p psl/${T}
-    echo /cluster/home/hiram/kent/src/hg/utils/automation/blastz-run-ucsc -outFormat psl -dropSelf ${T} ${Q} ../../DEF \{check out exists psl/${T}/${T}.${Q}.psl\}
-  done
-done > jobList
-    para -ram=32g create jobList
-    para push
-    # one last failing job:
-# Completed: 99 of 100 jobs
-# CPU time in finished jobs:       2836s      47.27m     0.79h    0.03d  0.000 y
-# IO & Wait Time:                   279s       4.65m     0.08h    0.00d  0.000 y
-# Average job time:                  31s       0.52m     0.01h    0.00d
-# Longest finished job:             586s       9.77m     0.16h    0.01d
-# Submission to last job:           620s      10.33m     0.17h    0.01d
-
-    mkdir /hive/data/genomes/hg38/bed/lastzSelf.2014-01-25/run.blastz/lastJob/split010
-    cd /hive/data/genomes/hg38/bed/lastzSelf.2014-01-25/run.blastz/lastJob/split010
-    mkdir psl
-
-    twoBitToFa /hive/data/genomes/hg38/bed/lastzSelf.2014-01-25/hg38.self.2bit:chr16_03:0-1689648 part010.fa
-
-    faSplit -lift=split010.lift size part010.fa 169000 split010_
-TOP="/hive/data/genomes/hg38/bed/lastzSelf.2014-01-25/run.blastz/lastJob/split010"
-
-for T in split*.fa
-do
-  mkdir -p psl/${T}
-  echo "${TOP}/${T}" > ${T}.lst
-  faToTwoBit  ${T} ${T}.2bit
-  for Q in split*.fa
-  do
-     echo "/cluster/home/hiram/kent/src/hg/utils/automation/blastz-run-ucsc -outFormat psl -dropSelf ${T}.lst ${Q}.lst DEF {check out exists psl/${T}/${T}.${Q}.psl}"
-  done
-done > jobList
-     para -ram=32g create jobList
-
-# Completed: 100 of 100 jobs
-# CPU time in finished jobs:     176579s    2942.99m    49.05h    2.04d  0.006 y
-# IO & Wait Time:                  1239s      20.64m     0.34h    0.01d  0.000 y
-# Average job time:                1778s      29.64m     0.49h    0.02d
-# Longest finished job:           29343s     489.05m     8.15h    0.34d
-# Submission to last job:         29348s     489.13m     8.15h    0.34d
-
-    catDir psl/* | grep -v "^#" > raw.psl
-
-    liftUp -type=.psl stdout split010.lift error raw.psl \
-        | liftUp -pslQ -type=.psl chr16_03.psl split010.lift error stdin
-
-    # this combination allowed psl headers to sneak in the middle,
-    # had to be cleaned:
-    catDir psl/* | grep -v "^#" > part014.psl
-    cat split010/chr16_03.psl >> part014.psl
-    cp -p part014.psl ../../psl/part014.lst/part014.lst_part014.lst.psl
-
-    time $HOME/kent/src/hg/utils/automation/doBlastzChainNet.pl \
-	-verbose=2 \
-        -continue=cat -stop=net `pwd`/DEF \
-        -syntenicNet -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
-	-fileServer=hgwdev \
-        -chainMinScore=3000 -chainLinearGap=medium > cat.log 2>&1
-    # real    43m11.340s
-    # failed in chaining, running manually on hgwdev
-    time ./bigJobs.sh > bigJobs.log 2>&1
-    #  real    468m59.648s
-
-    time ./part014.sh > part014.log 2>&1
-
-    # real    1319m57.911s
-    # -rw-rw-r-- 1 3581498246 Feb  8 14:37 part014.lst.chain
-    time $HOME/kent/src/hg/utils/automation/doBlastzChainNet.pl \
-	-verbose=2 \
-        -continue=chainMerge -stop=net `pwd`/DEF \
-        -syntenicNet -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
-	-fileServer=hgwdev \
-        -chainMinScore=3000 -chainLinearGap=medium > chainMerge.log 2>&1
-
-    time $HOME/kent/src/hg/utils/automation/doBlastzChainNet.pl \
-	-verbose=2 \
-        -continue=load -stop=load `pwd`/DEF \
-        -syntenicNet -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
-	-fileServer=hgwdev \
-        -chainMinScore=3000 -chainLinearGap=medium > load.log 2>&1
-
-    hgLoadChain -normScore -tIndex hg38 chainSelf hg38.hg38.all.chain.gz
-    #  Loading 104815249 chains into hg38.chainSelf
-
-    cat fb.hg38.chainSelfLink.txt
-    #   392419010 bases of 3049335806 (12.869%) in intersection
-    cd /hive/data/genomes/hg38/bed
-    ln -s lastzSelf.2014-01-25 lastz.self
-    ln -s lastzSelf.2014-01-25 lastz.hg38
-
-#########################################################################
-## 4-Way Multiz for UCSC Genes construction (DONE - 2014-02-11 - Hiram)
-    ssh hgwdev
-    mkdir /hive/data/genomes/hg38/bed/multiz4way
-    cd /hive/data/genomes/hg38/bed/multiz4way
-
-    #	extract our 4 organisms from the 44-way on hg18:
-    ln -s /hive/data/genomes/hg18/bed/multiz44way/44way.4d.nh ./44way.nh
-
-    /cluster/bin/phast/tree_doctor \
-	--prune-all-but hg19,mm10,canFam3,rheMac3 $HOME/kent/src/hg/utils/phyloTrees/120way.nh \
-	| sed -e "s/hg19/hg38/" > 4way.nh
-
-    #	this looks like:
-    cat 4way.nh
-(((hg38:0.033974,rheMac3:0.037601):0.109934,mm10:0.356483):0.020593,canFam3:0.165928);
-
-
-    #	Use this specification in the phyloGif tool:
-    #	http://genome.ucsc.edu/cgi-bin/phyloGif
-    #	to obtain a gif image for htdocs/images/phylo/hg38_4way.gif
-
-    /cluster/bin/phast/all_dists 4way.nh > 4way.distances.txt
-    #	Use this output to create the table below
-    grep -y hg38 4way.distances.txt | sort -k3,3n
-#
-#	If you can fill in all the numbers in this table, you are ready for
-#	the multiple alignment procedure
-#
-#                         featureBits chainLink measures
-#                                        chainHg38Link   chain    linearGap
-#    distance                      on hg38    on other   minScore
-#  1  0.071575 - rhesus rheMac3 (% 79.729) (% 86.715)       5000     medium
-#  2  0.330429 - dog canFam3    (% 49.978) (% 60.083)       3000     medium
-#  3  0.500391 - mouse mm10     (% 31.629) (% 35.323)       3000     medium
-
-    #	using the syntenic nets
-    cd /cluster/data/hg38/bed/multiz4way
-    mkdir mafLinks
-    cd mafLinks
-    mkdir rheMac3 canFam3 mm10
-
-    for D in mm10 canFam3 rheMac3
-do
-    ln -s ../../../lastz.${D}/axtChain/hg38.${D}.synNet.maf.gz ./${D}/
-done
-
-    mkdir /hive/data/genomes/hg38/bed/multiz4way/mafSplit
-    cd /hive/data/genomes/hg38/bed/multiz4way/mafSplit
-    for D in mm10 canFam3 rheMac3
-do
-    echo "working: ${D}"
-    zcat ../mafLinks/${D}/hg38.${D}.synNet.maf.gz > ${D}.maf
-    mkdir -p ${D}
-    mafSplit -byTarget -useFullSequenceName /dev/null ${D}/${D}_  ${D}.maf
-    rm -f ${D}.maf
-done
-
-    #	determine what is the newest version of multiz and use that
-    cd /hive/data/genomes/hg38/bed/multiz4way
-    mkdir penn
-    cp -p /cluster/bin/penn/multiz.2009-01-21_patched/multiz penn
-    cp -p /cluster/bin/penn/multiz.2009-01-21_patched/maf_project penn
-    cp -p /cluster/bin/penn/multiz.2009-01-21_patched/autoMZ penn
-
-    # the autoMultiz cluster run
-    ssh ku
-    cd /hive/data/genomes/hg38/bed/multiz4way
-
-    # create species list and stripped down tree for autoMZ
-    sed 's/[a-z][a-z]*_//g; s/:[0-9\.][0-9\.]*//g; s/;//; /^ *$/d' \
-	4way.nh > tmp.nh
-    echo `cat tmp.nh` | sed 's/ //g; s/,/ /g' > tree.nh
-    sed 's/[()]//g; s/,/ /g' tree.nh > species.lst
-
-    mkdir run maf
-    cd run
-
-    #	NOTE: you need to set the db and multiz dirname properly in this script
-    cat > autoMultiz << '_EOF_'
-#!/bin/csh -ef
-set db = hg38
-set c = $1
-set maf = $2
-set binDir = /hive/data/genomes/hg38/bed/multiz4way/penn
-set tmp = /dev/shm/$db/multiz.$c
-set pairs = /hive/data/genomes/hg38/bed/multiz4way/mafSplit
-rm -fr $tmp
-mkdir -p $tmp
-cp ../{tree.nh,species.lst} $tmp
-pushd $tmp
-foreach s (`cat species.lst`)
-    set in = $pairs/$s/${s}_$c.maf
-    set out = $db.$s.sing.maf
-    if ($s == $db) then
-	continue
-    endif
-    if (-e $in.gz) then
-	zcat $in.gz > $out
-    else if (-e $in) then
-	cp $in $out
-    else
-	echo "##maf version=1 scoring=autoMZ" > $out
-    endif
-end
-set path = ($binDir $path); rehash
-$binDir/autoMZ + T=$tmp E=$db "`cat tree.nh`" $db.*.sing.maf $c.maf
-popd
-cp $tmp/$c.maf $maf
-rm -fr $tmp
-'_EOF_'
-    # << happy emacs
-    chmod +x autoMultiz
-
-cat  << '_EOF_' > template
-#LOOP
-./autoMultiz $(root1) {check out line+ /hive/data/genomes/hg38/bed/multiz4way/maf/$(root1).maf}
-#ENDLOOP
-'_EOF_'
-    # << happy emacs
-
-    cut -f1 /cluster/data/hg38/chrom.sizes > chrom.lst
-    gensub2 chrom.lst single template jobList
-    para create jobList
-    # 455 jobs
-    para try ... check ... push ... etc ...
-# Completed: 455 of 455 jobs
-# CPU time in finished jobs:      50111s     835.18m    13.92h    0.58d  0.002 y
-# IO & Wait Time:                  5574s      92.91m     1.55h    0.06d  0.000 y
-# Average job time:                 122s       2.04m     0.03h    0.00d
-# Longest finished job:            4717s      78.62m     1.31h    0.05d
-# Submission to last job:          4722s      78.70m     1.31h    0.05d
-
-    #	combine results into a single file for loading and gbdb reference
-    cd /hive/data/genomes/hg38/bed/multiz4way
-    grep "^#" maf/chr19_GL949749v2_alt.maf | grep -v "eof maf" > multiz4way.maf
-    grep -h -v "^#" maf/*.maf >> multiz4way.maf
-    grep "^#" maf/chr19_GL949749v2_alt.maf | grep "eof maf" >> multiz4way.maf
-    #	real    3m27.561s
-
-    #	makes a 8.5 Gb file:
-    #   -rw-rw-r-- 1 9044143788 Feb 11 12:51 multiz4way.maf
-
-    # Load into database
-    ssh hgwdev
-    cd /hive/data/genomes/hg38/bed/multiz4way
-    mkdir /gbdb/hg38/multiz4way
-    ln -s /hive/data/genomes/hg38/bed/multiz4way/multiz4way.maf \
-	/gbdb/hg38/multiz4way
-    #	the hgLoadMaf generates huge tmp files, locate them in /dev/shm
-    cd /dev/shm
-    time nice -n +19 hgLoadMaf hg38 multiz4way
-    #   Loaded 6141667 mafs in 1 files from /gbdb/hg38/multiz4way
-    #   real    2m2.812s
-
-    cd /hive/data/genomes/hg38/bed/multiz4way
-    time (cat /gbdb/hg38/multiz4way/*.maf \
-        | hgLoadMafSummary -verbose=2 -minSize=10000 \
-	-mergeGap=500 -maxSize=50000 hg38 multiz4waySummary stdin)
-    # Created 1266559 summary blocks from 11780291 components and 6141667 mafs
-    # real    3m0.791s
-# -rw-rw-r-- 1  311246327 Feb 11 12:54 multiz4way.tab
-# -rw-rw-r-- 1   58730176 Feb 11 12:58 multiz4waySummary.tab
-    wc -l multiz4way*
-    # 6141667 multiz4way.tab
-    # 1266559 multiz4waySummary.tab
-    # 7408226 total
-
-#########################################################################
-## RE-load alternate sequence for PSL display (DONE - 2016-01-15 - Hiram)
-## The procedure below
-##    "load alternate sequence for PSL display (DONE - #2014-02-24 - Hiram)
-## produced an illegal psl Table altSeqLiftOverPsl:
-    pslCheck -db=hg38 altSeqLiftOverPsl
-    checked: 266 failed: 264 errors: 1046
-
-## Since then, the gff3ToPsl command has been updated to be a bit more
-##  robust, so, the following sequence produces the new alignment file:
-    mkdir -p /hive/data/genomes/hg38/bed/altAlignments/redo2016
-    cd /hive/data/genomes/hg38/bed/altAlignments/redo2016
-
-mkdir -p ucscPsl
-
-awk -F'/' '{printf "s/^%s\t/%s\t/g;\n", $3,$2}' ../accessionToUcsc.sed.txt \
-    > ucscToNcbi.sed.txt
-
-sed -f ucscToNcbi.sed.txt ../../../chrom.sizes > ncbi.chrom.sizes
-
-paste ncbi.chrom.sizes ../../../chrom.sizes \
-  | awk -F'\t' '{printf "0\t%s\t%d\t%s\t%d\n", $1,$2,$3,$4}' \
-    > ncbiToUcsc.lift
-
-find ../../../genbank -type f | grep alt_scaffolds | grep "\.gff$" \
-  | while read gff
-do
-  name=`basename $gff | sed -e 's/_.*//;'`
-  fasta=`dirname $gff | sed -e 's#alignments#FASTA/alt.scaf.fa.gz#;'`
-  size=`faCount $fasta | grep -w total | cut -f2`
-  printf "%s\t%d\n" "$name" "$size" > target.sizes
-  gff3ToPsl ncbi.chrom.sizes target.sizes $gff $name.psl
-  pslCheck ${name}.psl
-  liftUp -type=.psl stdout ncbiToUcsc.lift error ${name}.psl \
-    | liftUp -type=.psl -pslQ ucscPsl/${name}.psl ncbiToUcsc.lift error stdin
-  pslCheck ucscPsl/${name}.psl
-done
-
-  pslSort dirs altSeqLiftOverPsl.psl ./tmp ucscPsl
-  pslCheck -db=hg38 altSeqLiftOverPsl.psl
-
-  hgLoadPsl hg38 altSeqLiftOverPsl.psl
-  pslCheck -db=hg38 altSeqLiftOverPsl
-  #  checked: 266 failed: 0 errors: 0
-
-#########################################################################
-## load alternate sequence for PSL display (DONE - 2014-02-24 - Hiram)
-    mkdir /hive/data/genomes/hg38/bed/altAlignments/sequence
-    cd /hive/data/genomes/hg38/bed/altAlignments/sequence
-
-    rm -fr hg38.haplotypes.lift temp.lift targetFa queryFa
-    mkdir targetFa
-    mkdir queryFa
-    touch temp.lift
-
-    cat ../../altLocations/chrToAlt.bed | while read L
-do
-  chrName=`echo $L | awk '{print $1}'`
-  chromSize=`egrep "^$chrName   " ../../../chrom.sizes | cut -f2`
-  chrStart=`echo $L | awk '{printf "%d", $2}'`
-  chrEnd=`echo $L | awk  '{printf "%d", $3}'`
-  chrSize=`echo $chrEnd $chrStart | awk '{print $1-$3}'`
-  queryName=`echo $L | awk '{print $4}'`
-  partName="${chrName}_${chrStart}_${chrEnd}"
-  echo $chrName $chrStart $chrEnd $queryName $partName $chromSize
-  echo -e "$chrStart\t${partName}\t$chrSize\t$chrName\t$chromSize" >> temp.lift
-  twoBitToFa ../../../hg38.2bit:$chrName:$chrStart-$chrEnd stdout | sed -e "s/^>.*/>$partName/;" > targetFa/$queryName.fa
-  twoBitToFa ../../../hg38.2bit:$queryName queryFa/$queryName.fa
-done
-
-sort -u temp.lift | sort -k4,4 -k1,1n > hg38.haplotypes.lift
-
-    mkdir /gbdb/hg38/ncbiAltMappings
-    cd /hive/data/genomes/hg38/bed/altAlignments/sequence/queryFa
-    ln -s `pwd`/*.fa /gbdb/hg38/ncbiAltMappings
-    cd /hive/data/genomes/hg38/bed/altAlignments/sequence
-    hgLoadSeq -drop -seqTbl=seqNcbiAltSequence -extFileTbl=extNcbiAltSequence \
-        hg38 /gbdb/hg38/ncbiAltMappings/*.fa
-
-    pslSwap ../altAlignments.psl stdout \
-      | pslRecalcMatch stdin ../../../hg38.2bit ../../../hg38.2bit \
-        hg38.referenceTarget.psl
-
-    # the table name altSeqLiftOverPsl is recognized in hgc to allow display
-    # of the details of the alignments
-    hgLoadPsl hg38 -table=altSeqLiftOverPsl hg38.referenceTarget.psl
-
-#########################################################################
-## alternate sequence alignments EXPERIMENT (DONE - 2014-01-17 - Hiram)
-    # the lastzAltSequences.2014-01-23 alignment was used for this instead
-    # of this procedure
-    mkdir /hive/data/genomes/hg38/bed/altAlignments
-    cd /hive/data/genomes/hg38/bed/altAlignments
-
-    grep -v "^#" ../../genbank/GCA_000001405.15_GRCh38_top-level.acc2name \
-	| awk '{printf "s/%s/%s/g;\n", $1, $3}' > accessionToUcsc.sed.txt
-
-    find ../../genbank -type f | grep alt_scaffolds | grep "\.gff$" \
-	| while read F
-do
-   cat $F | sed -f accessionToUcsc.sed.txt \
-	| gff3ToPsl ../../chrom.sizes stdin stdout
-done > altAlignments.psl
-	| xargs cat | sed -f accessionToUcsc.sed.txt \
-	| gff3ToPsl ../../chrom.sizes stdin altAlignments.psl
-
-    time pslRecalcMatch altAlignments.psl ../../hg38.2bit ../../hg38.2bit \
-        altRecalcMatch.psl
-    # real    0m51.122s
-
-    # just to see what they look like in different formats:
-    pslToChain altRecalcMatch.psl altAlignments.chain
-    chainToAxt altAlignments.chain ../../hg38.2bit ../../hg38.2bit \
-	altAlignments.axt
-    axtToMaf -score altAlignments.axt ../../chrom.sizes ../../chrom.sizes \
-        altAlignments.maf
-
-    mkdir mafSplits
-    mafSplit /dev/null mafSplits/ altAlignments.maf
-    # doesn't work:
-# Can't find chrom in MAF component src: chr6_GL000250v2_alt
-
-    mkdir splits psl
-    find ../../genbank -type f | grep alt_scaffolds | grep "\.gff$" \
-        | while read F
-do
-   chrAlt=`basename $F | sed -e 's/_.*//' | sed -f accessionToUcsc.sed.txt`
-   echo $chrAlt
-   cat $F | sed -f accessionToUcsc.sed.txt \
-        | gff3ToPsl ../../chrom.sizes stdin splits/${chrAlt}.psl
-   pslRecalcMatch splits/${chrAlt}.psl ../../hg38.2bit ../../hg38.2bit \
-	psl/${chrAlt}.psl
-done
-
-   mkdir swap
-   mkdir swap/psl swap/chain swap/axt swap/maf swap/anno
-   for F in psl/*.psl
-do
-  B=`basename $F | sed -e 's/.psl//'`
-  echo $B
-  pslSwap $F stdout | pslRecalcMatch stdin ../../hg38.2bit ../../hg38.2bit \
-      swap/psl/${B}.psl
-  pslToChain swap/psl/${B}.psl swap/chain/${B}.chain
-  chainToAxt swap/chain/${B}.chain ../../hg38.2bit ../../hg38.2bit \
-	swap/axt/${B}.axt
-  axtToMaf -score swap/axt/${B}.axt ../../chrom.sizes ../../chrom.sizes stdout \
-      | sed -e 's/^s chr\([0-9XYM][0-9]* \)/s ref38.chr\1/; s/^s chr\([0-9XYM][0-9]*_\)/s alt38.chr\1/;' > swap/maf/${B}.maf
-  mafAddIRows -nBeds=nBeds swap/maf/${B}.maf ../../hg38.2bit swap/anno/${B}.maf
-done
-# axtToMaf -score swap/axt/${B}.axt ../../chrom.sizes ../../chrom.sizes stdout \
-#      | sed -e 's/^s chr/s hg38.chr/' > swap/maf/${B}.maf
-
-   twoBitInfo -nBed ../../hg38.2bit ../../hg38.N.bed
-   ln -s  ../../hg38.N.bed hg38.bed
-   ln -s ../../hg38.N.bed ref38.bed
-   ln -s ../../hg38.N.bed alt38.bed
-   echo hg38.bed > nBeds
-   echo ref38.bed >> nBeds
-   echo alt38.bed >> nBeds
-   ln -s  ../../chrom.sizes hg38.len
-   ln -s  ../../chrom.sizes ref38.len
-   ln -s  ../../chrom.sizes alt38.len
-   echo hg38.len > sizes
-   echo ref38.len >> sizes
-   echo alt38.len >> sizes
-
-   mkdir chain axt maf anno
-   for F in psl/*.psl
-do
-   B=`basename $F | sed -e 's/.psl//'`
-   echo $B
-   pslToChain $F chain/${B}.chain
-   chainToAxt chain/${B}.chain ../../hg38.2bit ../../hg38.2bit axt/${B}.axt
-  axtToMaf -score axt/${B}.axt ../../chrom.sizes ../../chrom.sizes stdout \
-      | sed -e 's/^s chr\([0-9XYM][0-9]* \)/s ref38.chr\1/; s/^s chr\([0-9XYM][0-9]*_\)/s alt38.chr\1/;' > maf/${B}.maf
-   mafAddIRows -nBeds=nBeds maf/${B}.maf ../../hg38.2bit anno/${B}.maf
-done
-
-#   axtToMaf -score axt/${B}.axt ../../chrom.sizes ../../chrom.sizes stdout \
-#      | sed -e 's/^s chr/s hg38.chr/' > maf/${B}.maf
-
-############################################################################
-# Liftover Gencode V19 from hg19  (DONE braney 2014-02-14)
-
-mkdir /cluster/data/hg38/bed/liftOverGencodeV19
-cd /cluster/data/hg38/bed/liftOverGencodeV19
-
-echo "show tables like 'wgEncodeGencode%19'" | hgsql hg19 | tail -n +2 > all.gencode.tables
-echo " select tableName from trackDb where tableName like 'wgEncodeGencode_%V19';" | hgsql hg19 --skip-column-names > genePred.gencode.tables
-
-# load the non-genepred table as is.   This isn't quite the right thing to do
-# with exon support, but it's good enough for our purposes at the moment
-join -v 1 *.gencode.tables | while read t; do echo "create table $t select * from hg19.$t" | hgsql hg38; echo $t; done
-
-for i in `cat genePredExt.gencode.tables`;
-do
-    echo "select name,score,name2 from $i" | hgsql hg19 | sort > $i.name2Score.txt;
-    genePredToFakePsl hg19 $i $i.psl $i.cds;
-    pslMap -chainMapFile -swapMap $i.psl /gbdb/hg19/liftOver/hg19ToHg38.over.chain.gz stdout | pslCDnaFilter -uniqueMapped stdin stdout |  sort -k 14,14 -k 16,16n | pslToBed -cds=$i.cds stdin stdout | bedToGenePred stdin stdout | sort |  join /dev/stdin $i.name2Score.txt| tr ' ' '\t' | hgLoadGenePred -genePredExt hg38 $i stdin;
-    echo $i;
-done
-
-for i in `cat genePred.gencode.tables`;
-do
-    genePredToFakePsl hg19 $i $i.psl $i.cds;
-    pslMap -chainMapFile -swapMap $i.psl /gbdb/hg19/liftOver/hg19ToHg38.over.chain.gz stdout | pslCDnaFilter -uniqueMapped stdin stdout |  sort -k 14,14 -k 16,16n | pslToBed -cds=$i.cds stdin stdout | bedToGenePred stdin stdout |  tr ' ' '\t' | hgLoadGenePred hg38 $i stdin;
-    echo $i;
-done
-
-#####################################################################
-## tRNAs track ( 2014-02-18 braney DONE)
-## this is a preliminary version for UCSC build.  NOT FOR RELEASE!
-ssh hgwdev
-cd /hive/data/genomes/hg38/bed
-mkdir tRNAs
-cd tRNAs
-
-cp  /hive/users/pchan/tRNAs/Eukaryota/hg38/hg38-tRNAs.bed .
-
-hgLoadBed -tab hg38 tRNAs hg38-tRNAs.bed -sqlTable=$HOME/kent/src/hg/lib/tRNAs.sql
-
-## tRNAs track (2015-10-04, Chris FINISHING BUILD FOR RELEASE)
-    cd /hive/data/genomes/hg38/bed/tRNAs
-    cat /hive/users/pchan/gtrnadb2/Eukaryota/hg38/hg38-tRNAs.bed | sed 's^</BLOCKQUOTE>^^g' | > hg38-tRNAs2.bed
-    hgsql hg38 -e 'drop table if exists tRNAs'
-    hgLoadBed -tab hg38 tRNAs hg38-tRNAs2.bed -sqlTable=$HOME/kent/src/hg/lib/tRNAs.sql
-    mkdir gif
-    cp -p /hive/users/pchan/gtrnadb2/Eukaryota/hg38/images/* gif
-    cd /hive/data/gbdb/hg38
-    ln -s /hive/data/genomes/hg38/bed/tRNAs/gif RNA-img
-    cd /usr/local/apache/htdocs-ceisenhart/RNA-img
-    ln -s /gbdb/hg38/RNA-img hg38
-
-############################################################################
-# EXONIPHY , lifted from hg19 (DONE - braney 2014-02-19)
-#	needed for ucscGenes building
-    # exoniphyHg19.gp is prepared as follows
-    mkdir /cluster/data/hg38/bed/exoniphy
-    cd /cluster/data/hg38/bed/exoniphy
-    hgsql hg19 -e "select * from exoniphy" -N | cut  -f 2-16 > exoniphyHg19.gp
-    time nice -n +19 liftOver -genePred exoniphyHg19.gp \
-	/cluster/data/hg19/bed/liftOver/hg19ToHg38.over.chain.gz \
-	    exoniphyHg38.gp unmapped
-    # real    0m2.015s
-    # user    0m1.894s
-    # sys     0m0.076s
-
-    wc -l *
-    #   186601 exoniphyHg19.gp
-    #   186533 exoniphyHg38.gp
-    #      136 unmapped
-    #   373270 total
-
-    cd /cluster/data/hg38/bed/exoniphy
-    nice -n +19 hgLoadGenePred -genePredExt hg38 exoniphy exoniphyHg38.gp
-    nice -n +19 featureBits hg38 exoniphy
-    # 28807039 bases of 3049335806 (0.945%) in intersection
-    nice -n +19 featureBits hg19 exoniphy
-    # 28661160 bases of 2897316137 (0.989%) in intersection
-
-#########################################################################
-# LASTZ Rat Rn5 (DONE - 2014-02-27 - Hiram)
-    #	establish a screen to control this job
-    screen -S hg38Rn5
-    mkdir /hive/data/genomes/hg38/bed/lastzRn5.2014-02-27
-    cd /hive/data/genomes/hg38/bed/lastzRn5.2014-02-27
-
-    # XXX don't forget to specify the BLASTZ binary:
-    cat << '_EOF_' > DEF
-# human vs rat
-BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.52/bin/lastz
-
-# TARGET: Human Hg38
-SEQ1_DIR=/hive/data/genomes/hg38/hg38.2bit
-SEQ1_LEN=/hive/data/genomes/hg38/chrom.sizes
-SEQ1_CTGDIR=/hive/data/genomes/hg38/hg38.contigs.2bit
-SEQ1_CTGLEN=/hive/data/genomes/hg38/hg38.contigs.chrom.sizes
-SEQ1_LIFT=/hive/data/genomes/hg38/jkStuff/hg38.contigs.lift
-SEQ1_CHUNK=20000000
-SEQ1_LAP=10000
-
-# QUERY: Rat Rn5
-SEQ2_DIR=/hive/data/genomes/rn5/rn5.2bit
-SEQ2_LEN=/hive/data/genomes/rn5/chrom.sizes
-SEQ2_CHUNK=10000000
-SEQ2_LIMIT=100
-SEQ2_LAP=0
-
-BASE=/hive/data/genomes/hg38/bed/lastzRn5.2014-02-27
-TMPDIR=/scratch/tmp
-'_EOF_'
-    # << happy emacs
-
-    time doBlastzChainNet.pl -verbose=2 \
-	`pwd`/DEF \
-	-syntenicNet \
-	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
-	-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1
-
-    #   real    658m53.984s
-    cat fb.hg38.chainRn5Link.txt
-    # 938823407 bases of 3049335806 (30.788%) in intersection
-
-    #	running the swap
-    mkdir /hive/data/genomes/rn5/bed/blastz.hg38.swap
-    cd /hive/data/genomes/rn5/bed/blastz.hg38.swap
-    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
-	/hive/data/genomes/hg38/bed/lastzRn5.2014-02-27/DEF \
-	-swap \
-	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
-	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1
-    #   real    66m53.095s
-    cat fb.rn5.chainHg38Link.txt
-    #   934256475 bases of 2572853723 (36.312%) in intersection
-
-    # syntenic net for 14-way use 2014-04-02 - Hiram
-    cd /hive/data/genomes/rn5/bed/blastz.hg38.swap
-    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
-	/hive/data/genomes/hg38/bed/lastzRn5.2014-02-27/DEF \
-	-continue=syntenicNet -syntenicNet -swap \
-	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
-	-chainMinScore=3000 -chainLinearGap=medium > synNet.log 2>&1
-    #  real    16m54.489s
-
-##############################################################################
-# LASTZ Rat Rn4 (DONE - 2014-02-27 - Hiram)
-    #	establish a screen to control this job
-    screen -S hg38Rn4
-    mkdir /hive/data/genomes/hg38/bed/lastzRn4.2014-02-27
-    cd /hive/data/genomes/hg38/bed/lastzRn4.2014-02-27
-
-    # XXX don't forget to specify the BLASTZ binary:
-    cat << '_EOF_' > DEF
-# human vs rat
-BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.52/bin/lastz
-
-# TARGET: Human Hg38
-SEQ1_DIR=/hive/data/genomes/hg38/hg38.2bit
-SEQ1_LEN=/hive/data/genomes/hg38/chrom.sizes
-SEQ1_CTGDIR=/hive/data/genomes/hg38/hg38.contigs.2bit
-SEQ1_CTGLEN=/hive/data/genomes/hg38/hg38.contigs.chrom.sizes
-SEQ1_LIFT=/hive/data/genomes/hg38/jkStuff/hg38.contigs.lift
-SEQ1_CHUNK=20000000
-SEQ1_LAP=10000
-
-# QUERY: Rat Rn4
-SEQ2_DIR=/hive/data/genomes/rn4/rn4.2bit
-SEQ2_LEN=/hive/data/genomes/rn4/chrom.sizes
-SEQ2_CHUNK=10000000
-SEQ2_LIMIT=100
-SEQ2_LAP=0
-
-BASE=/hive/data/genomes/hg38/bed/lastzRn4.2014-02-27
-TMPDIR=/scratch/tmp
-'_EOF_'
-    # << happy emacs
-
-    doBlastzChainNet.pl -verbose=2 \
-	`pwd`/DEF \
-	-syntenicNet \
-	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
-	-chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1
-    #   real    658m53.984s
-
-    cat fb.hg38.chainRn4Link.txt
-    #   913992768 bases of 3049335806 (29.974%) in intersection
-
-    #	running the swap
-    mkdir /hive/data/genomes/rn4/bed/blastz.hg38.swap
-    cd /hive/data/genomes/rn4/bed/blastz.hg38.swap
-    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
-	/hive/data/genomes/hg38/bed/lastzRn4.2014-02-27/DEF \
-	-swap \
-	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
-	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
-    #   real    73m5.666s
-
-    cat fb.rn4.chainHg38Link.txt
-    #	889613774 bases of 2571531505 (34.595%) in intersection
-
-##############################################################################
-# GENEID GENE PREDICTIONS (DONE - 2014-03-07 - Hiram)
-    ssh hgwdev
-    mkdir /hive/data/genomes/hg38/bed/geneid
-    cd /hive/data/genomes/hg38/bed/geneid
-    mkdir download
-    cd download
-    for C in `cut -f1 ../../../chrom.sizes`
-    do
-	echo $C
- wget --timestamping \
-http://genome.crg.es/genepredictions/H.sapiens/hg38/geneid_v1.4/${C}.gtf3
-    wget --timestamping \
-http://genome.crg.es/genepredictions/H.sapiens/hg38/geneid_v1.4/${C}.prot
-    done
-
-    cd ..
-    cat download/*.gtf | ldHgGene -gtf -genePredExt hg38 geneid stdin
-    #	Read 33428 transcripts in 277332 lines in 1 files
-    #	33428 groups 92 seqs 1 sources 3 feature types
-    #	33428 gene predictions
-
-############################################################################
-# GENEREVIEWS TRACK (DONE 2014-05-17 - Chin)
-# This track depends on some tasks completed for hg19, specifically:
-#
-# $HOME/kent/src/hg/lib/geneReviewsGrshortNBKid.sql
-# $HOME/kent/src/hg/lib/geneReviewsGrshortTitleNBKid.sql
-# $HOME/kent/src/hg/lib/geneReviewsDetail.sql
-# $HOME/kent/src/hg/makeDb/trackDb/human/geneReviews.html
-#
-# Unlike hg19, this hg38 tracks is generated by the automatic geneReviews
-# scripts in
-# /hive/data/outside/otto/geneReviews, specifically buildGeneReviews.sh.
-# Current data are fetched weekly from NCBI
-# ftp://ftp.ncbi.nlm.nih.gov/pub/GeneReviews/
-# to /hive/data/outside/otto/geneReviews/${DATE}.
-
-###########################################################################
-# Chimp Lastz run (DONE - 2014-05-27 - Hiram)
-    screen -S hg38PanTro4      # use a screen to manage this longish running job
-    mkdir /hive/data/genomes/hg38/bed/lastzPanTro4.2014-05-27
-    cd /hive/data/genomes/hg38/bed/lastzPanTro4.2014-05-27
-
-    # always set the BLASTZ program so we know what version was used
-    cat << '_EOF_' > DEF
-# human vs chimp
-BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.52/bin/lastz
-BLASTZ_O=600
-BLASTZ_E=150
-# maximum M allowed with lastz is only 254
-BLASTZ_M=254
-
-BLASTZ_T=2
-BLASTZ_Y=15000
-BLASTZ_K=4500
-BLASTZ_Q=/scratch/data/blastz/human_chimp.v2.q
-#    A    C    G    T
-#    90 -330 -236 -356
-#  -330  100 -318 -236
-#  -236 -318  100 -330
-#  -356 -236 -330   90
-
-# TARGET: Human Hg38
-SEQ1_DIR=/scratch/data/hg38/hg38.2bit
-SEQ1_LEN=/scratch/data/hg38/chrom.sizes
-SEQ1_CHUNK=10000000
-SEQ1_LAP=10000
-SEQ1_IN_CONTIGS=0
-
-# QUERY: Chimp PanTro4
-SEQ2_DIR=/hive/data/genomes/panTro4/panTro4.2bit
-SEQ2_LEN=/hive/data/genomes/panTro4/chrom.sizes
-SEQ2_CHUNK=10000000
-SEQ2_LAP=0
-SEQ2_LIMIT=200
-SEQ2_IN_CONTIGS=0
-
-BASE=/hive/data/genomes/hg38/bed/lastzPanTro4.2014-05-27
-TMPDIR=/dev/shm
-'_EOF_'
-    # << emacs
-
-    time (doBlastzChainNet.pl `pwd`/DEF -verbose=2 \
-        -chainMinScore=5000 -chainLinearGap=medium \
-        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
-        -syntenicNet) > do.log 2>&1
-    # real    154m12.215s
-    cat fb.hg38.chainPanTro4Link.txt
-    # 2839294579 bases of 3049335806 (93.112%) in intersection
-
-    # filter with doRecipBest.pl
-    time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` \
-        hg38 panTro4) > rbest.log 2>&1
-    # real    57m55.320s
-
-    # running the swap
-    mkdir /hive/data/genomes/panTro4/bed/blastz.hg38.swap
-    cd /hive/data/genomes/panTro4/bed/blastz.hg38.swap
-    time (doBlastzChainNet.pl -verbose=2 \
-        -swap /hive/data/genomes/hg38/bed/lastzPanTro4.2014-05-27/DEF \
-        -chainMinScore=5000 -chainLinearGap=medium \
-        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
-        -syntenicNet) > swap.log 2>&1
-    cat fb.panTro4.chainHg38Link.txt
-    # 2776497530 bases of 2902338967 (95.664%) in intersection
-    # real    98m23.729s
-
-    time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` \
-        panTro4 hg38) > rbest.log 2>&1
-    # real    64m33.812s
-
-#############################################################################
-# Opossum Lastz run (DONE - 2014-05-27 - Hiram)
-    screen -S hg38MonDom5      # use a screen to manage this longish running job
-    mkdir /hive/data/genomes/hg38/bed/lastzMonDom5.2014-05-27
-    cd /hive/data/genomes/hg38/bed/lastzMonDom5.2014-05-27
-
-    # always set the BLASTZ program so we know what version was used
-    cat << '_EOF_' > DEF
-# human vs chimp
-BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.52/bin/lastz
-BLASTZ_M=50
-
-BLASTZ_Y=3400
-BLASTZ_L=6000
-BLASTZ_K=2200
-BLASTZ_Q=/scratch/data/blastz/HoxD55.q
-#     A    C    G    T
-#    91  -90  -25 -100
-#   -90  100 -100  -25
-#   -25 -100  100  -90
-#  -100  -25  -90  91
-
-# TARGET: Human Hg38
-SEQ1_DIR=/scratch/data/hg38/hg38.2bit
-SEQ1_LEN=/scratch/data/hg38/chrom.sizes
-SEQ1_CHUNK=10000000
-SEQ1_LAP=10000
-SEQ1_LIMIT=5
-
-# QUERY: Opossum MonDom5
-SEQ2_DIR=/scratch/data/monDom5/monDom5.2bit
-SEQ2_LEN=/hive/data/genomes/monDom5/chrom.sizes
-SEQ2_CHUNK=10000000
-SEQ2_LAP=0
-
-BASE=/hive/data/genomes/hg38/bed/lastzMonDom5.2014-05-27
-TMPDIR=/dev/shm
-'_EOF_'
-    # << emacs
-
-    time (doBlastzChainNet.pl `pwd`/DEF -verbose=2 \
-        -chainMinScore=5000 -chainLinearGap=loose \
-        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
-        -syntenicNet) > do.log 2>&1
-    # real    670m13.280s
-    # one failed chain run for hg19, finished manually on hgwdev, then:
-    time (doBlastzChainNet.pl `pwd`/DEF -verbose=2 \
-        -continue=chainMerge -chainMinScore=5000 -chainLinearGap=loose \
-        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
-        -syntenicNet) > chainMerge.log 2>&1
-    # real    164m28.822s
-
-    cat fb.hg38.chainMonDom5Link.txt
-    # 438195373 bases of 3049335806 (14.370%) in intersection
-
-    # filter with doRecipBest.pl
-    time (/cluster/bin/scripts/doRecipBest.pl -buildDir=`pwd` \
-        -dbHost=hgwdev -workhorse=hgwdev hg38 monDom5) > rbest.log 2>&1
-    # real    130m22.825s
-
-    # running the swap
-    mkdir /hive/data/genomes/monDom5/bed/blastz.hg38.swap
-    cd /hive/data/genomes/monDom5/bed/blastz.hg38.swap
-    time (doBlastzChainNet.pl -verbose=2 \
-        /hive/data/genomes/hg38/bed/lastzMonDom5.2014-05-27/DEF \
-        -swap -chainMinScore=5000 -chainLinearGap=loose \
-        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
-        -syntenicNet) > swap.log 2>&1
-    # real    102m41.443s
-
-    cat fb.monDom5.chainHg38Link.txt
-    # 420069915 bases of 3501660299 (11.996%) in intersection
-    time (/cluster/bin/scripts/doRecipBest.pl -buildDir=`pwd` \
-        -dbHost=hgwdev -workhorse=hgwdev monDom5 hg38) > rbest.log 2>&1
-    #  real    90m56.189s
-
-_EOF_
-#############################################################################
-# LOCUS REFERENCE GENOMIC (LRG) REGIONS AND TRANSCRIPTS (DONE 10/25/19 angie)
-# Redmine #13359, #24285 -- otto-mate To Do #17877
-# previously done 7/7/14, 9/9/16, 5/30/18
-# THIS IS NOW AN OTTO JOB !!
-    set today = `date +%Y_%m_%d`
-    mkdir -p /hive/data/genomes/hg38/bed/lrg/$today
-    cd /hive/data/genomes/hg38/bed/lrg/$today
-    wget ftp://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_public_xml_files.zip
-    unzip LRG_public_xml_files.zip
-
-    # Run script to convert LRG*.xml files to BED+ for regions and genePredExt+fa for transcripts:
-    # parseLrgXml.pl updated 2020-09-16 to add four new fields to the gp output
-    # the four extra fields are identifiers for:
-    # NCBI transcript, Ensembl transcript, NCBI protein, Ensembl protein
-
-    ~/kent/src/hg/utils/automation/parseLrgXml.pl GRCh38
-    genePredCheck lrgTranscriptsUnmapped.gp
-#Error: lrgTranscriptsUnmapped.gp:765: LRG_7t1 no exonFrame on CDS exon 46
-#checked: 1029 failed: 1
-    # If there are complaints e.g. about exonFrame, look for inconsistencies in the
-    # affected transcript's coding_region/coordinates vs. exon/intron info in xml.
-    # Contact Variation team leader Fiona Cunningham @EBI to resolve in the background
-    # (missing exonFrame info doesn't affect our track representation because we end up using
-    # psl).  We agreed to disagree about exon 46 of LRG_7t1 because that last coding exon
-    # portion is only the stop codon.
-
-    # No longer necessary to filter out alt and fix patches since they have been added to hg38.
-
-    # and we need the transcript plus gene name later:
-    cut -f1,12 lrgTranscriptsUnmapped.gp | sort > transcript.gene.name.txt
-
-    # five extra columns have been added to the genePred (2020-10-05 - Hiram)
-    # extract them so they can be added to the psl:
-    awk -F$'\t' '{printf "%s\t%s\t%s\t%s\t%s\t%s %s %s %s\n", $1,$16,$17,$18,$19, $16,$18,$17,$19}' lrgTranscriptsUnmapped.gp | sort \
-       | join -t$'\t' - transcript.gene.name.txt \
-         | awk -F$'\t' '{printf "%s\t%s\t%s\t%s\t%s\t%s\t%s %s\n", $1,$2,$3,$4,$5,$7,$6,$7}' > lrgTransExtraFields.tsv
-
-    # the five extra fields are identifiers for:
-    # NCBI transcript, Ensembl transcript, NCBI protein, Ensembl protein,
-    #	Gene name
-
-    # Load LRG regions:
-    #bedToBigBed lrg.bed /hive/data/genomes/hg38/chrom.sizes lrg.bb \
-    #-tab -type=bed12+ -as=$HOME/kent/src/hg/lib/lrg.as -extraIndex=name
-    # after ML #29689, added ncbiAcc field, Max, July 1, 2022
-    # changed to:
-    bedToBigBed lrg.bed /hive/data/genomes/hg38/chrom.sizes lrg.bb \
-    -tab -type=bed12+ -as=$HOME/kent/src/hg/lib/lrg.as -extraIndex=name,ncbiAcc
-    ln -sf `pwd`/lrg.bb /gbdb/hg38/bbi/lrg.bb
-    hgBbiDbLink hg38 lrg /gbdb/hg38/bbi/lrg.bb
-
-    # Map LRG fixed_annotation transcripts from LRG coords to hg38 coords (HT MarkD):
-    lrgToPsl lrg.bed /hive/data/genomes/hg38/chrom.sizes lrg.psl
-    pslCheck lrg.psl
-#checked: 919 failed: 0 errors: 0
-    awk '{print $10 "\t" $11;}' lrg.psl > lrg.sizes
-    genePredToFakePsl -chromSize=lrg.sizes placeholder \
-      lrgTranscriptsUnmapped.gp lrgTranscriptsFakePsl.psl lrgTranscripts.cds
-    pslMap lrgTranscriptsFakePsl.psl lrg.psl lrgTranscriptsHg38.psl
-    mrnaToGene -genePredExt -cdsFile=lrgTranscripts.cds -keepInvalid \
-      lrgTranscriptsHg38.psl lrgTranscriptsHg38NoName2.gp
-#Warning: no CDS for LRG_163t1
-#Warning: no CDS for LRG_347t1
-    # It's OK if mrnaToGene complains about "no CDS" for a non-coding tx (RefSeq accession NR_*).
-    grep -l NR_ LRG_163.xml LRG_347.xml
-#LRG_163.xml
-#LRG_347.xml
-
-    cat lrgCdna.tab | sed -e 's/^/>/;' | tr '\t' '\n' > lrgCdna.fa
-    # construct bigPsl with five extra fields
-    pslToBigPsl -fa=lrgCdna.fa -cds=lrgTranscripts.cds \
-	lrgTranscriptsHg38.psl bigPsl.txt
-
-    # add the five extra identifiers to the bigPsl file:
-    join -t$'\t' -1 4 \
-       -o 1.1,1.2,1.3,1.4,1.5,1.6,1.7,1.8,1.9,1.10,1.11,1.12,1.13,1.14,1.15\
-,1.16,1.17,1.18,1.19,1.20,1.21,1.22,1.23,1.24,1.25,2.2,2.3,2.4,2.5,2.6,2.7 \
-       <(sort -k4 bigPsl.txt) lrgTransExtraFields.tsv \
-         | sort -k1,1 -k2,2n > lrgExtraTranscriptsHg38.bigPsl.bed
-
-    bedToBigBed -as=bigPsl+6.as -type=bed12+19 -tab \
-       lrgExtraTranscriptsHg38.bigPsl.bed ../../../chrom.sizes lrgBigPsl.bb
-    bigBedInfo lrgBigPsl.bb
-    rm -f /gbdb/hg38/bbi/lrgBigPsl.bb
-    ln -sf `pwd`/lrgBigPsl.bb /gbdb/hg38/bbi
-    hgBbiDbLink hg38 lrgBigPsl /gbdb/hg38/bbi/lrgBigPsl.bb
-
-
-    # Load PSL, CDS and sequences.
-    hgLoadPsl hg38 -table=lrgTranscriptAli lrgTranscriptsHg38.psl
-    hgLoadSqlTab hg38 lrgCds ~/kent/src/hg/lib/cdsSpec.sql lrgTranscripts.cds
-    hgPepPred hg38 tab lrgCdna lrgCdna.tab
-    hgPepPred hg38 tab lrgPep lrgPep.tab
-
-
-#############################################################################
-## 7-Way Multiz (DONE - 2014-06-02 - Hiram)
-    ssh hgwdev
-    mkdir /hive/data/genomes/hg38/bed/multiz7way
-    cd /hive/data/genomes/hg38/bed/multiz7way
-
-    # from the 63-way in the source tree, select out the 7 used here:
-    /cluster/bin/phast/tree_doctor \
-        --prune-all-but hg19,panTro4,rheMac3,mm10,rn5,canFam3,monDom5 \
-        /cluster/home/hiram/kent/src/hg/utils/phyloTrees/130way.nh \
-          | sed -e 's/hg19/hg38/' > hg38.7way.nh
-
-    #	what that looks like:
-    ~/kent/src/hg/utils/phyloTrees/asciiTree.pl hg38.7way.nh
-# (((((hg38:0.006550,
-#     panTro4:0.006840):0.027424,
-#    rheMac3:0.037601):0.109934,
-#   (mm10:0.084509,
-#   rn5:0.091589):0.271974):0.020593,
-#  canFam3:0.165928):0.258392,
-# monDom5:0.340786);
-
-    # extract species list from that .nh file
-    sed 's/[a-z][a-z]*_//g; s/:[0-9\.][0-9\.]*//g; s/;//; /^ *$/d' \
-        hg38.7way.nh | xargs echo | sed 's/ //g; s/,/ /g' \
-        | sed 's/[()]//g; s/,/ /g' | tr '[ ]' '[\n]' > species.list.txt
-
-    # construct db to name translation list:
-    cat species.list.txt | while read DB
-do
-hgsql -N -e "select name,organism from dbDb where name=\"${DB}\";" hgcentraltest
-done | sed -e "s/\t/->/; s/ /_/g;" | sed -e 's/$/;/' | sed -e 's/\./_/g' \
-        > db.to.name.txt
-
-    # construct a common name .nh file:
-    /cluster/bin/phast/tree_doctor --rename \
-    "`cat db.to.name.txt`" hg38.7way.nh | sed -e 's/00*)/)/g; s/00*,/,/g' \
-       | $HOME/kent/src/hg/utils/phyloTrees/asciiTree.pl /dev/stdin \
-         > hg38.7way.commonNames.nh
-
-    $HOME/kent/src/hg/utils/phyloTrees/asciiTree.pl hg38.7way.nh > t.nh
-    $HOME/kent/src/hg/utils/phyloTrees/scientificNames.sh t.nh \
-       | $HOME/kent/src/hg/utils/phyloTrees/asciiTree.pl /dev/stdin \
-          > hg38.7way.scientificNames.nh
-    rm -f t.nh
-    cat hg38.7way.scientificNames.nh
-# (((((Homo_sapiens:0.00655,
-#     Pan_troglodytes:0.00684):0.027424,
-#    Macaca_mulatta:0.037601):0.109934,
-#   (Mus_musculus:0.084509,
-#   Rattus_norvegicus:0.091589):0.271974):0.020593,
-#  Canis_lupus_familiaris:0.165928):0.258392,
-# Monodelphis_domestica:0.340786);
-
-    ~/kent/src/hg/utils/phyloTrees/asciiTree.pl hg38.7way.commonNames.nh
-# (((((Human:0.00655,
-#     Chimp:0.00684):0.027424,
-#    Rhesus:0.037601):0.109934,
-#   (Mouse:0.084509,
-#   Rat:0.091589):0.271974):0.020593,
-#  Dog:0.165928):0.258392,
-# Opossum:0.340786);
-
-    #	Use this specification in the phyloGif tool:
-    #	http://genome.ucsc.edu/cgi-bin/phyloGif
-    #	to obtain a png image for src/hg/htdocs/images/phylo/hg38_7way.png
-
-    /cluster/bin/phast/all_dists hg38.7way.nh | grep hg38 \
-        | sed -e "s/hg38.//" | sort -k2n > 7way.distances.txt
-    #	Use this output to create the table below
-    head 7way.distances.txt
-# taeGut1 0.075718
-# melUnd1 0.220312
-# galGal4 0.507021
-# melGal1 0.509140
-# hg19    1.175433
-# mm10    1.383071
-
-    cat << '_EOF_' > sizeStats.pl
-#!/usr/bin/env perl
-
-use strict;
-use warnings;
-
-open (FH, "<7way.distances.txt") or
-        die "can not read 7way.distances.txt";
-
-my $count = 0;
-while (my $line = <FH>) {
-    chomp $line;
-    my ($D, $dist) = split('\s+', $line);
-    my $chain = "chain" . ucfirst($D);
-    my $B="/hive/data/genomes/hg38/bed/lastz.$D/fb.hg38." .
-        $chain . "Link.txt";
-    my $chainLinkMeasure =
-        `awk '{print \$5}' ${B} 2> /dev/null | sed -e "s/(//; s/)//"`;
-    chomp $chainLinkMeasure;
-    $chainLinkMeasure = 0.0 if (length($chainLinkMeasure) < 1);
-    $chainLinkMeasure =~ s/\%//;
-    my $swapFile="/hive/data/genomes/${D}/bed/lastz.hg38/fb.${D}.chainHg38Link.txt";
-    my $swapMeasure = "N/A";
-    if ( -s $swapFile ) {
-	$swapMeasure =
-	    `awk '{print \$5}' ${swapFile} 2> /dev/null | sed -e "s/(//; s/)//"`;
-	chomp $swapMeasure;
-	$swapMeasure = 0.0 if (length($swapMeasure) < 1);
-	$swapMeasure =~ s/\%//;
-    }
-    my $orgName=
-    `hgsql -N -e "select organism from dbDb where name='$D';" hgcentraltest`;
-    chomp $orgName;
-    if (length($orgName) < 1) {
-        $orgName="N/A";
-    }
-    ++$count;
-    printf "# %02d  %.4f (%% %06.3f) (%% %06.3f) - %s %s\n", $count, $dist,
-        $chainLinkMeasure, $swapMeasure, $orgName, $D;
-}
-close (FH);
-'_EOF_'
-    # << happy emacs
-    chmod +x ./sizeStats.pl
-    ./sizeStats.pl
-#
-
-#	If you can fill in all the numbers in this table, you are ready for
-#	the multiple alignment procedure
-
-#       featureBits chainLink measures
-#               chainLink
-#  N distance  on hg38  on other     other species
-# 01  0.0134 (% 93.112) (% 95.664) - Chimp panTro4
-# 02  0.0716 (% 79.729) (% 86.715) - Rhesus rheMac3
-# 03  0.3304 (% 49.978) (% 60.083) - Dog canFam3
-# 04  0.5004 (% 31.629) (% 35.323) - Mouse mm10
-# 05  0.5075 (% 30.788) (% 36.312) - Rat rn5
-# 06  0.7637 (% 14.370) (% 11.996) - Opossum monDom5
-
-# None of this concern for distances matters in building the first step, the
-# maf files.
-
-    # create species list and stripped down tree for autoMZ
-    sed 's/[a-z][a-z]*_//g; s/:[0-9\.][0-9\.]*//g; s/;//; /^ *$/d' \
-	hg38.7way.nh | xargs echo | sed 's/ //g; s/,/ /g' > tree.nh
-
-    sed 's/[()]//g; s/,/ /g' tree.nh > species.list
-    #   hg38 panTro4 rheMac3 mm10 rn5 canFam3 monDom5
-
-    #	bash shell syntax here ...
-    cd /hive/data/genomes/hg38/bed/multiz7way
-    export H=/hive/data/genomes/hg38/bed
-    mkdir mafLinks
-    # want syntenic net for: panTro4 rheMac3 mm10 rn5 canFam3
-    # and unfiltered maf net for: monDom5
-    for G in panTro4 rheMac3 mm10 rn5 canFam3
-    do
-      mkdir mafLinks/$G
-      echo ln -s ${H}/lastz.$G/axtChain/hg38.${G}.synNet.maf.gz ./mafLinks/$G
-      ln -s ${H}/lastz.$G/axtChain/hg38.${G}.synNet.maf.gz ./mafLinks/$G
-    done
-
-    mkdir mafLinks/monDom5
-    echo ln -s ${H}/lastz.monDom5/mafNet/*.maf.gz ./mafLinks/monDom5
-    ln -s ${H}/lastz.monDom5/mafNet/*.maf.gz ./mafLinks/monDom5
-    # verify the symLinks are good:
-    ls -ogrtL mafLinks/*/*
-#-rw-rw-r-- 1  709500062 Jan 25 12:15 mafLinks/mm10/hg38.mm10.synNet.maf.gz
-#-rw-rw-r-- 1 1089643630 Jan 27 19:15 mafLinks/canFam3/hg38.canFam3.synNet.maf.gz
-#-rw-rw-r-- 1 1277455681 Jan 28 21:52 mafLinks/rheMac3/hg38.rheMac3.synNet.maf.gz
-#-rw-rw-r-- 1  687500679 Mar  1 12:27 mafLinks/rn5/hg38.rn5.synNet.maf.gz
-#-rw-rw-r-- 1 1463969868 May 27 11:41 mafLinks/panTro4/hg38.panTro4.synNet.maf.gz
-#-rw-rw-r-- 1  323347908 May 29 12:38 mafLinks/monDom5/hg38.monDom5.net.maf.gz
-
-    # split the maf files into a set of hashed named files
-    # this hash named split keeps the same chr/contig names in the same
-    # named hash file.
-    mkdir /hive/data/genomes/hg38/bed/multiz7way/mafSplit
-    cd /hive/data/genomes/hg38/bed/multiz7way/mafSplit
-    for D in `sed -e "s/hg38 //" ../species.list`
-do
-    echo "${D}"
-    mkdir $D
-    cd $D
-    echo "mafSplit -byTarget -useHashedName=8 /dev/null . ../../mafLinks/${D}/*.maf.gz"
-    mafSplit -byTarget -useHashedName=8 /dev/null . \
-	../../mafLinks/${D}/*.maf.gz
-    cd ..
-done
-
-    # construct a list of all possible maf file names.
-    # they do not all exist in each of the species directories
-    find . -type f | wc -l
-    # 641
-    find . -type f | grep ".maf$" | xargs -L 1 basename | sort -u > maf.list
-    wc -l maf.list
-    # 118 maf.list
-
-    mkdir /hive/data/genomes/hg38/bed/multiz7way/splitRun
-    cd /hive/data/genomes/hg38/bed/multiz7way/splitRun
-    mkdir maf run
-    cd run
-    mkdir penn
-    cp -p /cluster/bin/penn/multiz.2009-01-21_patched/multiz penn
-    cp -p /cluster/bin/penn/multiz.2009-01-21_patched/maf_project penn
-    cp -p /cluster/bin/penn/multiz.2009-01-21_patched/autoMZ penn
-
-    #	set the db and pairs directories here
-    cat > autoMultiz.csh << '_EOF_'
-#!/bin/csh -ef
-set db = hg38
-set c = $1
-set result = $2
-set run = `/bin/pwd`
-set tmp = /dev/shm/$db/multiz.$c
-set pairs = /hive/data/genomes/hg38/bed/multiz7way/mafSplit
-/bin/rm -fr $tmp
-/bin/mkdir -p $tmp
-/bin/cp -p ../../tree.nh ../../species.list $tmp
-pushd $tmp > /dev/null
-foreach s (`/bin/sed -e "s/$db //" species.list`)
-    set in = $pairs/$s/$c
-    set out = $db.$s.sing.maf
-    if (-e $in.gz) then
-        /bin/zcat $in.gz > $out
-        if (! -s $out) then
-            echo "##maf version=1 scoring=autoMZ" > $out
-        endif
-    else if (-e $in) then
-        /bin/ln -s $in $out
-    else
-        echo "##maf version=1 scoring=autoMZ" > $out
-    endif
-end
-set path = ($run/penn $path); rehash
-$run/penn/autoMZ + T=$tmp E=$db "`cat tree.nh`" $db.*.sing.maf $c \
-        > /dev/null
-popd > /dev/null
-/bin/rm -f $result
-/bin/cp -p $tmp/$c $result
-/bin/rm -fr $tmp
-'_EOF_'
-# << happy emacs
-    chmod +x autoMultiz.csh
-
-    cat  << '_EOF_' > template
-#LOOP
-./autoMultiz.csh $(file1) {check out line+ /hive/data/genomes/hg38/bed/multiz7way/splitRun/maf/$(root1).maf}
-#ENDLOOP
-'_EOF_'
-# << happy emacs
-
-    ln -s ../../mafSplit/maf.list maf.list
-    ssh ku
-    cd /hive/data/genomes/hg38/bed/multiz7way/splitRun/run
-    gensub2 maf.list single template stdout > jobList
-    para -ram=8g create jobList
-# Completed: 118 of 118 jobs
-# CPU time in finished jobs:     118241s    1970.69m    32.84h    1.37d  0.004 y
-# IO & Wait Time:                   682s      11.36m     0.19h    0.01d  0.000 y
-# Average job time:                1008s      16.80m     0.28h    0.01d
-# Longest finished job:           10068s     167.80m     2.80h    0.12d
-# Submission to last job:         10076s     167.93m     2.80h    0.12d
-
-    # combine into one file  (the 1>&2 redirect sends the echo to stderr)
-    cd /hive/data/genomes/hg38/bed/multiz7way
-    head -1 splitRun/maf/017.maf > multiz7way.maf
-    for F in splitRun/maf/*.maf
-do
-    echo "${F}" 1>&2
-    egrep -v "^#" ${F}
-done >> multiz7way.maf
-    tail -1 splitRun/maf/017.maf >> multiz7way.maf
-# -rw-rw-r-- 1 15635828403 Jun  3 11:49 multiz7way.maf
-
-    # Load into database
-    ssh hgwdev
-    cd /hive/data/genomes/hg38/bed/multiz7way
-    mkdir /gbdb/hg38/multiz7way
-    ln -s `pwd`/multiz7way.maf /gbdb/hg38/multiz7way
-    cd /dev/shm
-    time nice -n +17 hgLoadMaf hg38 multiz7way
-    # Loaded 10270624 mafs in 1 files from /gbdb/hg38/multiz7way
-    # real    3m51.265s
-
-    time nice -n +17 hgLoadMafSummary -verbose=2 -minSize=30000 \
-	-mergeGap=1500 -maxSize=200000 hg38 multiz7waySummary \
-	/gbdb/hg38/multiz7way/multiz7way.maf
-    # Created 1260918 summary blocks from 35384988 components
-    # and 10270624 mafs from /gbdb/hg38/multiz7way/multiz7way.maf
-    # real    5m39.388s
-
-
-    wc -l multiz7way*.tab
-    # 10270624 multiz7way.tab
-    # 1260918 multiz7waySummary.tab
-    # 11531542 total
-
-    rm multiz7way*.tab
-
-##############################################################################
-# GAP ANNOTATE MULTIZ7WAY MAF AND LOAD TABLES (DONE - 2014-06-03 - Hiram)
-    # mafAddIRows has to be run on single chromosome maf files, it does not
-    #	function correctly when more than one reference sequence
-    #	are in a single file.  Need to split of the maf file into individual
-    #   maf files
-    mkdir -p /hive/data/genomes/hg38/bed/multiz7way/anno/mafSplit
-    cd /hive/data/genomes/hg38/bed/multiz7way/anno/mafSplit
-
-    time mafSplit -outDirDepth=1 -byTarget -useFullSequenceName \
-        /dev/null . ../../multiz7way.maf
-    #   real    4m8.617s
-
-    find . -type f | wc -l
-    #   353
-
-    # check for N.bed files everywhere:
-    cd /hive/data/genomes/hg38/bed/multiz7way/anno
-    for DB in `cat ../species.list`
-do
-    if [ ! -s /hive/data/genomes/${DB}/${DB}.N.bed ]; then
-        echo "MISS: ${DB}"
-#        cd /hive/data/genomes/${DB}
-#        twoBitInfo -nBed ${DB}.2bit ${DB}.N.bed
-    else
-        echo "  OK: ${DB}"
-    fi
-done
-
-    cd /hive/data/genomes/hg38/bed/multiz7way/anno
-    for DB in `cat ../species.list`
-do
-    echo "${DB} "
-    ln -s  /hive/data/genomes/${DB}/${DB}.N.bed ${DB}.bed
-    echo ${DB}.bed  >> nBeds
-    ln -s  /hive/data/genomes/${DB}/chrom.sizes ${DB}.len
-    echo ${DB}.len  >> sizes
-done
-    # make sure they all are successful symLinks:
-    ls -ogrtL
-
-    screen -S hg38      # use a screen to control this longish job
-    ssh ku
-    cd /hive/data/genomes/hg38/bed/multiz7way/anno
-    mkdir result
-    for D in `ls mafSplit`
-do
-    echo mkdir result/${D}
-    mkdir result/${D}
-done
-    cat << '_EOF_' > template
-#LOOP
-mafAddIRows -nBeds=nBeds mafSplit/$(path1) /hive/data/genomes/hg38/hg38.2bit {check out exists+ result/$(path1)}
-#ENDLOOP
-'_EOF_'
-    # << happy emacs
-
-    find ./mafSplit -type f | sed -e 's#^./mafSplit/##' > maf.list
-    gensub2 maf.list single template jobList
-    # limit jobs on a node with the ram=32g requirement because they go fast
-    para -ram=32g create jobList
-    para try ... check ... push ...
-# Completed: 353 of 353 jobs
-# CPU time in finished jobs:        530s       8.83m     0.15h    0.01d  0.000 y
-# IO & Wait Time:                  1057s      17.62m     0.29h    0.01d  0.000 y
-# Average job time:                   4s       0.07m     0.00h    0.00d
-# Longest finished job:              63s       1.05m     0.02h    0.00d
-# Submission to last job:           220s       3.67m     0.06h    0.00d
-
-    # verify all result files have some content, look for 0 size files:
-    find ./result -type f -size 0
-    # should see none
-    # or in this manner:
-    find ./result -type f | xargs ls -og | sort -k3nr | tail
-
-    # combine into one file  (the 1>&2 redirect sends the echo to stderr)
-    head -q -n 1 result/0/chr8.maf > hg38.7way.maf
-    find ./result -type f | while read F
-do
-    echo "${F}" 1>&2
-    grep -h -v "^#" ${F}
-done >> hg38.7way.maf
-
-    #	these maf files do not have the end marker, this does nothing:
-    #	tail -q -n 1 result/0/chr8.maf >> hg38.7way.maf
-    # How about an official end marker:
-    echo "##eof maf" >> hg38.7way.maf
-    ls -og
-# -rw-rw-r--  1 17795297196 Jun  3 14:01 hg38.7way.maf
-
-    du -hsc hg38.7way.maf
-    # 17G     hg38.7way.maf
-
-    # construct symlinks to get the individual maf files into gbdb:
-    rm /gbdb/hg38/multiz7way/multiz7way.maf   # remove previous results
-    ln -s `pwd`/hg38.7way.maf /gbdb/hg38/multiz7way/multiz7way.maf
-
-    # Load into database
-    cd /dev/shm
-    time nice -n +19 hgLoadMaf -pathPrefix=/gbdb/hg38/multiz7way \
-        hg38 multiz7way
-    # Loaded 10359242 mafs in 1 files from /gbdb/hg38/multiz7way
-    # real    4m21.862s
-
-    time hgLoadMafSummary -verbose=2 -minSize=30000 \
-	-mergeGap=1500 -maxSize=200000 hg38 multiz7waySummary \
-        /gbdb/hg38/multiz7way/multiz7way.maf
-#  Created 1260918 summary blocks from 35384988 components
-#  and 10359242 mafs from /gbdb/hg38/multiz7way/multiz7way.maf
-#  real    6m6.583s
-
-# -rw-rw-r-- 1 530538267 Jun  3 14:05 multiz7way.tab
-# -rw-rw-r-- 1  60616616 Jun  3 14:15 multiz7waySummary.tab
-
-    rm multiz7way*.tab
-
-######################################################################
-# MULTIZ7WAY MAF FRAMES (DONE - 2014-06-03 - Hiram)
-    ssh hgwdev
-    mkdir /hive/data/genomes/hg38/bed/multiz7way/frames
-    cd /hive/data/genomes/hg38/bed/multiz7way/frames
-#   survey all the genomes to find out what kinds of gene tracks they have
-    cat << '_EOF_' > showGenes.csh
-#!/bin/csh -fe
-foreach db (`cat ../species.list`)
-    echo -n "${db}: "
-    set tables = `hgsql $db -N -e "show tables like '%Gene%'"`
-    foreach table ($tables)
-        if ($table == "ensGene" || $table == "refGene" || \
-           $table == "mgcGenes" || $table == "knownGene" || \
-           $table == "xenoRefGene" ) then
-           set count = `hgsql $db -N -e "select count(*) from $table"`
-            echo -n "${table}: ${count}, "
-        endif
-    end
-    set orgName = `hgsql hgcentraltest -N -e \
-            "select scientificName from dbDb where name='$db'"`
-    set orgId = `hgsql hg19 -N -e \
-            "select id from organism where name='$orgName'"`
-    if ($orgId == "") then
-        echo "Mrnas: 0"
-    else
-        set count = `hgsql hg19 -N -e "select count(*) from gbCdnaInfo where organism=$orgId"`
-        echo "Mrnas: ${count}"
-    endif
-end
-'_EOF_'
-    # << happy emacs
-    chmod +x ./showGenes.csh
-    time ./showGenes.csh
-# hg38: knownGene: 104178, mgcGenes: 34081, refGene: 54852, xenoRefGene: 172740, Mrnas: 10723716
-# panTro4: ensGene: 29160, refGene: 2622, xenoRefGene: 280516, Mrnas: 11163
-# rheMac3: refGene: 6369, xenoRefGene: 275096, Mrnas: 443642
-# mm10: ensGene: 94647, knownGene: 61642, mgcGenes: 26768, refGene: 33765, xenoRefGene: 161178, Mrnas: 5224613
-# rn5: ensGene: 29188, mgcGenes: 6924, refGene: 18567, xenoRefGene: 175416, Mrnas: 1247500
-# canFam3: ensGene: 29884, refGene: 1582, xenoRefGene: 253196, Mrnas: 387195
-# monDom5: ensGene: 24882, refGene: 492, xenoRefGene: 248251,  Mrnas: 2461
-
-    # from that summary, use these gene sets:
-    # refGene - rheMac3
-    # ensGene - panTro4 rn5 canFam3 monDom5
-    # knownGene - hg38 mm10
-
-    mkdir genes
-    #   1. knownGene: hg38 mm10
-    for DB in hg38 mm10
-do
-    hgsql -N -e "select name,chrom,strand,txStart,txEnd,cdsStart,cdsEnd,exonCount,exonStarts,exonEnds from knownGene" ${DB} \
-      | genePredSingleCover stdin stdout | gzip -2c \
-        > genes/${DB}.gp.gz
-done
-    #   2. ensGene:
-    for DB in panTro4 rn5 canFam3 monDom5
-do
-hgsql -N -e "select name,chrom,strand,txStart,txEnd,cdsStart,cdsEnd,exonCount,exonStarts,exonEnds from ensGene" ${DB} \
-      | genePredSingleCover stdin stdout | gzip -2c \
-        > /scratch/tmp/${DB}.tmp.gz
-    mv /scratch/tmp/${DB}.tmp.gz genes/$DB.gp.gz
-    echo "${DB} done"
-done
-    #   3. refGene
-    for DB in rheMac3
-do
-hgsql -N -e "select * from refGene" ${DB} | cut -f2- \
-      | genePredSingleCover stdin stdout | gzip -2c \
-        > /scratch/tmp/${DB}.tmp.gz
-    mv /scratch/tmp/${DB}.tmp.gz genes/$DB.gp.gz
-    echo "${DB} done"
-done
-
-    # verify counts for genes are reasonable:
-    for T in genes/*.gz
-do
-    echo -n "# $T: "
-    zcat $T | cut -f1 | sort | uniq -c | wc -l
-done
-# genes/canFam3.gp.gz: 19507
-# genes/hg38.gp.gz: 21887
-# genes/mm10.gp.gz: 21013
-# genes/monDom5.gp.gz: 21033
-# genes/panTro4.gp.gz: 18657
-# genes/rheMac3.gp.gz: 5614
-# genes/rn5.gp.gz: 22863
-
-    time (cat ../anno/hg38.7way.maf \
-	| nice -n +19 genePredToMafFrames hg38 stdin stdout \
-	    `sed -e "s#\([a-zA-Z0-9]*\)#\1 genes/\1.gp.gz#g" ../species.list` \
-		| gzip > multiz7wayFrames.bed.gz)
-    #   real    3m44.591s
-
-    # verify there are frames on everything, should be 7 species:
-    zcat multiz7wayFrames.bed.gz | awk '{print $4}' | sort | uniq -c
-# 265160 canFam3
-# 208941 hg38
-# 253323 mm10
-# 574521 monDom5
-# 200156 panTro4
-#  49802 rheMac3
-# 244731 rn5
-
-    #   load the resulting file
-    ssh hgwdev
-    cd /hive/data/genomes/hg38/bed/multiz7way/frames
-    time hgLoadMafFrames hg38 multiz7wayFrames multiz7wayFrames.bed.gz
-    #   real    0m19.959s
-
-    time featureBits -countGaps hg38 multiz7wayFrames
-    #   52686177 bases of 3209286105 (1.642%) in intersection
-    #   real    0m12.593s
-
-    #   enable the trackDb entries:
-# frames multiz7wayFrames
-# irows on
-    #   appears to work OK
-
-#########################################################################
-# Phylogenetic tree from 7-way (DONE - 2014-06-04 - Hiram)
-    mkdir /hive/data/genomes/hg38/bed/multiz7way/4d
-    cd /hive/data/genomes/hg38/bed/multiz7way/4d
-
-    # the annotated maf is:
-    ../anno/hg38.7way.maf
-
-    # using knownGene for hg38
-    hgsql -N -e "select name,chrom,strand,txStart,txEnd,cdsStart,cdsEnd,exonCount,exonStarts,exonEnds from knownGene" hg38 > hg38.knownGene.gp
-
-    genePredSingleCover hg38.knownGene.gp stdout | sort > hg38.knownGeneNR.gp
-    wc -l hg38.knownGeneNR.gp
-    #	21887 hg38.knownGeneNR.gp
-
-    mkdir annoSplit
-    cd annoSplit
-    time mafSplit -verbose=2 -outDirDepth=1 -byTarget -useFullSequenceName \
-        /dev/null . ../../anno/hg38.7way.maf
-    # real    5m14.770s
-
-    find . -type f | wc -l
-    #   353
-    ssh ku
-    mkdir /hive/data/genomes/hg38/bed/multiz7way/4d/run
-    cd /hive/data/genomes/hg38/bed/multiz7way/4d/run
-    mkdir ../mfa
-
-    # newer versions of msa_view have a slightly different operation
-    # the sed of the gp file inserts the reference species in the chr name
-    cat << '_EOF_' > 4d.csh
-#!/bin/csh -fe
-set PHASTBIN = /cluster/bin/phast.build/cornellCVS/phast.2010-12-30/bin
-set r = "/hive/data/genomes/hg38/bed/multiz7way"
-set c = $1:r
-set infile = $r/4d/annoSplit/$2
-set outDir = $r/4d/mfa/$3:h
-set outfile = $r/4d/mfa/$3
-/bin/mkdir -p $outDir
-cd /scratch/tmp
-/bin/awk -v C=$c '$2 == C {print}' $r/4d/hg38.knownGeneNR.gp | sed -e "s/\t$c\t/\thg38.$c\t/" > $c.gp
-set NL=`wc -l $c.gp| gawk '{print $1}'`
-echo $NL
-if ("$NL" != "0") then
-    $PHASTBIN/msa_view --4d --features $c.gp -i MAF $infile -o SS > $c.ss
-    $PHASTBIN/msa_view -i SS --tuple-size 1 $c.ss > $outfile
-else
-    echo "" > $outfile
-endif
-/bin/rm -f $c.gp $c.ss
-'_EOF_'
-    # << happy emacs
-    chmod +x 4d.csh
-
-    find ../annoSplit -type f | sed -e "s#../annoSplit/##" > maf.list
-
-    cat << '_EOF_' > template
-#LOOP
-4d.csh $(file1) $(path1) {check out line+ ../mfa/$(dir1)/$(root1).mfa}
-#ENDLOOP
-'_EOF_'
-    # << happy emacs
-
-    gensub2 maf.list single template jobList
-    para create jobList
-    para try ... check
-    para time
-# Completed: 353 of 353 jobs
-# CPU time in finished jobs:        836s      13.93m     0.23h    0.01d  0.000 y
-# IO & Wait Time:                  1172s      19.54m     0.33h    0.01d  0.000 y
-# Average job time:                   6s       0.09m     0.00h    0.00d
-# Longest finished job:              72s       1.20m     0.02h    0.00d
-# Submission to last job:            89s       1.48m     0.02h    0.00d
-
-    # Not all results have contents, that is OK
-
-    # combine mfa files
-    ssh hgwdev
-    cd /hive/data/genomes/hg38/bed/multiz7way/4d
-    # remove the broken empty files, size 0 and size 1:
-    find ./mfa -type f -size 0 | xargs rm -f
-    # most interesting, this did not identify files of size 1:
-#    find ./mfa -type f -size 1
-    find ./mfa -type f | xargs ls -og | awk '$3 == 1' | awk '{print $NF}' \
-        > empty.list
-    cat empty.list | xargs rm -f
-    #want comma-less species.list
-    /cluster/bin/phast.build/cornellCVS/phast.2010-12-30/bin/msa_view \
-	--aggregate "`cat ../species.list`" mfa/*/*.mfa | sed s/"> "/">"/ \
-	    > 4d.all.mfa
-    # check they are all in there:
-    grep "^>" 4d.all.mfa
-    #    >hg38
-    #    >panTro4
-    #    >rheMac3
-    #    >mm10
-    #    >rn5
-    #    >canFam3
-    #    >monDom5
-
-    sed 's/[a-z][a-z]*_//g; s/:[0-9\.][0-9\.]*//g; s/;//; /^ *$/d' \
-	../hg38.7way.nh | xargs echo | sed -e 's/ //g' > tree_commas.nh
-    # tree_commas.nh looks like:
-    #   (((((hg38,panTro4),rheMac3),(mm10,rn5)),canFam3),monDom5)
-    # use phyloFit to create tree model (output is phyloFit.mod)
-    time nice -n +19 \
-	/cluster/bin/phast.build/cornellCVS/phast.2010-12-30/bin/phyloFit \
-	    --EM --precision MED --msa-format FASTA --subst-mod REV \
-		--tree tree_commas.nh 4d.all.mfa
-    #   real    0m6.583s
-
-
-    mv phyloFit.mod all.mod
-
-    grep TREE all.mod
-# TREE: (((((hg38:0.00673596,panTro4:0.00686169):0.0248146,rheMac3:0.0357598):0.0970072,(mm10:0.081661,rn5:0.0874126):0.246527):0.0264964,canFam3:0.156769):0.303241,monDom5:0.303241);
-
-    # compare these calculated lengths to the tree extracted from 130way:
-    grep TREE all.mod | sed -e 's/TREE: //' \
-      | /cluster/bin/phast/all_dists /dev/stdin | grep hg38 | sort -k3n \
-        | sed -e "s/hg38.//; s/^/    #  /"
-    #  panTro4  0.013598
-    #  rheMac3  0.067310
-    #  canFam3  0.311823
-    #  mm10     0.456746
-    #  rn5      0.462497
-    #  monDom5  0.761536
-
-    # yes, somewhat similar
-    /cluster/bin/phast/all_dists ../hg38.7way.nh | grep hg38 \
-        | sort -k3n | sed -e "s/hg38.//; s/^/    #  /"
-    #  panTro4   0.013390
-    #  rheMac3   0.071575
-    #  canFam3   0.330429
-    #  mm10      0.500391
-    #  rn5       0.507471
-    #  monDom5   0.763679
-
-#########################################################################
-# phastCons 7-way (DONE - 2014-06-04 - Hiram)
-    # split 7way mafs into 10M chunks and generate sufficient statistics
-    # files for # phastCons
-    ssh ku
-    mkdir -p /hive/data/genomes/hg38/bed/multiz7way/cons/SS
-    cd /hive/data/genomes/hg38/bed/multiz7way/cons/SS
-    mkdir result done
-
-    cat << '_EOF_' > mkSS.csh
-#!/bin/csh -ef
-set d = $1
-set c = $2
-set doneDir = done/$d
-set MAF = /hive/data/genomes/hg38/bed/multiz7way/anno/result/$d/$c.maf
-set WINDOWS = /hive/data/genomes/hg38/bed/multiz7way/cons/SS/result/$d/$c
-set WC = `cat $MAF | wc -l`
-set NL = `grep "^#" $MAF | wc -l`
-if ( -s $3 ) then
-    exit 0
-endif
-if ( -s $3.running ) then
-    exit 0
-endif
-
-/bin/mkdir -p $doneDir
-/bin/date >> $3.running
-
-/bin/rm -fr $WINDOWS
-/bin/mkdir -p $WINDOWS
-pushd $WINDOWS > /dev/null
-if ( $WC != $NL ) then
-/cluster/bin/phast.build/cornellCVS/phast.2010-12-30/bin/msa_split \
-    $MAF -i MAF -o SS -r $WINDOWS/$c -w 10000000,0 -I 1000 -B 5000
-endif
-popd > /dev/null
-/bin/date >> $3
-/bin/rm -f $3.running
-'_EOF_'
-    # << happy emacs
-    chmod +x mkSS.csh
-
-    cat << '_EOF_' > template
-#LOOP
-mkSS.csh $(dir1) $(root1) {check out line+ done/$(dir1)/$(root1)}
-#ENDLOOP
-'_EOF_'
-    # << happy emacs
-
-    #	do the easy ones first to see some immediate results
-    find ../../anno/result -type f | sed -e "s#../../anno/result/##" > maf.list
-
-    gensub2 maf.list single template jobList
-    para -ram=32g create jobList
-    para try ... check ... etc
-# Completed: 353 of 353 jobs
-# CPU time in finished jobs:       1216s      20.27m     0.34h    0.01d  0.000 y
-# IO & Wait Time:                  1385s      23.08m     0.38h    0.02d  0.000 y
-# Average job time:                   7s       0.12m     0.00h    0.00d
-# Longest finished job:             111s       1.85m     0.03h    0.00d
-# Submission to last job:           189s       3.15m     0.05h    0.00d
-
-    find ./result -type f | wc -l
-    #	 641
-
-    # Run phastCons
-    #	This job is I/O intensive in its output files, beware where this
-    #	takes place or do not run too many at once.
-    ssh ku
-    mkdir -p /hive/data/genomes/hg38/bed/multiz7way/cons/run.cons
-    cd /hive/data/genomes/hg38/bed/multiz7way/cons/run.cons
-
-    #	This is setup for multiple runs based on subsets, but only running
-    #   the 'all' subset here.
-    #   It triggers off of the current working directory
-    #	$cwd:t which is the "grp" in this script.  Running:
-    #	all and vertebrates
-
-    cat << '_EOF_' > doPhast.csh
-#!/bin/csh -fe
-set PHASTBIN = /cluster/bin/phast.build/cornellCVS/phast.2010-12-30/bin
-set c = $1
-set d = $2
-set f = $3
-set len = $4
-set cov = $5
-set rho = $6
-set grp = $cwd:t
-set cons = /hive/data/genomes/hg38/bed/multiz7way/cons
-set tmp = $cons/tmp/${d}_${c}
-mkdir -p $tmp
-set ssSrc = $cons/SS/result
-set useGrp = "$grp.mod"
-if (-s $cons/$grp/$grp.non-inf) then
-  ln -s $cons/$grp/$grp.mod $tmp
-  ln -s $cons/$grp/$grp.non-inf $tmp
-  ln -s $ssSrc/$d/$f $tmp
-else
-  ln -s $ssSrc/$d/$f $tmp
-  ln -s $cons/$grp/$grp.mod $tmp
-endif
-pushd $tmp > /dev/null
-if (-s $grp.non-inf) then
-  $PHASTBIN/phastCons $f $useGrp \
-    --rho $rho --expected-length $len --target-coverage $cov --quiet \
-    --not-informative `cat $grp.non-inf` \
-    --seqname $c --idpref $c --most-conserved $c.bed --score > $c.pp
-else
-  $PHASTBIN/phastCons $f $useGrp \
-    --rho $rho --expected-length $len --target-coverage $cov --quiet \
-    --seqname $c --idpref $c --most-conserved $c.bed --score > $c.pp
-endif
-popd > /dev/null
-mkdir -p pp/$d bed/$d
-sleep 4
-touch pp/$d bed/$d
-rm -f pp/$d/$c.pp
-rm -f bed/$d/$c.bed
-mv $tmp/$c.pp pp/$d
-mv $tmp/$c.bed bed/$d
-rm -fr $tmp
-rmdir --ignore-fail-on-non-empty $cons/tmp/$d:h
-'_EOF_'
-    # << happy emacs
-    chmod +x doPhast.csh
-
-    #	this template will serve for all runs
-    #	root1 == chrom name, file1 == ss file name without .ss suffix
-    cat << '_EOF_' > template
-#LOOP
-../run.cons/doPhast.csh $(root1) $(dir1) $(file1) 45 0.3 0.3 {check out line+ pp/$(dir1)/$(root1).pp}
-#ENDLOOP
-'_EOF_'
-    # << happy emacs
-
-    find ../SS/result -type f | sed -e "s#../SS/result/##" > ss.list
-    wc -l ss.list
-    #	641 ss.list
-
-    # Create parasol batch and run it
-    # run for all species
-    cd /hive/data/genomes/hg38/bed/multiz7way/cons
-    mkdir -p all
-    cd all
-    #	Using the .mod tree
-    cp -p ../../4d/all.mod ./all.mod
-
-    gensub2 ../run.cons/ss.list single ../run.cons/template jobList
-    para -ram=32g create jobList
-    para try ... check ...
-    para push
-# Completed: 641 of 641 jobs
-# CPU time in finished jobs:       6557s     109.29m     1.82h    0.08d  0.000 y
-# IO & Wait Time:                  4497s      74.94m     1.25h    0.05d  0.000 y
-# Average job time:                  17s       0.29m     0.00h    0.00d
-# Longest finished job:              33s       0.55m     0.01h    0.00d
-# Submission to last job:           120s       2.00m     0.03h    0.00d
-
-    # create Most Conserved track
-    cd /hive/data/genomes/hg38/bed/multiz7way/cons/all
-    cut -f1 ../../../../chrom.sizes | while read C
-do
-    ls -d bed/?/${C} 2> /dev/null | while read D
-    do
-        echo ${D}/${C}*.bed 1>&2
-        cat ${D}/${C}*.bed
-    done | sort -k1,1 -k2,2n \
-    | awk '{printf "%s\t%d\t%d\tlod=%d\t%s\n", "'${C}'", $2, $3, $5, $5;}'
-done > tmpMostConserved.bed
-
-    /cluster/bin/scripts/lodToBedScore tmpMostConserved.bed > mostConserved.bed
-    #    -rw-rw-r--  1 42636652 Jun  4 10:45 tmpMostConserved.bed
-    #    -rw-rw-r--  1 43721828 Jun  4 10:45 mostConserved.bed
-
-    # load into database
-    ssh hgwdev
-    cd /hive/data/genomes/hg38/bed/multiz7way/cons/all
-    time nice -n +19 hgLoadBed hg38 phastConsElements7way mostConserved.bed
-    #  Read 1234990 elements of size 5 from mostConserved.bed
-    #  real    0m11.390s
-
-    # on human we often try for 5% overall cov, and 70% CDS cov
-    # most bets are off here for that goal, these alignments are too few
-    #	and too far between
-    #	--rho 0.3 --expected-length 45 --target-coverage 0.3
-    featureBits hg38 -enrichment knownGene:cds phastConsElements7way
-    # knownGene:cds 1.266%, phastConsElements7way 4.551%,
-    #    both 0.888%, cover 70.16%, enrich 15.42x
-
-    # Create merged posterier probability file and wiggle track data files
-    cd /hive/data/genomes/hg38/bed/multiz7way/cons/all
-    mkdir downloads
-
-    # the third sed fixes the chrom names, removing the partition extensions
-    time (find ./pp -type f | sed -e "s#^./##; s#\.# d #g; s#-# m #;" \
-	| sort -k1,1 -k3,3n | sed -e "s# d #.#g; s# m #-#g;" | xargs cat \
-	| sed -e 's/\.[0-9][0-9]*-[0-9][0-9]* start/ start/' \
-        | gzip -c > downloads/phastCons7way.wigFix.gz)
-    #   real    37m47.242s
-
-    # check integrity of data with wigToBigWig
-    time (zcat downloads/phastCons7way.wigFix.gz \
-	| wigToBigWig -verbose=2 stdin /hive/data/genomes/hg38/chrom.sizes \
-	    phastCons7way.bw) > bigWig.log 2>&1 &
-    tail bigWig.log
-    # pid=34733: VmPeak:    33106324 kB
-    #   real    40m53.287s
-
-    bigWigInfo phastCons7way.bw
-# version: 4
-# isCompressed: yes
-# isSwapped: 0
-# primaryDataSize: 5,675,802,079
-# primaryIndexSize: 92,579,900
-# zoomLevels: 10
-# chromCount: 353
-# basesCovered: 2,898,191,577
-# mean: 0.168088
-# min: 0.000000
-# max: 1.000000
-# std: 0.233827
-
-    #	encode those files into wiggle data
-    time (zcat downloads/phastCons7way.wigFix.gz \
-	| wigEncode stdin phastCons7way.wig phastCons7way.wib)
-    #   Converted stdin, upper limit 1.00, lower limit 0.00
-    #   real    15m28.525s
-
-    du -hsc *.wi?
-    #  2.7G    phastCons7way.wib
-    #  282M    phastCons7way.wig
-    #  3.0G    total
-
-    # Load gbdb and database with wiggle.
-    ln -s `pwd`/phastCons7way.wib /gbdb/hg38/multiz7way/phastCons7way.wib
-    time nice -n +19 hgLoadWiggle -pathPrefix=/gbdb/hg38/multiz7way \
-	hg38 phastCons7way phastCons7way.wig
-    #   real    0m33.502s
-
-    # use to set trackDb.ra entries for wiggle min and max
-    # and verify table is loaded correctly
-
-    wigTableStats.sh hg38 phastCons7way
-# db.table          min max mean       count sumData      stdDev  viewLimits
-hg38.phastCons7way 0 1 0.168088 2898191577 4.87152e+08 0.233827 viewLimits=0:1
-
-    #  Create histogram to get an overview of all the data
-    time nice -n +19 hgWiggle -doHistogram -db=hg38 \
-	-hBinSize=0.001 -hBinCount=1000 -hMinVal=0.0 -verbose=2 \
-	    phastCons7way > histogram.data 2>&1
-    #	real    2m40.179s
-
-    #	create plot of histogram:
-
-    cat << '_EOF_' | gnuplot > histo.png
-set terminal png small x000000 xffffff xc000ff x66ff66 xffff00 x00ffff
-set size 1.4, 0.8
-set key left box
-set grid noxtics
-set grid ytics
-set title " Human hg38 Histogram phastCons7way track"
-set xlabel " phastCons7way score"
-set ylabel " Relative Frequency"
-set y2label " Cumulative Relative Frequency (CRF)"
-set y2range [0:1]
-set y2tics
-set yrange [0:0.02]
-
-plot "histogram.data" using 2:5 title " RelFreq" with impulses, \
-        "histogram.data" using 2:7 axes x1y2 title " CRF" with lines
-'_EOF_'
-    #	<< happy emacs
-
-    display histo.png &
-
-#########################################################################
-# phyloP for 7-way (DONE - 2014-06-04 - Hiram)
-    # run phyloP with score=LRT
-    ssh ku
-    mkdir /cluster/data/hg38/bed/multiz7way/consPhyloP
-    cd /cluster/data/hg38/bed/multiz7way/consPhyloP
-
-    mkdir run.phyloP
-    cd run.phyloP
-    # Adjust model file base composition background and rate matrix to be
-    # representative of the chromosomes in play
-    grep BACKGROUND ../../cons/all/all.mod | awk '{printf "%0.3f\n", $3 + $4}'
-    #	0.556
-    /cluster/bin/phast.build/cornellCVS/phast.2010-12-30/bin/modFreqs \
-	../../cons/all/all.mod 0.556 > all.mod
-    # verify, the BACKGROUND should now be paired up:
-    grep BACK all.mod
-    #   BACKGROUND: 0.222000 0.278000 0.278000 0.222000
-
-    cat << '_EOF_' > doPhyloP.csh
-#!/bin/csh -fe
-set PHASTBIN = /cluster/bin/phast.build/cornellCVS/phast.2010-12-30/bin
-set f = $1
-set d = $f:h
-set file1 = $f:t
-set out = $2
-set cName = $f:t:r
-set grp = $cwd:t
-set cons = /hive/data/genomes/hg38/bed/multiz7way/consPhyloP
-set tmp = $cons/tmp/$grp/$f
-/bin/rm -fr $tmp
-/bin/mkdir -p $tmp
-set ssSrc = "/hive/data/genomes/hg38/bed/multiz7way/cons/SS/result/$f"
-set useGrp = "$grp.mod"
-/bin/ln -s $cons/run.phyloP/$grp.mod $tmp
-pushd $tmp > /dev/null
-$PHASTBIN/phyloP --method LRT --mode CONACC --wig-scores --chrom $cName \
-    -i SS $useGrp $ssSrc.ss > $file1.wigFix
-popd > /dev/null
-/bin/mkdir -p $out:h
-sleep 4
-/bin/touch $out:h
-/bin/mv $tmp/$file1.wigFix $out
-/bin/rm -fr $tmp
-/bin/rmdir --ignore-fail-on-non-empty $cons/tmp/$grp/$d
-/bin/rmdir --ignore-fail-on-non-empty $cons/tmp/$grp/$d:h
-/bin/rmdir --ignore-fail-on-non-empty $cons/tmp/$grp
-/bin/rmdir --ignore-fail-on-non-empty $cons/tmp
-'_EOF_'
-    # << happy emacs
-
-    # Create list of chunks
-    find ../../cons/SS/result -type f | grep ".ss$" \
-	| sed -e "s/.ss$//; s#^../../cons/SS/result/##" > ss.list
-    # make sure the list looks good
-    wc -l ss.list
-    #	641 ss.list
-
-    # Create template file
-    #	file1 == $chr/$chunk/file name without .ss suffix
-    cat << '_EOF_' > template
-#LOOP
-../run.phyloP/doPhyloP.csh $(path1) {check out line+ wigFix/$(dir1)/$(file1).wigFix}
-#ENDLOOP
-'_EOF_'
-    # << happy emacs
-
-    ######################   Running all species  #######################
-    # setup run for all species
-    mkdir /hive/data/genomes/hg38/bed/multiz7way/consPhyloP/all
-    cd /hive/data/genomes/hg38/bed/multiz7way/consPhyloP/all
-    rm -fr wigFix
-    mkdir wigFix
-
-    gensub2 ../run.phyloP/ss.list single ../run.phyloP/template jobList
-    # the -ram=8g will allow only one job per node to slow this down since
-    #	it would run too fast otherwise.  Either run on one of the small
-    #	klusters or use the -ram=8g on the para create
-    para -ram=32g create jobList
-    para try ... check ... push ... etc ...
-    para time > run.time
-# Completed: 641 of 641 jobs
-# CPU time in finished jobs:       4755s      79.24m     1.32h    0.06d  0.000 y
-# IO & Wait Time:                  4343s      72.39m     1.21h    0.05d  0.000 y
-# Average job time:                  14s       0.24m     0.00h    0.00d
-# Longest finished job:              27s       0.45m     0.01h    0.00d
-# Submission to last job:          1152s      19.20m     0.32h    0.01d
-
-    # make downloads
-    mkdir downloads
-
-    time (find ./wigFix -type f | sed -e "s#^./##; s#\.# d #g; s#-# m #;" \
-	| sort -k1,1 -k3,3n | sed -e "s# d #.#g; s# m #-#g;" | xargs cat \
-	| gzip -c > downloads/phyloP7way.wigFix.gz) &
-    #   real    29m51.665s
-
-    # check integrity of data with wigToBigWig
-    time (zcat downloads/phyloP7way.wigFix.gz \
-	| wigToBigWig -verbose=2 stdin /hive/data/genomes/hg38/chrom.sizes \
-	phyloP7way.bw) > bigWig.log 2>&1 &
-    egrep "real|VmPeak" bigWig.log
-    # pid=76577: VmPeak:    33106320 kB
-    #  real    42m53.038s
-
-
-    bigWigInfo phyloP7way.bw
-# version: 4
-# isCompressed: yes
-# isSwapped: 0
-# primaryDataSize: 3,759,451,708
-# primaryIndexSize: 92,579,900
-# zoomLevels: 10
-# chromCount: 353
-# basesCovered: 2,898,191,577
-# mean: 0.074472
-# min: -5.220000
-# max: 1.062000
-# std: 0.545945
-
-    #	encode those files into wiggle data
-    time (zcat downloads/phyloP7way.wigFix.gz \
-	| wigEncode stdin phyloP7way.wig phyloP7way.wib) &
-    #   Converted stdin, upper limit 1.06, lower limit -5.22
-    #   real    16m11.861s
-
-
-    du -hsc *.wi?
-    #   47M     phyloP7way.wib
-    #   12M     phyloP7way.wig
-    #   58M     total
-
-    # Load gbdb and database with wiggle.
-    ln -s `pwd`/phyloP7way.wib /gbdb/hg38/multiz7way/phyloP7way.wib
-    nice hgLoadWiggle -pathPrefix=/gbdb/hg38/multiz7way hg38 \
-	phyloP7way phyloP7way.wig
-
-    # use to set trackDb.ra entries for wiggle min and max
-    # and verify table is loaded correctly
-
-    wigTableStats.sh hg38 phyloP7way
-# db.table      min max mean count sumData
-# hg38.phyloP7way -5.22 1.062 0.0744721 2898191577 2.15834e+08
-#       stdDev viewLimits
-#     0.545945 viewLimits=-2.65525:1.062
-
-    #	that range is: 5.22+1.062 = 6.282 for hBinSize=0.006282
-
-    #  Create histogram to get an overview of all the data
-    time nice -n +19 hgWiggle -doHistogram \
-	-hBinSize=0.006282 -hBinCount=1000 -hMinVal=-5.22 -verbose=2 \
-	    -db=hg38 phyloP7way > histogram.data 2>&1
-    #   real    2m55.843s
-
-
-    # find out the range for the 2:5 graph
-    grep -v chrom histogram.data | grep "^[0-9]" | ave -col=5 stdin
-# Q1 0.000001
-# median 0.000060
-# Q3 0.000656
-# average 0.001022
-# min 0.000000
-# max 0.065461
-# count 978
-# total 0.999982
-# standard deviation 0.004157
-
-    #	create plot of histogram:
-    cat << '_EOF_' | gnuplot > histo.png
-set terminal png small x000000 xffffff xc000ff x66ff66 xffff00 x00ffff
-set size 1.4, 0.8
-set key left box
-set grid noxtics
-set grid ytics
-set title " Human hg38 Histogram phyloP7way track"
-set xlabel " phyloP7way score"
-set ylabel " Relative Frequency"
-set y2label " Cumulative Relative Frequency (CRF)"
-set y2range [0:1]
-set y2tics
-set yrange [0:0.02]
-
-plot "histogram.data" using 2:5 title " RelFreq" with impulses, \
-        "histogram.data" using 2:7 axes x1y2 title " CRF" with lines
-'_EOF_'
-    #	<< happy emacs
-
-    display histo.png &
-
-#############################################################################
-# construct download files for 7-way (DONE - 2014-06-05 - Hiram)
-    mkdir /usr/local/apache/htdocs-hgdownload/goldenPath/hg38/multiz7way
-    mkdir /usr/local/apache/htdocs-hgdownload/goldenPath/hg38/phastCons7way
-    mkdir /usr/local/apache/htdocs-hgdownload/goldenPath/hg38/phyloP7way
-    mkdir /hive/data/genomes/hg38/bed/multiz7way/downloads
-    cd /hive/data/genomes/hg38/bed/multiz7way/downloads
-    mkdir multiz7way phastCons7way phyloP7way
-    cd multiz7way
-    time cp -p ../../anno/hg38.7way.maf .
-    #   real    0m55.984s
-    time gzip *.maf
-    #   real    46m53.149s
-
-    ln -s ../../hg38.7way.nh .
-    ln -s ../../hg38.7way.commonNames.nh .
-    time md5sum *.nh *.maf.gz > md5sum.txt
-    #   real    1m55.317s
-    ln -s `pwd`/* \
-        /usr/local/apache/htdocs-hgdownload/goldenPath/hg38/multiz7way
-
-    du -hsc *.maf.gz ../../anno/hg38.7way.maf
-    #  3.5G    hg38.7way.maf.gz
-    #   17G     ../../anno/hg38.7way.maf
-
-    #####################################################################
-    cd /hive/data/genomes/hg38/bed/multiz7way/downloads/phastCons7way
-
-    ln -s ../../cons/all/downloads/phastCons7way.wigFix.gz \
-        ./hg38.phastCons7way.wigFix.gz
-    ln -s ../../cons/all/phastCons7way.bw ./hg38.phastCons7way.bw
-    ln -s ../../cons/all/all.mod ./hg38.phastCons7way.mod
-    time md5sum *.gz *.mod *.bw > md5sum.txt
-    #   real    0m37.384s
-    # obtain the README.txt from petMar2/phastCons7way and update for this
-    #   situation
-    ln -s `pwd`/*.gz `pwd`/*.mod `pwd`/*.bw `pwd`/*.txt \
-      /usr/local/apache/htdocs-hgdownload/goldenPath/hg38/phastCons7way
-
-    #####################################################################
-    cd /hive/data/genomes/hg38/bed/multiz7way/downloads/phyloP7way
-
-    ln -s ../../consPhyloP/all/downloads/phyloP7way.wigFix.gz \
-        ./hg38.phyloP7way.wigFix.gz
-    ln -s ../../consPhyloP/run.phyloP/all.mod hg38.phyloP7way.mod
-    ln -s ../../consPhyloP/all/phyloP7way.bw hg38.phyloP7way.bw
-
-    time md5sum *.mod *.bw *.gz > md5sum.txt
-    #   real    0m29.431s
-
-    # obtain the README.txt from geoFor1/phyloP7way and update for this
-    #   situation
-    ln -s `pwd`/* \
-      /usr/local/apache/htdocs-hgdownload/goldenPath/hg38/phyloP7way
-
-    ###########################################################################
-    ## create upstream refGene maf files
-    cd /hive/data/genomes/hg38/bed/multiz7way/downloads/multiz7way
-    # bash script
-#!/bin/sh
-export geneTbl="knownGene"
-for S in 1000 2000 5000
-do
-    echo "making upstream${S}.maf"
-    featureBits hg38 ${geneTbl}:upstream:${S} -fa=/dev/null -bed=stdout \
-        | perl -wpe 's/_up[^\t]+/\t0/' | sort -k1,1 -k2,2n \
-        | /cluster/bin/$MACHTYPE/mafFrags hg38 multiz7way \
-                stdin stdout \
-                -orgs=/hive/data/genomes/hg38/bed/multiz7way/species.list \
-        | gzip -c > upstream${S}.${geneTbl}.maf.gz
-    echo "done upstream${S}.${geneTbl}.maf.gz"
-done
-    #   real    60m16.631s
-
-    md5sum upstream*.gz >> md5sum.txt
-
-    # some other symlinks were already made above
-    # obtain the README.txt from geoFor1/multiz7way and update for this
-    #   situation
-    ln -s `pwd`/upstream*.gz README.txt \
-        /usr/local/apache/htdocs-hgdownload/goldenPath/hg38/multiz7way
-
-#############################################################################
-# hgPal downloads (DONE - 2014-06-06 - Hiram)
-#   FASTA from 7-way for knownGene, refGene and knownCanonical
-
-    ssh hgwdev
-    screen -S hg38HgPal
-    mkdir /hive/data/genomes/hg38/bed/multiz7way/pal
-    cd /hive/data/genomes/hg38/bed/multiz7way/pal
-    cat ../species.list | tr '[ ]' '[\n]' > order.list
-
-    export mz=multiz7way
-    export gp=knownGene
-    export db=hg38
-    export I=0
-    mkdir exonAA exonNuc
-    for C in `sort -nk2 ../../../chrom.sizes | cut -f1`
-    do
-        I=`echo $I | awk '{print $1+1}'`
-	echo "mafGene -chrom=$C -exons -noTrans $db $mz $gp order.list stdout | gzip -c > exonNuc/$C.exonNuc.fa.gz &"
-	echo "mafGene -chrom=$C -exons $db $mz $gp order.list stdout | gzip -c > exonAA/$C.exonAA.fa.gz &"
-        if [ $I -gt 6 ]; then
-            echo "date"
-            echo "wait"
-            I=0
-        fi
-    done > $gp.jobs
-    echo "date" >> $gp.jobs
-    echo "wait" >> $gp.jobs
-
-    time ./$gp.jobs > $gp.jobs.log 2>&1 &
-    #   real    28m46.919s
-
-    time zcat exonAA/*.gz | gzip -c > $gp.$mz.exonAA.fa.gz
-    #   real    0m23.798s
-    time zcat exonNuc/*.gz | gzip -c > $gp.$mz.exonNuc.fa.gz
-    #   real    1m28.197s
-
-    export mz=multiz7way
-    export gp=knownGene
-    export db=hg38
-    export pd=/usr/local/apache/htdocs-hgdownload/goldenPath/$db/$mz/alignments
-    mkdir -p $pd
-    md5sum *.fa.gz > md5sum.txt
-    ln -s `pwd`/$gp.$mz.exonAA.fa.gz $pd/$gp.exonAA.fa.gz
-    ln -s `pwd`/$gp.$mz.exonNuc.fa.gz $pd/$gp.exonNuc.fa.gz
-    ln -s `pwd`/md5sum.txt $pd/
-
-    rm -rf exonAA exonNuc
-
-    ### need other gene track alignments also
-    # running up refGene
-    cd /hive/data/genomes/hg38/bed/multiz7way/pal
-    export mz=multiz7way
-    export gp=refGene
-    export db=hg38
-    export I=0
-    mkdir exonAA exonNuc
-    for C in `sort -nk2 ../../../chrom.sizes | cut -f1`
-    do
-        I=`echo $I | awk '{print $1+1}'`
-	echo "mafGene -chrom=$C -exons -noTrans $db $mz $gp order.list stdout | gzip -c > exonNuc/$C.exonNuc.fa.gz &"
-	echo "mafGene -chrom=$C -exons $db $mz $gp order.list stdout | gzip -c > exonAA/$C.exonAA.fa.gz &"
-        if [ $I -gt 6 ]; then
-            echo "date"
-            echo "wait"
-            I=0
-        fi
-    done > $gp.jobs
-    echo "date" >> $gp.jobs
-    echo "wait" >> $gp.jobs
-
-    time sh -x $gp.jobs > $gp.jobs.log 2>&1
-    #   real    15m15.424s
-
-    export mz=multiz7way
-    export gp=refGene
-    export db=hg38
-    time zcat exonAA/*.gz | gzip -c > $gp.$mz.exonAA.fa.gz
-    #   real    0m23.119s
-    time zcat exonNuc/*.gz | gzip -c > $gp.$mz.exonNuc.fa.gz
-    #   real    1m15.547s
-
-    du -hsc exonAA exonNuc refGene*.fa.gz
-    #  59M     exonAA
-    #  101M    exonNuc
-    #  59M     refGene.multiz7way.exonAA.fa.gz
-    #  101M    refGene.multiz7way.exonNuc.fa.gz
-    #  317M    total
-
-    rm -rf exonAA exonNuc
-
-    # we're only distributing exons at the moment
-    export mz=multiz7way
-    export gp=refGene
-    export db=hg38
-    export pd=/usr/local/apache/htdocs-hgdownload/goldenPath/$db/$mz/alignments
-    mkdir -p $pd
-    md5sum *.fa.gz > md5sum.txt
-    ln -s `pwd`/$gp.$mz.exonAA.fa.gz $pd/$gp.exonAA.fa.gz
-    ln -s `pwd`/$gp.$mz.exonNuc.fa.gz $pd/$gp.exonNuc.fa.gz
-    ln -s `pwd`/md5sum.txt $pd/
-
-    ### And knownCanonical
-    cd /hive/data/genomes/hg38/bed/multiz7way/pal
-    export mz=multiz7way
-    export gp=knownCanonical
-    export db=hg38
-    mkdir exonAA exonNuc ppredAA ppredNuc knownCanonical
-
-    cut -f1 ../../../chrom.sizes | while read C
-    do
-        echo $C
-	hgsql hg38 -N -e "select chrom, chromStart, chromEnd, transcript from knownCanonical where chrom='$C'" > knownCanonical/$C.known.bed
-    done
-
-    ls knownCanonical/*.known.bed | while read F
-    do
-      if [ -s $F ]; then
-         echo $F | sed -e 's#knownCanonical/##; s/.known.bed//'
-      fi
-    done | while read C
-    do
-	echo "date"
-	echo "mafGene -geneBeds=knownCanonical/$C.known.bed  $db $mz knownGene order.list stdout | \
-	    gzip -c > ppredAA/$C.ppredAA.fa.gz"
-	echo "mafGene -geneBeds=knownCanonical/$C.known.bed -noTrans $db $mz knownGene order.list stdout | \
-	    gzip -c > ppredNuc/$C.ppredNuc.fa.gz"
-	echo "mafGene -geneBeds=knownCanonical/$C.known.bed -exons -noTrans $db $mz knownGene order.list stdout | \
-	    gzip -c > exonNuc/$C.exonNuc.fa.gz"
-	echo "mafGene -geneBeds=knownCanonical/$C.known.bed -exons $db $mz knownGene order.list stdout | \
-	    gzip -c > exonAA/$C.exonAA.fa.gz"
-    done > $gp.$mz.jobs
-
-    time sh -x $gp.$mz.jobs > $gp.$mz.job.log 2>&1
-    # real    72m58.133s
-
-    rm *.known.bed
-    mz=multiz7way
-    gp=knownCanonical
-    db=hg38
-    zcat exonAA/c*.gz | gzip -c > $gp.$mz.exonAA.fa.gz &
-    zcat exonNuc/c*.gz | gzip -c > $gp.$mz.exonNuc.fa.gz &
-    zcat ppredAA/c*.gz | gzip -c > $gp.$mz.ppredAA.fa.gz &
-    zcat ppredNuc/c*.gz | gzip -c > $gp.$mz.ppredNuc.fa.gz
-
-    rm -rf exonAA exonNuc ppredAA ppredNuc
-
-    mz=multiz7way
-    gp=knownCanonical
-    db=hg38
-    pd=/usr/local/apache/htdocs-hgdownload/goldenPath/$db/$mz/alignments
-    mkdir -p $pd
-    ln -s `pwd`/$gp.$mz.exonAA.fa.gz $pd/$gp.exonAA.fa.gz
-    ln -s `pwd`/$gp.$mz.exonNuc.fa.gz $pd/$gp.exonNuc.fa.gz
-    cd  $pd
-    md5sum *.exon*.fa.gz > md5sum.txt
-
-#############################################################################
-# wiki page for 7-way (DONE - 2014-06-04 - Hiram)
-    mkdir /hive/users/hiram/bigWays/hg38.7way
-    cd /hive/users/hiram/bigWays
-    echo "hg38" > hg38.7way/ordered.list
-    awk '{print $1}' /hive/data/genomes/hg38/bed/multiz7way/7way.distances.txt \
-       >> hg38.7way/ordered.list
-
-    # sizeStats.sh catches up the cached measurements required for data
-    # in the tables.  They may already be done.
-    ./sizeStats.sh hg38.7way/ordered.list
-    # dbDb.sh constructs hg38.7way/Hg38_7-way_conservation_alignment.html
-    ./dbDb.sh hg38 7way
-    # sizeStats.pl constructs hg38.7way/Hg38_7-way_Genome_size_statistics.html
-    ./sizeStats.pl hg38 7way
-
-    # defCheck.pl constructs Hg38_7-way_conservation_lastz_parameters.html
-    ./defCheck.pl hg38 7way
-
-    # this constructs the html pages in hg38.7way/:
-# -rw-rw-r-- 1 4153 Jun  5 11:03 Hg38_7-way_conservation_alignment.html
-# -rw-rw-r-- 1 5833 Jun  5 11:04 Hg38_7-way_Genome_size_statistics.html
-# -rw-rw-r-- 1 3854 Jun  5 11:04 Hg38_7-way_conservation_lastz_parameters.html
-
-    # add those pages to the genomewiki.  Their page names are the
-    # names of the .html files without the .html:
-#  Hg38_7-way_conservation_alignment
-#  Hg38_7-way_Genome_size_statistics
-#  Hg38_7-way_conservation_lastz_parameters
-
-    # when you view the first one you enter, it will have links to the
-    # missing two.
-
-#############################################################################
-# GRC Incident database (DONE - 2014-06-14 - Hiram)
-    # this procedure is run as a cron job in Hiram's account:
-
-    #	33 09 * * * /hive/data/outside/grc/incidentDb/runUpdate.sh makeItSo
-
-    # data comes from: ftp://ftp.ncbi.nlm.nih.gov/pub/grc/
-    # processed by /hive/data/outside/grc/incidentDb/grcUpdate.sh
-
-    # the table in the dataBase is: grcIncidentDb
-    # which is the URL to the bb file, a single row:
-    # http://genomewiki.ucsc.edu/images/7/7f/Hg38.grcIncidentDb.bb
-
-#############################################################################
-# RepeatMasker Visualization track (DONE - 2014-07-25 - Hiram)
-    mkdir /hive/data/genomes/hg38/bed/rmskJoined
-    cd /hive/data/genomes/hg38/bed/rmskJoined
-
-    ln -s ../repeatMasker/hg38.sorted.fa.out .
-    ln -s ../repeatMasker/hg38.fa.align.gz .
-
-    # working on fixing this script for the next release of RM
-    /scratch/data/RepeatMasker140131/util/nextVerRmToUCSCTables.pl \
-            -out hg38.sorted.fa.out -align hg38.fa.align.gz
-
-    hgLoadBed -sqlTable=$HOME/kent/src/hg/lib/rmskJoined.sql \
-        -renameSqlTable -verbose=4 -tab \
-            -type=bed4+9 -as=$HOME/kent/src/hg/lib/rmskJoined.as hg38 \
-                rmskJoinedBaseline hg38.sorted.fa.join.bed \
-                    > loadJoined.log 2>&1
-
-    hgLoadSqlTab hg38 rmskAlignBaseline \
-        /cluster/home/hiram/kent/src/hg/lib/rmskAlign.sql \
-            hg38.fa.align.tsv > loadAlign.log 2>&1
-
-    hgLoadOutJoined -verbose=2 hg38 hg38.sorted.fa.out > loadOut.log 2>&1
-
-    featureBits -countGaps hg38 rmskJoinedBaseline
-    #    2716777279 bases of 3209286105 (84.654%) in intersection
-
-##############################################################################
-# LASTZ Macaca Mulatta RheMac2 (DONE - 2014-07-13 - braney)
-    mkdir /hive/data/genomes/hg38/bed/lastzRheMac2.2014-07-11
-    cd /hive/data/genomes/hg38/bed/lastzRheMac2.2014-07-11
-
-    # best to always specify an exact path to lastz so we know which one is used
-    # lastz default parameters are human-mouse parameters
-
-    cat << '_EOF_' > DEF
-# human vs macaca mulatta
-BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.52/bin/lastz
-# maximum M allowed with lastz is only 254
-BLASTZ_M=254
-BLASTZ_Q=/scratch/data/blastz/human_chimp.v2.q
-BLASTZ_O=600
-BLASTZ_E=150
-# other parameters from panTro2 vs hg18 lastz on advice from Webb
-BLASTZ_K=4500
-BLASTZ_Y=15000
-BLASTZ_T=2
-
-# TARGET: Human Hg38
-SEQ1_DIR=/hive/data/genomes/hg38/hg38.2bit
-SEQ1_LEN=/hive/data/genomes/hg38/chrom.sizes
-SEQ1_CTGDIR=/hive/data/genomes/hg38/hg38.contigs.2bit
-SEQ1_CTGLEN=/hive/data/genomes/hg38/hg38.contigs.chrom.sizes
-SEQ1_LIFT=/hive/data/genomes/hg38/jkStuff/hg38.contigs.lift
-SEQ1_CHUNK=20000000
-SEQ1_LAP=10000
-
-# QUERY: Macaca Mulatta RheMac2
-SEQ2_DIR=/scratch/data/rheMac2/rheMac2.2bit
-SEQ2_LEN=/scratch/data/rheMac2/chrom.sizes
-SEQ2_CHUNK=20000000
-SEQ2_LAP=0
-SEQ2_IN_CONTIGS=0
-
-BASE=/hive/data/genomes/hg38/bed/lastzRheMac2.2014-07-11
-TMPDIR=/dev/shm
-'_EOF_'
-    # << happy emacs
-    time $HOME/kent/src/hg/utils/automation/doBlastzChainNet.pl \
-        `pwd`/DEF \
-        -syntenicNet -fileServer=hgwdev \
-	-chainMinScore=5000 -chainLinearGap=medium \
-        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > do.log 2>&1
-    #  Elapsed time: 141m36s
-    cat fb.hg38.chainRheMac2Link.txt
-    # 2455106923 bases of 3049335806 (80.513%) in intersection
-
-    #   running the swap
-    mkdir /hive/data/genomes/rheMac2/bed/blastz.hg38.swap
-    cd /hive/data/genomes/rheMac2/bed/blastz.hg38.swap
-    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
-        /hive/data/genomes/hg38/bed/lastzRheMac2.2014-07-11/DEF \
-        -swap -syntenicNet -chainMinScore=5000 -chainLinearGap=medium \
-        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > swap.log 2>&1
-    # 83m26.095s
-    cat fb.rheMac2.chainHg38Link.txt
-    # 2313950599 bases of 2646704109 (87.428%) in intersection
-#
-
-#########################################################################
-# LASTZ Chlorocebus sabaeus  (DONE - 2014-07-13 - braney)
-    mkdir /hive/data/genomes/hg38/bed/lastzChlSab2.2014-07-11
-    cd /hive/data/genomes/hg38/bed/lastzChlSab2.2014-07-11
-
-    # best to always specify an exact path to lastz so we know which one is used
-    # lastz default parameters are human-mouse parameters
-
-    cat << '_EOF_' > DEF
-# human vs Chlorocebus sabaeus
-BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.52/bin/lastz
-# maximum M allowed with lastz is only 254
-BLASTZ_M=254
-BLASTZ_Q=/scratch/data/blastz/human_chimp.v2.q
-BLASTZ_O=600
-BLASTZ_E=150
-# other parameters from panTro2 vs hg18 lastz on advice from Webb
-BLASTZ_K=4500
-BLASTZ_Y=15000
-BLASTZ_T=2
-
-
-# TARGET: Human Hg38
-SEQ1_DIR=/hive/data/genomes/hg38/hg38.2bit
-SEQ1_LEN=/hive/data/genomes/hg38/chrom.sizes
-SEQ1_CTGDIR=/hive/data/genomes/hg38/hg38.contigs.2bit
-SEQ1_CTGLEN=/hive/data/genomes/hg38/hg38.contigs.chrom.sizes
-SEQ1_LIFT=/hive/data/genomes/hg38/jkStuff/hg38.contigs.lift
-SEQ1_CHUNK=20000000
-SEQ1_LAP=10000
-
-# QUERY Chlorocebus sabaeus chlSab2
-SEQ2_DIR=/scratch/data/chlSab2/chlSab2.2bit
-SEQ2_LEN=/scratch/data/chlSab2/chrom.sizes
-SEQ2_CHUNK=20000000
-SEQ2_LAP=0
-SEQ2_IN_CONTIGS=0
-
-BASE=/hive/data/genomes/hg38/bed/lastzChlSab2.2014-07-11
-TMPDIR=/dev/shm
-'_EOF_'
-    # << happy emacs
-    time $HOME/kent/src/hg/utils/automation/doBlastzChainNet.pl \
-        `pwd`/DEF \
-        -syntenicNet -fileServer=hgwdev \
-	-chainMinScore=5000 -chainLinearGap=medium \
-        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > do.log 2>&1
-    # Elapsed time: 142m4s
-    cat fb.hg38.chainChlSab2Link.txt
-    # 2573435303 bases of 3049335806 (84.393%) in intersection
-
-    #   running the swap
-    mkdir /hive/data/genomes/chlSab2/bed/blastz.hg38.swap
-    cd /hive/data/genomes/chlSab2/bed/blastz.hg38.swap
-    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
-        /hive/data/genomes/hg38/bed/lastzChlSab2.2014-07-11/DEF \
-        -swap -syntenicNet -chainMinScore=5000 -chainLinearGap=medium \
-        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku > swap.log 2>&1
-    # 88m48.411s
-    cat fb.chlSab2.chainHg38Link.txt
-    # 2429053010 bases of 2752019208 (88.264%) in intersection
-
-#########################################################################
-# SEGMENTAL DUPLICATIONS (DONE - 2014-08-13 - Hiram)
-    # redmine issue: refs #13580
-
-    # file received in email from Archana Natarajan Raja (araja at uw.edu)
-    mkdir /hive/data/genomes/hg38/bed/genomicSuperDups
-    cd /hive/data/genomes/hg38/bed/genomicSuperDups
-# -rw-r--r-- 1 16478617 Aug 11 16:18 GenomicSuperDup.tab
-
-    # no longer filtering items smaller than 1,000 bases, see note
-    # in redmine issue refs #13580
-# While the size of the 24 alignments are less than 1000 bases , the size of
-# their pairs to which they align are always >1000, you can confirm this by
-# looking at the value in column 22 in your table (alignB -ucsc format), will
-# always be >1000 bp . We are seeing this only now because there are lots of
-# new and resolved duplications added to hg38. Hence , I would recommend not
-# filtering these items and uploading the current set as is.
-
-    # there is no chrEBV in the browser:
-    grep -v chrEBV GenomicSuperDup.tab | sed -e 's/\t_\t/\t-\t/;' \
-      | hgLoadBed hg38 genomicSuperDups stdin \
-	-sqlTable=$HOME/kent/src/hg/lib/genomicSuperDups.sql
-    #  Read 69894 elements of size 29 from stdin
-
-    checkTableCoords  hg38 genomicSuperDups
-    # <silence>  (the chrEBV was found with this check)
-
-    featureBits -countGaps hg38 genomicSuperDups
-    # 175429664 bases of 3209286105 (5.466%) in intersection
-
-    featureBits -countGaps hg19 genomicSuperDups
-    #  166092393 bases of 3137161264 (5.294%) in intersection
-    featureBits -countGaps hg18 genomicSuperDups
-    #  159204446 bases of 3107677273 (5.123%) in intersection
-
-    featureBits -countGaps mm10 genomicSuperDups
-    # 214917441 bases of 2730871774 (7.870%) in intersection
-    featureBits -countGaps mm9 genomicSuperDups
-    # 208214567 bases of 2725765481 (7.639%) in intersection
-
-##############################################################################
-# cloneEnds (DONE - 2014-08-14 - Hiram)
-
-    mkdir /hive/data/genomes/hg38/bed/cloneEnds
-    cd /hive/data/genomes/hg38/bed/cloneEnds
-
-    # fetch the NCBI INSDC name correspondence file:
-    rsync -a -P rsync://ftp.ncbi.nlm.nih.gov/genomes/ASSEMBLY_REPORTS/All/GCF_000001405.26.assembly.txt ./
-
-    # fetch the clone reports
-    mkdir reports
-    rsync -a -P \
-rsync://ftp.ncbi.nih.gov/repository/clone/reports/Homo_sapiens/*.GCF_000001405.26.106.*.gff \
-       ./reports/
-
-    # script to establish refSeq to UCSC chrom names:
-
-    cat << '_EOF_' > refSeqNames.pl
-#!/usr/bin/env perl
-
-use strict;
-use warnings;
-
-open (FH, "<GCF_000001405.26.assembly.txt") or die "can not read GCF_000001405.26.assembly.txt";
-while (my $line = <FH>) {
-  chomp $line;
-  next if ($line =~ m/^#/);
-  my @a = split('\t', $line);
-  my $chrN = $a[2];
-  my $refSeq = $a[6];
-  my $contig = $a[4];
-  my $type = $a[1];
-  next if (!defined $type);
-  next if (!defined $refSeq);
-  next if (!defined $contig);
-  my $suffix = "";
-  if ($type eq "alt-scaffold") {
-     $suffix = "_alt";
-  } elsif ($type eq "unlocalized-scaffold") {
-     $suffix = "_random";
-  } elsif ($type eq "unplaced-scaffold") {
-     $chrN = "Un";
-  }
-  $chrN = "M" if ($chrN eq "MT");
-  if ($a[0] =~ m/_/) {
-    $contig =~ s/\./v/;
-    printf "%s\tchr%s_%s%s\n", $refSeq, $chrN, $contig, $suffix;
-  } else {
-    printf "%s\tchr%s\n", $refSeq, $chrN;
-  }
-}
-close (FH);
-'_EOF_'
-    # << happy emacs
-
-    chmod +x refSeqNames.pl
-
-    ./refSeqNames.pl > refSeq.ucscName.tab
-
-    # establish full library list:
-    ls reports/*.GCF_000001405.26.106.*.gff | sed -e 's#reports/##' \
-       | cut -d"." -f1 | sort -u > library.list.txt
-
-    # a script to scan the GFF files, with the refSeq.ucscName.tab
-    # name correspondence to construct bed files
-
-    cat << '_EOF_' > hg38.pl
-#!/usr/bin/env perl
-
-use strict;
-use warnings;
-
-my $argc = scalar(@ARGV);
-
-if ($argc < 1) {
-  printf STDERR "usage: ./hg38.pl <report.gff> [moreReports.gff]\n";
-  exit 255;
-}
-
-my %refSeqToUcsc;   # key is refSeq name, value is UCSC chrom name
-open (FH, "<refSeq.ucscName.tab") or die "can not read refSeq.ucscName.tab";
-while (my $line = <FH>) {
-  chomp $line;
-  my ($refSeq, $ucsc) = split('\t', $line);
-  $refSeqToUcsc{$refSeq} = $ucsc;
-}
-close (FH);
-
-my %chromSizes;    # key is UCSC chrom name, key is chrom size
-open (FH, "</hive/data/genomes/hg38/chrom.sizes") or die "can not read hg38/chrom.sizes";
-while (my $line = <FH>) {
-  chomp $line;
-  my ($chr, $size) = split('\t', $line);
-  $chromSizes{$chr} = $size;
-}
-close (FH);
-
-while (my $file = shift) {
-my %starts;   # key is parent ID, value is start end coordinates start,end
-my %ends;	# key is parent ID, value is end end coordinates start,end
-my %parents;	# key is parent ID, value is 1 to signify exists
-my %endNames;   # key is parent ID, value is the Name of the parent clone_insert
-
-printf STDERR "# processing $file\n";
-
-open (FH, "<$file") or die "can not read $file";
-while (my $line = <FH>) {
-  chomp $line;
-  next if ($line=~ m/^#/);
-  my @a = split('\t', $line);
-  next if (scalar(@a) < 1);
-  my $contig = $a[0];
-  $contig =~ s/ref.//;
-  $contig =~ s/\|//;
-  my $ucscChr = $refSeqToUcsc{$contig};
-  if (!defined($ucscChr)) {
-    printf STDERR "# ERR: contig not in refSeqToUcsc: '$contig'\n";
-    next;
-  }
-  next if (! exists($chromSizes{$ucscChr}));
-  my $chromSize = $chromSizes{$ucscChr};
-  my $chromStart = $a[3] - 1;
-  my $chromEnd = $a[4];
-  if ($chromStart > $chromSize) {
-    printf STDERR "# warning chromStart over size $ucscChr $chromStart $chromEnd\n";
-    $chromStart = $chromSize-1;
-  }
-  if ($chromEnd > $chromSize) {
-    my $overRun = $chromEnd - $chromSize;
-    printf STDERR "# warning chromEnd over size by $overRun -> $ucscChr $chromStart $chromEnd\n";
-    $chromEnd = $chromSize;
-  }
-  my $id="notFound";
-  my $name="notFound";
-  my $parent="notFound";
-  my @b = split(';', $a[8]);
-  for (my $i = 0; $i < scalar(@b); ++$i) {
-     my ($tag, $value) = split('=', $b[$i]);
-     if ($tag eq "ID") {
-        $id = $value;
-        if ($id !~ m/-/) {
-          if (exists($parents{$id})) {
-            printf STDERR "# WARN: duplicate parent: $id";
-          } else {
-            $parents{$id} = $ucscChr;
-          }
-        }
-     } elsif ($tag eq "Parent") {
-        $parent = $value;
-     } elsif ($tag eq "Name") {
-        $name = $value;
-     }
-  }
-  my $type="notFound";
-  my $insertType = $a[2];
-  if ($insertType =~ m/clone_insert_start/) {
-     $type = "start";
-     if ($parent eq "notFound") {
-       printf STDERR "# ERR: can not find parent for start $name Ttype $id\n";
-     } else {
-       if (!exists($parents{$parent})) {
-         printf STDERR "# ERR: start found $name  with no parent $parent declared\n";
-       } elsif (exists($starts{$parent})) {
-         printf STDERR "# ERR: duplicate start for $parent\n";
-       } elsif ($ucscChr eq $parents{$parent}) {
-         $starts{$parent} = sprintf("%s\t%s", $chromStart, $chromEnd);
-       } else {
-         printf STDERR "# ERR: start on different chrom $ucscChr than parent $parent $parents{$parent}\n";
-       }
-     }
-  } elsif ($insertType =~ m/clone_insert_end/) {
-     $type = "end";
-     if ($parent eq "notFound") {
-       printf STDERR "# ERR: can not find parent for end $name Ttype $id\n";
-     } else {
-       if (!exists($parents{$parent})) {
-         printf STDERR "# ERR: end found $name  with no parent $parent declared\n";
-       } elsif (exists($ends{$parent})) {
-         printf STDERR "# ERR: duplicate end for $parent\n";
-       } elsif ($ucscChr eq $parents{$parent}) {
-         $ends{$parent} = sprintf("%s\t%s", $chromStart, $chromEnd);
-       } else {
-         printf STDERR "# ERR: end on different chrom $ucscChr than parent $parent $parents{$parent}\n";
-       }
-     }
-  } elsif ($insertType =~ m/clone_insert/) {
-     $type = "insert";
-     $endNames{$id} = $name;
-  }
-  $name =~ s/gi\|//g;
-  $id =~ s/gi\|//g;
-  printf STDERR "%s\t%d\t%d\t%s_%s_%s\t0\t%s\n", $ucscChr, $chromStart, $chromEnd, $name, $type, $id, $a[6];
-}	# while (my $line = <FH>)
-
-close (FH);
-
-foreach my $parent (keys %parents) {
-  if (! exists($starts{$parent}) ) {
-    printf STDERR "# ERR: no start for $parent\n";
-  } elsif (! exists($ends{$parent}) ) {
-    printf STDERR "# ERR: no end for $parent\n";
-  } else {
-    my $strand = "+";
-    my $chrStart = 0;
-    my $chrEnd = 0;
-    my $blockStart = 0;
-    my ($sStart, $sEnd) = split('\t', $starts{$parent});
-    my ($eStart, $eEnd) = split('\t', $ends{$parent});
-    my $startSize = $sEnd - $sStart;
-    my $endSize = $eEnd - $eStart;
-    if ($eStart < $sStart) {
-      $chrStart = $eStart;
-      $chrEnd = $sEnd;
-      $blockStart = $sStart - $chrStart;
-      $strand = "-";
-      $startSize = $eEnd - $eStart;
-      $endSize = $sEnd - $sStart;
-    } else {
-      $chrStart = $sStart;
-      $chrEnd = $eEnd;
-      $blockStart = $eStart - $chrStart;
-    }
-    if ($startSize > $blockStart) {
-      printf STDERR "# startSize > blockStart $endNames{$parent}\n";
-    } else {
-      printf "%s\t%d\t%d\t%s\t0\t%s\t%d\t%d\t0\t2\t%d,%d\t0,%d\n", $parents{$parent}, $chrStart, $chrEnd, $endNames{$parent}, $strand, $chrStart, $chrEnd, $startSize, $endSize, $blockStart;
-    }
-  }
-}
-}
-'_EOF_'
-    # << happy emacs
-
-    chmod +x hg38.pl
-
-    # process GFF files into bed files into separateLibs/ directory
-for L in `cat library.list.txt`
-do
-   export destDir="separateLibs/${L}"
-   echo "working: ${L}" 1>&1
-   mkdir -p "${destDir}"
-   ./hg38.pl reports/${L}.GCF_000001405.26.106.*.gff \
-       2> ${destDir}/tmp.bed6 | sort -k1,1 -k2,2n > ${destDir}/hg38.${L}.bed
-   sort -k1,1 -k2,2n ${destDir}/tmp.bed6 > ${destDir}/hg38.${L}.items.bed6
-done
-
-    # use only those libraries with more than 20,000 clone ends
-    wc -l separateLibs/*/*.bed | sort -n | grep -v total | awk '$1 > 20000' \
-        | sed -e 's#.*separateLibs/##; s#/.*##' > libs.over20K.list
-
-    # note those libraries with less than 20,000 clone ends
-    wc -l separateLibs/*/*.bed | grep -v total | awk '$1 < 20000' | sed -e 's#.*separateLibs/##; s#/.*##' > libs.under20K.list
-
-    # filter out bad ends, length must be <= median size times three
-    cat libs.over20K.list | while read D
-do
-   if [ ! -s separateLibs/${D}/lengths.txt ]; then
-      awk '{print $3-$2}' separateLibs/${D}/hg38.${D}.bed \
-        > separateLibs/${D}/lengths.txt
-   fi
-   median3X=`ave separateLibs/${D}/lengths.txt | grep median | awk '{printf "%d", $2*3}'`
-   awk '($3-$2) < '$median3X'' separateLibs/${D}/hg38.${D}.bed > separateLibs/${D}/hg38.median3X.bed
-   awk '($3-$2) >= '$median3X'' separateLibs/${D}/hg38.${D}.bed > separateLibs/${D}/hg38.badMap.bed
-   before=`cat separateLibs/${D}/hg38.${D}.bed | wc -l`
-   after=`cat separateLibs/${D}/hg38.median3X.bed | wc -l`
-   dropped=`echo $before $after | awk '{print $1-$2}'`
-   perCent=`echo $dropped $before | awk '{printf "%.2f", 100*'$dropped/$before'}'`
-   echo "$D $before - $after = $dropped -> % $perCent dropped"
-done
-
-#  ABC20 24692 - 24474 = 218 -> % 0.88 dropped
-#  RP11 86660 - 85903 = 757 -> % 0.87 dropped
-#  CTD 95853 - 94941 = 912 -> % 0.95 dropped
-#  CH17 105618 - 105060 = 558 -> % 0.53 dropped
-#  ABC21 182154 - 180973 = 1181 -> % 0.65 dropped
-#  ABC22 189939 - 188743 = 1196 -> % 0.63 dropped
-#  COR02 208263 - 206782 = 1481 -> % 0.71 dropped
-#  ABC18 325080 - 322904 = 2176 -> % 0.67 dropped
-#  ABC27 334178 - 331822 = 2356 -> % 0.71 dropped
-#  ABC24 398944 - 395776 = 3168 -> % 0.79 dropped
-#  ABC23 436965 - 433896 = 3069 -> % 0.70 dropped
-#  ABC16 452220 - 449101 = 3119 -> % 0.69 dropped
-#  COR2A 583008 - 578578 = 4430 -> % 0.76 dropped
-#  WI2 587165 - 582843 = 4322 -> % 0.74 dropped
-#  ABC7 649297 - 644071 = 5226 -> % 0.80 dropped
-#  ABC11 729962 - 724864 = 5098 -> % 0.70 dropped
-#  ABC9 755994 - 750648 = 5346 -> % 0.71 dropped
-#  ABC12 777816 - 771827 = 5989 -> % 0.77 dropped
-#  ABC10 787969 - 781331 = 6638 -> % 0.84 dropped
-#  ABC13 810822 - 803589 = 7233 -> % 0.89 dropped
-#  ABC14 845573 - 839126 = 6447 -> % 0.76 dropped
-#  ABC8 1204275 - 1192784 = 11491 -> % 0.95 dropped
-
-   # loading the median3X files
-for L in `cat libs.over20K.list`
-do
-    echo $L 1>&2
-    hgLoadBed -type=bed12 hg38 cloneEnd${L} \
-       separateLibs/${L}/hg38.median3X.bed \
-        > separateLibs/loadBed.${L}.log 2>&1
-done
-
-   # loading the dropped ends:
-   mkdir /hive/data/genomes/hg38/bed/cloneEnds/droppedTooBig
-   # link them to here
-   cat ../libs.over20K.list | while read L
-do
-  ln -s ../separateLibs/${L}/hg38.badMap.bed ${L}.badMap.bed
-done
-  # then load
-  hgLoadBed -type=bed12 hg38 cloneEndbadEnds *.badMap.bed
-
-    # construct multiple mapped ends:
-for L in `cat libs.over20K.list`
-do
-    cat separateLibs/${L}/hg38.median3X.bed
-done | sort -k4 > allEnds.bed
-
-    cut -f4 allEnds.bed | sort | uniq -c | sort -rn > allEnds.names.count.txt
-
-    awk '$1 > 1' allEnds.names.count.txt | awk '{print $2}' \
-       | sort > multiples.names.txt
-
-    join -t'	' -o "2.1,2.2,2.3,2.4,2.5,2.6,2.7,2.8,2.9,2.10,2.11,2.12" \
-       -2 4 multiples.names.txt allEnds.bed | sort -k1,1 -k2,2n \
-           > allEnds.multiple.locations.bed
-
-    hgLoadBed -type=bed12 hg38 cloneEndmultipleMaps \
-        allEnds.multiple.locations.bed > load.multipleMaps.log 2>&1
-
-    awk '$6 == "+"' allEnds.bed | sort -k1,1 -k2,2n \
-      | bedItemOverlapCount hg38 stdin > allEnds.forward.bedGraph
-
-    awk '$6 == "-"' allEnds.bed | sort -k1,1 -k2,2n \
-      | bedItemOverlapCount hg38 stdin > allEnds.reverse.bedGraph
-
-    bedGraphToBigWig allEnds.forward.bedGraph \
-       /hive/data/genomes/hg38/chrom.sizes \
-         cloneEndcoverageForward.bw
-
-    bedGraphToBigWig allEnds.reverse.bedGraph \
-       /hive/data/genomes/hg38/chrom.sizes \
-          cloneEndcoverageReverse.bw
-
-    mkdir /gbdb/hg38/bbi/cloneEnd
-    ln -s `pwd`/cloneEndcoverageForward.bw /gbdb/hg38/bbi/cloneEnd
-    ln -s `pwd`/cloneEndcoverageReverse.bw /gbdb/hg38/bbi/cloneEnd
-
-    hgBbiDbLink hg38 cloneEndcoverageForward \
-        /gbdb/hg38/bbi/cloneEnd/cloneEndcoverageForward.bw
-    hgBbiDbLink hg38 cloneEndcoverageReverse \
-        /gbdb/hg38/bbi/cloneEnd/cloneEndcoverageReverse.bw
-
-    ### Fixup the scores to indicate how many multiple mappings as mentioned
-    ### in the hg19 bacEnds description page: one mapping: score = 1000
-    ### multiple mappings: score = 1500/count
-    ### the sort | uniq -c | awk does this score calculation with the name
-    ###   in column 1
-    ### The join puts the existing table together with those scores
-    ### DONE - 2015-06-18 - Hiram
-
-    mkdir /hive/data/genomes/hg38/bed/cloneEnds/addCounts
-    cd /hive/data/genomes/hg38/bed/cloneEnds/addCounts
-    mkdir score withScore noScore withScore
-    for table in cloneEndABC10 cloneEndABC11 cloneEndABC12 cloneEndABC13 \
-cloneEndABC14 cloneEndABC16 cloneEndABC18 cloneEndABC20 cloneEndABC21 \
-cloneEndABC22 cloneEndABC23 cloneEndABC24 cloneEndABC27 cloneEndABC7 \
-cloneEndABC8 cloneEndABC9 cloneEndCH17 cloneEndCOR02 cloneEndCOR2A \
-cloneEndCTD cloneEndRP11 cloneEndWI2 cloneEndbadEnds cloneEndmultipleMaps
-do
-  hgsql -N -e "select name from $table;" hg38 | sort | uniq -c |
-      awk '{ if (1 == $1) {printf "%s\t1000\n", $2} else {printf "%s\t%d\n", $2, 1500/$1} }' \
-         | sort > score/hg38.$table.score.tab
-  hgsql -N -e "select * from $table order by name;" hg38 \
-      | sort -k5 > noScore/hg38.$table.tab
-  join -t'^I' -1 5 noScore/hg38.$table.tab score/hg38.$table.score.tab \
-  | awk '{printf "%d\t%s\t%d\t%d\t%s\t%d\t%s\t%d\t%d\t%d\t%d\t%s\t%s\n", $2,$3,$4,$5,$1,$14,$7,$8,$9,$10,$11,$12,$13}' \
-    | sort -k2,2 -k3,3n > withScore/hg38.$table.withScore.tab
-  hgsql -e "delete from $table;" hg38
-  hgsql -e "load data local infile \"withScore/hg38.$table.withScore.tab\" into table $table;" hg38
-done
-
-##############################################################################
-# SIB Transcriptome (DONE 2014-08-27 Steve)
-
-    # Create working directory and download data from where Christian
-    # Iseli (christian.iseli@unil.ch) put it, and unpack.
-    mkdir -p /hive/data/genomes/hg38/bed/sibTranscriptome
-    cd /hive/data/genomes/hg38/bed/sibTranscriptome
-    wget --timestamping http://ludwig-sun1.unil.ch/~chris/HTr.gtf.gz
-    wget --timestamping http://ludwig-sun1.unil.ch/~chris/txg.tar.gz
-
-    tar -zxvf txg.tar.gz
-
-    zcat HTr.gtf.gz | ldHgGene hg38 sibGene stdin
-    # Reading stdin
-    # Read 208508 transcripts in 2824960 lines in 1 files
-    # 208508 groups 25 seqs 1 sources 2 feature types
-    # 208508 gene predictions
-
-    # Do a little data cleanup and transformation and load splice graphs
-    # into database.
-    sed 's/altGraphX/sibTxGraph/' ~/kent/src/hg/lib/altGraphX.sql > sibTxGraph.sql
-    cat txg/*.txg | txgToAgx stdin stdout | hgLoadBed -notItemRgb \
-      -sqlTable=sibTxGraph.sql hg38 sibTxGraph stdin
-    # Reading stdin
-    # Read 47817 elements of size 18 from stdin
-    # Sorted
-    # Creating table definition for sibTxGraph from sql: sibTxGraph.sql
-    # Saving bed.tab
-    # Loading hg38
-
-    # Create sibAltEvents track for analyzed alt-splices.
-    # Not on RR for hg18 and hg19, so do not push it out
-    cat txg/*.txg | txgAnalyze stdin /cluster/data/hg38/hg38.2bit sibAltEvents.bed
-    awk '$2 >= 0' sibAltEvents.bed | sort | uniq > foo.bed
-    hgLoadBed hg38 sibAltEvents foo.bed
-    # Reading foo.bed
-    # Read 452436 elements of size 6 from foo.bed
-    # Sorted
-    # Creating table definition for sibAltEvents, bedSize: 6
-    # Saving bed.tab
-    # Loading hg38
-
-    # push sibGene and sibTxGraph for hg38
-
-############################################################################
-# Orangutan Lastz run (DONE - 2014-05-27 - Hiram)
-    screen -S hg38PonAbe2      # use a screen to manage this longish running job
-    mkdir /hive/data/genomes/hg38/bed/lastzPonAbe2.2014-09-02
-    cd /hive/data/genomes/hg38/bed/lastzPonAbe2.2014-09-02
-
-    # always set the BLASTZ program so we know what version was used
-    cat << '_EOF_' > DEF
-# human vs chimp
-BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.52/bin/lastz
-BLASTZ_O=600
-BLASTZ_E=150
-# maximum M allowed with lastz is only 254
-BLASTZ_M=254
-
-BLASTZ_T=2
-BLASTZ_Y=15000
-BLASTZ_K=4500
-BLASTZ_Q=/scratch/data/blastz/human_chimp.v2.q
-#    A    C    G    T
-#    90 -330 -236 -356
-#  -330  100 -318 -236
-#  -236 -318  100 -330
-#  -356 -236 -330   90
-
-# TARGET: Human Hg38
-SEQ1_DIR=/scratch/data/hg38/hg38.2bit
-SEQ1_LEN=/scratch/data/hg38/chrom.sizes
-SEQ1_CHUNK=20000000
-SEQ1_LAP=10000
-SEQ1_IN_CONTIGS=0
-
-# QUERY: Orangutan PonAbe2
-SEQ2_DIR=/hive/data/genomes/ponAbe2/ponAbe2.2bit
-SEQ2_LEN=/hive/data/genomes/ponAbe2/chrom.sizes
-SEQ2_CHUNK=10000000
-SEQ2_LAP=0
-SEQ2_LIMIT=100
-SEQ2_IN_CONTIGS=0
-
-BASE=/hive/data/genomes/hg38/bed/lastzPonAbe2.2014-09-02
-TMPDIR=/dev/shm
-'_EOF_'
-
-    time (doBlastzChainNet.pl `pwd`/DEF -verbose=2 \
-        -chainMinScore=5000 -chainLinearGap=medium \
-        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
-        -syntenicNet) > do.log 2>&1
-    # real    144m46.575s
-    cat fb.hg38.chainPonAbe2Link.txt
-    # 2719618310 bases of 3049335806 (89.187%) in intersection
-
-    # filter with doRecipBest.pl
-    time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` \
-        hg38 ponAbe2) > rbest.log 2>&1
-    # real    60m1.060s
-    time (doRecipBest.pl -load -continue=load -workhorse=hgwdev \
-	-buildDir=`pwd` hg38 ponAbe2) > loadRBest.log 2>&1 &
-    # real    3m35.834s
-
-    cat fb.hg38.chainRBestPonAbe2Link.txt
-    # 2538296592 bases of 3049335806 (83.241%) in intersection
-
-    # running the swap
-    mkdir /hive/data/genomes/ponAbe2/bed/blastz.hg38.swap
-    cd /hive/data/genomes/ponAbe2/bed/blastz.hg38.swap
-    time (doBlastzChainNet.pl -verbose=2 \
-        -swap /hive/data/genomes/hg38/bed/lastzPonAbe2.2014-09-02/DEF \
-        -chainMinScore=5000 -chainLinearGap=medium \
-        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
-        -syntenicNet) > swap.log 2>&1
-    # real    102m27.866s
-    cat fb.ponAbe2.chainHg38Link.txt
-    #  2773568958 bases of 3093572278 (89.656%) in intersection
-
-    time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` \
-        ponAbe2 hg38) > rbest.log 2>&1
-    # real    78m47.312s
-
-
-
-
-#############################################################################
-# Add chrX alts to par (DONE 2014-10-14 angie)
-# Thanks to Hiram for pointing out that intersecting chrX positions in
-# altLocations and par shows whether a chrX alt overlaps a PAR.
-    cd /hive/data/genomes/hg38/bed/par
-    hgsql hg38 -e 'select * from altLocations where chrom = "chrX"'
-#+-----+-------+------------+----------+---------------------+
-#| bin | chrom | chromStart | chromEnd | name                |
-#+-----+-------+------------+----------+---------------------+
-#|  73 | chrX  |     319337 |   601516 | chrX_KI270880v1_alt |
-#|  73 | chrX  |     326487 |   601516 | chrX_KI270913v1_alt |
-#| 149 | chrX  |   79965153 | 80097082 | chrX_KI270881v1_alt |
-#+-----+-------+------------+----------+---------------------+
-    hgsql hg38 -e 'select * from par where chrom = "chrX"'
-#+-----+-------+------------+-----------+------+
-#| bin | chrom | chromStart | chromEnd  | name |
-#+-----+-------+------------+-----------+------+
-#|   9 | chrX  |      10000 |   2781479 | PAR1 |
-#| 221 | chrX  |  155701382 | 156030895 | PAR2 |
-#+-----+-------+------------+-----------+------+
-    # chrX_KI270880v1_alt and chrX_KI270913v1_alt are entirely contained in PAR1;
-    # chrX_KI270881v1_alt is not in either PAR.
-    hgsql hg38 -e 'select chrom,size from chromInfo \
-                     where chrom in ("chrX_KI270880v1_alt", "chrX_KI270913v1_alt");'
-#+---------------------+--------+
-#| chrom               | size   |
-#+---------------------+--------+
-#| chrX_KI270880v1_alt | 284869 |
-#| chrX_KI270913v1_alt | 274009 |
-#+---------------------+--------+
-    # Process that into bed4 with name=PAR1:
-    hgsql hg38 -NBe 'select chrom, 0, size, "PAR1" from chromInfo \
-                       where chrom in ("chrX_KI270880v1_alt", "chrX_KI270913v1_alt");' \
-      >> hg38Par.bed4
-    hgLoadBed hg38 par hg38Par.bed4
-    checkTableCoords hg38 par
-
-
-#############################################################################
-# LASTZ Cow bosTau8 (DONE - 2014-10-15 - Steve)
-    mkdir /hive/data/genomes/hg38/bed/lastzBosTau8.2014-10-215
-    cd /hive/data/genomes/hg38/bed/lastzBosTau8.2014-10-15
-
-    cat << '_EOF_' > DEF
-# human vs cow
-# maximum M allowed with lastz is only 254
-BLASTZ_M=254
-
-# TARGET: Human hg38
-SEQ1_DIR=/scratch/data/hg38/hg38.2bit
-SEQ1_LEN=/scratch/data/hg38/chrom.sizes
-SEQ1_CHUNK=10000000
-SEQ1_LAP=10000
-
-# QUERY: Cow bosTau8
-SEQ2_DIR=/hive/data/genomes/bosTau8/bosTau8.2bit
-SEQ2_LEN=/hive/data/genomes/bosTau8/chrom.sizes
-SEQ2_CHUNK=10000000
-SEQ2_LAP=0
-
-
-BASE=/hive/data/genomes/hg38/bed/lastzBosTau8.2014-10-15
-TMPDIR=/scratch/tmp
-'_EOF_'
-    # << happy emacs
-    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
-        `pwd`/DEF \
-        -syntenicNet \
-        -noLoadChainSplit \
-        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
-        -chainMinScore=3000 -chainLinearGap=medium > do.log 2>&1
-    # real    602m37.523s
-    cat fb.hg38.chainBosTau8Link.txt
-    # 1401921010 bases of 3049335806 (45.975%) in intersection
-    # Create link
-    cd /hive/data/genomes/hg38/bed
-    ln -s  lastzBosTau8.2014-10-15 lastz.bosTau8
-
-    #   running the swap
-    mkdir /hive/data/genomes/bosTau8/bed/blastz.hg38.swap
-    cd /hive/data/genomes/bosTau8/bed/blastz.hg38.swap
-    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
-        /hive/data/genomes/hg38/bed/lastzBosTau8.2014-10-15/DEF \
-        -swap  -syntenicNet \
-        -noLoadChainSplit \
-        -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
-        -chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1
-    #   real     116m32.121s
-    cat fb.bosTau8.chainHg38Link.txt
-    #   1336307377 bases of 2649307237 (50.440%) in intersection
-    cd /hive/data/genomes/bosTau8/bed
-    ln -s blastz.hg38.swap lastz.hg38
-
-############################################################################
-# NCBI ClinVar (new version -DONE - 2014-11-08 - Max)
-# see hg19.txt
-#########################################################################
-
-########################################################################
-# CNV Developmental Delay track (2014-11-21 Steve)
-
-    mkdir /hive/data/genomes/hg38/bed/cnvDevDelay
-    cd /hive/data/genomes/hg38/bed/cnvDevDelay
-
-wget --timestamping 'ftp://ftp.ncbi.nlm.nih.gov/pub/dbVar/data/Homo_sapiens/by_study/nstd100_Coe_et_al_2014/gvf/nstd100_Coe_et_al_2014.GRCh38.remap.all.germline.ucsc.gvf.gz'
-wget --timestamping 'ftp://ftp.ncbi.nlm.nih.gov/pub/dbVar/data/Homo_sapiens/by_study/nstd54_Cooper_et_al_2011/gvf/nstd54_Cooper_et_al_2011.GRCh38.remap.all.germline.ucsc.gvf.gz'
-
-cp /kent/src/hg/utils/automation/gvfToBed8Attrs.pl .
-mv gvfToBed8Attrs.pl gvfToBed8AttrsCase.pl
-cp gvfToBed8AttrsCase.pl gvfToBed8AttrsControl100.pl
-cp gvfToBed8AttrsCase.pl gvfToBed8AttrsControl54.pl
-
-# made three local copies of Angie's gvf conversion script - one to include
-# only case individuals from nstd100, one to include only control individuals
-# from nstd100 and one to include only control individuals from nstd54
-
-# had to add an additional elsif statement to the nstd100 scripts to filter
-# based on sample_name field:
-
-#  } elsif ($tag eq "sample_name") {
-#    $sample_name = $val;
-#  }
-
-# added line 33/35 to each file:
-
-# next if ($sample_name eq "Unknown"); # keep only "case" individuals from nstd100
-# next if ($sample_name ne "Unknown"); # keep only "control" individuals from nstd100
-# next if ($phenotype ne "not_reported"); # keep only "control" individuals from nstd54
-
-zcat nstd100_Coe_et_al_2014.GRCh38.remap.all.germline.ucsc.gvf.gz | gvfToBed8AttrsCase.pl > cnvDevDelayAllCase.bed
-zcat nstd100_Coe_et_al_2014.GRCh38.remap.all.germline.ucsc.gvf.gz | gvfToBed8AttrsControl100.pl > cnvDevDelayAllControl.bed
-zcat nstd54_Cooper_et_al_2011.GRCh38.remap.all.germline.ucsc.gvf.gz | gvfToBed8AttrsControl54.pl >> cnvDevDelayAllControl.bed
-
-# GRCh38 data from dbVar had different naming scheme for alternate chromosomes
-# (e.g., chr1|NT_187515.1 instead of chr1_KI270762v1_alt), so needed to write
-# a script to substitute the correct UCSC names
-
-    cat << '_EOF_' > chromXref.pl
-#!/usr/bin/env perl
-
-use strict;
-use warnings;
-
-sub usage() {
-  printf STDERR "usage: ./chromXref.pl <infile> <outfile>\n"
-}
-
-my $argc = scalar(@ARGV);
-
-if ($argc != 2) {
-  usage;
-  exit 255;
-}
-
-open (file1, "<hg38.xref") or die "cannot read hg38.xref";
-
-my @accArray = ();
-my $i = 0;
-while (my $line = <file1>) {
-  chomp($line);
-  my ($type, $chr, $acc1, $acc2) = split('\t', $line);
-  ($type, undef) = split('-', $type);
-  ($acc1, my $version) = split('\.', $acc1);
-  if ($type eq "unlocalized") {
-    $type = "random";
-  }
-  my $ucscAcc = "_" . $acc1 . "v" . $version . "_" . $type;
-  $accArray[$i][0] = $ucscAcc;
-  $accArray[$i][1] = $acc2;
-  $i++;
-}
-
-close (file1);
-
-open (file2, "<$ARGV[0]") or die "cannot read $ARGV[0]";
-open (file3, ">$ARGV[1]") or die "cannot read $ARGV[1]";
-local $/;
-my $fileContents = <file2>;
-for ($i = 0; $i < scalar(@accArray); $i++) {
-  my $temp1 = $accArray[$i][1];
-  my $temp2 = $accArray[$i][0];
-  if ($fileContents =~ m/\|$temp1/) {
-    $fileContents =~ s/\|$temp1/$temp2/g;
-  }
-}
-
-print file3 $fileContents;
-close (file2);
-close (file3);
-'_EOF_'
-    # << happy emacs
-
-cp /hive/data/genomes/hg38/genbank/GCF_000001405.26.assembly.txt .
-
-cat GCF_000001405.26.assembly.txt | grep -v '^#\|assembled\|unplaced' | awk '{print $2 "\t" $3 "\t" $5 "\t" $7}' > hg38.xref
-
-chromXref.pl cnvDevDelayAllCase.bed cnvDevDelayAllCaseUcsc.bed
-chromXref.pl cnvDevDelayAllControl.bed cnvDevDelayAllControlUcsc.bed
-
-hgLoadBed -tab -renameSqlTable -sqlTable=$HOME/kent/src/hg/lib/bed8Attrs.sql \
-        -allowStartEqualEnd hg38 cnvDevDelayCase cnvDevDelayAllCaseUcsc.bed
-
-hgLoadBed -tab -renameSqlTable -sqlTable=$HOME/kent/src/hg/lib/bed8Attrs.sql \
-        -allowStartEqualEnd hg38 cnvDevDelayControl cnvDevDelayAllControlUcsc.bed
-
-    checkTableCoords hg38 cnvDevDelayCase
-    checkTableCoords hg38 cnvDevDelayControl
-
-
-#########################################################################
-# RETROFINDER RETROPOSED GENES ucscRetro track VERSION 9
-# (2015-01-12 - 2015-01-20, hartera, DONE)
-ssh hgwdev
-mkdir -p /hive/groups/gencode/pseudogenes/retroFinder/hg38.20150112
-cd /hive/groups/gencode/pseudogenes/retroFinder/hg38.20150112
-
-cat << '_EOF_' > DEF
-
-RETRO_OPTIONS="-verbose=4 -minAli=0.98 -nearTop=0.005 "
-VERSION=9
-RUNDATE="2015-01-12"
-DB=hg38
-SCORETHRESH=550
-GENOMENAME='Homo sapiens'
-GBDB=hg
-DATE=20150112
-RUNDIR=/hive/groups/gencode/pseudogenes/retroFinder/$DB.$DATE
-BINDIR=/hive/users/hartera/GencodeWG/retroFinder/branches/version2/bin
-KENTDIR=/cluster/home/hartera/kent
-KENTBINDIR=/cluster/home/hartera/bin/x86_64
-MRNABASE=/hive/data/genomes/$DB/bed/mrnaBlastz.$VERSION
-TMPMRNA=$RUNDIR/mrnaBlastz/$DB
-TMPEST=$RUNDIR/est/$DB
-USEALTSEQS=0
-EST=all_est
-SPLICED_EST=intronEst
-SPLIT_EST=0
-SPLIT_SPLICED_EST=0
-LASTZPROG=/cluster/bin/penn/x86_64/lastz
-SCRIPT=/hive/users/hartera/GencodeWG/retroFinder/branches/version2/src/pipeline
-GENOME=/hive/data/genomes
-RETRODIR=$GENOME/$DB/bed/retro
-BASE=$RUNDIR/retro
-OUTDIR=${BASE}/version${VERSION}/${DB}
-RESULT=$OUTDIR/result
-RESULTSPLIT=$OUTDIR/resultSplit
-LOG=$OUTDIR/log
-OUT=$OUTDIR/out
-OVERLAPDIR=$OUTDIR/run.o
-TABLE=ucscRetroInfo$VERSION
-ORTHOTABLE=ucscRetroOrtho$VERSION
-ALIGN=ucscRetroAli$VERSION
-LOCAL=/scratch/data/$DB
-TWOBIT=$GENOME/$DB/$DB.2bit
-RMSK=rmsk
-NET1=netMm10
-NET2=netCanFam3
-NET3=netRheMac3
-# these two nets determine which retros are classified as ancient,
-# use two farthest nets
-ANCIENT1=netMm10
-ANCIENT2=netCanFam3
-GENE1=knownGene
-GENE2=refGene
-GENE3=wgEncodeGencodeCompV19
-CLUSTER=ku
-SPECIES="hg38 mm10"
-ROOTDIR="/cluster/home/hartera/public_html/retro/hg38Jun14"
-WEBROOT=$ROOTDIR/retro.$VERSION
-WEBSERVER=http://hgwdev-hartera.soe.ucsc.edu
-SHUFFLEDIR=shuffle
-SHUFFLEROOT=$WEBROOT/$SHUFFLEDIR
-DUPDIR=dups
-DUPROOT=$WEBROOT/$DUPDIR
-AGEDIR=age
-AGEROOT=$WEBROOT/$AGEDIR
-EXPDIR=exp
-GENEPFAM=knownGene
-PFAM=knownToPfam
-PFAMIDFIELD=name
-PFAMDOMAIN=value
-ALTSPICE=
-#ALTSPLICE=sibTxGraph
-SPLITBYAGE=$SCRIPT/splitRetrosByAge
-PDB=proteins140122
-#ARRAY=gnfAtlas2
-#AFFYPROBE="affyU133A,affyGnf1h"
-#ARRAYMEDIAN=hgFixed.gnfHumanAtlas2Median
-#ARRAYRATIO=hgFixed.gnfHumanAtlas2AllRatio
-#ARRAYABS=hgFixed.gnfHumanAtlas2All
-#ARRAYEXP=hgFixed.gnfHumanAtlas2MedianExps
-#ARRAYEXPALL=hgFixed.gnfHumanAtlas2AllExps
-#ARRAYLOOKUP=knownToGnfAtlas2
-#ARRAYPSLS="/hive/data/genomes/hg19/bed/geneAtlas2/affyU133A.psl /hive/data/genomes/hg19/bed/geneAtlas2/affyGnf1h.psl"
-'_EOF_'
-    # << happy emacs
-chmod +x DEF
-
-mkdir -p /hive/data/genomes/hg38/bed/retro
-mkdir -p /hive/data/genomes/hg38/bed/mrnaBlastz.9
-mkdir -p /hive/groups/gencode/pseudogenes/retroFinder/hg38.20150112/mrnaBlastz
-cd /hive/groups/gencode/pseudogenes/retroFinder/hg38.20150112/mrnaBlastz
-cp ../DEF .
-
-# Create S1.len file
-rom.sizes without random chroms or chrM, there are many alt loci also
-# in hg38 that were not in hg19 so 285 chroms total.
-cat /hive/data/genomes/hg38/chrom.sizes | grep -v random \
-   | grep -v chrUn | grep -v chrM > S1.len
-cp S1.len /hive/data/genomes/hg38/bed/mrnaBlastz.9
-
-screen
-# Run steps 1 to 5 of RetroFinder pipeline from scripts in CCDS SVN source tree:
-retroFinder/branches/version2/src/pipeline/ucscStep1.sh DEF
-# check cluster jobs on ku
-retroFinder/branches/version2/src/pipeline/ucscStep2.sh DEF
-retroFinder/branches/version2/src/pipeline/ucscStep3.sh DEF
-#check cluster jobs on ku
-retroFinder/branches/version2/src/pipeline/ucscStep4.sh DEF
-#check cluster jobs on ku
-    # Load the track
-retroFinder/branches/version2/src/pipeline/ucscStep5.sh DEF
-cd /hive/groups/gencode/pseudogenes/retroFinder/hg38.20150112/retro/version9/hg38
-retroFinder/branches/version2/src/pipeline/filterMrna.sh
-retroFinder/branches/version2/src/pipeline/filterEst.sh
-# Check cluster jobs on ku
-retroFinder/branches/version2/src/pipeline/analyseExpress.sh
-# Check cluster jobs on ku
-#added ucscRetroAli9 to kent/src/hg/makeDb/human/hg38/trackDb.ra
-# copied
-# /hive/groups/gencode/pseudogenes/retroFinder/hg38.20150112/retro/version9/hg38/trackDb.retro
-# entry to kent/src/hg/makeDb/trackDb/human/hg38/trackDb.ra and edited it to
-# remove the full date and add:
-# dataVersion Jan. 2015
-# Scripts copied ucscRetroAli9.psl, ucscRetroInfo9.bed and ucscRetroCds9.tab
-# to /hive/data/genomes/hg38/bed/retro/
-
-##########
-# Make dbVar chrom to UCSC chrom lift file
-#  DONE braney 2/12/15
-cd /cluster/data/hg38/jkStuff
-sort /cluster/data/hg38/chrom.sizes > tmpChrom
-grep -v '^#\|assembled' /hive/data/genomes/hg38/genbank/GCF_000001405.26.assembly.txt | awk 'BEGIN {OFS="\t"} {print "chr" $3 "_" $5 "_" $2, "chr" $3 "|"$7}' | sed 's/-scaffold//' | sed 's/unlocalized/random/' | sed 's/_unplaced//' | sed 's/chrna/chrUn/g' | sed 's/\./v/'  | sort | join /dev/stdin tmpChrom | awk 'BEGIN {OFS="\t"} {print 0, $2, $3, $1, $3}'  > dbVar.lift
-awk 'BEGIN {OFS="\t"} {print 0, $1, $2, $1, $2}' /cluster/data/hg38/chrom.sizes >> dbVar.lift
-rm tmpChrom
-
-#########################################################################
-# UCSC to RefSeq name correspondence (DONE - 2015-04-13 - Hiram)
-
-    mkdir /hive/data/genomes/hg38/bed/ucscToRefSeq
-    cd /hive/data/genomes/hg38/bed/ucscToRefSeq
-
-    # columns 5 and 7 are the INSDC and RefSeq names
-
-    grep -v "^#" ../../genbank/GCF_000001405.26.assembly.txt \
-      | awk -F'\t' '{printf "%s\t%s\n", $5,$7}'  | sort > insdc.refSeq.tab
-
-    hgsql -N -e 'select name,chrom,chromStart,chromEnd from ucscToINSDC;' hg38 \
-      | sort > insdc.ucsc.tab
-
-    join insdc.ucsc.tab insdc.refSeq.tab | tr '[ ]' '[\t]' \
-       | cut -f2- > ucsc.refSeq.tab
-
-
-    export chrSize=`cut -f1 ucsc.refSeq.tab | awk '{print length($0)}' | sort -n | tail -1`
-    sed -e "s/21/$chrSize/" $HOME/kent/src/hg/lib/ucscToINSDC.sql \
-       | sed -e 's/INSDC/RefSeq/g;' > ucscToRefSeq.sql
-    hgLoadSqlTab hg38 ucscToRefSeq ./ucscToRefSeq.sql ucsc.refSeq.tab
-
-    checkTableCoords  hg38 -table=ucscToRefSeq
-
-#########################################################################
-#CREATE MICROSAT TRACK (DONE - 2015-05-22 - Hiram)
-    ssh hgwdev
-    mkdir /cluster/data/hg38/bed/microsat
-    cd /cluster/data/hg38/bed/microsat
-
-    awk '($5==2 || $5==3) && $6 >= 15 && $8 == 100 && $9 == 0 {printf("%s\t%s\t%s\t%dx%s\n", $1, $2, $3, $6, $16);}' \
-       ../simpleRepeat/simpleRepeat.bed > microsat.bed
-
-    hgLoadBed hg38 microsat microsat.bed
-
-#############################################################################
-# ENCODE Regulatory tracks  (Kate & Chris)
-
-# see reg.txt
-#########################################################################
-# GWIPS-viz Ribo-seq - (DONE - 2016-02-05 - Steve)
-# contact Audrey Michel (audreymannion@gmail.com)
-# redmine #16765
-
-obtained bigWig file from shared Google drive
-https://drive.google.com/a/soe.ucsc.edu/folderview?id=0B_xvV_5tXzOGQ1h5NEh4bnhNTDg&usp=sharing_eid
-
-mkdir /hive/data/genomes/hg38/bed/gwipsvizRiboseq
-cp Global_RiboProElong.10_02_2016.bw /hive/data/genomes/hg38/bed/gwipsvizRiboseq/gwipsvizRiboseq.bw
-
-mkdir /gbdb/hg38/bbi/gwipsvizRiboseq
-cd /gbdb/hg38/bbi/gwipsvizRiboseq
-ln -s /hive/data/genomes/hg38/bed/gwipsvizRiboseq/gwipsvizRiboseq.bw gwipsvizRiboseq.bw
-
-hgsql hg38
-create table gwipsvizRiboseq select * from gc5BaseBw;
-update gwipsvizRiboseq set fileName="/gbdb/hg38/bbi/gwipsvizRiboseq/gwipsvizRiboseq.bw" where fileName="/gbdb/hg38/bbi/gc5BaseBw/gc5Base.bw";
-
-#########################################################################
-# COSMIC v81 DONE Chris Eisenhart 2017-05-11
-# Make a new COSCMIC track for hg19
-mkdir /hive/data/outside/cosmic/hg38/v81
-cd /hive/data/outside/cosmic/hg38/v81
-
-# Get the new data
-sftp ceisenha@ucsc.edu@sftp-cancer.sanger.ac.uk
-# Login to SFTP server then run these commands
-get /files/grch38/cosmic/v81/CosmicMutantExport.tsv.gz
-
-# Remove the 'NS' fields, search for the \t after to exclude the E'NS'ST transcripts.
-zcat CosmicMutantExport.tsv.gz | sed 's/NS\t/\t/g' > cosMut.tsv
-
-# Use a script to convert to bed format.
-cosmicToBed cosMut.tsv cosMut.bed
-# This many lines were skipped, 131597 for not having genomic coordinate
-
-# Sort and convert to big bed using the .as file.
-sort -k1,1 -k2,2n cosMut.bed > sCosMut.bed
-bedToBigBed -type=bed4+31 -as=cosmicNew.as sCosMut.bed /hive/data/genomes/hg38/chrom.sizes cosMutHg38V81.bb -tab -extraIndex=name,cosmLabel
-
-# Link it up so the outside world can see it.
-cd /gbdb/hg38/cosmic/
-ln -s /hive/data/outside/cosmic/hg38/v81/cosMutHg38V81.bb .
-#########################################################################
-# hoffmanMappability hub import (2 super tracks) DONE Chris Eisenhart 2017-05-16
-mkdir /hive/data/outside/hoffmanMappability/hg38
-cd /hive/data/outside/hoffmanMappability/hg38
-wget https://www.pmgenomics.ca/hoffmanlab/proj/bismap/trackhub/hg38/trackDb.txt
-# Get the trackDb file
-importTrackHub trackDb.txt hofMap.ra /gbdb/hg38/hoffmanMappability/ --trackDbPath=$HOME/kent/src/hg/makeDb/trackDb/human/hg38/ --test
-# Check that the commands are what we want, then run for real
-importTrackHub trackDb.txt hofMap.ra /gbdb/hg38/hoffmanMappability/ --trackDbPath=$HOME/kent/src/hg/makeDb/trackDb/human/hg38/
-# View the .ra file to make sure things are ok, here changed the groups to map,
-# added the alpha tags, and removed the 'show' from 'superTrack on show'
-cp hofMap.ra ~/kent/src/hg/makeDb/trackDb/human/hg38
-# Include hofMap.ra in the trackDb.ra file
-
-# the importTrackHub failed on redirection, fetch all the files manually:
-# 2017-09-15 - Hiram
-
-cd /hive/data/outside/hoffmanMappability/hg38
-
-grep bigDataUrl trackDb.txt | awk '{print $NF}' | sed -e 's#https://www.pmgenomics.ca/hoffmanlab/proj/bismap/trackhub/hg38/##;' | while read F
-do
-  echo $F
-  rm -f $F
-  wget --timestamping "https://www.pmgenomics.ca/hoffmanlab/proj/bismap/trackhub/hg38/${F}"
-done
-    # real    29m40.429s
-
-#########################################################################
-# tcgaExpr super track Chris Eisenhart, DONE, 2017-05-17
-# tcgaTranscExpr
-# TCGA transcript level expression barChart track, from TOIL pipeline recompute (John Vivian)
-# biorxiv.org/content/biorxiv/early/2016/07/07/062497.full.pdf
-mkdir /hive/data/outside/tcgaBarcharts/
-mkdir /hive/data/outside/tcgaBarcharts/transcripts
-cd /hive/data/outside/tcgaBarcharts/transcripts
-
-# Get all the meta data
-cp ~max/projects/cirm/datasetPages/tcgaGtex/tcgaMeta.tab .
-# Cut out the meta data the script wants, sample name and group.
-cut -f 1,5 tcgaMeta.tab | sed 's/ /_/g' > tcgaLargeSamples.tsv
-
-# Get and clean the matrix
-cp ~max/projects/cirm/datasetPages/tcgaGtex/tcga.tpm.tab .
-# Clean up the transcript names (remove the .#)
-cut -f 1 tcga.tpm.tab | cut -f 1 -d "." > tcgaTranscripts.txt
-cut -f 2- tcga.tpm.tab > tcgaTpmValues.tsv
-paste tcgaTranscripts.txt tcgaTpmValues.tsv > tcgaMatrix.tsv
-
-# Build a coordinate map
-hgsql hg38 -e "select * from ensGene" | cut -f 2- | sort > ensGene
-hgsql hg38 -e "select * from ensemblToGeneName" | sort >  ensemblToGeneName
-join ensGene ensemblToGeneName | awk '{print $2"\t"$4"\t"$5"\t"$1"\t0\t"$3"\t"$16}' > coord.bed
-
-# Use the meta data, matrix, and coordinate map to generate a barchart bed
-time expMatrixToBarchartBed tcgaLargeSamples.tsv tcgaMatrix.tsv coord.bed tcgaTransExp.bed --groupOrder tcgaGroupOrder.txt
-
-# NOTE: Use the header line of the bed file to populate the barChartBars field in the trackDb.
-# The order of the labels in the barChartBars field should match the order of the labels in the
-# expScores column in the bed file header.
-
-# Sort and convert into a bigBed file.
-sort -k1,1 -k2,2n tcgaTransExp.bed > sortedTcgaTransExp.bed
-bedToBigBed -type=bed6+5 -as=$HOME/kent/src/hg/lib/barChartTranscExp.as sortedTcgaTransExp.bed /hive/data/genomes/hg38/chrom.sizes tcgaTransExp.bb
-
-# Link the files into gbdb
-cd /gbdb/hgFixed/human/expMatrix
-ln -s /hive/data/outside/tcgaBarcharts/transcripts/tcgaLargeSamples.tsv tcgaLargeSamples.tab
-ln -s /hive/data/outside/tcgaBarcharts/transcripts/tcgaMatrix.tsv tcgaMatrix.tab
-ln -s /hive/data/outside/tcgaBarcharts/transcripts/tcgaTransExp.bb .
-
-###########3
-# Reload bigBed with a schema that will be shared with genes track, to support
-# configuration as subtracks in a composite
-# (2007-08-30 kate)
-cd /hive/data/outside/tcgaBarcharts/transcripts
-bedToBigBed -type=bed6+5 -as=$HOME/kent/src/hg/lib/barChartGeneExpr.as sortedTcgaTransExp.bed /hive/data/genomes/hg38/chrom.sizes tcgaTranscExpr.hg38.bb
-mkdir /gbdb/hg38/tcga
-ln -s `pwd`/tcgaTranscExpr.hg38.bb /gbdb/hg38/tcga/tcgaTranscExpr.bb
-
-# TCGA gene level expression barChart track, from TOIL pipeline recompute (John Vivian)
-# tcgaGeneExpr
-mkdir ../genes
-cd ../genes
-
-# Get the gene matrix.
-cp ~max/projects/cirm/datasetPages/tcgaGtex/tcga.geneTpm.tab .
-
-# Make a coordinate file, the genes in gtexGeneModelV6 have .# versions which are
-# removed with the temp fils.
-hgsql hg38 -e "select * from hg38.gtexGeneModelV6" | awk '{print $3"\t"$5"\t"$6"\t"$2"\t0\t"$4"\t"$2}' > coord6+1.bed.temp
-cut -f 4 coord6+1.bed.temp | cut -f 1 -d "." > foo
-cut -f 1-3 coord6+1.bed.temp > foo2
-paste foo2 foo > foo3
-cut -f 5- coord6+1.bed.temp > foo4
-paste foo3 foo4 > coord6+1.bed
-# This bed file didn't have the right gene names (ENS rather than Hugo), fix it.
-hgsql hg38 -e "select * From knownCanonical" > foo
-wc foo
-cut -f 6 foo | cut -f 1 -d "."
-cut -f 6 foo | cut -f 1 -d "." > foo2
-head foo
-cut -f 1-3 foo > foo3
-paste foo2 foo3 > foo4
-cut -f 4- coord6+1.bed > foo5
-join <(sort foo5) <(sort foo4) | awk '{print $5"\t"$6"\t"$7"\t"$1"\t0\t"$3"\t"$4}' > coord6+1.3.bed
-
-# Generate the bed file, can use the same transcript file
-time expMatrixToBarchartBed ../transcripts/tcgaLargeSamples.tsv tcga.geneTpm.tab coord6+1.3.bed tcgaGeneExp.bed --groupOrder=../transcripts/tcgaGroupOrder.txt
-
-# Convert to big bed
-sort -k1,1 -k2,2n tcgaGeneExp.bed > sortedTcgaGeneExp.bed
-bedToBigBed -type=bed6+5 -as=$HOME/kent/src/hg/lib/barChartGeneExp.as sortedTcgaGeneExp.bed /hive/data/genomes/hg38/chrom.sizes tcgaGeneExp.bb
-
-# Link to gbdb
-cd /gbdb/hgFixed/human/expMatrix
-ln -s /hive/data/outside/tcgaBarcharts/genes/tcgaGeneExp.bb .
-ln -s /hive/data/outside/tcgaBarcharts/genes/tcga.geneTpm.tab tcgaGeneMatrix.tab
-
-###########3
-# Reload bigBed with a schema that will be shared with transcript track, to support
-# configuration as subtracks in a composite
-# Apparently Chris actually loaded the #3 file (added gene names, adjusted end coord apparently)
-# (2007-08-30 kate)
-cd /hive/data/outside/tcgaBarcharts/genes
-bedToBigBed -type=bed6+5 -as=$HOME/kent/src/hg/lib/barChartGeneExpr.as sortedTcgaGeneExp3.bed /hive/data/genomes/hg38/chrom.sizes tcgaGeneExpr.hg38.bb
-mkdir /gbdb/hg38/tcga
-ln -s `pwd`/tcgaGeneExpr.hg38.bb /gbdb/hg38/tcga/tcgaGeneExpr.bb
-
-#########################################################################
-# gtexTransExp Chris Eisenhart, done, 2017-05-23
-# TCGA transcript level RNA-seq, from TOIL pipeline recompute (John Vivian)
-# biorxiv.org/content/biorxiv/early/2016/07/07/062497.full.pdf
-mkdir /hive/data/outside/gtex/barChartTrack
-cd /hive/data/outside/gtex/barChartTrack
-
-# Seems John included some TCGA data (CML) in the GTEx matrix and samples, the cleaning steps remove this.
-# Make a clean sample file
-cat ../johnVivianRecompute/sraToSample.txt | sed 's/ male /\tmale\t/g' | sed 's/ female /\tfemale\t/g' | cut -f 3 | sed 's/ - /-/g' | sed 's/ /_/g' > gtexSampleGroups.txt
-cat ../johnVivianRecompute/sraToSample.txt | cut -f 1 -d " " > gtexSampleNames.txt
-paste gtexSampleNames.txt gtexSampleGroups.txt > gtexSamples.txt
-grep -v '(CML)' gtexSamples.tsv > cleanGtexSamples.tsv
-
-# Make a clean matrix
-cut -f 1 ../johnVivianRecompute/gtex.tpm.tab | cut -f 1 -d "." > gtexTranscripts.txt
-cut -f 2- ../johnVivianRecompute/gtex.tpm.tab > gtexTpmValues.tsv
-paste gtexTranscripts.txt gtexTpmValues.tsv > gtexMatrix.tsv
-rowsToCols gtexMatrix.tsv tspsdGtexMatrix.tsv
-sort tspsdGtexMatrix.tsv > sortedTspsdGtexMatrix.tsv
-grep -v '(CML)' gtexSamples.tsv | cut -f 1 | sed 's/Run_s/#transcript/g' | sort > sortedCleanGtexSamples.tsv
-join sortedCleanGtexSamples.tsv sortedTspsdGtexMatrix.tsv > cleanTspsdGtexMatrix.tsv
-rowsToCols cleanTspsdMatrix.tsv cleanGtexMatrix.tsv
-
-# Build a coordinate map
-hgsql hg38 -e "select * from ensGene" | cut -f 2- | sort > ensGene
-hgsql hg38 -e "select * from ensemblToGeneName" | sort >  ensemblToGeneName
-join ensGene ensemblToGeneName | awk '{print $2"\t"$4"\t"$5"\t"$1"\t0\t"$3"\t"$16}' > coord.bed
-# NOTE: CHRISL10-05-2021 - the above ensGene steps weren't actually done or the files were removed,
-# there was a coord.tsv which I used instead so the below re-run could work
-tawk '{print $1,$2,$3,$4,0,$5,$6}' coord.tsv > coord.bed
-# END CHRISL10-05-2021 NOTE)
-
-# Get the gtex ordering
-hgsql hgFixed -e "select * from gtexTissue" | cut -f 3 | sed 's/ - /-/g' | sed 's/ /_/g' | sed '1D' > gtexGroupOrder.txt
-
-# Use the meta data, matrix, and coordinate map to generate a barchart bed
-# NOTE: CHRISL10-05-2021 - re-ran this step to fix float parsing bug:
-time expMatrixToBarchartBed cleanGtexSamples.tsv cleanGtexMatrix.tsv coord.bed gtexTransExp.bed --groupOrderFile gtexGroupOrder.txt
-
-# NOTE: Use the header line of the bed file to populate the barChartBars field in the trackDb.
-# The order of the labels in the barChartBars field should match the order of the labels in the
-# expScores column in the bed file header.
-
-# Sort and convert into a bigBed file.
-sort -k1,1 -k2,2n gtexTransExp.bed > sortedGtexTransExp.bed
-# NOTE: CHRISL10-05-2021 - re-ran bedToBigBed step with correct file names
-bedToBigBed -as=$HOME/kent/src/hg/lib/barChartBed.as -type=bed6+5 sortedGtexTransExp.bed /hive/data/genomes/hg38/chrom.sizes gtexTranscExpr.bb
-
-# Link the files into gbdb
-cd /gbdb/hgFixed/human/expMatrix
-ln -s /hive/data/outside/gtex/barChartTrack/cleanGtexSamples.tsv cleanGtexSamples.tab
-ln -s /hive/data/outside/gtex/barChartTrack/cleanGtexMatrix.tsv cleanGtexMatris.tab
-
-# <2007-08-30 kate)
-cd /gbdb/hg38/gtex
-ln -s /hive/data/outside/gtex/barChartTrack/gtexTranscExpr.bb .
-
-#########################################################################
-# LASTZ human/hg38 vs. Zebrafish /danRer11
-#	(DONE - 2017-06-12 - Chris)
-
-    mkdir /hive/data/genomes/hg38/bed/lastzDanRer11.2017-06-12
-    cd /hive/data/genomes/hg38/bed/lastzDanRer11.2017-06-12
-
-    printf '# human vs zebrafish danRer11
-BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.66/bin/lastz
-BLASTZ_M=254
-
-# TARGET: human hg38
-SEQ1_DIR=/hive/data/genomes/hg38/hg38.2bit
-SEQ1_LEN=/hive/data/genomes/hg38/chrom.sizes
-SEQ1_CTGDIR=/hive/data/genomes/hg38/hg38.contigs.2bit
-SEQ1_CTGLEN=/hive/data/genomes/hg38/hg38.contigs.chrom.sizes
-SEQ1_LIFT=/hive/data/genomes/hg38/jkStuff/hg38.contigs.lift
-SEQ1_CHUNK=40000000
-SEQ1_LIMIT=20
-SEQ1_LAP=10000
-
-# QUERY: zebrafish danRer11
-SEQ2_DIR=/hive/data/genomes/danRer11/danRer11.2bit
-SEQ2_LEN=/hive/data/genomes/danRer11/chrom.sizes
-SEQ2_CHUNK=20000000
-SEQ2_LIMIT=200
-SEQ2_LAP=0
-
-BASE=/hive/data/genomes/hg38/bed/lastzDanRer11.2017-06-12
-TMPDIR=/dev/shm
-' > DEF
-
-    time (doBlastzChainNet.pl `pwd`/DEF -verbose=2 \
-        -chainMinScore=3000 -chainLinearGap=medium \
-          -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
-            -noDbNameCheck -syntenicNet) > do.log 2>&1
-    # real    3327m39.074s
-
-	cat fb.hg38.chainDanRer11Link.txt
-    # 41036733 bases of 3049335806 (1.346%) in intersection
-
-	973293331 bases of 3049335806 (31.918%) in intersection
-
-    time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` hg38 danRer11) \
-       > rbest.log 2>&1 &
-
-    # and for the swap:
-    mkdir /hive/data/genomes/danRer11/bed/blastz.hg38.swap
-    cd /hive/data/genomes/danRer11/bed/blastz.hg38.swap
-
-    time (doBlastzChainNet.pl -verbose=2 \
-      /hive/data/genomes/hg38/bed/lastzDanRer11.2017-06-12/DEF \
-        -swap -chainMinScore=3000 -chainLinearGap=medium \
-          -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
-            -noDbNameCheck -syntenicNet) > swap.log 2>&1
-	#  real	39m24.916s
-
-    cat fb.danRer11.chainHg38Link.txt
-    # 47869194 bases of 1674677181 (2.858%) in intersection
-
-    time (doRecipBest.pl -workhorse=hgwdev -buildDir=`pwd` danRer11 hg38) \
-       > rbest.log 2>&1 &
-    # real	638m45.337s
-_EOF_
-#########################################################################
-# refSeqFuncElems NCBI refSeq functional elements, REDONE 2017-11-29 Angie
-# previously done 2017-08-01 by Chris E
-
-mkdir /hive/data/genomes/hg38/bed/refSeqFuncElems.2017-11-29
-cd /hive/data/genomes/hg38/bed/refSeqFuncElems.2017-11-29
-
-# NOTE FOR NEXT TIME: instead of using interim GFF, in the future these annotations might be
-# folded into the same main release GFF3 from which the ncbiRefSeq* tables are extracted by
-# doNcbiRefSeq.pl.
-wget ftp://ftp.ncbi.nlm.nih.gov/genomes/H_sapiens/GFF_interim/interim_GRCh38.p11_top_level_2017-06-27.gff3.gz
-
-# Get mapping of RefSeq NC_* chromosome accs (and NT_*, NW_*) to hg38 chrom names
-hgsql hg38 -NBe 'select alias, chrom from chromAlias where source = "refseq" order by alias' \
-> refSeqToChrom.tab
-cut -f 2 refSeqToChrom.tab | sed -e 's/^/^/' > chrom.tab
-
-# Use Terence Murphy's list of feature types (and the multi-type attribute regulatory_class)
-# to identify Functional Elements and swap in hg38 chrom names.
-# Use subColumn -miss so it doesn't quit when it sees a patch contig that doesn't map to an
-# hg38 chrom.  Use grep -f chrom.tab to filter out patch contig annotations.
-zcat interim_GRCh38.p11_top_level_2017-06-27.gff3.gz \
-| grep -P "(\t(CAAT_signal|GC_rich_promoter_region|TATA_box|enhancer|insulator|locus_control_region|mobile_genetic_element|origin_of_replication|promoter|protein_binding_site|recombination_feature|regulatory_region|repeat_region|sequence_feature|sequence_secondary_structure|silencer|stem_loop)\t|regulatory_class=)" \
-| subColumn -miss=/dev/null 1 stdin refSeqToChrom.tab stdout \
-| grep -f chrom.tab > funcElems.gff
-wc -l funcElems.gff
-#5756 funcElems.gff
-
-# Transform GFF to BED+
-~/kent/src/hg/utils/automation/parseRefSeqFuncElems funcElems.gff /dev/stdout \
-| sort -k1,1 -k2n,2n > refSeqFuncElems.bed
-wc -l refSeqFuncElems.bed
-#5756 refSeqFuncElems.bed
-
-# Make bigBed and link from /gbdb
-bedToBigBed -tab -type=bed9+7 -as=$HOME/kent/src/hg/lib/refSeqFuncElems.as \
-  refSeqFuncElems.bed /hive/data/genomes/hg38/chrom.sizes refSeqFuncElems.bb
-rm -f /gbdb/hg38/ncbiRefSeq/refSeqFuncElems.bb
-ln -s `pwd`/refSeqFuncElems.bb /gbdb/hg38/ncbiRefSeq/
-
-###################################################################
-# cosmicRegions (DONE 2017-08-03 Chris)
-# Make a new COSCMIC track for hg38 v82
-mkdir /hive/data/outside/cosmic/hg38/v82
-cd /hive/data/outside/cosmic/hg38/v82
-
-# Get the new data
-sftp ceisenha@ucsc.edu@sftp-cancer.sanger.ac.uk
-# Login to SFTP server then run these commands
-get /files/grch38/cosmic/v82/CosmicMutantExport.tsv.gz
-
-# Remove the 'NS' fields, search for the \t after to exclude the E'NS'ST transcripts.
-zcat CosmicMutantExport.tsv.gz | sed 's/NS\t/\t/g' > cosMut.tsv
-
-# Use a script to convert to bed format.
-cosmicToBed cosMut.tsv cosMut.bed
-# This many lines were skipped, 134601 for not having genomic coordinate
-
-# Sort and convert to big bed using the .as file.
-sort -k1,1 -k2,2n cosMut.bed > sCosMut.bed
-bedToBigBed -type=bed8+31 -as=cosmicNew.as sCosMut.bed /hive/data/genomes/hg38/chrom.sizes cosMutHg38V82.bb -tab -extraIndex=name,cosmLabel
-
-
-# Link it up so the outside world can see it.
-cd /gbdb/hg38/cosmic/
-ln -s /hive/data/outside/cosmic/hg38/v82/cosMutHg38V82.bb .
-
-#########################################################################
-# RepeatMasker Visualization track update (DONE - 2018-05-04 - ChrisL)
-    screen -S rmskJoined.2018-05-04
-    mkdir /hive/data/genomes/hg38/bed/rmskJoined.2018-05-04
-    cd /hive/data/genomes/hg38/bed/rmskJoined.2018-05-04
-
-    ln -s ../repeatMasker/hg38.sorted.fa.out .
-    ln -s ../repeatMasker/hg38.fa.align.gz .
-
-    # this script points to the most recent RepeatMasker version:
-    time (/scratch/data/RepeatMasker/util/rmToUCSCTables.pl \
-        -out hg38.sorted.fa.out -align hg38.fa.align.gz) > do.log 2>&1 &
-
-    # no differences, forgot to remake rmsk files
-    # so instead remake the rmsk track and try again
-    mkdir /hive/data/genomes/hg38/bed/repeatMasker.2018-05-04
-    cd /hive/data/genomes/hg38/bed/repeatMasker.2018-05-04
-
-    # remake the sorted.fa.out and fa.align.gz, stop after masking
-    # so rmsk table isn't overwritten
-    time (doRepeatMasker.pl -stop=mask -useHMMER -bigClusterHub=ku \
-       -workhorse=hgwdev -dbHost=hgwdev -buildDir=`pwd` hg38) > mask.log 2>&1 &
-    # RepeatMasker bug?: Undefined id, line 1440295 of input:
-    #    10  26.1  0.0  0.0  chr13     114292339 114292382   (71946) C  L1P4           LINE/L1               (17) 6149   6106
-    # RepeatMasker bug?: Undefined id, line 3529762 of input:
-    #   992   2.3  0.5  0.0  chr3      180461254 180462048 (17833511) C  L1PA3          LINE/L1                (3) 6152   5354
-    # RepeatMasker bug?: Undefined id, line 3529763 of input:
-    #  1153   3.2  0.2  0.0  chr3      180462043 180463006 (17832553) +  L1PA3          LINE/L1               4392 5357  (789)
-    # RepeatMasker bug?: Undefined id, line 5303571 of input:
-    #   220  22.5  0.0 17.7  chr9      105798076 105799127 (32595590) C  SATR2          Satellite              (4)  866      1
-    # real    643m17.617s
-
-    # get rid of the missing id items:
-    grep -v "114292339 114292382\|180461254 180462048\|180462043 180463006\|105798076 105799127" \
-        hg38.fa.out > clean.hg38.fa.out
-    mv clean.hg38.fa.out hg38.fa.out
-
-    # finish the last step of doCat.csh:
-    /cluster/bin/scripts/extractNestedRepeats.pl hg38.fa.out | sort -k1,1 -k2,2n > hg38.nestedRepeats.bed
-
-    cd /hive/data/genomes/hg38/bed/rmskJoined.2018-05-04
-
-    rm hg38.sorted.fa.out
-    rm hg38.fa.align.gz
-    rm *.tsv
-    ln -s ../repeatMasker.2018-05-04/hg38.sorted.fa.out .
-    ln -s ../repeatMasker.2018-05-04/hg38.fa.align .
-
-    # and then re-run
-    time (/scratch/data/RepeatMasker/util/rmToUCSCTables.pl \
-        -out hg38.sorted.fa.out -align hg38.fa.align.gz) > rerun.log 2>&1 &
-    # real    141m7.268s
-
-    # confirm the counts are different from the previous version:
-    # wc -l ../rmskJoined/hg38.fa.align.tsv ../rmskJoined/hg38.sorted.fa.join.bed ../rmskJoined/hg38.sorted.fa.out.tsv
-   7203858 ../rmskJoined/hg38.fa.align.tsv
-   4607727 ../rmskJoined/hg38.sorted.fa.join.bed
-   5520118 ../rmskJoined/hg38.sorted.fa.out.tsv
-  17331703 total
-    # wc -l *.tsv
-   7227245 hg38.fa.align.tsv
-   4828114 hg38.sorted.fa.join.tsv
-   5916189 hg38.sorted.fa.out.tsv
-  17971548 total
-
-    hgLoadBed -sqlTable=$HOME/kent/src/hg/lib/rmskJoined.sql \
-        -renameSqlTable -verbose=4 -tab \
-            -type=bed4+9 -as=$HOME/kent/src/hg/lib/rmskJoined.as hg38 \
-                rmskJoinedCurrent hg38.sorted.fa.join.tsv \
-                    > loadJoined.log 2>&1
-
-    hgLoadSqlTab hg38 rmskAlignCurrent \
-        /cluster/home/chmalee/kent/src/hg/lib/rmskAlign.sql \
-            hg38.fa.align.tsv > loadAlign.log 2>&1
-
-    hgLoadOutJoined -verbose=2 -table=rmskOutCurrent hg38 hg38.sorted.fa.out > loadOut.log 2>&1
-
-    featureBits -countGaps hg38 rmskJoinedCurrent
-    # 2796899855 bases of 3209286105 (87.150%) in intersection
-#########################################################################
-# Hi-C Visualization based on Krietenstein 2019 (DONE - 2019-10-07 - Jonathan)
-mkdir -p /hive/data/genomes/hg38/bed/hic
-cd /hive/data/genomes/hg38/bed/hic
-
-# Files are located on 4D Nucleome (data.4dnucleome.org).  The URL for the paper on that
-# site is https://data.4dnucleome.org/publications/b13590b2-a341-4e5e-ad5e-72e233b32e9d/.
-# The four file IDs downloaded below are for contact matrix .hic files created for
-# different cell-line/protocol combinations
-wget 'https://data.4dnucleome.org/files-processed/4DNFI2TK7L2F/@@download/4DNFI2TK7L2F.hic' # H1-hESC Micro-C XL
-wget 'https://data.4dnucleome.org/files-processed/4DNFIQYQWPF5/@@download/4DNFIQYQWPF5.hic' # H1-hESC in situ
-wget 'https://data.4dnucleome.org/files-processed/4DNFI18Q799K/@@download/4DNFI18Q799K.hic' # HFFc6 Micro-C XL
-wget 'https://data.4dnucleome.org/files-processed/4DNFIFLJLIS5/@@download/4DNFIFLJLIS5.hic' # HFFc6 in situ
-
-printf "All files were downloaded from the 4D Nucleome Data Portal at data.4dnucleome.org.
-These are processed contact matrices from Krietenstein et al. (2019) Ultrastructural details
-of mammalian chromosme architecture. (https://www.biorxiv.org/content/10.1101/639922v1).
-
-4DNFI2TK7L2F.hic - Micro-C XL data set on H1-hESC
-4DNFIQYQWPF5.hic - in situ Hi-C data set on H1-hESC
-4DNFI18Q799K.hic - Micro-C  XL data set on HFFc6
-4DNFIFLJLIS5.hic - in situ Hi-C data set on HFFc6" > README.txt
-
-mkdir -p /gbdb/hg38/bbi/hic
-cd /gbdb/hg38/bbi/hic
-ln -s /hive/data/genomes/hg38/bed/hic/* .
-
-
-#########################################################################
-# LASTZ Self/hg38 (DONE 2020-02-11 - Angie)
-    # RM #24695
-    # Re-run with updated process to include pslDropOverlap .
-    # Use "contigs" from previous run lastzSelf.2014-01-25/hg38.self.2bit
-
-    screen -S hg38Self -t hg38Self
-    mkdir /hive/data/genomes/hg38/bed/lastzSelf.2020-01-27
-    cd /hive/data/genomes/hg38/bed/lastzSelf.2020-01-27
-    cat << _EOF_ > DEF
-# human vs human with mouse defaults
-BLASTZ=/cluster/bin/penn/lastz-distrib-1.04.00/bin/lastz
-
-# TARGET: Human hg38
-SEQ1_DIR=/hive/data/genomes/hg38/hg38.2bit
-SEQ1_LEN=/hive/data/genomes/hg38/chrom.sizes
-SEQ1_CTGDIR=/hive/data/genomes/hg38/bed/lastzSelf.2014-01-25/hg38.self.2bit
-SEQ1_CTGLEN=/hive/data/genomes/hg38/bed/lastzSelf.2014-01-25/hg38.self.chrom.sizes
-SEQ1_LIFT=/hive/data/genomes/hg38/jkStuff/hg38.contigs.lift
-SEQ1_CHUNK=20000000
-SEQ1_LAP=10000
-
-# QUERY: Human hg38
-SEQ2_DIR=/hive/data/genomes/hg38/hg38.2bit
-SEQ2_LEN=/hive/data/genomes/hg38/chrom.sizes
-SEQ2_CTGDIR=/hive/data/genomes/hg38/bed/lastzSelf.2014-01-25/hg38.self.2bit
-SEQ2_CTGLEN=/hive/data/genomes/hg38/bed/lastzSelf.2014-01-25/hg38.self.chrom.sizes
-SEQ2_LIFT=/hive/data/genomes/hg38/jkStuff/hg38.contigs.lift
-SEQ2_CHUNK=20000000
-SEQ2_LAP=0
-
-BASE=/hive/data/genomes/hg38/bed/lastzSelf.2020-01-27
-TMPDIR=/dev/shm
-_EOF_
-
-    # NOTE FOR NEXT TIME: use -chainMinScore=10000 (at least), not 3000
-
-    ~/kent/src/hg/utils/automation/doBlastzChainNet.pl `pwd`/DEF -verbose=2 \
-        -chainMinScore=3000 -chainLinearGap=medium -syntenicNet \
-        -workhorse=hgwdev -smallClusterHub=hgwdev -bigClusterHub=ku \
-        -stop=net >& do.log &
-    tail -f do.log
-
-
-    # After two days, 4 jobs are running, one of which (part014.lst vs itself) crashed with
-    # out-of-mem error.  After 3 days, 3 jobs completed but part014.lst runs lastz out of mem.
-    # Split part014.lst up into components, run on hgwdev (more mem).
-    mkdir /hive/data/genomes/hg38/bed/lastzSelf.2020-01-27/run.blastz/part014
-    cd /hive/data/genomes/hg38/bed/lastzSelf.2020-01-27/run.blastz/part014
-    mkdir psl
-    cp /dev/null jobList
-    for t in $(cat ../tParts/part014.lst); do
-      tBase=$(basename $t)
-      for q in $(cat ../tParts/part014.lst); do
-        qBase=$(basename $q)
-        echo "$HOME/kent/src/hg/utils/automation/blastz-run-ucsc -outFormat psl -dropSelf $t $q ../../DEF {check out exists psl/${tBase}_${qBase}.psl }" >> jobList
-      done
-    done
-    para create jobList
-    para try, check, push, etc,
-    # 94 of the jobs ran for 12s or less.  The other 6 are chr{X_Y}_00 vs. self & each other,
-    # chr13_16 vs self and chr16_03 vs self.  All but chr16_03 vs self completed in < 6 minutes.
-#Completed: 99 of 100 jobs
-#Crashed: 1 jobs
-#CPU time in finished jobs:       1559s      25.98m     0.43h    0.02d  0.000 y
-#IO & Wait Time:                   248s       4.14m     0.07h    0.00d  0.000 y
-#Average job time:                  18s       0.30m     0.01h    0.00d
-#Longest finished job:             321s       5.35m     0.09h    0.00d
-#Submission to last job:         94681s    1578.02m    26.30h    1.10d
-
-    # Dang, chr16_03 vs. self still runs out of mem even on hgwdev.
-    mkdir /hive/data/genomes/hg38/bed/lastzSelf.2020-01-27/run.blastz/part014/chr16_03
-    cd /hive/data/genomes/hg38/bed/lastzSelf.2020-01-27/run.blastz/part014/chr16_03
-    twoBitToFa /hive/data/genomes/hg38/bed/lastzSelf.2014-01-25/hg38.self.2bit:chr16_03:0-1689648 \
-      chr16_03.fa
-    faSplit -lift=chr16_03.lift size chr16_03.fa 169000 chr16_03_split_
-    faToTwoBit chr16_03_split_*.fa chr16_03_split.2bit
-    twoBitInfo chr16_03_split.2bit stdout | sort -k2nr > chr16_03_split.sizes
-    sed -re 's@CTGDIR.*@CTGDIR=/hive/data/genomes/hg38/bed/lastzSelf.2020-01-27/run.blastz/part014/chr16_03/chr16_03_split.2bit@;
-             s@CTGLEN.*@CTGLEN=/hive/data/genomes/hg38/bed/lastzSelf.2020-01-27/run.blastz/part014/chr16_03/chr16_03_split.sizes@;' \
-      ../../../DEF > DEF.split
-    mkdir psl
-    cwd=$(pwd)
-    while read tBase tSize; do
-      while read qBase qSize; do
-        echo "$HOME/kent/src/hg/utils/automation/blastz-run-ucsc -outFormat psl -dropSelf $cwd/chr16_03_split.2bit:$tBase:0-$tSize $cwd/chr16_03_split.2bit:$qBase:0-$qSize DEF.split {check out exists psl/${tBase}_${qBase}.psl}"
-      done < chr16_03_split.sizes
-    done < chr16_03_split.sizes > jobList
-    para create jobList
-    para try, check, push, etc,
-#Completed: 100 of 100 jobs
-#CPU time in finished jobs:     142614s    2376.89m    39.61h    1.65d  0.005 y
-#IO & Wait Time:                   167s       2.79m     0.05h    0.00d  0.000 y
-#Average job time:                1428s      23.80m     0.40h    0.02d
-#Longest finished job:           22861s     381.02m     6.35h    0.26d
-#Submission to last job:         22874s     381.23m     6.35h    0.26d
-    # 6 hours for chr16_03_split_00 vs. itself.  ~4.5h for _09 vs _00.
-    cat psl/*.psl \
-    | liftUp -nohead -type=.psl stdout \
-        chr16_03.lift error stdin \
-    | liftUp -nohead -type=.psl -pslQ \
-        ../psl/hg38.self.2bit:chr16_03:0-1689648_hg38.self.2bit:chr16_03:0-1689648.psl \
-        chr16_03.lift error stdin
-
-    cd ..
-    cat psl/* > ../../psl/part014.lst/part014.lst_part014.lst.psl
-
-    # Make run.time file or doBlastzChainNet.pl won't continue:
-    cd /hive/data/genomes/hg38/bed/lastzSelf.2020-01-27/run.blastz
-    para time >& run.time
-
-    # Resume doBlastzChainNet.pl:
-    cd /hive/data/genomes/hg38/bed/lastzSelf.2020-01-27
-    ~/kent/src/hg/utils/automation/doBlastzChainNet.pl `pwd`/DEF -verbose=2 \
-        -chainMinScore=3000 -chainLinearGap=medium -syntenicNet \
-        -workhorse=hgwdev -smallClusterHub=hgwdev -bigClusterHub=ku \
-        -continue=cat -stop=net >& do2.log &
-    tail -f do2.log
-#Batch failed after 4 tries on chain.csh part016.lst chain/part016.lst.chain
-#Command failed:
-#ssh -x -o 'StrictHostKeyChecking = no' -o 'BatchMode = yes' hgwdev nice /hive/data/genomes/hg38/bed/lastzSelf.2020-01-27/axtChain/run/doChainRun.csh
-
-    cd /hive/data/genomes/hg38/bed/lastzSelf.2020-01-27/axtChain/run
-    para problems
-    # mostly these:
-#errAbort re-entered due to out-of-memory condition. Exiting.
-    # one job made it through errAbort:
-#needLargeMem: Out of memory - request size 564838920 bytes, errno: 12
-    para time
-#Completed: 59 of 68 jobs
-#Crashed: 9 jobs
-#CPU time in finished jobs:      24727s     412.12m     6.87h    0.29d  0.001 y
-#IO & Wait Time:                     0s       0.00m     0.00h    0.00d  0.000 y
-#Average job time:                 409s       6.82m     0.11h    0.00d
-#Longest finished job:            2350s      39.17m     0.65h    0.03d
-#Submission to last job:          2462s      41.03m     0.68h    0.03d
-    para crashed
-#chain.csh part012.lst {check out line+ chain/part012.lst.chain}
-#chain.csh part017.lst {check out line+ chain/part017.lst.chain}
-#chain.csh part016.lst {check out line+ chain/part016.lst.chain}
-#chain.csh part015.lst {check out line+ chain/part015.lst.chain}
-#chain.csh part014.lst {check out line+ chain/part014.lst.chain}
-#chain.csh hg38.self.2bit:chr1_10: {check out line+ chain/hg38.self.2bit:chr1_10:.chain}
-#chain.csh hg38.self.2bit:chr10_05: {check out line+ chain/hg38.self.2bit:chr10_05:.chain}
-#chain.csh hg38.self.2bit:chr7_00: {check out line+ chain/hg38.self.2bit:chr7_00:.chain}
-
-    # Run the jobs outside of parasol (~11h):
-    csh -efx chain.csh part012.lst chain/part012.lst.chain &
-    csh -efx chain.csh part017.lst chain/part017.lst.chain &
-    csh -efx chain.csh part016.lst chain/part016.lst.chain &
-    csh -efx chain.csh part015.lst chain/part015.lst.chain &
-    csh -efx chain.csh part014.lst chain/part014.lst.chain &
-    csh -efx chain.csh hg38.self.2bit:chr1_10: chain/hg38.self.2bit:chr1_10:.chain &
-    csh -efx chain.csh hg38.self.2bit:chr10_05: chain/hg38.self.2bit:chr10_05:.chain &
-    csh -efx chain.csh hg38.self.2bit:chr7_00: chain/hg38.self.2bit:chr7_00:.chain &
-    csh -efx chain.csh hg38.self.2bit:chr16_08: chain/hg38.self.2bit:chr16_08:.chain &
-
-    # Resume doBlastzChainNet.pl again:
-    cd /hive/data/genomes/hg38/bed/lastzSelf.2020-01-27
-    ~/kent/src/hg/utils/automation/doBlastzChainNet.pl `pwd`/DEF -verbose=2 \
-        -chainMinScore=3000 -chainLinearGap=medium -syntenicNet \
-        -workhorse=hgwdev -smallClusterHub=hgwdev -bigClusterHub=ku \
-        -continue=chainMerge -stop=net >& do3.log &
-    tail -f do3.log
-# *** All done !  Elapsed time: 19m11s
-
-    # Load track w/new name chainSelfRedo to compare to existing chainSelf:
-    hgLoadChain -normScore -tIndex hg38 chainSelfRedo axtChain/hg38.hg38.all.chain.gz
-
-    # No idea why but somehow the liftUp seems not to have worked for part012 and part017,
-    # so the all.chain had chr22_31, chr8_01 etc.  :b  run again again.
-    cd /hive/data/genomes/hg38/bed/lastzSelf.2020-01-27/axtChain/run
-    mv chain/part012.lst.chain{,.bak}
-    mv chain/part017.lst.chain{,.bak}
-    csh -efx chain.csh part012.lst chain/part012.lst.chain >& part012.log &
-    csh -efx chain.csh part017.lst chain/part017.lst.chain >& part017.log &
-    # Those completed successfully.  Dunno why the earlier ones didn't get lifted.
-    cd ..
-    mv hg38.hg38.all{,.oopsPartUnlifted}.chain.gz
-    # Reconstruct hg38.hg38.all.chain.gz (the chainMerge step is just this command):
-    find /hive/data/genomes/hg38/bed/lastzSelf.2020-01-27/axtChain/run/chain -name "*.chain" \
-    | chainMergeSort -inputList=stdin \
-    | nice gzip -c \
-      > /hive/data/genomes/hg38/bed/lastzSelf.2020-01-27/axtChain/hg38.hg38.all.chain.gz
-
-    # NOTE FOR NEXT TIME: this filtering step will be unnecessary when -minScore=10000 is used
-    # from the beginning.
-    # Filter to minScore of 10000 (too much fluff with -minScore=3000) per Jim (see #24695)
-    cd /hive/data/genomes/hg38/bed/lastzSelf.2020-01-27/axtChain
-    mv hg38.hg38.all.chain.gz hg38.hg38.all.unfiltered.chain.gz
-    chainFilter hg38.hg38.all.unfiltered chain.gz -minScore=10000 \
-    | gzip -c > hg38.hg38.all.chain.gz
-    hgLoadChain -normScore -tIndex hg38 chainSelfRedo hg38.hg38.all.chain.gz
-    checkTableCoords hg38 chainSelfRedo
-
-    # Rename to chainSelf and update lastz symlinks and downloads
-    hgsql hg38 -e 'drop table chainSelf; drop table chainSelfLink;
-                   rename table chainSelfRedo to chainSelf;
-                   rename table chainSelfRedoLink to chainSelfLink;'
-    cd /hive/data/genomes/hg38/bed
-    rm lastz.self lastz.hg38
-    ln -s lastzSelf.2020-01-27 lastz.self
-    ln -s lastzSelf.2020-01-27 lastz.hg38
-    cd /hive/data/genomes/hg38/bed/lastzSelf.2020-01-27/axtChain
-    cp /hive/data/genomes/hg38/bed/lastzSelf.2014-01-25/axtChain/README.txt .
-    $EDITOR README.txt
-    md5sum hg38.hg38.all.chain.gz > md5sum.txt
-    # Make sure that the old download dir has only symlinks, no real files, then remove and rebuild.
-    ls -lR /usr/local/apache/htdocs-hgdownload/goldenPath/hg38/vsSelf/
-    rm -r /usr/local/apache/htdocs-hgdownload/goldenPath/hg38/vsSelf/
-    mkdir /usr/local/apache/htdocs-hgdownload/goldenPath/hg38/vsSelf/
-    cd /usr/local/apache/htdocs-hgdownload/goldenPath/hg38/vsSelf/
-    ln -s /hive/data/genomes/hg38/bed/lastzSelf.2020-01-27/axtChain/{README.txt,hg38.hg38.all.chain.gz,md5sum.txt} .
-
-
-#########################################################################
-# NCBI ReMap alignments (DONE 2020-02-11 Angie)
-# RM 24449
-    mkdir /hive/data/genomes/hg38/bed/chainHg19ReMap
-    cd /hive/data/genomes/hg38/bed/chainHg19ReMap
-    wget ftp://ftp.ncbi.nlm.nih.gov/pub/remap/Homo_sapiens/current/GCF_000001405.39_GRCh38.p13/GCF_000001405.25_GRCh37.p13/GCF_000001405.39-GCF_000001405.25.gff
-    # We will need to substitute all the RefSeq chrom and contig IDs with our own names.
-    # The same alt contig can appear in both assemblies with the same name, so replace
-    # hg19 names at the beginning of the line and hg38 names after "Target=".
-    hgsql hg19 -NBe 'select alias, chrom from chromAlias where find_in_set("refseq", source)' \
-    | sed -re 's/\./\\./;' \
-    | awk '{print "s/^" $1 "\\b/" $2 "/;";}' \
-      > hg38.hg19.chromAlias.sed
-    hgsql hg38 -NBe 'select alias, chrom from chromAlias where find_in_set("refseq", source)' \
-    | sed -re 's/\./\\./;' \
-    | awk '{print "s/Target=" $1 "\\b/Target=" $2 "/;";}' \
-      >> hg38.hg19.chromAlias.sed
-
-    # There are some GRCh38.p13 sequences that we have not yet imported into hg38 -- use -dropT.
-    sed -f hg38.hg19.chromAlias.sed GCF_000001405.39-GCF_000001405.25.gff \
-    | gff3ToPsl -dropT /hive/data/genomes/{hg19,hg38}/chrom.sizes stdin stdout \
-    | pslPosTarget stdin stdout \
-    | sort -k14,14 -k16n,16n > remap.hg38.hg19.psl
-
-    # Convert to chain for browser display.  Some of the remap chains have minScore < 1000 and
-    # by default would be dropped by chainScore... use -minScore=0 to prevent that.
-    time pslToChain remap.hg38.hg19.psl stdout \
-    | chainScore -minScore=0 stdin /hive/data/genomes/{hg38/hg38.2bit,hg19/hg19.2bit} \
-        remap.hg38.hg19.chain
-#real    9m31.900s
-#user    9m1.624s
-#sys     0m20.863s
-    hgLoadChain hg38 -tIndex chainHg19ReMap remap.hg38.hg19.chain
-#Loading 5315 chains into hg38.chainHg19ReMap
-    time axtChain -psl -linearGap=medium -verbose=0 remap.hg38.hg19.psl \
-      /hive/data/genomes/hg38/hg38.2bit /hive/data/genomes/hg19/hg19.2bit \
-      remap.axtChain.hg38.hg19.chain
-#real    2m26.333s
-#user    2m4.237s
-#sys     0m22.071s
-    hgLoadChain hg38 -tIndex chainHg19ReMapAxtChain remap.axtChain.hg38.hg19.chain
-#Loading 2115 chains into hg38.chainHg19ReMapAxtChain
-
-###################################################
-#Agilent SNP/CNV arrays 3/11/21
-#Downloaded by web browser
-cd /hive/data/genomes/hg38/bed/agilentProbes/genetiSureCyto
-fetchChromSizes hg38 > hg38.chrom.sizes
-bedSort hg38.GenetiSure_Cyto_CGH+SNP_Microarray_4x180K_085591_D_BED_20200302.bed hg38.GenetiSure_Cyto_CGH+SNP_Microarray_4x180K_085591_D_BED_20200302.bed
-uniq hg38.GenetiSure_Cyto_CGH+SNP_Microarray_4x180K_085591_D_BED_20200302.bed >hg38.GenetiSure_Cyto_CGH+SNP_Microarray_4x180K_085591_D_BED_20200302.uniq.bed
-bedToBigBed hg38.GenetiSure_Cyto_CGH+SNP_Microarray_4x180K_085591_D_BED_20200302.uniq.bed hg38.chrom.sizes hg38.GenetiSure_Cyto_CGH+SNP_Microarray_4x180K_085591_D_BED_20200302.bb
-bedSort hg38.GenetiSure_Cyto_CGH_Microarray_8x60K_085590_D_BED_20200302.bed hg38.GenetiSure_Cyto_CGH_Microarray_8x60K_085590_D_BED_20200302.bed
-uniq hg38.GenetiSure_Cyto_CGH_Microarray_8x60K_085590_D_BED_20200302.bed > hg38.GenetiSure_Cyto_CGH_Microarray_8x60K_085590_D_BED_20200302.uniq.bed
-bedToBigBed hg38.GenetiSure_Cyto_CGH_Microarray_8x60K_085590_D_BED_20200302.uniq.bed hg38.chrom.sizes hg38.GenetiSure_Cyto_CGH_Microarray_8x60K_085590_D_BED_20200302.bb
-bedSort hg38.GenetiSure_Cyto_CGH_Microarray_4x180K_085589_D_BED_20200302.bed hg38.GenetiSure_Cyto_CGH_Microarray_4x180K_085589_D_BED_20200302.bed
-uniq hg38.GenetiSure_Cyto_CGH_Microarray_4x180K_085589_D_BED_20200302.bed > hg38.GenetiSure_Cyto_CGH_Microarray_4x180K_085589_D_BED_20200302.uniq.bed
-bedToBigBed hg38.GenetiSure_Cyto_CGH_Microarray_4x180K_085589_D_BED_20200302.uniq.bed hg38.chrom.sizes hg38.GenetiSure_Cyto_CGH_Microarray_4x180K_085589_D_BED_20200302.bb
-mkdir -p /gbdb/hg38/snpCnvArrays/agilent
-cd /gbdb/hg38/snpCnvArrays/agilent
-ln -s /hive/data/genomes/hg38/bed/agilentProbes/genetiSureCyto/hg38.GenetiSure_Cyto_CGH+SNP_Microarray_4x180K_085591_D_BED_20200302.bb
-ln -s /hive/data/genomes/hg38/bed/agilentProbes/genetiSureCyto/hg38.GenetiSure_Cyto_CGH_Microarray_8x60K_085590_D_BED_20200302.bb
-ln -s /hive/data/genomes/hg38/bed/agilentProbes/genetiSureCyto/hg38.GenetiSure_Cyto_CGH_Microarray_4x180K_085589_D_BED_20200302.bb
-vi ~/kent/src/hg/makeDb/trackDb/human/hg38/trackDb.ra
-
-#########################################################################
-# DECIPHER CNV & SNV - initial build (DONE 2022-04-08 Jonathan)
-# RM 29130
-
-cd /hive/data/genomes/outside/otto/decipher
-mkdir 2022-04-05
-cd 2022-04-05
-
-# manually fetch decipher-variants-grch38-2022-04-03.bed from DECIPHER
-../buildDecipher decipher-variants-grch38-2022-04-03.bed
-
-for i in `cat ../decipher.tables`
-        do
-        n=$i"New"
-        o=$i"Old"
-        hgsqlSwapTables hg38 $n $i $o -dropTable3
-        done
-
-mkdir -p /gbdb/hg38/decipher
-cd /gbdb/hg38/decipher
-ln -s /hive/data/outside/otto/decipher/2022-04-05/decipherCnv.bb .
-
-#########################################################################
-# COSMIC (DONE 07-11-2023)
-# RM 29625
-
-#Fetch file
-cd /hive/data/outside/cosmic/hg38/v98/
-wget 'https://cog.sanger.ac.uk/cosmic/GRCh38/ucsc/v98/ucsc_export.bed.gz?AWSAccessKeyId=KRV7P7QR9DL41J9EWGA2&Expires=1686847188&Signature=4YV3CuFKudxIhqVdWAaCe0CMAiY%3D' -O ucsc_export.bed.gz
-wget 'https://cog.sanger.ac.uk/cosmic/GRCh38/ucsc/v98/ucsc_export.bed.gz?AWSAccessKeyId=KRV7P7QR9DL41J9EWGA2&Expires=1687525456&Signature=jBdJOlOOaqmMWNnOtJUyNRptVj4%3D'
-mv ucsc_export.bed.gz\?AWSAccessKeyId\=KRV7P7QR9DL41J9EWGA2\&Expires\=1687525456\&Signature\=jBdJOlOOaqmMWNnOtJUyNRptVj4\= ucsc_export.bed.gz
-
-#Reorder to columns to conform to bed 6+3
-zcat ucsc_export.bed.gz | awk -F'\t' -v OFS="\t" '{ print $1, $2, $3, $7, 0, $6, $4, $5, $8 }' | sort -k1,1 -k2,2n > cosmic.bed
-
-#Tiny bit of python to identify the broken lines in the file where chromStart > chromEnd
-
-#for line in myFile:
-#    newLine = line.split("\t")
-#    if int(newLine[1]) > int(newLine[2]):
-#        print(line)
-#        n+=1
-#print(n)
-
-#remove those broken records from the file
-cat cosmic.bed | grep -vf badRecords.bed > cosmic.fixed.bed
-
-#subtract to conform to bed format for all the items that have same star and endPos
-
-cat cosmic.fixed.bed | awk 'BEGIN {OFS="\t"} {
-if ($2 == $3)
-        print $1,$2-1,$3,$4,$5,$6,$7,$8,$9;
-else
-        print $0;
-}' > cosmic.fixedPos.bed
-
-bedToBigBed -type=bed6+3 -as=/hive/data/outside/cosmic/hg38/v98/cosmic.as /hive/data/outside/cosmic/hg38/v98/cosmic.fixedPos.bed /hive/data/genomes/hg38/chrom.sizes /hive/data/outside/cosmic/hg38/v98/cosmic.bb -tab
-
-#make symlink
-ln -s /hive/data/outside/cosmic/hg38/v98/cosmic.bb /gbdb/hg38/cosmic/cosmic.bb
-
-#This data has since been updated, see new makedoc doc/hg38/cosmicV98.txt and rm #32430
-
-##############################################################################
-# LIFTOVER TO GCA_018873775.2_hg01243.v3.0 (DONE - 2023-08-13 - Hiram)
-    ssh hgwdev
-    # going to need an ooc for hg38.p14.2bit
-    cd /hive/data/genomes/hg38
-    time blat hg38.p14.2bit /dev/null /dev/null -tileSize=11 \
-      -makeOoc=hg38.p14.ooc -repMatch=1024
-    # Wrote 36808 overused 11-mers to hg38.p14.ooc
-    # real    0m50.753s
-
-    # and ooc for this GenArk hub
-    cd /hive/data/genomes/asmHubs/genbankBuild/GCA/018/873/775/GCA_018873775.2_hg01243.v3.0
-  time blat GCA_018873775.2_hg01243.v3.0.2bit /dev/null /dev/null -tileSize=11 \
-      -makeOoc=GCA_018873775.2_hg01243.v3.0.ooc -repMatch=1024
-# Wrote 39087 overused 11-mers to GCA_018873775.2_hg01243.v3.0.ooc
-# real    0m49.426s
-
-  mkdir /hive/data/genomes/hg38/bed/blat.GCA_018873775.2_hg01243.v3.0.2023-08-13
-    cd /hive/data/genomes/hg38/bed/blat.GCA_018873775.2_hg01243.v3.0.2023-08-13
-
-    doSameSpeciesLiftOver.pl -verbose=2 -buildDir=`pwd` \
-        -debug -bigClusterHub=hgwdev -dbHost=hgwdev -workhorse=hgwdev \
-        -target2Bit=/hive/data/genomes/hg38/hg38.2bit \
-        -targetSizes=/hive/data/genomes/hg38/chrom.sizes \
- -query2Bit=/hive/data/genomes/asmHubs/genbankBuild/GCA/018/873/775/GCA_018873775.2_hg01243.v3.0/GCA_018873775.2_hg01243.v3.0.2bit \
- -querySizes=/hive/data/genomes/asmHubs/genbankBuild/GCA/018/873/775/GCA_018873775.2_hg01243.v3.0/GCA_018873775.2_hg01243.v3.0.chrom.sizes \
-        -ooc=/hive/data/genomes/hg38/hg38.p14.ooc \
-         hg38 GCA_018873775.2
-
-    # trying -ram=6g to get full use of hgwdev kluster nodes
-    time (~/kent/src/hg/utils/automation/doSameSpeciesLiftOver.pl \
-        -verbose=2 -buildDir=`pwd` \
-        -bigClusterHub=hgwdev -dbHost=hgwdev -workhorse=hgwdev \
-        -target2Bit=/hive/data/genomes/hg38/hg38.2bit \
-        -targetSizes=/hive/data/genomes/hg38/chrom.sizes \
- -query2Bit=/hive/data/genomes/asmHubs/genbankBuild/GCA/018/873/775/GCA_018873775.2_hg01243.v3.0/GCA_018873775.2_hg01243.v3.0.2bit \
- -querySizes=/hive/data/genomes/asmHubs/genbankBuild/GCA/018/873/775/GCA_018873775.2_hg01243.v3.0/GCA_018873775.2_hg01243.v3.0.chrom.sizes \
-        -ooc=/hive/data/genomes/hg38/hg38.p14.ooc \
-         hg38 GCA_018873775.2) > doLiftOverToGCA_018873775.2.log 2>&1
-    # real    12654m58.134s
-
-    # broken after the alignment was done, with the parasol endless loop
-    # error message in the log file:
-    #  select failure in rudp: Invalid argument
-    # killed that, cleaned the 4Tb log file, and gave up on this alignment
-    # since the lastz/chain/net is much better
-
-    # see if the liftOver menus function in the browser from hg38
-    #    to GCA_018873775.2
-
-##############################################################################
-# LIFTOVER GCA_018873775.2_hg01243.v3.0 to hg38 (DONE - 2023-08-13 - Hiram)
-    ssh hgwdev
-
-    mkdir /hive/data/genomes/asmHubs/genbankBuild/GCA/018/873/775/GCA_018873775.2_hg01243.v3.0/trackData/blat.hg38.2023-08-13
-    cd /hive/data/genomes/asmHubs/genbankBuild/GCA/018/873/775/GCA_018873775.2_hg01243.v3.0/trackData/blat.hg38.2023-08-13
-
-    doSameSpeciesLiftOver.pl -verbose=2 -buildDir=`pwd` \
-        -debug -bigClusterHub=hgwdev -dbHost=hgwdev -workhorse=hgwdev \
- -target2Bit=/hive/data/genomes/asmHubs/genbankBuild/GCA/018/873/775/GCA_018873775.2_hg01243.v3.0/GCA_018873775.2_hg01243.v3.0.2bit \
- -targetSizes=/hive/data/genomes/asmHubs/genbankBuild/GCA/018/873/775/GCA_018873775.2_hg01243.v3.0/GCA_018873775.2_hg01243.v3.0.chrom.sizes \
-        -query2Bit=/hive/data/genomes/hg38/hg38.2bit \
-        -querySizes=/hive/data/genomes/hg38/chrom.sizes \
-        -ooc=/hive/data/genomes/asmHubs/genbankBuild/GCA/018/873/775/GCA_018873775.2_hg01243.v3.0/GCA_018873775.2_hg01243.v3.0.ooc \
-         GCA_018873775.2 hg38
-
-    # trying -ram=6g to get full use of hgwdev kluster nodes
-    time (~/kent/src/hg/utils/automation/doSameSpeciesLiftOver.pl \
-        -verbose=2 -buildDir=`pwd` \
-        -bigClusterHub=hgwdev -dbHost=hgwdev -workhorse=hgwdev \
- -target2Bit=/hive/data/genomes/asmHubs/genbankBuild/GCA/018/873/775/GCA_018873775.2_hg01243.v3.0/GCA_018873775.2_hg01243.v3.0.2bit \
- -targetSizes=/hive/data/genomes/asmHubs/genbankBuild/GCA/018/873/775/GCA_018873775.2_hg01243.v3.0/GCA_018873775.2_hg01243.v3.0.chrom.sizes \
-        -query2Bit=/hive/data/genomes/hg38/hg38.2bit \
-        -querySizes=/hive/data/genomes/hg38/chrom.sizes \
-        -ooc=/hive/data/genomes/asmHubs/genbankBuild/GCA/018/873/775/GCA_018873775.2_hg01243.v3.0/GCA_018873775.2_hg01243.v3.0.ooc \
-         GCA_018873775.2 hg38) > doLiftOverToHg38.log 2>&1
-
-    # broken after the alignment was done, with the parasol endless loop
-    # error message in the log file:
-    #  select failure in rudp: Invalid argument
-    # killed that, cleaned the 4Tb log file, and gave up on this alignment
-    # since the lastz/chain/net is much better
-    # real    193m24.137s
-
-    # see if the liftOver menus function in the browser from GCA_018873775.2
-    #    to hg38
-
-##############################################################################
-# LIFTOVER TO GCA_018503275.1_NA19240.pri.mat.f1_v2 (TBD - 2023-08-14 - Hiram)
-    ssh hgwdev
-
-    # ooc for this GenArk hub
-    cd /hive/data/genomes/asmHubs/genbankBuild/GCA/018/503/275/GCA_018503275.1_NA19240.pri.mat.f1_v2
-  time blat GCA_018503275.1_NA19240.pri.mat.f1_v2.2bit /dev/null /dev/null \
-      -tileSize=11 -repMatch=1024 \
-      -makeOoc=GCA_018503275.1_NA19240.pri.mat.f1_v2.11.ooc
-  # Wrote 35866 overused 11-mers to GCA_018503275.1_NA19240.pri.mat.f1_v2.11.ooc
-    # real    0m32.298s
-
-  mkdir /hive/data/genomes/hg38/bed/blat.GCA_018503275.1_NA19240.pri.mat.f1_v2.2023-08-14
-  cd /hive/data/genomes/hg38/bed/blat.GCA_018503275.1_NA19240.pri.mat.f1_v2.2023-08-14
-
-    ~/kent/src/hg/utils/automation/doSameSpeciesLiftOver.pl -verbose=2 \
-        -buildDir=`pwd` -ram=4g -chainRam=16g \
-        -debug -bigClusterHub=hgwdev -dbHost=hgwdev -workhorse=hgwdev \
-        -target2Bit=/hive/data/genomes/hg38/hg38.2bit \
-        -targetSizes=/hive/data/genomes/hg38/chrom.sizes \
- -query2Bit=/hive/data/genomes/asmHubs/genbankBuild/GCA/018/503/275/GCA_018503275.1_NA19240.pri.mat.f1_v2/GCA_018503275.1_NA19240.pri.mat.f1_v2.2bit \
- -querySizes=/hive/data/genomes/asmHubs/genbankBuild/GCA/018/503/275/GCA_018503275.1_NA19240.pri.mat.f1_v2/GCA_018503275.1_NA19240.pri.mat.f1_v2.chrom.sizes \
-        -ooc=/hive/data/genomes/hg38/hg38.p14.ooc \
-         hg38 GCA_018503275.1
-
-    # trying -ram=4g to get full use of hgwdev kluster nodes
-    time (~/kent/src/hg/utils/automation/doSameSpeciesLiftOver.pl \
-        -verbose=2 -buildDir=`pwd` -ram=4g -chainRam=16g \
-        -bigClusterHub=hgwdev -dbHost=hgwdev -workhorse=hgwdev \
-        -target2Bit=/hive/data/genomes/hg38/hg38.2bit \
-        -targetSizes=/hive/data/genomes/hg38/chrom.sizes \
- -query2Bit=/hive/data/genomes/asmHubs/genbankBuild/GCA/018/503/275/GCA_018503275.1_NA19240.pri.mat.f1_v2/GCA_018503275.1_NA19240.pri.mat.f1_v2.2bit \
- -querySizes=/hive/data/genomes/asmHubs/genbankBuild/GCA/018/503/275/GCA_018503275.1_NA19240.pri.mat.f1_v2/GCA_018503275.1_NA19240.pri.mat.f1_v2.chrom.sizes \
-        -ooc=/hive/data/genomes/hg38/hg38.p14.ooc \
-         hg38 GCA_018503275.1) > doLiftOverToGCA_018503275.1.log 2>&1
-    # real    11370m18.026s
-
-    # broken after the alignment was done, with the parasol endless loop
-    # error message in the log file:
-    #  select failure in rudp: Invalid argument
-    # killed that, cleaned the 4Tb log file, and gave up on this alignment
-    # since the lastz/chain/net is much better
-    # -rw-rw-r-- 1 4363949695640 Aug 22 09:16 doLiftOverToGCA_018503275.1.log
-
-    # see if the liftOver menus function in the browser from hg38
-    #    to GCA_018503275.1
-
-##############################################################################
-# LIFTOVER GCA_018503275.1_NA19240.pri.mat.f1_v2 to hg38 (DONE - 2023-08-14 - Hiram)
-    ssh hgwdev
-
-    mkdir /hive/data/genomes/asmHubs/genbankBuild/GCA/018/503/275/GCA_018503275.1_NA19240.pri.mat.f1_v2/trackData/blat.hg38.2023-08-14
-    cd /hive/data/genomes/asmHubs/genbankBuild/GCA/018/503/275/GCA_018503275.1_NA19240.pri.mat.f1_v2/trackData/blat.hg38.2023-08-14
-
-    ~/kent/src/hg/utils/automation/doSameSpeciesLiftOver.pl -verbose=2 \
-        -buildDir=`pwd` -ram=4g -chainRam=16g \
-        -debug -bigClusterHub=hgwdev -dbHost=hgwdev -workhorse=hgwdev \
- -target2Bit=/hive/data/genomes/asmHubs/genbankBuild/GCA/018/503/275/GCA_018503275.1_NA19240.pri.mat.f1_v2/GCA_018503275.1_NA19240.pri.mat.f1_v2.2bit \
- -targetSizes=/hive/data/genomes/asmHubs/genbankBuild/GCA/018/503/275/GCA_018503275.1_NA19240.pri.mat.f1_v2/GCA_018503275.1_NA19240.pri.mat.f1_v2.chrom.sizes \
-        -query2Bit=/hive/data/genomes/hg38/hg38.2bit \
-        -querySizes=/hive/data/genomes/hg38/chrom.sizes \
-        -ooc=/hive/data/genomes/asmHubs/genbankBuild/GCA/018/503/275/GCA_018503275.1_NA19240.pri.mat.f1_v2/GCA_018503275.1_NA19240.pri.mat.f1_v2.11.ooc \
-         GCA_018503275.1 hg38
-
-    time (~/kent/src/hg/utils/automation/doSameSpeciesLiftOver.pl -verbose=2 \
-        -buildDir=`pwd` -ram=4g -chainRam=16g \
-        -bigClusterHub=hgwdev -dbHost=hgwdev -workhorse=hgwdev \
- -target2Bit=/hive/data/genomes/asmHubs/genbankBuild/GCA/018/503/275/GCA_018503275.1_NA19240.pri.mat.f1_v2/GCA_018503275.1_NA19240.pri.mat.f1_v2.2bit \
- -targetSizes=/hive/data/genomes/asmHubs/genbankBuild/GCA/018/503/275/GCA_018503275.1_NA19240.pri.mat.f1_v2/GCA_018503275.1_NA19240.pri.mat.f1_v2.chrom.sizes \
-        -query2Bit=/hive/data/genomes/hg38/hg38.2bit \
-        -querySizes=/hive/data/genomes/hg38/chrom.sizes \
-        -ooc=/hive/data/genomes/asmHubs/genbankBuild/GCA/018/503/275/GCA_018503275.1_NA19240.pri.mat.f1_v2/GCA_018503275.1_NA19240.pri.mat.f1_v2.11.ooc \
-         GCA_018503275.1 hg38) > liftOverToHg38.log 2>&1
-    # real    5082m17.500s
-
-    # this is interesting, this alignment completed and actually has good
-    # coverage:
-    cat fb.GCA_018503275.1.chain.Hg38Link.txt
-    # 2928654519 bases of 3032066086 (96.589%) in intersection
-
-    # see if the liftOver menus function in the browser from GCA_018503275.1
-    #    to hg38
-
-##############################################################################
-## update grp table add new row for HPRC (DONE - 2023-08-29 - Hiram)
-## existing structure:
-
-    hgsql -e 'desc grp;' hg38
-
-+-----------------+-----------+------+-----+---------+-------+
-| Field           | Type      | Null | Key | Default | Extra |
-+-----------------+-----------+------+-----+---------+-------+
-| name            | char(255) | NO   | PRI |         |       |
-| label           | char(255) | NO   |     |         |       |
-| priority        | float     | NO   |     | 0       |       |
-| defaultIsClosed | int(11)   | YES  |     | NULL    |       |
-+-----------------+-----------+------+-----+---------+-------+
-
-    #  add one new row:
-    hgsql hg38 \
-      -e "INSERT INTO grp VALUES ('hprc', 'Human Pangenome - HPRC', 3.6, 0);"
-
-    # resulting table:
-
-    hgsql -e 'select * from grp order by priority;' hg38
-+------------+------------------------------------+----------+-----------------+
-| name       | label                              | priority | defaultIsClosed |
-+------------+------------------------------------+----------+-----------------+
-| user       | Custom Tracks                      |        1 |               0 |
-| remc       | Reference Epigenome Mapping Center |      1.2 |               1 |
-| map        | Mapping and Sequencing             |        2 |               0 |
-| genes      | Genes and Gene Predictions         |        3 |               0 |
-| phenDis    | Phenotype and Literature           |      3.4 |               0 |
-| pub        | Literature                         |      3.5 |               0 |
-| hprc       | Human Pangenome - HPRC             |      3.6 |               0 |
-| covid      | COVID-19                           |      3.6 |               0 |
-| singleCell | Single Cell RNA-seq                |      3.7 |               0 |
-| rna        | mRNA and EST                       |        4 |               0 |
-| expression | Expression                         |      4.5 |               0 |
-| regulation | Regulation                         |        5 |               0 |
-| compGeno   | Comparative Genomics               |        6 |               0 |
-| varRep     | Variation                          |        7 |               0 |
-| rep        | Repeats                            |        8 |               0 |
-| x          | Experimental                       |       10 |               1 |
-+------------+------------------------------------+----------+-----------------+
-
-##############################################################################
-# Affy CytoScan HD track, refs #32856  (2024-01-23 Gerardo)
-cd /hive/data/genomes/hg38/bed/
-mkdir genotypeArrays; cd genotypeArrays
-#The user sent Gerardo a direct email with a shared folder link. Gerardo downloaded the bed files and made them available on dev.
-#The user provided two bed files (https://hgwdev-gperez2.gi.ucsc.edu/~gperez2/mlq/mlq_32791/). Gerardo used the version 2 bed file for the track.
-wget https://hgwdev-gperez2.gi.ucsc.edu/~gperez2/mlq/mlq_32791/CytoScanHD_Accel_Array.na36.bed.zip
-unzip CytoScanHD_Accel_Array.na36.bed.zip
-# Removed header and sorted the file
-grep -v 'track' CytoScanHD_Accel_Array.na36.bed | bedSort stdin stdout > affyCytoScanHD.bed
-bedToBigBed -tab -type=bed12 affyCytoScanHD.bed https://hgdownload.soe.ucsc.edu/goldenPath/hg38/bigZips/hg38.chrom.sizes affyCytoScanHD.bb 
-cd /gbdb/hg38
-mkdir genotypeArrays; cd genotypeArrays
-# Making symlink for big file and raw bed file
-ln -s /hive/data/genomes/hg38/bed/genotypeArrays/affyCytoScanHD.bb
-ln -s /hive/data/genomes/hg38/bed/genotypeArrays/CytoScanHD_Accel_Array.na36.bed.zip
-cd ~/kent/src/hg/makeDb/trackDb/human/hg38
-vi trackDb.ra
-
-##############################################################################
-# LASTZ Human Hg38 vs. California sea lion GCF_009762305.2
-#    (DONE - 2024-03-06 - jairo)
-
-    mkdir /hive/data/genomes/hg38/bed/lastzGCF_009762305.2.2024-03-06
-    cd /hive/data/genomes/hg38/bed/lastzGCF_009762305.2.2024-03-06
-
-    printf '# California sea lion GCF_009762305.2 vs. Human Hg38
-BLASTZ=/cluster/bin/penn/lastz-distrib-1.04.03/bin/lastz
-
-# TARGET: Human  hg38
-SEQ1_DIR=/hive/data/genomes/hg38/hg38.2bit
-SEQ1_LEN=/hive/data/genomes/hg38/chrom.sizes
-SEQ1_CHUNK=20000000
-SEQ1_LAP=10000
-SEQ1_LIMIT=40
-
-# QUERY: California sea lion 2020-07-14 GCF_009762305.2_mZalCal1.pri.v2
-SEQ2_DIR=/hive/data/genomes/asmHubs/GCF/009/762/305/GCF_009762305.2/GCF_009762305.2.2bit
-SEQ2_LEN=/hive/data/genomes/asmHubs/GCF/009/762/305/GCF_009762305.2/GCF_009762305.2.chrom.sizes.txt
-SEQ2_CHUNK=20000000
-SEQ2_LAP=0
-SEQ2_LIMIT=100
-
-BASE=/hive/data/genomes/hg38/bed/lastzGCF_009762305.2.2024-03-06
-TMPDIR=/dev/shm
-
-' > DEF
-
-    time (~/kent/src/hg/utils/automation/doBlastzChainNet.pl -trackHub -noDbNameCheck -verbose=2 `pwd`/DEF -syntenicNet \
-       -qAsmId GCF_009762305.2_mZalCal1.pri.v2 -workhorse=hgwdev -smallClusterHub=hgwdev -bigClusterHub=ku \
-        -chainMinScore=3000 -chainLinearGap=medium) > do.log 2>&1
-    grep -w real do.log | sed -e 's/^/    # /;'
-    # real      1018m28.119s
-
-    sed -e 's/^/    # /;' fb.hg38.chainGCF_009762305.2Link.txt
-    # 1633315994 bases of 3299210039 (49.506%) in intersection
-    sed -e 's/^/    # /;' fb.hg38.chainSynGCF_009762305.2Link.txt
-    # 1564193911 bases of 3299210039 (47.411%) in intersection
-
-    time (~/kent/src/hg/utils/automation/doRecipBest.pl -trackHub -load -workhorse=hgwdev -buildDir=`pwd` \
-       \
-      -query2Bit="/hive/data/genomes/asmHubs/GCF/009/762/305/GCF_009762305.2/GCF_009762305.2.2bit" \
--querySizes="/hive/data/genomes/asmHubs/GCF/009/762/305/GCF_009762305.2/GCF_009762305.2.chrom.sizes.txt" \
-        hg38 GCF_009762305.2) > rbest.log 2>&1
-
-    grep -w real rbest.log | sed -e 's/^/    # /;'
-    # real      303m36.739s
-
-    sed -e 's/^/    # /;' fb.hg38.chainRBest.GCF_009762305.2.txt
-    # 1461974620 bases of 3299210039 (44.313%) in intersection
-
-    ### and for the swap
-
-    cd /hive/data/genomes/asmHubs/allBuild/GCF/009/762/305/GCF_009762305.2_mZalCal1.pri.v2/trackData/blastz.hg38.swap
-
-   time (~/kent/src/hg/utils/automation/doBlastzChainNet.pl -trackHub -noDbNameCheck -swap -verbose=2 \
-   -qAsmId GCF_009762305.2_mZalCal1.pri.v2 /hive/data/genomes/hg38/bed/lastzGCF_009762305.2.2024-03-06/DEF -swapDir=`pwd` \
-  -syntenicNet -workhorse=hgwdev -smallClusterHub=hgwdev -bigClusterHub=ku \
-    -chainMinScore=3000 -chainLinearGap=medium) > swap.log 2>&1
-
-    grep -w real swap.log | sed -e 's/^/    # /;'
-    # real      103m25.220s
-
-    sed -e 's/^/    # /;' fb.GCF_009762305.2.chainHg38Link.txt
-    # 1493183463 bases of 2409685272 (61.966%) in intersection
-    sed -e 's/^/    # /;' fb.GCF_009762305.2.chainSynHg38Link.txt
-    # 1457122207 bases of 2409685272 (60.469%) in intersection
-\    time (~/kent/src/hg/utils/automation/doRecipBest.pl -trackHub -load -workhorse=hgwdev -buildDir=`pwd` \
-    \
-   -target2bit="/hive/data/genomes/asmHubs/GCF/009/762/305/GCF_009762305.2/GCF_009762305.2.2bit" \
--targetSizes="/hive/data/genomes/asmHubs/GCF/009/762/305/GCF_009762305.2/GCF_009762305.2.chrom.sizes.txt" \
-   GCF_009762305.2 hg38) > rbest.log 2>&1
-
-    grep -w real rbest.log | sed -e 's/^/    # /;'
-    # real      286m31.189s
-
-    sed -e 's/^/    # /;' fb.GCF_009762305.2.chainRBest.Hg38.txt
-    # 1461710350 bases of 2409685272 (60.660%) in intersection
-
-##############################################################################
-# hg38.chromAlias.bb was incorrectly built without indexes so it will not
-# work with bedToBigBed 2024-04-08 markd
-
-cd /hive/data/genomes/hg38/goldenPath/bigZips/initial
-mv hg38.chromAlias.bb  hg38.chromAlias.noindexes.bb
-bigBedInfo -asOut hg38.chromAlias.noindexes.bb >hg38.chromAlias.as
-bigBedToBed hg38.chromAlias.noindexes.bb  hg38.chromAlias.bed
-bedToBigBed -tab -type=bed3+ -as=hg38.chromAlias.as hg38.chromAlias.bed -sizesIs2Bit  -extraIndex=ucsc,assembly,ensembl,genbank,refseq hg38.2bit hg38.chromAlias.bb
-
-##############################################################################
-
-# ENCODE 4 TF rPeak Clusters - RM #34930 - Lou 12/19/24
-
-mkdir /hive/data/genomes/hg38/bed/ENCODEv4TFrPeaks
-cd /hive/data/genomes/hg38/bed/ENCODEv4TFrPeaks
-hubClone -download https://users.wenglab.org/gaomingshi/TF.rpeak.test.txt
-ln -s /hive/data/genomes/hg38/bed/ENCODEv4TFrPeaks/no_trim.TF_name.rPeaks.bb /gbdb/hg38/bbi/ENCODE4/TFrPeakClusters.bb
-ln -s /hive/data/genomes/hg38/bed/ENCODEv4TFrPeaks/no_trim.TF_name.decorator.bb /gbdb/hg38/bbi/ENCODE4/TFrPeakClustersDecorator.bb
-# Then just moved the files to the ENCODEv4TFrPeaks dir, moved/tweaked HTML and trackDb
 
+# alphaMissense ticket #32269 (Jeltje, Jan 2025)
+mkdir -p /hive/data/genomes/hg38/bed/alphaMissense/
+cd /hive/data/genomes/hg38/bed/alphaMissense
+wget https://storage.googleapis.com/dm_alphamissense/AlphaMissense_hg38.tsv.gz
+time python ~/kent/src/hg/makeDb/outside/alphaMissense/alphaMissenseToWig.py AlphaMissense_hg38.tsv.gz
+wigToBigWig a.wig ../../chrom.sizes a.bw &
+wigToBigWig c.wig ../../chrom.sizes c.bw &
+wigToBigWig g.wig ../../chrom.sizes g.bw &
+wigToBigWig t.wig ../../chrom.sizes t.bw &
+wait
+
+##Colors were added using the script
+#kent/src/hg/makeDb/scripts/wigColorByColors/makeWigColorByRevelCadd.py