src/hg/makeDb/doc/hg18.txt 1.353

1.353 2009/03/07 01:13:23 angie
Added HapMap Recomb Rate (rel22) and updated HapMap SNPs (rel27). Moved a shared inline script out (to hg/snp/snpLoad/getOrthoSeq.pl).
Index: src/hg/makeDb/doc/hg18.txt
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/makeDb/doc/hg18.txt,v
retrieving revision 1.352
retrieving revision 1.353
diff -b -B -U 4 -r1.352 -r1.353
--- src/hg/makeDb/doc/hg18.txt	25 Feb 2009 23:27:22 -0000	1.352
+++ src/hg/makeDb/doc/hg18.txt	7 Mar 2009 01:13:23 -0000	1.353
@@ -16513,8 +16513,9 @@
 
 #########################################################################
 # HapMap SNPs (DONE 2007-05-23 Andy)
 # rel22
+# OBSOLETED by Phase II+III SNPs 3/09 angie (see HAPMAP REL27 GENOTYPES)
 ssh hgwdev
 bash
 cd /cluster/data/hg18/bed
 mkdir -p hapmap/zips
@@ -22267,87 +22268,24 @@
 #Submission to last job:           221s       3.68m     0.06h    0.00d
 
     ssh kolossus
     cd /cluster/data/hg18/bed/snp129Ortho
-    # Here is a script that looks up the base value in the ortho species
-    # and swizzles columns to prepare for the joining and re-swizzling
-    # of both ortho species' columns into the final product.  If it is
-    # used more than once, should be checked in, perhaps in hg/snp/snpLoad.
-    cat > getOrthoSeq.pl <<'_EOF_'
-#!/usr/bin/env perl
-# Dig up orthologous alleles and swizzle columns so the glommed name that
-# includes human position info etc. is first.  It will be used as a key for
-# joining up multiple other-species' ortho data.  Also swizzle columns so
-# that the remaining columns are in order of appearance in the final result,
-# snp129OrthoPanTro2RheMac2.  Upcase ortho alleles for consistency w/dbSNP.
-use warnings;
-use strict;
-
-my $twoBitFName = shift @ARGV
-  || die "usage: getOrthoSeq.pl orthoDb.2bit [file(s)]\n";
-
-sub getOChrSeq($$) {
-  # Slurp in fasta sequence using twoBitToFa.
-  my ($twoBitFName, $oChr) = @_;
-  open(P, "twoBitToFa -noMask $twoBitFName -seq=$oChr stdout |")
-    || die "Can't open pipe from twoBitToFa $twoBitFName -seq=$oChr: $!\n";
-  <P> =~ /^>\w+/
-    || die "Doesn't look like we got fasta -- first line is this:\n$_";
-  # From man perlfaq5: trick to slurp entire contents:
-  my $c = 0;
-  my $seq = do { local $/; my $data = <P>; $c = ($data =~ s/\n//g); $data; };
-  close(P);
-  return $seq;
-}
-
-my %rc = ( "a" => "t", "c" => "g", "g" => "c", "t" => "a",
-           "A" => "T", "C" => "G", "G" => "C", "T" => "A", );
-sub revComp($) {
-  # Reverse-complement fasta input.  (Pass through non-agtc chars.)
-  my ($seq) = @_;
-  my $rcSeq = reverse $seq;
-  for (my $i = 0;  $i < length($rcSeq);  $i++) {
-    my $base = substr($rcSeq, $i, 1);
-    my $cBase = $rc{$base} || $base;
-    substr($rcSeq, $i, 1, $cBase);
-  }
-  return $rcSeq;
-}
-
-my $prevOChr;
-my ($oChrSeq, $oChrSize);
-while (<>) {
-  chomp;
-  my ($oChr, $oStart, $oEnd, $nameGlom, undef, $oStrand) = split;
-  if (! defined $prevOChr || $oChr ne $prevOChr) {
-    $oChrSeq = &getOChrSeq($twoBitFName, $oChr);
-    $oChrSize = length($oChrSeq);
-  }
-  die "Coords out of range, input line $.: $oEnd > $oChr size $oChrSize\n\t"
-    if ($oEnd > $oChrSize);
-  my $oAllele = substr($oChrSeq, $oStart, $oEnd - $oStart);
-  $oAllele = &revComp($oAllele) if ($oStrand eq "-");
-  print join("\t", $nameGlom, $oChr, $oStart, $oEnd, $oAllele, $oStrand) .
-        "\n";
-  $prevOChr = $oChr;
-}
-'_EOF_'
-    # << emacs
-    chmod a+x getOrthoSeq.pl
+    # Note: the formerly inlined script getOrthoSeq.pl has been checked in
+    # as kent/src/hg/snp/snpLoad/getOrthoSeq.pl.
 
     # Concatenate the chimp results, sorting by chimp pos in order to
-    # efficiently access 2bit sequence in ./getOrthoSeq.  The output of
+    # efficiently access 2bit sequence in getOrthoSeq.  The output of
     # that is then sorted by the glommed human info field, so that we
     # can use join to combine chimp and macaque results in the next step.
     # Ditto for macaque and orangutan.
     sort -k1,1 -k2n,2n run.liftOChimp/out/panTro2.chunk*.bed \
-    | ./getOrthoSeq.pl /cluster/data/panTro2/panTro2.2bit \
+    | ~/kent/src/hg/snp/snpLoad/getOrthoSeq.pl /cluster/data/panTro2/panTro2.2bit \
     | sort > panTro2.orthoGlom.txt
     sort -k1,1 -k2n,2n run.liftOPon/out/ponAbe2.chunk*.bed \
-    | ./getOrthoSeq.pl /cluster/data/ponAbe2/ponAbe2.2bit \
+    | ~/kent/src/hg/snp/snpLoad/getOrthoSeq.pl /cluster/data/ponAbe2/ponAbe2.2bit \
     | sort > ponAbe2.orthoGlom.txt
     sort -k1,1 -k2n,2n run.liftOMac/out/rheMac2.chunk*.bed \
-    | ./getOrthoSeq.pl /cluster/data/rheMac2/rheMac2.2bit \
+    | ~/kent/src/hg/snp/snpLoad/getOrthoSeq.pl /cluster/data/rheMac2/rheMac2.2bit \
     | sort > rheMac2.orthoGlom.txt
     # The whole pipeline takes ~5-7 minutes each.
     wc -l panTro2.orthoGlom.txt ponAbe2.orthoGlom.txt rheMac2.orthoGlom.txt
 #   9909458 panTro2.orthoGlom.txt
@@ -26789,17 +26727,19 @@
 #Loaded 660280 elements of size 7
 
 
 #############################################################################
-# HGDP HETEROZYGOSITY (DONE 2/12/09 angie)
+# HGDP HETEROZYGOSITY (DONE 2/12/09 angie, except for Bantu 3/02/09)
     mkdir /hive/data/genomes/hg18/bed/hgdpHzy
     cd /hive/data/genomes/hg18/bed/hgdpHzy
     foreach continent (african americas easia european mideast oceania sasia)
       wget --timestamping http://hgdp.uchicago.edu/data/hzy/$continent.gff.gz
     end
-    foreach continent (african americas easia european mideast oceania sasia)
+    wget --timestamping http://hgdp.uchicago.edu/data/hzy/allbantu.hzy.gff.gz
+#*** waiting to hear back from Joe about whether that's the right file
+    foreach continent (african allbantu americas easia european mideast oceania sasia)
       set bedGraph = `echo $continent \
-                      | sed -re 's/can$/ca/; s/pean$/pe/; s/asia/Asia/; \
+                      | sed -re 's/can$/ca/; s/pean$/pe/; s/asia/Asia/; s/allbantu/bantu/; \
                                  s/(.*)/hgdpHzy\u\1.bedGraph/'`
       echo $bedGraph
       zcat $continent.gff.gz \
       | awk 'BEGIN{OFS="\t";} {print $1, ($4-1), $5, $6;}' \
@@ -26809,10 +26749,12 @@
     # some are over the 10Mbase wiggle item size limit.
     foreach f (*.bedGraph)
       hgLoadBed hg18 $f:r $f -bedGraph=4
     end
-    # All 7 have same size:
+    # All have same size:
 #Loaded 640676 elements of size 4
+    # except for Bantu:
+#Loaded 640698 elements of size 4
 
 
 #############################################################################
 # HGDP FST (DONE 2/12/09 angie)
@@ -26834,9 +26776,9 @@
     foreach continent (Bantu Americas E.Asia European MiddleEast Oceania S.Asian)
       wget --timestamping \
         http://hgdp.uchicago.edu/data/iHS/smoothed$continent.iHS.gff.gz
       set bedGraph = `echo $continent \
-                      | sed -re 's/Bantu/Africa/; s/pean$/pe/; s/\.Asian?/Asia/; \
+                      | sed -re 's/pean$/pe/; s/\.Asian?/Asia/; \
                                  s/MiddleEast/Mideast/; s/(.*)/hgdpIhs\1.bedGraph/'`
       echo $bedGraph
       zcat smoothed$continent.iHS.gff.gz \
       | sed -e 's/^chr23/chrX/' \
@@ -26845,9 +26787,9 @@
     end
     foreach f (*.bedGraph)
       hgLoadBed hg18 $f:r $f -bedGraph=4
     end
-#Reading hgdpIhsAfrica.bedGraph
+#Reading hgdpIhsBantu.bedGraph
 #Loaded 540438 elements of size 4
 #Reading hgdpIhsAmericas.bedGraph
 #Loaded 422167 elements of size 4
 #Reading hgdpIhsEAsia.bedGraph
@@ -26869,19 +26811,18 @@
     foreach continent (Bantu Americas E.Asia Europe Mideast Oceania S.Asia)
       wget --timestamping \
         http://hgdp.uchicago.edu/data/XPEHH/$continent.xpehh.forbrowser.gff.gz
       set bedGraph = `echo $continent \
-                      | sed -re 's/Bantu/Africa/; s/\.Asia?/Asia/; \
-                                 s/(.*)/hgdpXpehh\1.bedGraph/'`
+                      | sed -re 's/\.Asia?/Asia/; s/(.*)/hgdpXpehh\1.bedGraph/'`
       echo $bedGraph
       zcat $continent.xpehh.forbrowser.gff.gz \
       | awk 'BEGIN{OFS="\t";} {print $1, ($4-1), $5, $6;}' \
         > $bedGraph
     end
     foreach f (*.bedGraph)
       hgLoadBed hg18 $f:r $f -bedGraph=4
     end
-#Reading hgdpXpehhAfrica.bedGraph
+#Reading hgdpXpehhBantu.bedGraph
 #Loaded 636680 elements of size 4
 #Reading hgdpXpehhAmericas.bedGraph
 #Loaded 636143 elements of size 4
 #Reading hgdpXpehhEAsia.bedGraph
@@ -26896,4 +26837,285 @@
 #Loaded 636773 elements of size 4
 
 
 #############################################################################
+# HAPMAP REL22 RECOMBINATION RATES (PHASE II)  (DONE 2/24/09 angie)
+    mkdir -p /hive/data/outside/hapmap/recombination/2008-03_rel22_B36/rates
+    cd /hive/data/outside/hapmap/recombination/2008-03_rel22_B36/
+    wget --timestamping \
+      ftp://ftp.hapmap.org/pub/hapmap/public/recombination/2008-03_rel22_B36/00README.txt
+    cd rates
+    wget --timestamping \
+      ftp://ftp.hapmap.org/pub/hapmap/public/recombination/2008-03_rel22_B36/rates/\*
+
+    # Make bedGraph-formatted files.
+    mkdir -p /hive/data/genomes/hg18/bed/hapmap/recombination/2008-03_rel22_B36
+    cd /hive/data/genomes/hg18/bed/hapmap/recombination/2008-03_rel22_B36
+    cp /dev/null hapmapRecombRate.bed
+    foreach f (/hive/data/outside/hapmap/recombination/2008-03_rel22_B36/rates/*.txt)
+      set chr = `echo $f:t:r | sed -e 's/^.*chr/chr/; s/_b36.*//;'`
+      echo $f $chr
+      perl -wpe 's/^position .*\n// && next; \
+                 m/^(\d+) (\d+\.?\d*) .*/ || die $_; $end=$1; $rate=$2; \
+                 $start=$end-100 unless (defined $start); \
+                 $_ = "'$chr'\t$start\t$end\t$rate\n";  $start = $end;' \
+        $f >> hapmapRecombRate.bedGraph
+    end
+    # Some items are over the 10Mbase wiggle item size limit, so use bedGraph.
+    time hgLoadBed hg18 hapmapRecombRate hapmapRecombRate.bedGraph -bedGraph=4
+#Loaded 3281323 elements of size 4
+#14.688u 1.796s 0:31.99 51.4%    0+0k 0+0io 0pf+0w
+
+    # There are >3M items...  try bigWig!  :)
+    wigToBigWig hapmapRecombRate.bedGraph /hive/data/genomes/hg18/chrom.sizes \
+      hapmapRecombRate.bw
+    ln -s `pwd`/hapmapRecombRate.bw /gbdb/hg18/bbi/
+    hgsql hg18 -e 'drop table if exists hapmapRecombRateBW; \
+            create table hapmapRecombRateBW (fileName varchar(255) not null); \
+            insert into hapmapRecombRateBW values ("/gbdb/hg18/bbi/hapmapRecombRate.bw");'
+
+
+#############################################################################
+# HAPMAP REL27 GENOTYPES (MERGED PHASE II+III)  (DONE 2/25/09 angie)
+    # First, download release to /hive/data/outside...
+    mkdir -p /hive/data/outside/hapmap/genotypes/2009-02_phaseII+III/{excluded,forward}
+    cd /hive/data/outside/hapmap/genotypes/2009-02_phaseII+III
+    wget --timestamping \
+      ftp://ftp.hapmap.org/pub/hapmap/public/genotypes/2009-02_phaseII+III/00README.txt
+    cd excluded
+    wget --timestamping \
+      ftp://ftp.hapmap.org/pub/hapmap/public/genotypes/2009-02_phaseII+III/excluded/\*
+    cd ../forward
+    wget --timestamping \
+      ftp://ftp.hapmap.org/pub/hapmap/public/genotypes/2009-02_phaseII+III/forward/\*
+
+    # This directory's README refers to the README from the
+    # phaseIII-only 2009_01, which gives the file format and explains
+    # the population codes:
+    wget --timestamping -o 00README_2009-01_phaseIII.txt \
+      ftp://ftp.hapmap.org/pub/hapmap/public/genotypes/2009-01_phaseIII/00README.txt
+
+    # For details page... this is Coriell's NHGRI panel (all HapMap except 
+    # CEPH): http://ccr.coriell.org/Sections/Collections/NHGRI/?SsId=11
+    # http://www.broad.mit.edu/mpg/hapmap3/
+    # Broad, BCM and Sanger have a nice phase3 writeup.  Here is Broad's
+    # copy: http://www.broad.mit.edu/mpg/hapmap3/
+
+    # Now translate those into hapmapSnps* tables.
+    # NOTE FOR NEXT TIME: make this a cluster job.  It takes ~half hour each pop!
+    # Could run the script on each downloaded file as a separate job, and then
+    # concatenate results (or just feed chr*_$pop to hgLoadBed).
+    mkdir -p /hive/data/genomes/hg18/bed/hapmap/genotypes/2009-02_phaseII+III
+    cd /hive/data/genomes/hg18/bed/hapmap/genotypes/2009-02_phaseII+III
+    set sourceDir = /hive/data/outside/hapmap/genotypes/2009-02_phaseII+III/forward
+    foreach pop (ASW CEU CHB CHD GIH JPT LWK MEX MKK TSI YRI)
+      echo $pop
+      zcat $sourceDir/genotypes_chr*_${pop}_r27_nr.b36_fwd.txt.gz \
+      | perl -wpe 'chomp; \
+          if (/^rs# alleles c\w+ pos s\w+ a\w+# c\w+ protLSID assayLSID panelLSID QCcode NA/) { \
+            $_ = "";  # skip header lines  \
+          } elsif (s/^(rs\d+) ([ACGT])\/([ACGT]) (chr\w+) (\d+) \+ ncbi_[bB]?36 .* QC\+ //) { \
+            ($rsId, $obs1, $obs2, $chr, $end) = ($1, $2, $3, $4, $5); \
+            %compl = (A=>"T", C=>"G", G=>"C", T=>"A"); \
+            %hom = ();  %het = (); \
+      # NOTE: one trouble-maker (other pop files have A/C with AC genotypes): \
+            if ($rsId eq "rs7059622" && "'$pop'" eq "YRI") { warn "Tweaking YRI rs7059622.\n"; } \
+            foreach my $al (split()) { \
+              next if ($al eq "NN"); \
+              $al =~ /^([ACGT])([ACGT])$/ || die "Unrecognized allele string $al"; \
+              ($a1, $a2) = ($1, $2); \
+      # NOTE: one trouble-maker (other pop files have A/C with AC genotypes): \
+              if ($rsId eq "rs7059622" && "'$pop'" eq "YRI") \
+                { $a1 = $compl{$a1}; $a2 = $compl{$a2}; } \
+      # The error that the trouble-maker triggered: \
+              if (($a1 !~ /^[$obs1$obs2]$/) || ($a2 !~ /^[$obs1$obs2]$/)) \
+                { die "$rsId (${chr}_'$pop'): obs $obs1/$obs2 !~ $a1$a2!\n\t"; } \
+              if ($a1 eq $a2) { $hom{$a1}++; } else { $het{$a1}++; $het{$a2}++; } \
+            } \
+            $start = $end - 1; \
+            $hom1 = $hom{$obs1} || 0; $hom2 = $hom{$obs2} || 0; \
+            $het = $het{$obs1} || 0;  $het2 = $het{$obs2} || 0; \
+            $score = (1000 * (2*$hom2 + $het) / (2*($hom1 + $hom2 + $het))); \
+            if ($score >= 500) { $score = 1000 - $score; } \
+            $score = int($score + 0.5); \
+            if ($het != $het2) { die "het{$obs1} ($het{$obs1}) != het{$obs2} ($het{$obs2})"; } \
+            $_ = "$chr\t$start\t$end\t$rsId\t$score\t+\t$obs1/$obs2\t$obs1\t$hom1\t$obs2\t$hom2\t$het\n"; \
+          } else { \
+            die "Unrecognized format:\n$_\n\t"; \
+          }' > hapmapSnps$pop.bed
+    end
+    wc -l hapmapSnps*.bed
+#   1561453 hapmapSnpsASW.bed
+#   4030774 hapmapSnpsCEU.bed
+#   4052336 hapmapSnpsCHB.bed
+#   1306196 hapmapSnpsCHD.bed
+#   1407877 hapmapSnpsGIH.bed
+#   4052423 hapmapSnpsJPT.bed
+#   1529764 hapmapSnpsLWK.bed
+#   1410265 hapmapSnpsMEX.bed
+#   1537638 hapmapSnpsMKK.bed
+#   1419921 hapmapSnpsTSI.bed
+#   3984356 hapmapSnpsYRI.bed
+    foreach pop (ASW CEU CHB CHD GIH JPT LWK MEX MKK TSI YRI)
+      hgLoadBed hg18 hapmapSnps$pop hapmapSnps$pop.bed -renameSqlTable \
+        -sqlTable=$HOME/kent/src/hg/lib/hapmapSnps.sql
+    end
+#Reading hapmapSnpsASW.bed
+#Loaded 1561453 elements of size 12
+#Reading hapmapSnpsCEU.bed
+#Loaded 4030774 elements of size 12
+#Reading hapmapSnpsCHB.bed
+#Loaded 4052336 elements of size 12
+#Reading hapmapSnpsCHD.bed
+#Loaded 1306196 elements of size 12
+#Reading hapmapSnpsGIH.bed
+#Loaded 1407877 elements of size 12
+#Reading hapmapSnpsJPT.bed
+#Loaded 4052423 elements of size 12
+#Reading hapmapSnpsLWK.bed
+#Loaded 1529764 elements of size 12
+#Reading hapmapSnpsMEX.bed
+#Loaded 1410265 elements of size 12
+#Reading hapmapSnpsMKK.bed
+#Loaded 1537638 elements of size 12
+#Reading hapmapSnpsTSI.bed
+#Loaded 1419921 elements of size 12
+#Reading hapmapSnpsYRI.bed
+#Loaded 3984356 elements of size 12
+    rm bed.tab; nice gzip *.bed
+
+
+#############################################################################
+# HAPMAP REL27 ORTHOLOGOUS ALLELES (DONE 3/4/09 angie)
+    # Similar procedure to snp129Ortho, but we make one table per species
+    # because they are independent subtracks of HapMap SNPs.
+    cd /hive/data/genomes/hg18/bed/hapmap/genotypes/2009-02_phaseII+III
+    # Glom all human info that we need for the final table onto the
+    # name, to sneak it through liftOver: rsId|chr|start|end|obs|strand
+    awk 'BEGIN{OFS="\t";} \
+        {print $1, $2, $3, \
+               $4 "|" $1 "|" $2 "|" $3 "|" $7 "|" $6, \
+               0, $6;}' \
+      hapmapSnps???.bed \
+    | sort -u -k1,1 -k2n,2n \
+      > hapmapSnpsForLiftOver.bed
+    wc -l hapmapSnpsForLiftOver.bed
+#4165831 hapmapSnpsCombined.bed
+
+    # Orthologous allele locations:
+    mkdir run.liftOChimp
+    cd run.liftOChimp
+    mkdir split out
+    splitFile ../hapmapSnpsForLiftOver.bed 25000 split/chunk
+    cp /dev/null jobList
+    foreach f (split/chunk*)
+      echo liftOver $f \
+        /hive/data/genomes/hg18/bed/liftOver/hg18ToPanTro2.over.chain.gz \
+        \{check out exists out/panTro2.$f:t.bed\} out/hg18.$f:t.unmapped \
+        >> jobList
+    end
+    ssh pk
+    cd /hive/data/genomes/hg18/bed/hapmap/genotypes/2009-02_phaseII+III/run.liftOChimp
+    para make jobList
+#Completed: 167 of 167 jobs
+#CPU time in finished jobs:      31364s     522.74m     8.71h    0.36d  0.001 y
+#IO & Wait Time:                   800s      13.33m     0.22h    0.01d  0.000 y
+#Average job time:                 193s       3.21m     0.05h    0.00d
+#Longest finished job:             431s       7.18m     0.12h    0.00d
+#Submission to last job:           442s       7.37m     0.12h    0.01d
+    mkdir ../run.liftOMac
+    cd ../run.liftOMac
+    mkdir out
+    ln -s ../run.liftOChimp/split .
+    cp /dev/null jobList
+    foreach f (split/chunk*)
+      echo liftOver $f \
+        /hive/data/genomes/hg18/bed/liftOver/hg18ToRheMac2.over.chain.gz \
+        \{check out exists out/rheMac2.$f:t.bed\} out/hg18.$f:t.unmapped \
+        >> jobList
+    end
+    para make jobList
+#Completed: 167 of 167 jobs
+#CPU time in finished jobs:       2482s      41.36m     0.69h    0.03d  0.000 y
+#IO & Wait Time:                  1361s      22.69m     0.38h    0.02d  0.000 y
+#Average job time:                  23s       0.38m     0.01h    0.00d
+#Longest finished job:              33s       0.55m     0.01h    0.00d
+#Submission to last job:            97s       1.62m     0.03h    0.00d
+
+    # Concatenate the liftOver results, sorting by ortho pos in order to
+    # efficiently access 2bit sequence in getOrthoSeq.  The output of
+    # that is swizzled so that a glom of ortho coords is the first column,
+    # and then we sort by that for joining with base quality info.
+    # Ditto for macaque.  ~5 minutes per species:
+    cd /hive/data/genomes/hg18/bed/hapmap/genotypes/2009-02_phaseII+III
+    sort -k1,1 -k2n,2n run.liftOChimp/out/panTro2.chunk*.bed \
+    | ~/kent/src/hg/snp/snpLoad/getOrthoSeq.pl /hive/data/genomes/panTro2/panTro2.2bit \
+    | awk 'BEGIN{OFS="\t";} {print $2 ":" $3 ":" $4, $5, $6, $1;}' \
+    | sort > panTro2.orthoGlom.txt
+    sort -k1,1 -k2n,2n run.liftOMac/out/rheMac2.chunk*.bed \
+    | ~/kent/src/hg/snp/snpLoad/getOrthoSeq.pl /hive/data/genomes/rheMac2/rheMac2.2bit \
+    | awk 'BEGIN{OFS="\t";} {print $2 ":" $3 ":" $4, $5, $6, $1;}' \
+    | sort > rheMac2.orthoGlom.txt
+    wc -l panTro2.orthoGlom.txt rheMac2.orthoGlom.txt
+#  4057739 panTro2.orthoGlom.txt
+#  3750076 rheMac2.orthoGlom.txt
+    # Get base qualities -- ~12-16min per species.
+    cut -f 1 panTro2.orthoGlom.txt | sed -e 's/:/\t/g' \
+    | hgWiggle -db=panTro2 -lift=1 -doAscii -bedFile=stdin quality \
+    | varStepToBedGraph.pl stdin \
+    | awk 'BEGIN{OFS="\t";} {print $1 ":" $2 ":" $3, int($4+0.5);}' \
+    | sort > panTro2.baseQuals.txt
+#Processed 4003968 lines input, 4003685 data lines, 47 variable step declarations
+    cut -f 1 rheMac2.orthoGlom.txt | sed -e 's/:/\t/g' \
+    | hgWiggle -db=rheMac2 -lift=1 -doAscii -bedFile=stdin quality \
+    | varStepToBedGraph.pl stdin \
+    | awk 'BEGIN{OFS="\t";} {print $1 ":" $2 ":" $3, int($4+0.5);}' \
+    | sort > rheMac2.baseQuals.txt
+#Processed 3749772 lines input, 3749645 data lines, 21 variable step declarations
+
+    # Join the allele-glom with the base qual-glom and swizzle columns into
+    # the right order for a hapmapAllelesOrtho table.
+    join -a 1 -e 0 panTro2.orthoGlom.txt panTro2.baseQuals.txt \
+    | perl -wpe 'chomp; ($oG, $oA, $oStr, $hG, $bQ) = split; \
+        ($oC, $oS, $oE) = split(":", $oG); \
+        ($rs, $hC, $hS, $hE, $hO, $hStr) = split(/\|/, $hG); \
+        unless (defined $bQ) { \
+          if ($oC =~ /^chr(21|Y|Y_random)$/) { $bQ = 98; } # per panTro2 quality track desc \
+          elsif ($oC eq "chrM") { $bQ = 0; } \
+          else { die "missing qual for $oC: $_\n\t"; } } \
+        $_ = "$hC\t$hS\t$hE\t$rs\t$bQ\t$hStr\t\t$hO\t$oC\t$oS\t$oE\t$oStr\t$oA\n";' \
+    | sort -k1,1 -k2n,2n \
+        > hapmapAllelesChimp.bed
+    wc -l hapmapAllelesChimp.bed
+#4057739 hapmapAllelesChimp.bed
+    join -a 1 -e 0 rheMac2.orthoGlom.txt rheMac2.baseQuals.txt \
+    | perl -wpe 'chomp; ($oG, $oA, $oStr, $hG, $bQ) = split; \
+        ($oC, $oS, $oE) = split(":", $oG); \
+        ($rs, $hC, $hS, $hE, $hO, $hStr) = split(/\|/, $hG); \
+        unless (defined $bQ) { die "missing qual for $oC: $_\n\t"; } \
+        $_ = "$hC\t$hS\t$hE\t$rs\t$bQ\t$hStr\t\t$hO\t$oC\t$oS\t$oE\t$oStr\t$oA\n";' \
+    | sort -k1,1 -k2n,2n \
+        > hapmapAllelesMacaque.bed
+    wc -l hapmapAllelesMacaque.bed
+#3750076 hapmapAllelesMacaque.bed
+
+    # Load tables.
+    cd /hive/data/genomes/hg18/bed/hapmap/genotypes/2009-02_phaseII+III
+    hgLoadBed hg18 hapmapAllelesChimp hapmapAllelesChimp.bed \
+      -tab -renameSqlTable -sqlTable=$HOME/kent/src/hg/lib/hapmapAllelesOrtho.sql
+#Loaded 4057739 elements of size 13
+    hgLoadBed hg18 hapmapAllelesMacaque hapmapAllelesMacaque.bed \
+      -tab -renameSqlTable -sqlTable=$HOME/kent/src/hg/lib/hapmapAllelesOrtho.sql
+
+
+#############################################################################
+# HAPMAP REL27 SUMMARY FOR HGTRACKS FILTERING (DONE 3/5/09 angie)
+    cd /hive/data/genomes/hg18/bed/hapmap/genotypes/2009-02_phaseII+III
+    time hapmapPhaseIIISummary .
+#115.244u 5.009s 2:10.08 92.4%   0+0k 0+0io 2pf+0w
+    time hgLoadBed hg18 hapmapPhaseIIISummary hapmapPhaseIIISummary.bed \
+      -tab -renameSqlTable -sqlTable=$HOME/kent/src/hg/lib/hapmapPhaseIIISummary.sql
+#Loaded 4166007 elements of size 18
+#33.401u 3.275s 1:46.95 34.2%    0+0k 0+0io 0pf+0w
+
+
+#############################################################################