src/hg/makeDb/doc/hg19.txt 1.56

1.56 2009/11/05 00:33:06 hartera
Added segmental duplications provided by Eichler lab.
Index: src/hg/makeDb/doc/hg19.txt
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/makeDb/doc/hg19.txt,v
retrieving revision 1.55
retrieving revision 1.56
diff -b -B -U 4 -r1.55 -r1.56
--- src/hg/makeDb/doc/hg19.txt	4 Nov 2009 04:17:05 -0000	1.55
+++ src/hg/makeDb/doc/hg19.txt	5 Nov 2009 00:33:06 -0000	1.56
@@ -7864,4 +7864,31 @@
     ln -s `pwd`/$gp.$mz.exonAA.fa.gz $pd/$gp.exonAA.fa.gz
     ln -s `pwd`/$gp.$mz.exonNuc.fa.gz $pd/$gp.exonNuc.fa.gz
 
 ######################
+############################################################################
+# SEGMENTAL DUPLICATIONS (2009-10-31 and 2009-11-04, hartera, DONE)
+    # File emailed from Tin Louie <tinlouie at u.washington.edu>
+    # in Evan Eichler's lab. 
+    mkdir /hive/data/genomes/hg19/bed/genomicSuperDups
+    cd /hive/data/genomes/hg19/bed/genomicSuperDups
+   
+    wget ftp://mesh.gs.washington.edu/pub/UCSC/hg19genomicSuperDups.gz
+    gunzip hg19genomicSuperDups.gz
+    # The sed command is necessary to fix "_" used as strand.
+    # The awk command was necessary for some recent other species
+    # genomicSuperDups that had some too-short regions.  It does not seem
+    # to be necessary here, but doesn't hurt and may be useful in
+    # future builds.
+    sed -e 's/\t_\t/\t-\t/' hg19genomicSuperDups \
+    | awk '($3 - $2) >= 1000 && ($9 - $8) >= 1000 {print;}' \
+    | hgLoadBed hg19 genomicSuperDups stdin \
+      -sqlTable=$HOME/kent/src/hg/lib/genomicSuperDups.sql
+# Reading stdin
+# Loaded 63463 elements of size 29
+# Sorted
+# Creating table definition for genomicSuperDups
+# Saving bed.tab
+# Loading hg19
+    # Updated details page with suggested text and an additional reference. 
+    # src/hg/makeDb/trackDb/genomicSuperDups.html
+