src/hg/makeDb/doc/hg19.txt 1.56
1.56 2009/11/05 00:33:06 hartera
Added segmental duplications provided by Eichler lab.
Index: src/hg/makeDb/doc/hg19.txt
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/makeDb/doc/hg19.txt,v
retrieving revision 1.55
retrieving revision 1.56
diff -b -B -U 4 -r1.55 -r1.56
--- src/hg/makeDb/doc/hg19.txt 4 Nov 2009 04:17:05 -0000 1.55
+++ src/hg/makeDb/doc/hg19.txt 5 Nov 2009 00:33:06 -0000 1.56
@@ -7864,4 +7864,31 @@
ln -s `pwd`/$gp.$mz.exonAA.fa.gz $pd/$gp.exonAA.fa.gz
ln -s `pwd`/$gp.$mz.exonNuc.fa.gz $pd/$gp.exonNuc.fa.gz
######################
+############################################################################
+# SEGMENTAL DUPLICATIONS (2009-10-31 and 2009-11-04, hartera, DONE)
+ # File emailed from Tin Louie <tinlouie at u.washington.edu>
+ # in Evan Eichler's lab.
+ mkdir /hive/data/genomes/hg19/bed/genomicSuperDups
+ cd /hive/data/genomes/hg19/bed/genomicSuperDups
+
+ wget ftp://mesh.gs.washington.edu/pub/UCSC/hg19genomicSuperDups.gz
+ gunzip hg19genomicSuperDups.gz
+ # The sed command is necessary to fix "_" used as strand.
+ # The awk command was necessary for some recent other species
+ # genomicSuperDups that had some too-short regions. It does not seem
+ # to be necessary here, but doesn't hurt and may be useful in
+ # future builds.
+ sed -e 's/\t_\t/\t-\t/' hg19genomicSuperDups \
+ | awk '($3 - $2) >= 1000 && ($9 - $8) >= 1000 {print;}' \
+ | hgLoadBed hg19 genomicSuperDups stdin \
+ -sqlTable=$HOME/kent/src/hg/lib/genomicSuperDups.sql
+# Reading stdin
+# Loaded 63463 elements of size 29
+# Sorted
+# Creating table definition for genomicSuperDups
+# Saving bed.tab
+# Loading hg19
+ # Updated details page with suggested text and an additional reference.
+ # src/hg/makeDb/trackDb/genomicSuperDups.html
+