src/hg/makeDb/doc/hg19.txt 1.57
1.57 2009/11/07 17:35:48 hartera
Re-loaded segmental duplications data after correcting chromosome names.
Index: src/hg/makeDb/doc/hg19.txt
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/makeDb/doc/hg19.txt,v
retrieving revision 1.56
retrieving revision 1.57
diff -b -B -U 4 -r1.56 -r1.57
--- src/hg/makeDb/doc/hg19.txt 5 Nov 2009 00:33:06 -0000 1.56
+++ src/hg/makeDb/doc/hg19.txt 7 Nov 2009 17:35:48 -0000 1.57
@@ -7866,10 +7866,12 @@
######################
############################################################################
# SEGMENTAL DUPLICATIONS (2009-10-31 and 2009-11-04, hartera, DONE)
- # File emailed from Tin Louie <tinlouie at u.washington.edu>
- # in Evan Eichler's lab.
+# File emailed from Tin Louie <tinlouie at u.washington.edu>
+# in Evan Eichler's lab.
+# (2009-11-07, hartera, DONE) Re-loaded table after correcting some chromosome
+# names.
mkdir /hive/data/genomes/hg19/bed/genomicSuperDups
cd /hive/data/genomes/hg19/bed/genomicSuperDups
wget ftp://mesh.gs.washington.edu/pub/UCSC/hg19genomicSuperDups.gz
@@ -7890,5 +7892,43 @@
# Saving bed.tab
# Loading hg19
# Updated details page with suggested text and an additional reference.
# src/hg/makeDb/trackDb/genomicSuperDups.html
+ # (2009-11-07, hartera)
+ # Fix incorrect chromosomes in data.
+ # The script, runCheckTableCoordsDayOld, found this error in the table:
+ # hg19.genomicSuperDups has 28 records with chrom not described in
+ # chromInfo.
+ awk '{print $1}' hg19genomicSuperDups | sort | uniq > chroms
+ awk '{print $7}' hg19genomicSuperDups | sort | uniq > otherChroms
+ # Found several cases where the last letter of random is missed off
+ # for the random contigs. These are not concatenated into a random chrom
+ # for hg19.
+ hgsql -Ne 'select chrom from chromInfo;' hg19 | sort | uniq \
+ > chroms.chromInfo
+ # These are the contig names for random chroms that need to be corrected:
+ grep -v random chroms | grep rando | sort | uniq > chromsToCorrect
+ grep -v random otherChroms | grep rando | sort | uniq > otherChromsToCorrect
+ diff chromsToCorrect otherChromsToCorrect
+ # No difference so use chromsToCorrect file.
+ cp hg19genomicSuperDups hg19genomicSuperDups.orig
+ foreach c (`cat chromsToCorrect`)
+ perl -pi.bak -e "s/${c}/${c}m/g" hg19genomicSuperDups
+ end
+ # Check chroms and otherChroms again.
+ awk '{print $1}' hg19genomicSuperDups | sort | uniq > chromsNew
+ awk '{print $7}' hg19genomicSuperDups | sort | uniq > otherChromsNew
+ # These look good now.
+ # Drop the old table and re-load the corrected data.
+ hgsql -e 'drop table hg19genomicSuperDups;' hg19
+
+ sed -e 's/\t_\t/\t-\t/' hg19genomicSuperDups \
+ | awk '($3 - $2) >= 1000 && ($9 - $8) >= 1000 {print;}' \
+ | hgLoadBed hg19 genomicSuperDups stdin \
+ -sqlTable=$HOME/kent/src/hg/lib/genomicSuperDups.sql
+# Reading stdin
+# Loaded 63463 elements of size 29
+# Sorted
+# Creating table definition for genomicSuperDups
+# Saving bed.tab
+# Loading hg19