src/hg/makeDb/doc/hg18.txt 1.411
1.411 2010/04/02 17:26:05 angie
Data update for gwasCatalog.
Index: src/hg/makeDb/doc/hg18.txt
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/makeDb/doc/hg18.txt,v
retrieving revision 1.410
retrieving revision 1.411
diff -b -B -U 4 -r1.410 -r1.411
--- src/hg/makeDb/doc/hg18.txt 1 Apr 2010 16:37:14 -0000 1.410
+++ src/hg/makeDb/doc/hg18.txt 2 Apr 2010 17:26:05 -0000 1.411
@@ -29566,9 +29566,10 @@
# load the table
hgLoadBed -allowStartEqualEnd hg18 snpArrayIlluminaHumanOmni1_Quad snpArrayIlluminaHumanOmni1_Quad.tab -tab -sqlTable=snpArrayIlluminaHumanOmni1_Quad.sql
#############################################################################
-# NHGRI GWAS CATALOG (DONE 3/1/10)
+# NHGRI GWAS CATALOG (DONE 4/1/10)
+# Updated 3/1/10
# Originally done 1/19/10
# Area of possible future improvement: for SNPs that can't be mapped via our SNP track,
# could some of them be obsolete IDs that have been merged into current IDs?
mkdir /hive/data/genomes/hg18/bed/gwasCatalog
@@ -29576,10 +29577,10 @@
# Done once, don't need to redo:
cut -f 1-4 ../snp130/snp130.bed \
| sort -k4,4 \
> snp130Coords.bed
- mkdir /hive/data/genomes/hg18/bed/gwasCatalog/100301
- cd /hive/data/genomes/hg18/bed/gwasCatalog/100301
+ mkdir /hive/data/genomes/hg18/bed/gwasCatalog/100401
+ cd /hive/data/genomes/hg18/bed/gwasCatalog/100401
wget http://www.genome.gov/admin/gwascatalog.txt
# Column headers:
# 1 Date Added to Catalog
# 2 PubMedID
@@ -29615,11 +29616,8 @@
perl -we 'while (<>) { \
next if (/^\s*$/); \
@w = split("\t"); \
next if ($w[13] !~ /^rs\d+/); \
- if ($w[1] eq "" && $w[2] eq "Marroni" && $w[6] =~ /EUROSPAN Project$/) { \
- $w[1] = 20031603; \
- } \
if ($w[3] =~ /^(\d+)\/(\d+)\/(\d+)$/) { # transform to mysql DATE \
($month, $day, $year) = ($1, $2, $3); \
$w[3] = "$year-$month-$day"; \
} else { die "Cant parse date ($w[3])\t" } \
@@ -29641,9 +29639,9 @@
| sort -k1,1 -k2n,2n \
> gwasCatalog.bed
hgLoadBed hg18 gwasCatalog gwasCatalog.bed \
-tab -sqlTable=$HOME/kent/src/hg/lib/gwasCatalog.sql -notItemRgb -allowStartEqualEnd
-#Loaded 2930 elements of size 22
+#Loaded 3051 elements of size 22
# For David: find examples of risk alleles for which dbSNP observed
# alleles are complementary (A/T or C/G) -- how do we know what strand the
# risk allele is on?? -- asking corresp. author Teri Manolio.
@@ -29651,8 +29649,9 @@
from gwasCatalog as gc, snp130 as snp \
where gc.riskAllele rlike "^rs[0-9]+-[ACGT]" and \
gc.name = snp.name and snp.observed in ("C/G", "A/T") \
order by gc.name limit 20;'
+ # count(*) = 150
#############################################################################
# CRG MAPABILITY (2010-01-19 - 2010-01-28, hartera, DONE)