src/hg/makeDb/doc/hg18.txt 1.377
1.377 2009/08/18 22:47:49 angie
Updated snp130 with corrected functional annotations from dbSNP.
Index: src/hg/makeDb/doc/hg18.txt
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/makeDb/doc/hg18.txt,v
retrieving revision 1.376
retrieving revision 1.377
diff -b -B -U 4 -r1.376 -r1.377
--- src/hg/makeDb/doc/hg18.txt 12 Aug 2009 18:00:02 -0000 1.376
+++ src/hg/makeDb/doc/hg18.txt 18 Aug 2009 22:47:49 -0000 1.377
@@ -22359,9 +22359,10 @@
rm -r run*/split tmp.txt *.orthoGlom.txt
############################################################################
-# dbSNP BUILD 130 (DONE 5/22/09 angie)
+# dbSNP BUILD 130 (UPDATED 8/18/09 angie)
+# Originally done 5/22/09.
# Set up build directory
mkdir -p /hive/data/outside/dbSNP/130/{human,shared}
# Get field encodings -- if there are changes or additions to the
@@ -22681,8 +22682,35 @@
# 4 RefAlleleMismatch
#TODO: go through those above and send some bug reports to dbSNP.
+ # 8/18/09: dbSNP announced a correction to some functional class
+ # annotations (- strand mRNA -> swapped near-gene-3 and near-gene-5).
+ cd /hive/data/outside/dbSNP/130/human
+ # This is a list of affected rs IDs, genes, old funcs and new funcs:
+ wget ftp://ftp.ncbi.nih.gov/snp/organisms/human_9606/database/organism_data/b130_update/b130_SNPContigLocusId_36_3_functionClass_13_15_fix.txt
+ wc -l b130_SNPContigLocusId_36_3_functionClass_13_15_fix.txt
+#163147 b130_SNPContigLocusId_36_3_functionClass_13_15_fix.txt
+ # The first 19 lines are the header.
+
+ # Use the info in that file to make a series of sql update commands:
+ tail -n +20 b130_SNPContigLocusId_36_3_functionClass_13_15_fix.txt \
+ | perl -we '$fns[6]="intron"; $fns[13]="near-gene-3"; $fns[15]="near-gene-5"; \
+ $fns[41]="nonsense"; $fns[42]="missense"; \
+ $fns[53]="untranslated-3"; $fns[55]="untranslated-5"; \
+ while (<>) { \
+ ($rs,undef,undef,$old,undef,$new) = split(","); \
+ $oldF = $fns[$old]; $newF = $fns[$new]; die if (!(defined $oldF && defined $newF)); \
+ print "UPDATE snp130 set func=(REPLACE(func,\"$oldF\",\"$newF\")) where name=\"rs$rs\";\n"; \
+ }' \
+ > snp130.func_13_15_fix.sql
+ wc -l snp130.func_13_15_fix.sql
+#163128 snp130.func_13_15_fix.sql
+ hgsql hg18 < snp130.func_13_15_fix.sql
+ # The number of rows changed has to be smaller because some of those replacements
+ # are for annotations relative to a different assembly; we have func=unknown for
+ # those. E.g. rs437678.
+
#######################################################################
# ORTHOLOGOUS ALLELES IN CHIMP AND MACAQUE FOR SNP130 (DONE 5/15/09 angie)
mkdir /hive/data/genomes/hg18/bed/snp130Ortho