src/hg/makeDb/doc/hg19.txt 1.37
1.37 2009/09/02 21:47:28 angie
Added snp130CodingDbSnp (used dbSNP's provisional SNP coord mapping from hg18).
Index: src/hg/makeDb/doc/hg19.txt
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/makeDb/doc/hg19.txt,v
retrieving revision 1.36
retrieving revision 1.37
diff -b -B -U 4 -r1.36 -r1.37
--- src/hg/makeDb/doc/hg19.txt 1 Sep 2009 04:27:30 -0000 1.36
+++ src/hg/makeDb/doc/hg19.txt 2 Sep 2009 21:47:28 -0000 1.37
@@ -5793,4 +5793,58 @@
rm -r run*/split tmp.txt *.orthoGlom.txt
##############################################################################
+# DBSNP CODING ANNOTATIONS (DONE 9/1/09 angie)
+
+ # Repeat the coord-remapping performed for snp130 on the hg18 coding anno table.
+ cd /hive/data/outside/dbSNP/130/human/hg19
+ sed -re 's/\trs([0-9]+)\t/\t\1\t/' ../snp130CodingDbSnp.bed \
+ | sort -k4n,4n -k1,1 -k2n,2n > /data/tmp/hg18.snp130Coding.idSorted.bed
+ # reuse /data/tmp/Remap_36_3_37_1.txt mapping file created for snp130 above:
+ perl -we \
+ 'use strict; \
+ sub nextMap { \
+ my ($rsId, undef, $oChr, $oStart, $oEnd, $nChr, $nStart, $nEnd); \
+ do { \
+ ($rsId, undef, $oChr, $oStart, $oEnd, undef,undef,undef,undef, \
+ $nChr, $nStart, $nEnd) = split("\t", <>); \
+ if (defined $nEnd) { \
+ $oChr = "chr$oChr"; $nChr = "chr$nChr"; \
+ } \
+ $oStart--; $oEnd--; $nStart--; $nEnd--; # Yep. 0-based closed vs 1-based closed \
+ } while (defined $rsId && ($oEnd < 0 || $nChr eq "chrUn")); \
+ return ($rsId, $oChr, $oStart, $oEnd, $nChr, $nStart, $nEnd); \
+ } # nextMap \
+ my ($rsId, $oChr, $oStart, $oEnd, $nChr, $nStart, $nEnd) = &nextMap(); \
+ my ($rCount, $oCount, $tCount) = 0; \
+ open(my $oldF, "/data/tmp/hg18.snp130Coding.idSorted.bed") || die; \
+ while (my ($chr, $s, $e, $id, $tx, $frm, $alCount, $funcs, $als, $codons, $peps) = \
+ split("\t", <$oldF>)) { \
+ my $thisRCount = 0; \
+ while (defined $rsId && $rsId < $id) { \
+ ($rsId, $oChr, $oStart, $oEnd, $nChr, $nStart, $nEnd) = &nextMap(); \
+ } \
+ while (defined $oChr && $chr eq $oChr && $s == $oStart && $e == $oEnd && $id == $rsId) { \
+ print join("\t", $nChr, $nStart, $nEnd, "rs$id", $tx, $frm, \
+ $alCount, $funcs, $als, $codons, $peps); \
+ ($rsId, $oChr, $oStart, $oEnd, $nChr, $nStart, $nEnd) = &nextMap(); \
+ $thisRCount++; \
+ } \
+ $tCount += $thisRCount; \
+ $rCount++ if ($thisRCount > 0); \
+ $oCount++; \
+ } \
+ close($oldF); print STDERR "Replaced $rCount of $oCount inputs ($tCount outputs).\n";' \
+ /data/tmp/Remap_36_3_37_1.txt \
+ | sort -k1,1 -k2n,2n -k4,4 \
+ > /data/tmp/hg19.snp130Coding.bed
+#Replaced 197921 of 279815 inputs (198493 outputs).
+#160.824u 1.949s 2:43.01 99.8% 0+0k 0+0io 0pf+0w
+ hgLoadBed hg19 snp130CodingDbSnp -sqlTable=$HOME/kent/src/hg/lib/snp125Coding.sql \
+ -renameSqlTable -tab -notItemRgb -allowStartEqualEnd \
+ /data/tmp/hg19.snp130Coding.bed
+#Loaded 198493 elements of size 11
+ mv /data/tmp/hg19.snp130Coding.bed hg19.snp130CodingDbSnp.bed
+
+
+##############################################################################