src/hg/makeDb/doc/hg18.txt 1.376

1.376 2009/08/12 18:00:02 angie
DGV updated to v8; changed color scheme to match their browser's.
Index: src/hg/makeDb/doc/hg18.txt
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/makeDb/doc/hg18.txt,v
retrieving revision 1.375
retrieving revision 1.376
diff -b -B -U 4 -r1.375 -r1.376
--- src/hg/makeDb/doc/hg18.txt	4 Aug 2009 21:41:12 -0000	1.375
+++ src/hg/makeDb/doc/hg18.txt	12 Aug 2009 18:00:02 -0000	1.376
@@ -21253,44 +21253,49 @@
     hgLoadGenePred -genePredExt hg18 vegaPseudoGene pseudo.gp
 
 
 ############################################################################
-# DGV V7 (DATABASE OF GENOMIC VARIANTS) (DONE 3/11/09 angie)
+# DGV V8 (DATABASE OF GENOMIC VARIANTS) (DONE 8/12/09 angie)
+# DGV V7 done 3/11/09
 # DGV V6 thin regions dropped 2/23/09
 # DGV V6 with useless thin regions done 11/12/08
 # DGV V5 done 7/16/08
 # DGV V4 done 5/9/08
     ssh hgwdev
-    mkdir /hive/data/genomes/hg18/bed/dgv.v7
-    cd /hive/data/genomes/hg18/bed/dgv.v7
+    mkdir /hive/data/genomes/hg18/bed/dgv.v8
+    cd /hive/data/genomes/hg18/bed/dgv.v8
     wget --timestamping \
-      http://projects.tcag.ca/variation/downloads/variation.hg18.v7.mar.2009.txt
+      http://projects.tcag.ca/variation/downloads/variation.hg18.v8.aug.2009.txt
     wget --timestamping \
-      http://projects.tcag.ca/variation/downloads/indel.hg18.v7.mar.2009.txt
+      http://projects.tcag.ca/variation/downloads/indel.hg18.v8.aug.2009.txt
+    # Save previous version for comparison:
+    hgsql hg18 -e 'rename table dgv to dgvV7'
     # shuffle fields into bed8+
-    foreach f (*.v7.*.txt)
+    foreach f (*.v8.*.txt)
       tail -n +2 $f \
       | perl -wpe 'chomp; \
         ($id, $landmark, $chr, $start, $end, $varType, \
          undef, undef, undef, $ref, $pmid, $method, \
-         undef, undef, undef, undef, $sample) = split("\t"); \
+         $gain, $loss, undef, undef, $sample) = split("\t"); \
         $id =~ s/^Variation_//; \
-        $start--;  \
+        $start-- unless ($start == 0); \
         $landmark = "" if ($landmark =~ /^chr.*\d\.\.\d/); \
-        $rgb = "255,128,0"; \
-        $rgb = "200,0,0" if ($varType =~ /^Inv/); \
-        $rgb = "0,100,0" if ($varType eq "InDel"); \
+        $rgb = ($varType =~ /^Inv/) ? "100,0,100" : "0,200,0"; \
+        if ($gain ne "" || $loss ne "") { \
+          $gain =~ s/^(NA)? ?$/0/;  $loss =~ s/^(NA)? ?$/0/; \
+          $rgb = "200,0,0" if ($gain > 0 && $loss == 0); \
+          $rgb = "0,0,200" if ($loss > 0 && $gain == 0); \
+        } \
         $_ = join("\t", $chr, $start, $end, $id, 0, "+", \
-                  $start, $end, $rgb, $landmark, $varType, \
+                  $start, $start, $rgb, $landmark, $varType, \
                   $ref, $pmid, $method, $sample) . "\n";' \
           > $f:r.bed
     end
-    hgsql hg18 -e 'rename table dgv to dgvV6'
     hgLoadBed hg18 dgv *.bed \
-      -onServer -sqlTable=$HOME/kent/src/hg/lib/dgv.sql -tab
-#Loaded 38406 elements of size 15
+      -sqlTable=$HOME/kent/src/hg/lib/dgv.sql -tab
+#Loaded 49988 elements of size 15
       hgsql hg18 -NBe 'select count(distinct(pubMedId)) from dgv;'
-#31
+#35
 
 
 ############################################################################
 # AGILENT CGH PROBES (AND MM8, RN4) (Done 2008-05-13, Andy)