src/hg/makeDb/doc/hg17.txt 1.119

1.119 2009/02/23 23:41:35 angie
Remove thin tails from DGV (SAB feedback).
Index: src/hg/makeDb/doc/hg17.txt
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/makeDb/doc/hg17.txt,v
retrieving revision 1.118
retrieving revision 1.119
diff -b -B -U 4 -r1.118 -r1.119
--- src/hg/makeDb/doc/hg17.txt	9 Jan 2009 18:24:18 -0000	1.118
+++ src/hg/makeDb/doc/hg17.txt	23 Feb 2009 23:41:35 -0000	1.119
@@ -24335,9 +24335,10 @@
 ############################################################################
 
 
 ############################################################################
-# DGV V6 (DATABASE OF GENOMIC VARIANTS) (DONE 11/12/08 angie)
+# DGV V6 (DATABASE OF GENOMIC VARIANTS) (DONE 2/23/09 angie)
+# DGV V6 with useless thin regions done 11/12/08
 # DGV V5 done 8/11/08
 # DGV V4 done 5/9/08
     ssh hgwdev
     mkdir /cluster/data/hg17/bed/dgv.v6
@@ -24348,33 +24349,28 @@
       http://projects.tcag.ca/variation/downloads/indel.hg17.v6.nov.2008.txt
     # shuffle fields into bed8+ (input has one start coord==0, but min 
     # nonzero size of 99 not 100 implies most coords are 1-based):
     foreach f (*.v6.*.txt)
-      tail +2 $f \
+      tail -n +2 $f \
       | perl -wpe 'chomp; \
         ($id, $landmark, $chr, $start, $end, $varType, \
-         $locChr, $locStart, $locEnd, $ref, $pmid, $method, \
+         undef, undef, undef, $ref, $pmid, $method, \
          undef, undef, undef, undef, $sample) = split("\t"); \
-        die "chr $chr != loc $locChr" if ($chr ne $locChr); \
         $id =~ s/^Variation_//; \
-        $chromStart = $start < $locStart ? $start : $locStart; \
-        $chromEnd = $end > $locEnd ? $end : $locEnd; \
-        $thickStart = $locStart > $start ? $locStart : $start; \
-        $thickEnd = $locEnd < $end ? $locEnd : $end; \
-        $chromStart-- unless ($chromStart == 0); \
-        $thickStart-- unless ($thickStart == 0); \
+        $start-- unless ($start == 0); \
         $landmark = "" if ($landmark =~ /^chr.*\d\.\.\d/); \
         $rgb = "255,128,0"; \
         $rgb = "200,0,0" if ($varType =~ /^Inv/); \
         $rgb = "0,100,0" if ($varType eq "InDel"); \
-        $_ = join("\t", $chr, $chromStart, $chromEnd, $id, 0, "+", \
-                  $thickStart, $thickEnd, $rgb, $landmark, $varType, \
+        $_ = join("\t", $chr, $start, $end, $id, 0, "+", \
+                  $start, $end, $rgb, $landmark, $varType, \
                   $ref, $pmid, $method, $sample) . "\n";' \
           > $f:r.bed
     end
     hgsql hg17 -e 'rename table dgv to dgvV5'
     hgLoadBed hg17 dgv *.bed \
       -onServer -sqlTable=$HOME/kent/src/hg/lib/dgv.sql -tab
+#Loaded 17479 elements of size 15
 
 
 ############################################################################
 # KIDD/EICHLER DISCORDANT CLONE ENDS (DONE 6/10/08 angie)