src/hg/makeDb/doc/hg17.txt 1.119
1.119 2009/02/23 23:41:35 angie
Remove thin tails from DGV (SAB feedback).
Index: src/hg/makeDb/doc/hg17.txt
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/makeDb/doc/hg17.txt,v
retrieving revision 1.118
retrieving revision 1.119
diff -b -B -U 4 -r1.118 -r1.119
--- src/hg/makeDb/doc/hg17.txt 9 Jan 2009 18:24:18 -0000 1.118
+++ src/hg/makeDb/doc/hg17.txt 23 Feb 2009 23:41:35 -0000 1.119
@@ -24335,9 +24335,10 @@
############################################################################
############################################################################
-# DGV V6 (DATABASE OF GENOMIC VARIANTS) (DONE 11/12/08 angie)
+# DGV V6 (DATABASE OF GENOMIC VARIANTS) (DONE 2/23/09 angie)
+# DGV V6 with useless thin regions done 11/12/08
# DGV V5 done 8/11/08
# DGV V4 done 5/9/08
ssh hgwdev
mkdir /cluster/data/hg17/bed/dgv.v6
@@ -24348,33 +24349,28 @@
http://projects.tcag.ca/variation/downloads/indel.hg17.v6.nov.2008.txt
# shuffle fields into bed8+ (input has one start coord==0, but min
# nonzero size of 99 not 100 implies most coords are 1-based):
foreach f (*.v6.*.txt)
- tail +2 $f \
+ tail -n +2 $f \
| perl -wpe 'chomp; \
($id, $landmark, $chr, $start, $end, $varType, \
- $locChr, $locStart, $locEnd, $ref, $pmid, $method, \
+ undef, undef, undef, $ref, $pmid, $method, \
undef, undef, undef, undef, $sample) = split("\t"); \
- die "chr $chr != loc $locChr" if ($chr ne $locChr); \
$id =~ s/^Variation_//; \
- $chromStart = $start < $locStart ? $start : $locStart; \
- $chromEnd = $end > $locEnd ? $end : $locEnd; \
- $thickStart = $locStart > $start ? $locStart : $start; \
- $thickEnd = $locEnd < $end ? $locEnd : $end; \
- $chromStart-- unless ($chromStart == 0); \
- $thickStart-- unless ($thickStart == 0); \
+ $start-- unless ($start == 0); \
$landmark = "" if ($landmark =~ /^chr.*\d\.\.\d/); \
$rgb = "255,128,0"; \
$rgb = "200,0,0" if ($varType =~ /^Inv/); \
$rgb = "0,100,0" if ($varType eq "InDel"); \
- $_ = join("\t", $chr, $chromStart, $chromEnd, $id, 0, "+", \
- $thickStart, $thickEnd, $rgb, $landmark, $varType, \
+ $_ = join("\t", $chr, $start, $end, $id, 0, "+", \
+ $start, $end, $rgb, $landmark, $varType, \
$ref, $pmid, $method, $sample) . "\n";' \
> $f:r.bed
end
hgsql hg17 -e 'rename table dgv to dgvV5'
hgLoadBed hg17 dgv *.bed \
-onServer -sqlTable=$HOME/kent/src/hg/lib/dgv.sql -tab
+#Loaded 17479 elements of size 15
############################################################################
# KIDD/EICHLER DISCORDANT CLONE ENDS (DONE 6/10/08 angie)