e5f540b8d4762d28f98a6ade2ac84f63205a4333 hiram Tue Jul 26 13:14:14 2011 -0700 adding a bedCollapse.pl command to the scripts diff --git src/utils/bedCollapse.pl src/utils/bedCollapse.pl new file mode 100755 index 0000000..2e9b0d7 --- /dev/null +++ src/utils/bedCollapse.pl @@ -0,0 +1,50 @@ +#!/bin/env perl +# +# bedCollapse - combine adjacent bed elements into one element + +# DO NOT EDIT the /cluster/bin/scripts copy of this file -- +# edit ~/kent/src/utils/bedCollapse.pl instead. + +use strict; +use warnings; + +my $argc = scalar(@ARGV); + +if ($argc < 1) { + printf STDERR "usage: bedCollapse.pl <file.bed>\n"; + printf STDERR "will combine adjacent bed elements into one element\n"; + printf STDERR "This is working on only columns 2 and 3, the column 4\n"; + printf STDERR "output is the size of the element.\n"; + exit 255 +} + +my $file = shift; +my $chr = ""; +my $prevEnd = 0; +my $start = 0; +my $end = 0; +my $size = 0; +open (FH, "sort -k1,1 -k2,2n $file|") or die "can not read $file"; +while (my $line = <FH>) { + chomp $line; + my ($c, $s, $e, $rest) = split('\s+', $line, 4); + $size = $end - $start; + if (length($chr) > 1) { + if ($chr ne $c) { + printf "%s\t%d\t%d\t%d\n", $chr, $start, $end, $size; + $chr = $c; $start = $s; $end = $e; + } else { + if ($s == $end) { + $end = $e; + } else { + printf "%s\t%d\t%d\t%d\n", $chr, $start, $end, $size; + $chr = $c; $start = $s; $end = $e; + } + } + } else { + $chr = $c; $start = $s; $end = $e; + } +} +$size = $end - $start; +printf "%s\t%d\t%d\t%d\n", $chr, $start, $end, $size; +close (FH);