f7cb19e57b68f0d6212adac2f801ec91cef2be61 hiram Mon Jan 11 12:37:38 2016 -0800 similar to bedCollapse.pl but even more condensing of any overlapping items no redmine diff --git src/utils/bedSingleCover.pl src/utils/bedSingleCover.pl new file mode 100755 index 0000000..ffc7dbd --- /dev/null +++ src/utils/bedSingleCover.pl @@ -0,0 +1,55 @@ +#!/usr/bin/env perl +# +# bedSingleCover - combine overlapping bed elements into one element + +# DO NOT EDIT the /cluster/bin/scripts copy of this file -- +# edit ~/kent/src/utils/bedSingleCover.pl instead. + +use strict; +use warnings; + +my $argc = scalar(@ARGV); + +if ($argc < 1) { + printf STDERR "usage: bedSingleCover.pl [file.bed] > singleCover.bed\n"; + printf STDERR "will combine overlapping bed elements into one element\n"; + printf STDERR "No need to pre-sort the bed file, it will be sorted here\n"; + printf STDERR "result is four columns: chr start end size\n"; + printf STDERR "where size is the size of the element: size=end-start\n"; + exit 255 +} + +my $bedFile = shift; +my $chr = ""; +my $prevEnd = 0; +my $start = 0; +my $end = 0; +my $size = 0; +if ($bedFile =~ m/^stdin$/) { + open (FH, "grep -v '^ *#' /dev/stdin | sort -k1,1 -k2,2n |") or die "can not read /dev/stdin"; +} else { + open (FH, "grep -v '^ *#' $bedFile | sort -k1,1 -k2,2n|") or die "can not read $bedFile"; +} +while (my $line = <FH>) { + chomp $line; + my ($c, $s, $e, $rest) = split('\s+', $line, 4); + $size = $end - $start; + if (length($chr) > 1) { + if ($chr ne $c) { + printf "%s\t%d\t%d\t%d\n", $chr, $start, $end, $size; + $chr = $c; $start = $s; $end = $e; + } else { + if ($s <= $end) { + $end = $e if ($e > $end); + } else { + printf "%s\t%d\t%d\t%d\n", $chr, $start, $end, $size; + $chr = $c; $start = $s; $end = $e; + } + } + } else { + $chr = $c; $start = $s; $end = $e; + } +} +$size = $end - $start; +printf "%s\t%d\t%d\t%d\n", $chr, $start, $end, $size; +close (FH);