8c084018dedc1241e5220ad9b80f83e957368890 hiram Fri Jun 24 15:27:06 2022 -0700 handle case of empty input file no redmine diff --git src/utils/bedInvert.pl src/utils/bedInvert.pl index 636061f..2e35b68 100755 --- src/utils/bedInvert.pl +++ src/utils/bedInvert.pl @@ -1,95 +1,98 @@ #!/bin/env perl # # bedInvert - invert a bed file, output regions not covered by the bed file # DO NOT EDIT the /cluster/bin/scripts copy of this file -- # edit ~/kent/src/utils/bedInvert.pl instead. use strict; use warnings; my $argc = scalar(@ARGV); if ($argc != 2) { printf STDERR "usage: bedInvert.pl [chrom.sizes] [file.bed] > invert.bed\n"; printf STDERR "will output bed elements for gaps between elements in file.bed\n"; printf STDERR "The file.bed does not have to be sorted, it will be\n"; printf STDERR "\tsorted here.\n"; printf STDERR "The chrom.sizes is used to get the last possible element\n"; printf STDERR "\tto the end of the chromosome.\n"; exit 255 } my ($sizesFile, $bedFile) = @ARGV; my %chromSizes; # key is chrom name, value is length if ($sizesFile =~ m/^stdin$/) { open (FH, "</dev/stdin") or die "can not read chrom.sizes: stdin"; } elsif ($sizesFile =~ m/.gz$/) { open (FH, "zcat $sizesFile|") or die "can not read chrom.sizes: %s", $sizesFile; } else { open (FH, "<$sizesFile") or die "can not read chrom.sizes: %s", $sizesFile; } while (my $line = <FH>) { chomp $line; my ($chr, $size) = split('\s+', $line); $chromSizes{$chr} = $size; } close (FH); my %chromDone; # key is chrom name, value is 1 to indicate done this chrom my $chr = ""; my $start = 0; my $end = 0; my $size = 0; if ($bedFile =~ m/^stdin$/) { open (FH, "grep -v '^ *#' /dev/stdin | sort -k1,1 -k2,2n |") or die "can not read /dev/stdin"; } elsif ($bedFile =~ m/.gz$/) { open (FH, "zcat $bedFile | grep -v '^ *#' | sort -k1,1 -k2,2n|") or die "can not read $bedFile"; } else { open (FH, "grep -v '^ *#' $bedFile | sort -k1,1 -k2,2n|") or die "can not read $bedFile"; } while (my $line = <FH>) { chomp $line; my ($c, $s, $e, $rest) = split('\s+', $line, 4); if (length($chr) > 1) { if ($chr ne $c) { my $chromEnd = $chromSizes{$chr}; $size = $chromEnd - $end; # may be last element previous chrom $chromDone{$chr} = 1; printf "%s\t%d\t%d\t%d\n", $chr, $end, $chromEnd, $size if ($size > 0); $chr = $c; $start = $s; $end = $e; $size = $start - 0; # might be the first gap this new chrom $chromDone{$chr} = 1; printf "%s\t0\t%d\t%d\n", $chr, $start, $size if ($size > 0); } else { if ($s <= $end) { # next element overlaps or joins previous element $end = $e; } else { # there is a gap here from $end to $s $size = $s - $end; $chromDone{$chr} = 1; printf "%s\t%d\t%d\t%d\n", $chr, $end, $s, $size if ($size > 0); $chr = $c; $start = $s; $end = $e; } } } else { # first chromosome, first element $chr = $c; $start = $s; $end = $e; $size = $start - 0; $chromDone{$chr} = 1; # might be the first gap this new chrom printf "%s\t0\t%d\t%d\n", $chr, $start, $size if ($size > 0); } } # might be final gap on last chrom mentioned +# might be an empty input file, no lines read, never set chr +if (length($chr)) { my $chromEnd = $chromSizes{$chr}; $size = $chromEnd - $end; $chromDone{$chr} = 1; printf "%s\t%d\t%d\t%d\n", $chr, $end, $chromEnd, $size if ($size > 0);; +} close (FH); # output gaps on chroms never mentioned yet, they had no elements foreach $chr (keys %chromSizes) { next if (exists($chromDone{$chr})); printf "%s\t0\t%d\t%d\n", $chr, $chromSizes{$chr}, $chromSizes{$chr}; }