ed292c11c5008eec8dd80894e385fead2d77a97c hiram Thu Oct 21 11:59:45 2021 -0700 correct result when the first contig satisifies the measurement no redmine diff --git src/hg/utils/automation/n50.pl src/hg/utils/automation/n50.pl index 8ac8281..2fb6da3 100755 --- src/hg/utils/automation/n50.pl +++ src/hg/utils/automation/n50.pl @@ -1,82 +1,86 @@ #!/usr/bin/env perl # n50.pl - calculate N50 values for a two column file # $Id: n50.pl,v 1.3 2010/02/17 06:48:42 hiram Exp $ use strict; use warnings; my $argc = scalar(@ARGV); if ($argc < 1) { printf STDERR "n50.pl - calculate N50 values from a two column file\n"; printf STDERR "usage:\n\tn50.pl [other.sizes]\n"; printf STDERR "\twhere is a two column file:\n"; printf STDERR "\tcontigName size\n"; exit 255; } my $ix = 0; while (my $sizeFile = shift) { my $sizeCount = 0; my %sizes; # key is contigName, value is size if ($sizeFile eq "stdin") { printf STDERR "#\treading: stdin\n"; while (my $line = <>) { next if ($line =~ m/^\s*#/); ++$sizeCount; chomp ($line); my ($name, $size, $rest) = split('\s+', $line, 3); my $key = sprintf("%s_X_%d", $name, $ix++); $sizes{$key} = $size; } } else { printf STDERR "#\treading: $sizeFile\n"; open (FH, "<$sizeFile") or die "can not read $sizeFile"; while (my $line = ) { next if ($line =~ m/^\s*#/); ++$sizeCount; chomp ($line); my ($name, $size, $rest) = split('\s+', $line, 3); my $key = sprintf("%s_X_%d", $name, $ix++); $sizes{$key} = $size; } close (FH); } my $totalSize = 0; foreach my $key (keys %sizes) { $totalSize += $sizes{$key} } my $n50Size = $totalSize / 2; printf STDERR "#\tcontig count: %d, total size: %d, one half size: %d\n", $sizeCount, $totalSize, $n50Size; my $prevContig = ""; my $prevSize = 0; $totalSize = 0; my $contigCount =0; foreach my $key (sort { $sizes{$b} <=> $sizes{$a} } keys %sizes) { ++$contigCount; $totalSize += $sizes{$key}; if ($totalSize > $n50Size) { my $prevName = $prevContig; $prevName =~ s/_X_[0-9]+//; my $origName = $key; $origName =~ s/_X_[0-9]+//; printf "# cumulative\tN50 count\tcontig\tcontig size\n"; + if (1 == $contigCount) { + printf "%d\t%d\t%s\t%d\n", $totalSize, $contigCount, $origName, $sizes{$key}; + } else { printf "%d\t%d\t%s\t%d\n", $totalSize-$sizes{$key},$contigCount-1,$prevName, $prevSize; + } printf "%d one half size\n", $n50Size; printf "%d\t%d\t%s\t%d\n", $totalSize, $contigCount, $origName, $sizes{$key}; last; } $prevContig = $key; $prevSize = $sizes{$key}; } }