4fdac1a09cb6a154a2f739885b02fc8f0e40a00d
hiram
  Mon Sep 9 09:57:59 2019 -0700
first instance of hub is complete refs #23734

diff --git src/hg/makeDb/doc/VGP/bigDensity.pl src/hg/makeDb/doc/VGP/bigDensity.pl
index 2068256..f747514 100755
--- src/hg/makeDb/doc/VGP/bigDensity.pl
+++ src/hg/makeDb/doc/VGP/bigDensity.pl
@@ -10,30 +10,39 @@
 }
 
 my $winSize = shift;
 my $sizesFile = shift;
 my $genePred = shift;
 
 my %chromSizes;	# key is chr name, value is size
 
 open (FH, "<$sizesFile") or die "can not read $sizesFile";
 while (my $line = <FH>) {
   chomp $line;
   my ($chr, $size) = split('\s+', $line);
   $chromSizes{$chr} = $size;
 }
 close (FH);
+my $largestChrom = "";
+my $sizeLargestChrom = 0;
+foreach my $chr (sort {$chromSizes{$b} <=> $chromSizes{$a}} keys %chromSizes) {
+  $largestChrom = $chr;
+  last;
+}
+$sizeLargestChrom = $chromSizes{$largestChrom};
+
+printf STDERR "# largest chrom: '%s' at %d\n", $largestChrom, $chromSizes{$largestChrom};
 
 my %windows;  # key is chr name, value is pointer to array of counts per window
 #### initialize all window counts to zero
 foreach my $chr (keys %chromSizes) {
   my @a;
   my $arrPtr = \@a;
   $windows{$chr} = $arrPtr;
   my $size = $chromSizes{$chr};
   my $j = 0;
   for (my $i = 0; $i < $size; $i += $winSize) {
      $arrPtr->[$j++] = 0;
   }
 #  printf STDERR "# %s %d %d\n", $chr, $j, $size;
 }
 
@@ -57,43 +66,68 @@
     $thisChr = $chr;
     $chrPtr = $windows{$chr};
   }
   my $windowS = int ($txStart / $winSize);
   $chrPtr->[$windowS] += 1;
   my $windowE = int ($txEnd / $winSize);
   if ($windowE != $windowS) {
      $chrPtr->[$windowE] += 1;
      for (my $i = $windowS + 1; $i < $windowE; ++$i) {
        $chrPtr->[$i] += 1;
      }
   }
 }
 close (FH);
 
-foreach my $chr (sort keys %chromSizes) {
+my $chr = $largestChrom;
+my $sumCounts = 0;
+my $itemCount = 0;
+my @countArray;	# each count is placed into the array for ordering and
+	# quartile analysis
+
+# foreach my $chr (sort keys %chromSizes) {
 my %highestCount;	# key is chrName, value is window number of highest count
 $highestCount{$chr} = -1;
 my $highWater = 0;
 $chrPtr = $windows{$chr};
 my $winCount = scalar(@$chrPtr);
 # printf STDERR "# %s winCount %d\n", $chr, $winCount;
 my $lastEnd = $chromSizes{$chr};
 for (my $i = 0; $i < $winCount; ++$i) {
   my $count = $chrPtr->[$i];
   if ($count > $highWater) {
     $highWater = $count;
     $highestCount{$chr} = $i;
   }
-#  my $end = ($i+1) * $winSize;
-#  $end = $lastEnd if ($end > $lastEnd);
-#  printf "%s\t%d\t%d\t%d\n", $chr, $i*$winSize, $end, $chrPtr->[$i];
+  my $end = ($i+1) * $winSize;
+  $end = $lastEnd if ($end > $lastEnd);
+  if ($count > 0) {
+     $sumCounts += $count;
+     push @countArray, $count;
+     ++$itemCount;
+     printf "%s\t%d\t%d\t%d\t%d\t%d\n", $chr, $i*$winSize, $end, $count, $itemCount, $sumCounts;
+  }
 }
+die "finding no items on largest chrom $largestChrom $sizeLargestChrom" if ($itemCount < 1);
+my @sortedCounts = sort @countArray;
+my $firstQuartile = int ($itemCount / 4);
+my $thirdQuartile = int ($itemCount * 3 / 4);
+my $median = int ($itemCount / 2);
+my $mean = $sumCounts / $itemCount;
+printf STDERR "# 1st quartile: %d\n", $sortedCounts[$firstQuartile];
+printf STDERR "# median %d\n", $sortedCounts[$median];
+printf STDERR "# mean %d = %d / %d\n", $mean, $sumCounts, $itemCount;
+printf STDERR "# 3rd quartile: %d\n", $sortedCounts[$thirdQuartile];
+
+exit 255;
+
 my $highWindow = $highestCount{$chr};
 if ($highWindow > -1) {
   my $start = $highWindow * $winSize;
   my $end = ($highWindow + 1) * $winSize;
   $end = $lastEnd if ($end > $lastEnd);
   printf "%s\t%d\t%d\t%d\n", $chr, $start, $end, $highWater;
 } else {
   printf "%s none\n", $chr;
 }
-}
+
+# }