src/lib/bwgCreate.c 1.21

1.21 2009/11/16 11:01:22 kent
Improving calculations about initial zoom level in wigToBigWig, especially for compressed files.
Index: src/lib/bwgCreate.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/lib/bwgCreate.c,v
retrieving revision 1.20
retrieving revision 1.21
diff -b -B -U 4 -r1.20 -r1.21
--- src/lib/bwgCreate.c	12 Nov 2009 23:15:52 -0000	1.20
+++ src/lib/bwgCreate.c	16 Nov 2009 11:01:22 -0000	1.21
@@ -924,24 +924,37 @@
 struct bbiChromInfo *chromInfoArray;
 int chromCount, maxChromNameSize;
 bwgMakeChromInfo(sectionList, chromSizeHash, &chromCount, &chromInfoArray, &maxChromNameSize);
 
-/* Figure out initial summary level - starting with a summary 10 times the amount
- * of the smallest item.  See if summarized data is smaller than input data, if
+/* Figure out initial summary level - starting with a summary 20 times the amount
+ * of the smallest item.  See if summarized data is smaller than half input data, if
  * not bump up reduction by a factor of 2 until it is, or until further summarying
  * yeilds no size reduction. */
 int  minRes = bwgAverageResolution(sectionList);
-int initialReduction = minRes*10;
+int initialReduction = minRes*20;
 bits64 fullSize = bwgTotalSectionSize(sectionList);
+bits64 maxReducedSize = fullSize/2;
 struct bbiSummary *firstSummaryList = NULL, *summaryList = NULL;
 bits64 lastSummarySize = 0, summarySize;
 for (;;)
     {
     summaryList = bwgReduceSectionList(sectionList, chromInfoArray, initialReduction);
     bits64 summarySize = bbiTotalSummarySize(summaryList);
-    if (summarySize >= fullSize && summarySize != lastSummarySize)
+    if (doCompress)
         {
-	initialReduction *= 2;
+        summarySize *= 4;	// Compensate for summary not compressing as well as primary data
+	initialReduction *= 4;
+	}
+    if (summarySize >= maxReducedSize && summarySize != lastSummarySize)
+        {
+	/* Need to do more reduction.  First scale reduction by amount that it missed
+	 * being small enough last time, with an extra 10% for good measure.  Then
+	 * just to keep from spinning through loop two many times, make sure this is
+	 * at least 2x the previous reduction. */
+	int nextReduction = 1.1 * initialReduction * summarySize / maxReducedSize;
+	if (nextReduction < initialReduction*2)
+	    nextReduction = initialReduction*2;
+	initialReduction = nextReduction;
 	bbiSummaryFreeList(&summaryList);
 	lastSummarySize = summarySize;
 	}
     else
@@ -954,9 +967,9 @@
 /* Now calculate up to 10 levels of further summary. */
 bits64 reduction = initialReduction;
 for (i=0; i<ArraySize(reduceSummaries)-1; i++)
     {
-    reduction *= 10;
+    reduction *= 4;
     if (reduction > 1000000000)
         break;
     summaryList = bbiReduceSummaryList(reduceSummaries[summaryCount-1], chromInfoArray, 
     	reduction);