src/lib/bwgCreate.c 1.21
1.21 2009/11/16 11:01:22 kent
Improving calculations about initial zoom level in wigToBigWig, especially for compressed files.
Index: src/lib/bwgCreate.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/lib/bwgCreate.c,v
retrieving revision 1.20
retrieving revision 1.21
diff -b -B -U 4 -r1.20 -r1.21
--- src/lib/bwgCreate.c 12 Nov 2009 23:15:52 -0000 1.20
+++ src/lib/bwgCreate.c 16 Nov 2009 11:01:22 -0000 1.21
@@ -924,24 +924,37 @@
struct bbiChromInfo *chromInfoArray;
int chromCount, maxChromNameSize;
bwgMakeChromInfo(sectionList, chromSizeHash, &chromCount, &chromInfoArray, &maxChromNameSize);
-/* Figure out initial summary level - starting with a summary 10 times the amount
- * of the smallest item. See if summarized data is smaller than input data, if
+/* Figure out initial summary level - starting with a summary 20 times the amount
+ * of the smallest item. See if summarized data is smaller than half input data, if
* not bump up reduction by a factor of 2 until it is, or until further summarying
* yeilds no size reduction. */
int minRes = bwgAverageResolution(sectionList);
-int initialReduction = minRes*10;
+int initialReduction = minRes*20;
bits64 fullSize = bwgTotalSectionSize(sectionList);
+bits64 maxReducedSize = fullSize/2;
struct bbiSummary *firstSummaryList = NULL, *summaryList = NULL;
bits64 lastSummarySize = 0, summarySize;
for (;;)
{
summaryList = bwgReduceSectionList(sectionList, chromInfoArray, initialReduction);
bits64 summarySize = bbiTotalSummarySize(summaryList);
- if (summarySize >= fullSize && summarySize != lastSummarySize)
+ if (doCompress)
{
- initialReduction *= 2;
+ summarySize *= 4; // Compensate for summary not compressing as well as primary data
+ initialReduction *= 4;
+ }
+ if (summarySize >= maxReducedSize && summarySize != lastSummarySize)
+ {
+ /* Need to do more reduction. First scale reduction by amount that it missed
+ * being small enough last time, with an extra 10% for good measure. Then
+ * just to keep from spinning through loop two many times, make sure this is
+ * at least 2x the previous reduction. */
+ int nextReduction = 1.1 * initialReduction * summarySize / maxReducedSize;
+ if (nextReduction < initialReduction*2)
+ nextReduction = initialReduction*2;
+ initialReduction = nextReduction;
bbiSummaryFreeList(&summaryList);
lastSummarySize = summarySize;
}
else
@@ -954,9 +967,9 @@
/* Now calculate up to 10 levels of further summary. */
bits64 reduction = initialReduction;
for (i=0; i<ArraySize(reduceSummaries)-1; i++)
{
- reduction *= 10;
+ reduction *= 4;
if (reduction > 1000000000)
break;
summaryList = bbiReduceSummaryList(reduceSummaries[summaryCount-1], chromInfoArray,
reduction);