src/utils/bedGraphToBigWig/bedGraphToBigWig.c 57d153c3caf42b22dd4cff6138a54c1b8545333f

57d153c3caf42b22dd4cff6138a54c1b8545333f
kent
  Fri Mar 8 15:25:37 2013 -0800
Fixing a bug where sometimes zoom summaries would not be written out by bedGraphToBigWig.  I'm seeing a lot of code that can be shared between bedGraphToBigWig and bedToBigBed.  Refactored to share some now, will do more shortly.
diff --git src/utils/bedGraphToBigWig/bedGraphToBigWig.c src/utils/bedGraphToBigWig/bedGraphToBigWig.c
index 03e509d..c306bff 100644
--- src/utils/bedGraphToBigWig/bedGraphToBigWig.c
+++ src/utils/bedGraphToBigWig/bedGraphToBigWig.c
@@ -358,120 +358,118 @@
 
 void bedGraphToBigWig(char *inName, char *chromSizes, char *outName)
 /* bedGraphToBigWig - Convert a bedGraph program to bigWig.. */
 {
 verboseTimeInit();
 struct lineFile *lf = lineFileOpen(inName, TRUE);
 struct hash *chromSizesHash = bbiChromSizesFromFile(chromSizes);
 verbose(2, "%d chroms in %s\n", chromSizesHash->elCount, chromSizes);
 int minDiff = 0, i;
 double aveSize = 0;
 bits64 bedCount = 0;
 bits32 uncompressBufSize = 0;
 struct bbiChromUsage *usageList = bbiChromUsageFromBedFile(lf, chromSizesHash, NULL, 
     &minDiff, &aveSize, &bedCount);
 verboseTime(2, "pass1");
-verbose(2, "%d chroms in %s\n", slCount(usageList), inName);
+verbose(2, "%d chroms in %s, minDiff=%d, aveSize=%g, bedCount=%lld\n", 
+    slCount(usageList), inName, minDiff, aveSize, bedCount);
 
 /* Write out dummy header, zoom offsets. */
 FILE *f = mustOpen(outName, "wb");
 bbiWriteDummyHeader(f);
 bbiWriteDummyZooms(f);
 
 /* Write out dummy total summary. */
 struct bbiSummaryElement totalSum;
 ZeroVar(&totalSum);
 bits64 totalSummaryOffset = ftell(f);
 bbiSummaryElementWrite(f, &totalSum);
 
 /* Write out chromosome/size database. */
 bits64 chromTreeOffset = ftell(f);
 bbiWriteChromInfo(usageList, blockSize, f);
 
 /* Set up to keep track of possible initial reduction levels. */
-int resTryCount = 10, resTry;
-int resIncrement = 4;
-int resScales[resTryCount], resSizes[resTryCount];
-int res = minDiff * 2;
-if (res > 0)
-    {
-    for (resTry = 0; resTry < resTryCount; ++resTry)
-	{
-	resSizes[resTry] = 0;
-	resScales[resTry] = res;
-	res *= resIncrement;
-	}
-    }
-else
-    resTryCount = 0;
+int resScales[bbiMaxZoomLevels], resSizes[bbiMaxZoomLevels];
+int resTryCount = bbiCalcResScalesAndSizes(aveSize, resScales, resSizes);
 
 /* Write out primary full resolution data in sections, collect stats to use for reductions. */
 bits64 dataOffset = ftell(f);
 bits64 sectionCount = bbiCountSectionsNeeded(usageList, itemsPerSlot);
 writeOne(f, sectionCount);
 struct bbiBoundsArray *boundsArray;
 AllocArray(boundsArray, sectionCount);
 lineFileRewind(lf);
 bits32 maxSectionSize = 0;
 writeSections(usageList, lf, itemsPerSlot, boundsArray, sectionCount, f,
 	resTryCount, resScales, resSizes, doCompress, &maxSectionSize);
 verboseTime(2, "pass2");
 
 /* Write out primary data index. */
 bits64 indexOffset = ftell(f);
 cirTreeFileBulkIndexToOpenFile(boundsArray, sizeof(boundsArray[0]), sectionCount,
     blockSize, 1, NULL, bbiBoundsArrayFetchKey, bbiBoundsArrayFetchOffset, 
     indexOffset, f);
 verboseTime(2, "index write");
 
 /* Declare arrays and vars that track the zoom levels we actually output. */
 bits32 zoomAmounts[bbiMaxZoomLevels];
 bits64 zoomDataOffsets[bbiMaxZoomLevels];
 bits64 zoomIndexOffsets[bbiMaxZoomLevels];
 int zoomLevels = 0;
 
 /* Write out first zoomed section while storing in memory next zoom level. */
-if (minDiff > 0)
+/* This is just a block to make some variables more local. */
     {
+    assert(resTryCount > 0);
     bits64 dataSize = indexOffset - dataOffset;
     int maxReducedSize = dataSize/2;
     int initialReduction = 0, initialReducedCount = 0;
 
     /* Figure out initialReduction for zoom. */
+    int resTry;
     for (resTry = 0; resTry < resTryCount; ++resTry)
 	{
 	bits64 reducedSize = resSizes[resTry] * sizeof(struct bbiSummaryOnDisk);
 	if (doCompress)
 	    reducedSize /= 2;	// Estimate!
 	if (reducedSize <= maxReducedSize)
 	    {
 	    initialReduction = resScales[resTry];
 	    initialReducedCount = resSizes[resTry];
 	    break;
 	    }
 	}
     verbose(2, "initialReduction %d, initialReducedCount = %d\n", 
     	initialReduction, initialReducedCount);
 
-    if (initialReduction > 0)
+    /* Force there to always be at least one zoom.  It may waste a little space on small
+     * files, but it makes files more uniform, and avoids special case code for calculating
+     * overall file summary. */
+    if (initialReduction == 0)
+        {
+	initialReduction = resScales[0];
+	initialReducedCount = resSizes[0];
+	}
+    /* This is just a block to make some variables more local. */
         {
 	struct lm *lm = lmInit(0);
-	int zoomIncrement = 4;
+	int zoomIncrement = bbiResIncrement;
 	lineFileRewind(lf);
 	struct bbiSummary *rezoomedList = writeReducedOnceReturnReducedTwice(usageList, 
 		lf, initialReduction, initialReducedCount,
-		resIncrement, blockSize, itemsPerSlot, doCompress, lm, 
+		zoomIncrement, blockSize, itemsPerSlot, doCompress, lm, 
 		f, &zoomDataOffsets[0], &zoomIndexOffsets[0], &totalSum);
 	verboseTime(2, "writeReducedOnceReturnReducedTwice");
 	zoomAmounts[0] = initialReduction;
 	zoomLevels = 1;
 
 	int zoomCount = initialReducedCount;
 	int reduction = initialReduction * zoomIncrement;
 	while (zoomLevels < bbiMaxZoomLevels)
 	    {
 	    int rezoomCount = slCount(rezoomedList);
 	    if (rezoomCount >= zoomCount)
 	        break;
 	    zoomCount = rezoomCount;
 	    zoomDataOffsets[zoomLevels] = ftell(f);
 	    zoomIndexOffsets[zoomLevels] = bbiWriteSummaryAndIndex(rezoomedList,