6648b8b2419629b3d401c01f38086be43a2eeff8
kent
  Fri Mar 8 16:27:46 2013 -0800
Making bedToBigBed and bedGraphToBigWig share more code.  Cure is only half as bad as disease (trading sharing 64 lines for having a function with 16 parameters.)
diff --git src/lib/bbiWrite.c src/lib/bbiWrite.c
index 72c817c..85724bc 100644
--- src/lib/bbiWrite.c
+++ src/lib/bbiWrite.c
@@ -1,15 +1,17 @@
+/* bbiWrite.c - Routines to help write bigWig and bigBed files. See also bbiFile.h */
+
 #include "common.h"
 #include "hash.h"
 #include "linefile.h"
 #include "sqlNum.h"
 #include "zlibFace.h"
 #include "cirTree.h"
 #include "bPlusTree.h"
 #include "bbiFile.h"
 #include "obscure.h"
 
 void bbiWriteDummyHeader(FILE *f)
 /* Write out all-zero header, just to reserve space for it. */
 {
 repeatCharOut(f, 0, 64);
 }
@@ -252,30 +254,115 @@
     resSizes[resTry] = 0;
     resScales[resTry] = res;
     // if aveSize is large, then the initial value of res is large, and we
     // and we cannot do all 10 levels without overflowing res* integers and other related variables.
     if (res > 1000000000) 
 	{
 	resTryCount = resTry + 1;  
 	verbose(2, "resTryCount reduced from 10 to %d\n", resTryCount);
 	break;
 	}
     res *= resIncrement;
     }
 return resTryCount;
 }
 
+int bbiWriteZoomLevels(
+    struct lineFile *lf,    /* Input file. */
+    FILE *f,		    /* Output. */
+    int blockSize,	    /* Size of index block */
+    int itemsPerSlot,	    /* Number of data points bundled at lowest level. */
+    bbiWriteReducedOnceReturnReducedTwice writeReducedOnceReturnReducedTwice,   /* callback */
+    int fieldCount,	    /* Number of fields in bed (4 for bedGraph) */
+    boolean doCompress,	    /* Do we compress.  Answer really should be yes! */
+    bits64 dataSize,	    /* Size of data on disk (after compression if any). */
+    struct bbiChromUsage *usageList, /* Result from bbiChromUsageFromBedFile */
+    int resTryCount, int resScales[], int resSizes[],   /* How much to zoom at each level */
+    bits32 zoomAmounts[bbiMaxZoomLevels],      /* Fills in amount zoomed at each level. */
+    bits64 zoomDataOffsets[bbiMaxZoomLevels],  /* Fills in where data starts for each zoom level. */
+    bits64 zoomIndexOffsets[bbiMaxZoomLevels], /* Fills in where index starts for each level. */
+    struct bbiSummaryElement *totalSum)
+/* Write out all the zoom levels and return the number of levels written.  Writes 
+ * actual zoom amount and the offsets of the zoomed data and index in the last three
+ * parameters.  Sorry for all the parameters - it was this or duplicate a big chunk of
+ * code between bedToBigBed and bedGraphToBigWig. */
+{
+/* Write out first zoomed section while storing in memory next zoom level. */
+assert(resTryCount > 0);
+int maxReducedSize = dataSize/2;
+int initialReduction = 0, initialReducedCount = 0;
+
+/* Figure out initialReduction for zoom - one that is maxReducedSize or less. */
+int resTry;
+for (resTry = 0; resTry < resTryCount; ++resTry)
+    {
+    bits64 reducedSize = resSizes[resTry] * sizeof(struct bbiSummaryOnDisk);
+    if (doCompress)
+	reducedSize /= 2;	// Estimate!
+    if (reducedSize <= maxReducedSize)
+	{
+	initialReduction = resScales[resTry];
+	initialReducedCount = resSizes[resTry];
+	break;
+	}
+    }
+verbose(2, "initialReduction %d, initialReducedCount = %d\n", 
+    initialReduction, initialReducedCount);
+
+/* Force there to always be at least one zoom.  It may waste a little space on small
+ * files, but it makes files more uniform, and avoids special case code for calculating
+ * overall file summary. */
+if (initialReduction == 0)
+    {
+    initialReduction = resScales[0];
+    initialReducedCount = resSizes[0];
+    }
+
+/* Call routine to make the initial zoom level and also a bit of work towards further levels. */
+struct lm *lm = lmInit(0);
+int zoomIncrement = bbiResIncrement;
+lineFileRewind(lf);
+struct bbiSummary *rezoomedList = writeReducedOnceReturnReducedTwice(usageList, fieldCount,
+	lf, initialReduction, initialReducedCount,
+	zoomIncrement, blockSize, itemsPerSlot, doCompress, lm, 
+	f, &zoomDataOffsets[0], &zoomIndexOffsets[0], totalSum);
+verboseTime(2, "writeReducedOnceReturnReducedTwice");
+zoomAmounts[0] = initialReduction;
+int zoomLevels = 1;
+
+/* Loop around to do any additional levels of zoom. */
+int zoomCount = initialReducedCount;
+int reduction = initialReduction * zoomIncrement;
+while (zoomLevels < bbiMaxZoomLevels)
+    {
+    int rezoomCount = slCount(rezoomedList);
+    if (rezoomCount >= zoomCount)
+	break;
+    zoomCount = rezoomCount;
+    zoomDataOffsets[zoomLevels] = ftell(f);
+    zoomIndexOffsets[zoomLevels] = bbiWriteSummaryAndIndex(rezoomedList, 
+	blockSize, itemsPerSlot, doCompress, f);
+    zoomAmounts[zoomLevels] = reduction;
+    ++zoomLevels;
+    reduction *= zoomIncrement;
+    rezoomedList = bbiSummarySimpleReduce(rezoomedList, reduction, lm);
+    }
+lmCleanup(&lm);
+verboseTime(2, "further reductions");
+return zoomLevels;
+}
+
 
 int bbiCountSectionsNeeded(struct bbiChromUsage *usageList, int itemsPerSlot)
 /* Count up number of sections needed for data. */
 {
 struct bbiChromUsage *usage;
 int count = 0;
 for (usage = usageList; usage != NULL; usage = usage->next)
     {
     int countOne = (usage->itemCount + itemsPerSlot - 1)/itemsPerSlot;
     count += countOne;
     verbose(2, "%s %d, %d blocks of %d\n", usage->name, usage->itemCount, countOne, itemsPerSlot);
     }
 return count;
 }