src/utils/bedToBigBed/bedToBigBed.c 6648b8b2419629b3d401c01f38086be43a2eeff8

6648b8b2419629b3d401c01f38086be43a2eeff8
kent
  Fri Mar 8 16:27:46 2013 -0800
Making bedToBigBed and bedGraphToBigWig share more code.  Cure is only half as bad as disease (trading sharing 64 lines for having a function with 16 parameters.)
diff --git src/utils/bedToBigBed/bedToBigBed.c src/utils/bedToBigBed/bedToBigBed.c
index e3b81f4..60e28f7 100644
--- src/utils/bedToBigBed/bedToBigBed.c
+++ src/utils/bedToBigBed/bedToBigBed.c
@@ -2,31 +2,31 @@
 #include "common.h"
 #include "linefile.h"
 #include "hash.h"
 #include "options.h"
 #include "dystring.h"
 #include "obscure.h"
 #include "asParse.h"
 #include "basicBed.h"
 #include "sig.h"
 #include "rangeTree.h"
 #include "zlibFace.h"
 #include "sqlNum.h"
 #include "bPlusTree.h"
 #include "bigBed.h"
 
-char *version = "2.4";
+char *version = "2.5";
 
 /* Things set directly or indirectly by command lne in main() routine. */
 int blockSize = 256;
 int itemsPerSlot = 512;
 char *extraIndex = NULL;
 int bedN = 0;   /* number of standard bed fields */
 int bedP = 0;   /* number of bed plus fields */
 char *asFile = NULL;
 char *asText = NULL;
 static boolean doCompress = FALSE;
 static boolean tabSep = FALSE;
 
 void usage()
 /* Explain usage and exit. */
 {
@@ -325,31 +325,31 @@
     {
     if (!startsWithWord(chrom, line))
         {
 	lineFileReuse(lf);
 	break;
 	}
     char *row[3];
     chopLine(line, row);
     unsigned start = sqlUnsigned(row[1]);
     unsigned end = sqlUnsigned(row[2]);
     rangeTreeAddToCoverageDepth(tree, start, end);
     }
 return tree;
 }
 
-static struct bbiSummary *writeReducedOnceReturnReducedTwice(struct bbiChromUsage *usageList, 
+static struct bbiSummary *bedWriteReducedOnceReturnReducedTwice(struct bbiChromUsage *usageList, 
 	int fieldCount, struct lineFile *lf, bits32 initialReduction, bits32 initialReductionCount, 
 	int zoomIncrement, int blockSize, int itemsPerSlot, boolean doCompress,
 	struct lm *lm, FILE *f, bits64 *retDataStart, bits64 *retIndexStart,
 	struct bbiSummaryElement *totalSum)
 /* Write out data reduced by factor of initialReduction.  Also calculate and keep in memory
  * next reduction level.  This is more work than some ways, but it keeps us from having to
  * keep the first reduction entirely in memory. */
 {
 struct bbiSummary *twiceReducedList = NULL;
 bits32 doubleReductionSize = initialReduction * zoomIncrement;
 struct bbiChromUsage *usage = usageList;
 struct bbiBoundsArray *boundsArray, *boundsPt, *boundsEnd;
 boundsPt = AllocArray(boundsArray, initialReductionCount);
 boundsEnd = boundsPt + initialReductionCount;
 
@@ -626,100 +626,37 @@
 verboseTime(1, "pass2 - checking and writing primary data (%lld records, %d fields)", 
 	(long long)bedCount, fieldCount);
 
 /* Write out primary data index. */
 bits64 indexOffset = ftell(f);
 cirTreeFileBulkIndexToOpenFile(boundsArray, sizeof(boundsArray[0]), blockCount,
     blockSize, 1, NULL, bbiBoundsArrayFetchKey, bbiBoundsArrayFetchOffset, 
     indexOffset, f);
 freez(&boundsArray);
 verboseTime(2, "index write");
 
 /* Declare arrays and vars that track the zoom levels we actually output. */
 bits32 zoomAmounts[bbiMaxZoomLevels];
 bits64 zoomDataOffsets[bbiMaxZoomLevels];
 bits64 zoomIndexOffsets[bbiMaxZoomLevels];
-int zoomLevels = 0;
 
-/* Write out first zoomed section while storing in memory next zoom level. */
-/* This is just a block to make some variables more local. */
-    {
-    assert(resTryCount > 0);
-    bits64 dataSize = indexOffset - dataOffset;
-    int maxReducedSize = dataSize/2;
-    int initialReduction = 0, initialReducedCount = 0;
-
-    /* Figure out initialReduction for zoom. */
-    int resTry;
-    for (resTry = 0; resTry < resTryCount; ++resTry)
-	{
-	bits64 reducedSize = resSizes[resTry] * sizeof(struct bbiSummaryOnDisk);
-	if (doCompress)
-	    reducedSize /= 2;	// Estimate!
-	if (reducedSize <= maxReducedSize)
-	    {
-	    initialReduction = resScales[resTry];
-	    initialReducedCount = resSizes[resTry];
-	    break;
-	    }
-	}
-    verbose(2, "initialReduction %d, initialReducedCount = %d\n", 
-    	initialReduction, initialReducedCount);
-    verbose(2, "dataSize %llu, reducedSize %llu, resScales[0] = %d\n", dataSize, (bits64)(initialReducedCount*sizeof(struct bbiSummaryOnDisk)), resScales[0]);
-
-    /* Force there to always be at least one zoom.  It may waste a little space on small
-     * files, but it makes files more uniform, and avoids special case code for calculating
-     * overall file summary. */
-    if (initialReduction == 0)
-        {
-	initialReduction = resScales[0];
-	initialReducedCount = resSizes[0];
-	}
-
-    /* This is just a block to make some variables more local. */
-        {
-	struct lm *lm = lmInit(0);
-	int zoomIncrement = bbiResIncrement;
-	lineFileRewind(lf);
-	struct bbiSummary *rezoomedList = writeReducedOnceReturnReducedTwice(usageList, 
-		fieldCount, lf, initialReduction, initialReducedCount, 
-		zoomIncrement, blockSize, itemsPerSlot, doCompress, lm, 
-		f, &zoomDataOffsets[0], &zoomIndexOffsets[0], &totalSum);
-	verboseTime(1, "pass3 - writeReducedOnceReturnReducedTwice");
-	zoomAmounts[0] = initialReduction;
-	zoomLevels = 1;
-
-	int zoomCount = initialReducedCount;
-	int reduction = initialReduction * zoomIncrement;
-	while (zoomLevels < bbiMaxZoomLevels)
-	    {
-	    int rezoomCount = slCount(rezoomedList);
-	    if (rezoomCount >= zoomCount)
-	        break;
-	    zoomCount = rezoomCount;
-	    zoomDataOffsets[zoomLevels] = ftell(f);
-	    zoomIndexOffsets[zoomLevels] = bbiWriteSummaryAndIndex(rezoomedList, 
-	    	blockSize, itemsPerSlot, doCompress, f);
-	    zoomAmounts[zoomLevels] = reduction;
-	    ++zoomLevels;
-	    reduction *= zoomIncrement;
-	    rezoomedList = bbiSummarySimpleReduce(rezoomedList, reduction, lm);
-	    }
-	lmCleanup(&lm);
-	verboseTime(1, "further reductions");
-	}
-    }
+/* Call monster zoom maker library function that bedGraphToBigWig also uses. */
+int zoomLevels = bbiWriteZoomLevels(lf, f, blockSize, itemsPerSlot,
+    bedWriteReducedOnceReturnReducedTwice, fieldCount,
+    doCompress, indexOffset - dataOffset, 
+    usageList, resTryCount, resScales, resSizes, 
+    zoomAmounts, zoomDataOffsets, zoomIndexOffsets, &totalSum);
 
 /* Write out extra indexes if need be. */
 if (eim)
     {
     int i;
     for (i=0; i<eim->indexCount; ++i)
         {
 	eim->fileOffsets[i] = ftell(f);
 	maxBedNameSize = eim->maxFieldSize[i];
 	assert(sizeof(struct bbNamedFileChunk) == sizeof(eim->chunkArrayArray[i][0]));
 	bptFileBulkIndexToOpenFile(eim->chunkArrayArray[i], sizeof(eim->chunkArrayArray[i][0]), 
 	    bedCount, blockSize, bbNamedFileChunkKey, maxBedNameSize, bbNamedFileChunkVal, 
 	    sizeof(bits64) + sizeof(bits64), f);
 	verboseTime(1, "Sorting and writing extra index %d", i);
 	}