6648b8b2419629b3d401c01f38086be43a2eeff8 kent Fri Mar 8 16:27:46 2013 -0800 Making bedToBigBed and bedGraphToBigWig share more code. Cure is only half as bad as disease (trading sharing 64 lines for having a function with 16 parameters.) diff --git src/utils/bedToBigBed/bedToBigBed.c src/utils/bedToBigBed/bedToBigBed.c index e3b81f4..60e28f7 100644 --- src/utils/bedToBigBed/bedToBigBed.c +++ src/utils/bedToBigBed/bedToBigBed.c @@ -2,31 +2,31 @@ #include "common.h" #include "linefile.h" #include "hash.h" #include "options.h" #include "dystring.h" #include "obscure.h" #include "asParse.h" #include "basicBed.h" #include "sig.h" #include "rangeTree.h" #include "zlibFace.h" #include "sqlNum.h" #include "bPlusTree.h" #include "bigBed.h" -char *version = "2.4"; +char *version = "2.5"; /* Things set directly or indirectly by command lne in main() routine. */ int blockSize = 256; int itemsPerSlot = 512; char *extraIndex = NULL; int bedN = 0; /* number of standard bed fields */ int bedP = 0; /* number of bed plus fields */ char *asFile = NULL; char *asText = NULL; static boolean doCompress = FALSE; static boolean tabSep = FALSE; void usage() /* Explain usage and exit. */ { @@ -325,31 +325,31 @@ { if (!startsWithWord(chrom, line)) { lineFileReuse(lf); break; } char *row[3]; chopLine(line, row); unsigned start = sqlUnsigned(row[1]); unsigned end = sqlUnsigned(row[2]); rangeTreeAddToCoverageDepth(tree, start, end); } return tree; } -static struct bbiSummary *writeReducedOnceReturnReducedTwice(struct bbiChromUsage *usageList, +static struct bbiSummary *bedWriteReducedOnceReturnReducedTwice(struct bbiChromUsage *usageList, int fieldCount, struct lineFile *lf, bits32 initialReduction, bits32 initialReductionCount, int zoomIncrement, int blockSize, int itemsPerSlot, boolean doCompress, struct lm *lm, FILE *f, bits64 *retDataStart, bits64 *retIndexStart, struct bbiSummaryElement *totalSum) /* Write out data reduced by factor of initialReduction. Also calculate and keep in memory * next reduction level. This is more work than some ways, but it keeps us from having to * keep the first reduction entirely in memory. */ { struct bbiSummary *twiceReducedList = NULL; bits32 doubleReductionSize = initialReduction * zoomIncrement; struct bbiChromUsage *usage = usageList; struct bbiBoundsArray *boundsArray, *boundsPt, *boundsEnd; boundsPt = AllocArray(boundsArray, initialReductionCount); boundsEnd = boundsPt + initialReductionCount; @@ -626,100 +626,37 @@ verboseTime(1, "pass2 - checking and writing primary data (%lld records, %d fields)", (long long)bedCount, fieldCount); /* Write out primary data index. */ bits64 indexOffset = ftell(f); cirTreeFileBulkIndexToOpenFile(boundsArray, sizeof(boundsArray[0]), blockCount, blockSize, 1, NULL, bbiBoundsArrayFetchKey, bbiBoundsArrayFetchOffset, indexOffset, f); freez(&boundsArray); verboseTime(2, "index write"); /* Declare arrays and vars that track the zoom levels we actually output. */ bits32 zoomAmounts[bbiMaxZoomLevels]; bits64 zoomDataOffsets[bbiMaxZoomLevels]; bits64 zoomIndexOffsets[bbiMaxZoomLevels]; -int zoomLevels = 0; -/* Write out first zoomed section while storing in memory next zoom level. */ -/* This is just a block to make some variables more local. */ - { - assert(resTryCount > 0); - bits64 dataSize = indexOffset - dataOffset; - int maxReducedSize = dataSize/2; - int initialReduction = 0, initialReducedCount = 0; - - /* Figure out initialReduction for zoom. */ - int resTry; - for (resTry = 0; resTry < resTryCount; ++resTry) - { - bits64 reducedSize = resSizes[resTry] * sizeof(struct bbiSummaryOnDisk); - if (doCompress) - reducedSize /= 2; // Estimate! - if (reducedSize <= maxReducedSize) - { - initialReduction = resScales[resTry]; - initialReducedCount = resSizes[resTry]; - break; - } - } - verbose(2, "initialReduction %d, initialReducedCount = %d\n", - initialReduction, initialReducedCount); - verbose(2, "dataSize %llu, reducedSize %llu, resScales[0] = %d\n", dataSize, (bits64)(initialReducedCount*sizeof(struct bbiSummaryOnDisk)), resScales[0]); - - /* Force there to always be at least one zoom. It may waste a little space on small - * files, but it makes files more uniform, and avoids special case code for calculating - * overall file summary. */ - if (initialReduction == 0) - { - initialReduction = resScales[0]; - initialReducedCount = resSizes[0]; - } - - /* This is just a block to make some variables more local. */ - { - struct lm *lm = lmInit(0); - int zoomIncrement = bbiResIncrement; - lineFileRewind(lf); - struct bbiSummary *rezoomedList = writeReducedOnceReturnReducedTwice(usageList, - fieldCount, lf, initialReduction, initialReducedCount, - zoomIncrement, blockSize, itemsPerSlot, doCompress, lm, - f, &zoomDataOffsets[0], &zoomIndexOffsets[0], &totalSum); - verboseTime(1, "pass3 - writeReducedOnceReturnReducedTwice"); - zoomAmounts[0] = initialReduction; - zoomLevels = 1; - - int zoomCount = initialReducedCount; - int reduction = initialReduction * zoomIncrement; - while (zoomLevels < bbiMaxZoomLevels) - { - int rezoomCount = slCount(rezoomedList); - if (rezoomCount >= zoomCount) - break; - zoomCount = rezoomCount; - zoomDataOffsets[zoomLevels] = ftell(f); - zoomIndexOffsets[zoomLevels] = bbiWriteSummaryAndIndex(rezoomedList, - blockSize, itemsPerSlot, doCompress, f); - zoomAmounts[zoomLevels] = reduction; - ++zoomLevels; - reduction *= zoomIncrement; - rezoomedList = bbiSummarySimpleReduce(rezoomedList, reduction, lm); - } - lmCleanup(&lm); - verboseTime(1, "further reductions"); - } - } +/* Call monster zoom maker library function that bedGraphToBigWig also uses. */ +int zoomLevels = bbiWriteZoomLevels(lf, f, blockSize, itemsPerSlot, + bedWriteReducedOnceReturnReducedTwice, fieldCount, + doCompress, indexOffset - dataOffset, + usageList, resTryCount, resScales, resSizes, + zoomAmounts, zoomDataOffsets, zoomIndexOffsets, &totalSum); /* Write out extra indexes if need be. */ if (eim) { int i; for (i=0; i<eim->indexCount; ++i) { eim->fileOffsets[i] = ftell(f); maxBedNameSize = eim->maxFieldSize[i]; assert(sizeof(struct bbNamedFileChunk) == sizeof(eim->chunkArrayArray[i][0])); bptFileBulkIndexToOpenFile(eim->chunkArrayArray[i], sizeof(eim->chunkArrayArray[i][0]), bedCount, blockSize, bbNamedFileChunkKey, maxBedNameSize, bbNamedFileChunkVal, sizeof(bits64) + sizeof(bits64), f); verboseTime(1, "Sorting and writing extra index %d", i); }