57d153c3caf42b22dd4cff6138a54c1b8545333f kent Fri Mar 8 15:25:37 2013 -0800 Fixing a bug where sometimes zoom summaries would not be written out by bedGraphToBigWig. I'm seeing a lot of code that can be shared between bedGraphToBigWig and bedToBigBed. Refactored to share some now, will do more shortly. diff --git src/utils/bedGraphToBigWig/bedGraphToBigWig.c src/utils/bedGraphToBigWig/bedGraphToBigWig.c index 03e509d..c306bff 100644 --- src/utils/bedGraphToBigWig/bedGraphToBigWig.c +++ src/utils/bedGraphToBigWig/bedGraphToBigWig.c @@ -358,120 +358,118 @@ void bedGraphToBigWig(char *inName, char *chromSizes, char *outName) /* bedGraphToBigWig - Convert a bedGraph program to bigWig.. */ { verboseTimeInit(); struct lineFile *lf = lineFileOpen(inName, TRUE); struct hash *chromSizesHash = bbiChromSizesFromFile(chromSizes); verbose(2, "%d chroms in %s\n", chromSizesHash->elCount, chromSizes); int minDiff = 0, i; double aveSize = 0; bits64 bedCount = 0; bits32 uncompressBufSize = 0; struct bbiChromUsage *usageList = bbiChromUsageFromBedFile(lf, chromSizesHash, NULL, &minDiff, &aveSize, &bedCount); verboseTime(2, "pass1"); -verbose(2, "%d chroms in %s\n", slCount(usageList), inName); +verbose(2, "%d chroms in %s, minDiff=%d, aveSize=%g, bedCount=%lld\n", + slCount(usageList), inName, minDiff, aveSize, bedCount); /* Write out dummy header, zoom offsets. */ FILE *f = mustOpen(outName, "wb"); bbiWriteDummyHeader(f); bbiWriteDummyZooms(f); /* Write out dummy total summary. */ struct bbiSummaryElement totalSum; ZeroVar(&totalSum); bits64 totalSummaryOffset = ftell(f); bbiSummaryElementWrite(f, &totalSum); /* Write out chromosome/size database. */ bits64 chromTreeOffset = ftell(f); bbiWriteChromInfo(usageList, blockSize, f); /* Set up to keep track of possible initial reduction levels. */ -int resTryCount = 10, resTry; -int resIncrement = 4; -int resScales[resTryCount], resSizes[resTryCount]; -int res = minDiff * 2; -if (res > 0) - { - for (resTry = 0; resTry < resTryCount; ++resTry) - { - resSizes[resTry] = 0; - resScales[resTry] = res; - res *= resIncrement; - } - } -else - resTryCount = 0; +int resScales[bbiMaxZoomLevels], resSizes[bbiMaxZoomLevels]; +int resTryCount = bbiCalcResScalesAndSizes(aveSize, resScales, resSizes); /* Write out primary full resolution data in sections, collect stats to use for reductions. */ bits64 dataOffset = ftell(f); bits64 sectionCount = bbiCountSectionsNeeded(usageList, itemsPerSlot); writeOne(f, sectionCount); struct bbiBoundsArray *boundsArray; AllocArray(boundsArray, sectionCount); lineFileRewind(lf); bits32 maxSectionSize = 0; writeSections(usageList, lf, itemsPerSlot, boundsArray, sectionCount, f, resTryCount, resScales, resSizes, doCompress, &maxSectionSize); verboseTime(2, "pass2"); /* Write out primary data index. */ bits64 indexOffset = ftell(f); cirTreeFileBulkIndexToOpenFile(boundsArray, sizeof(boundsArray[0]), sectionCount, blockSize, 1, NULL, bbiBoundsArrayFetchKey, bbiBoundsArrayFetchOffset, indexOffset, f); verboseTime(2, "index write"); /* Declare arrays and vars that track the zoom levels we actually output. */ bits32 zoomAmounts[bbiMaxZoomLevels]; bits64 zoomDataOffsets[bbiMaxZoomLevels]; bits64 zoomIndexOffsets[bbiMaxZoomLevels]; int zoomLevels = 0; /* Write out first zoomed section while storing in memory next zoom level. */ -if (minDiff > 0) +/* This is just a block to make some variables more local. */ { + assert(resTryCount > 0); bits64 dataSize = indexOffset - dataOffset; int maxReducedSize = dataSize/2; int initialReduction = 0, initialReducedCount = 0; /* Figure out initialReduction for zoom. */ + int resTry; for (resTry = 0; resTry < resTryCount; ++resTry) { bits64 reducedSize = resSizes[resTry] * sizeof(struct bbiSummaryOnDisk); if (doCompress) reducedSize /= 2; // Estimate! if (reducedSize <= maxReducedSize) { initialReduction = resScales[resTry]; initialReducedCount = resSizes[resTry]; break; } } verbose(2, "initialReduction %d, initialReducedCount = %d\n", initialReduction, initialReducedCount); - if (initialReduction > 0) + /* Force there to always be at least one zoom. It may waste a little space on small + * files, but it makes files more uniform, and avoids special case code for calculating + * overall file summary. */ + if (initialReduction == 0) + { + initialReduction = resScales[0]; + initialReducedCount = resSizes[0]; + } + /* This is just a block to make some variables more local. */ { struct lm *lm = lmInit(0); - int zoomIncrement = 4; + int zoomIncrement = bbiResIncrement; lineFileRewind(lf); struct bbiSummary *rezoomedList = writeReducedOnceReturnReducedTwice(usageList, lf, initialReduction, initialReducedCount, - resIncrement, blockSize, itemsPerSlot, doCompress, lm, + zoomIncrement, blockSize, itemsPerSlot, doCompress, lm, f, &zoomDataOffsets[0], &zoomIndexOffsets[0], &totalSum); verboseTime(2, "writeReducedOnceReturnReducedTwice"); zoomAmounts[0] = initialReduction; zoomLevels = 1; int zoomCount = initialReducedCount; int reduction = initialReduction * zoomIncrement; while (zoomLevels < bbiMaxZoomLevels) { int rezoomCount = slCount(rezoomedList); if (rezoomCount >= zoomCount) break; zoomCount = rezoomCount; zoomDataOffsets[zoomLevels] = ftell(f); zoomIndexOffsets[zoomLevels] = bbiWriteSummaryAndIndex(rezoomedList,