src/lib/bigBed.c 1.16
1.16 2009/04/20 23:21:04 mikep
removed timing messages. added 'sorted' option to speed up creation of gigabyte sized beds.
Index: src/lib/bigBed.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/lib/bigBed.c,v
retrieving revision 1.15
retrieving revision 1.16
diff -b -B -U 4 -r1.15 -r1.16
--- src/lib/bigBed.c 20 Apr 2009 23:16:18 -0000 1.15
+++ src/lib/bigBed.c 20 Apr 2009 23:21:04 -0000 1.16
@@ -16,10 +16,8 @@
#include "udc.h"
#include "bbiFile.h"
#include "bigBed.h"
-#define TIMING 0
-
struct bbiFile *bigBedFileOpen(char *fileName)
/* Open up big bed file. */
{
return bbiFileOpen(fileName, bigBedSig, "big bed");
@@ -312,65 +310,47 @@
/* Parse it and do sanity check. */
as = asParseFile(asFileName);
if (as->next != NULL)
errAbort("Can only handle .as files containing a single object.");
- if (TIMING)
- uglyTime("Parsed file %s", asFileName);
}
/* Load in chromosome sizes. */
struct hash *chromHash = bbiChromSizesFromFile(chromSizes);
verbose(1, "Read %d chromosomes and sizes from %s\n", chromHash->elCount, chromSizes);
-if (TIMING)
- uglyTime("bbiChromSizesFromFile()");
/* Load and sort input file. */
bits64 fullSize;
struct ppBed *pb, *pbList = ppBedLoadAll(inName, chromHash, chromHash->lm, as,
definedFieldCount, &fullSize, &fieldCount);
if (definedFieldCount == 0)
definedFieldCount = fieldCount;
-if (TIMING)
- uglyTime("ppBedLoadAll()");
bits64 pbCount = slCount(pbList);
-if (TIMING)
- uglyTime("slCount()");
verbose(1, "Read %llu items from %s\n", pbCount, inName);
if (!sorted)
{
slSort(&pbList, ppBedCmp);
- if (TIMING)
- uglyTime("slSort()");
}
/* Make chromosomes. */
int chromCount, maxChromNameSize;
struct bbiChromInfo *chromInfoArray;
makeChromInfo(pbList, chromHash, &chromCount, &chromInfoArray, &maxChromNameSize);
verbose(1, "%d of %d chromosomes used (%4.2f%%)\n", chromCount, chromHash->elCount,
100.0*chromCount/chromHash->elCount);
-if (TIMING)
- uglyTime("makeChromInfo()");
/* Calculate first meaningful reduction size and make first reduction. */
bits16 summaryCount = 0;
bits16 version = 1;
int initialReduction = ppBedAverageSize(pbList)*10;
-if (TIMING)
- uglyTime("ppBedAverageSize()");
verbose(2, "averageBedSize=%d\n", initialReduction/10);
if (initialReduction > 10000)
initialReduction = 10000;
bits64 lastSummarySize = 0, summarySize;
struct bbiSummary *summaryList, *firstSummaryList;
for (;;)
{
summaryList = summaryOnDepth(pbList, chromInfoArray, initialReduction);
- if (TIMING)
- uglyTime("summaryOnDepth(initialReduction=%d)", initialReduction);
summarySize = bbiTotalSummarySize(summaryList);
- if (TIMING)
- uglyTime("bbiTotalSummarySize(summarySize=%d,fullSize=%ld,lastSummarySize=%ld)", summarySize, fullSize, lastSummarySize);
if (summarySize >= fullSize && summarySize != lastSummarySize)
{
initialReduction *= 2;
bbiSummaryFreeList(&summaryList);
@@ -383,10 +363,8 @@
reduceSummaries[0] = firstSummaryList = summaryList;
reductionAmounts[0] = initialReduction;
verbose(1, "Initial zoom reduction x%d data size %4.2f%%\n",
initialReduction, 100.0 * summarySize/fullSize);
-if (TIMING)
- uglyTime("first summary list");
/* Now calculate up to 10 levels of further summary. */
bits64 reduction = initialReduction;
for (i=0; i<ArraySize(reduceSummaries)-1; i++)
@@ -395,13 +373,9 @@
if (reduction > 1000000000)
break;
summaryList = bbiReduceSummaryList(reduceSummaries[summaryCount-1], chromInfoArray,
reduction);
- if (TIMING)
- uglyTime("bbiReduceSummaryList(reduction=%ld)", reduction);
summarySize = bbiTotalSummarySize(summaryList);
- if (TIMING)
- uglyTime("bbiTotalSummarySize(summarySize=%ld,summaryCount=%ld,lastSummarySize=%ld)", summarySize, summaryCount, lastSummarySize);
if (summarySize != lastSummarySize)
{
reduceSummaries[summaryCount] = summaryList;
reductionAmounts[summaryCount] = reduction;
@@ -411,10 +385,8 @@
if (summaryItemCount <= chromCount)
break;
}
-if (TIMING)
- uglyTime("reduce summaries");
/* Open output file and write out fixed size header, including some dummy values for
* offsets we'll fill in later. */
FILE *f = mustOpen(outName, "wb");
@@ -442,10 +414,8 @@
reductionDataOffsetPos[i] = ftell(f);
writeOne(f, reserved64); // Fill in with data offset later
writeOne(f, reserved64); // Fill in with index offset later
}
-if (TIMING)
- uglyTime("write summary headers");
/* Optionally write out .as file. */
if (asFileName != NULL)
{
@@ -466,10 +436,8 @@
bptFileBulkIndexToOpenFile(chromInfoArray, sizeof(chromInfoArray[0]), chromCount, chromBlockSize,
bbiChromInfoKey, maxChromNameSize, bbiChromInfoVal,
sizeof(chromInfoArray[0].id) + sizeof(chromInfoArray[0].size),
f);
-if (TIMING)
- uglyTime("write chromosome tree");
/* Write the main data. */
dataOffset = ftell(f);
writeOne(f, pbCount);
@@ -487,10 +455,8 @@
}
putc(0, f);
}
-if (TIMING)
- uglyTime("write the main data");
/* Write the main index. */
indexOffset = ftell(f);
struct ppBed **pbArray;
AllocArray(pbArray, pbCount);
@@ -499,20 +465,16 @@
cirTreeFileBulkIndexToOpenFile(pbArray, sizeof(pbArray[0]), pbCount,
blockSize, itemsPerSlot, NULL, ppBedFetchKey, ppBedFetchOffset,
indexOffset, f);
freez(&pbArray);
-if (TIMING)
- uglyTime("write the main index");
/* Write out summary sections. */
verbose(2, "bigBedFileCreate writing %d summaries\n", summaryCount);
for (i=0; i<summaryCount; ++i)
{
reductionDataOffsets[i] = ftell(f);
reductionIndexOffsets[i] = bbiWriteSummaryAndIndex(reduceSummaries[i], blockSize, itemsPerSlot, f);
}
-if (TIMING)
- uglyTime("write summary sections");
/* Go back and fill in offsets properly in header. */
fseek(f, dataOffsetPos, SEEK_SET);
writeOne(f, dataOffset);
@@ -533,10 +495,8 @@
writeOne(f, reductionDataOffsets[i]);
writeOne(f, reductionIndexOffsets[i]);
}
-if (TIMING)
- uglyTime("filled in header and done.");
carefulClose(&f);
freez(&chromInfoArray);
}