src/utils/bedGraphToBigWig/bedGraphToBigWig.c 1.18
1.18 2009/11/07 19:29:35 kent
Adding totalSummary to big files. Should be helpful in quickly calculating ranges to be displayed by default.
Index: src/utils/bedGraphToBigWig/bedGraphToBigWig.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/utils/bedGraphToBigWig/bedGraphToBigWig.c,v
retrieving revision 1.17
retrieving revision 1.18
diff -b -B -U 4 -r1.17 -r1.18
--- src/utils/bedGraphToBigWig/bedGraphToBigWig.c 6 Nov 2009 19:45:20 -0000 1.17
+++ src/utils/bedGraphToBigWig/bedGraphToBigWig.c 7 Nov 2009 19:29:35 -0000 1.18
@@ -177,9 +177,10 @@
static struct bbiSummary *writeReducedOnceReturnReducedTwice(struct bbiChromUsage *usageList,
struct lineFile *lf, bits32 initialReduction, bits32 initialReductionCount,
int zoomIncrement, int blockSize, int itemsPerSlot,
- struct lm *lm, FILE *f, bits64 *retDataStart, bits64 *retIndexStart)
+ struct lm *lm, FILE *f, bits64 *retDataStart, bits64 *retIndexStart,
+ struct bbiSummaryElement *totalSum)
/* Write out data reduced by factor of initialReduction. Also calculate and keep in memory
* next reduction level. This is more work than some ways, but it keeps us from having to
* keep the first reduction entirely in memory. */
{
@@ -192,8 +193,9 @@
boundsEnd = boundsPt + initialReductionCount;
*retDataStart = ftell(f);
writeOne(f, initialReductionCount);
+boolean firstRow = TRUE;
for (;;)
{
/* Get next line of input if any. */
char *row[5];
@@ -212,8 +214,27 @@
bits32 start = sqlUnsigned(row[1]);
bits32 end = sqlUnsigned(row[2]);
float val = sqlFloat(row[3]);
+ /* Update total summary stuff. */
+ bits32 size = end-start;
+ if (firstRow)
+ {
+ totalSum->validCount = size;
+ totalSum->minVal = totalSum->maxVal = val;
+ totalSum->sumData = val*size;
+ totalSum->sumSquares = val*size*size;
+ firstRow = FALSE;
+ }
+ else
+ {
+ totalSum->validCount += size;
+ if (val < totalSum->minVal) totalSum->minVal = val;
+ if (val > totalSum->maxVal) totalSum->maxVal = val;
+ totalSum->sumData += val*size;
+ totalSum->sumSquares += val*size*size;
+ }
+
/* If new chromosome output existing block. */
if (differentString(chrom, usage->name))
{
usage = usage->next;
@@ -256,8 +277,9 @@
sum->sumData += val * overlap;
sum->sumSquares += val*val * overlap;
bbiOutputOneSummaryFurtherReduce(sum, &twiceReducedList, doubleReductionSize,
&boundsPt, boundsEnd, usage->size, lm, f);
+ size -= overlap;
/* Move summary to next part. */
sum->start = start = sum->end;
sum->end = start + initialReduction;
@@ -267,9 +289,8 @@
sum->validCount = 0;
}
/* Add to summary. */
- bits32 size = end - start;
sum->validCount += size;
if (sum->minVal > val) sum->minVal = val;
if (sum->maxVal < val) sum->maxVal = val;
sum->sumData += val * size;
@@ -306,8 +327,14 @@
FILE *f = mustOpen(outName, "wb");
bbiWriteDummyHeader(f);
bbiWriteDummyZooms(f);
+/* Write out dummy total summary. */
+struct bbiSummaryElement totalSum;
+ZeroVar(&totalSum);
+bits64 totalSummaryOffset = ftell(f);
+bbiSummaryElementWrite(f, &totalSum);
+
/* Write out chromosome/size database. */
bits64 chromTreeOffset = ftell(f);
bbiWriteChromInfo(usageList, blockSize, f);
@@ -380,9 +407,9 @@
lineFileRewind(lf);
struct bbiSummary *rezoomedList = writeReducedOnceReturnReducedTwice(usageList,
lf, initialReduction, initialReducedCount,
resIncrement, blockSize, itemsPerSlot, lm,
- f, &zoomDataOffsets[0], &zoomIndexOffsets[0]);
+ f, &zoomDataOffsets[0], &zoomIndexOffsets[0], &totalSum);
verboseTime(2, "writeReducedOnceReturnReducedTwice");
zoomAmounts[0] = initialReduction;
zoomLevels = 1;
@@ -412,19 +439,24 @@
rewind(f);
bits32 sig = bigWigSig;
bits16 version = bbiCurrentVersion;
bits16 summaryCount = zoomLevels;
+bits32 reserved16 = 0;
bits32 reserved32 = 0;
-bits32 reserved64 = 0;
+bits64 reserved64 = 0;
/* Write fixed header */
writeOne(f, sig);
writeOne(f, version);
writeOne(f, summaryCount);
writeOne(f, chromTreeOffset);
writeOne(f, dataOffset);
writeOne(f, indexOffset);
-for (i=0; i<8; ++i)
+writeOne(f, reserved16); // fieldCount
+writeOne(f, reserved16); // definedFieldCount
+writeOne(f, reserved64); // autoSqlOffset
+writeOne(f, totalSummaryOffset);
+for (i=0; i<3; ++i)
writeOne(f, reserved32);
/* Write summary headers with data. */
verbose(2, "Writing %d levels of zoom\n", zoomLevels);
@@ -444,8 +476,12 @@
writeOne(f, reserved64);
writeOne(f, reserved64);
}
+/* Write total summary. */
+fseek(f, totalSummaryOffset, SEEK_SET);
+bbiSummaryElementWrite(f, &totalSum);
+
lineFileClose(&lf);
carefulClose(&f);
}