src/utils/bedGraphToBigWig/bedGraphToBigWig.c 1.18

1.18 2009/11/07 19:29:35 kent
Adding totalSummary to big files. Should be helpful in quickly calculating ranges to be displayed by default.
Index: src/utils/bedGraphToBigWig/bedGraphToBigWig.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/utils/bedGraphToBigWig/bedGraphToBigWig.c,v
retrieving revision 1.17
retrieving revision 1.18
diff -b -B -U 4 -r1.17 -r1.18
--- src/utils/bedGraphToBigWig/bedGraphToBigWig.c	6 Nov 2009 19:45:20 -0000	1.17
+++ src/utils/bedGraphToBigWig/bedGraphToBigWig.c	7 Nov 2009 19:29:35 -0000	1.18
@@ -177,9 +177,10 @@
 
 static struct bbiSummary *writeReducedOnceReturnReducedTwice(struct bbiChromUsage *usageList, 
 	struct lineFile *lf, bits32 initialReduction, bits32 initialReductionCount, 
 	int zoomIncrement, int blockSize, int itemsPerSlot, 
-	struct lm *lm, FILE *f, bits64 *retDataStart, bits64 *retIndexStart)
+	struct lm *lm, FILE *f, bits64 *retDataStart, bits64 *retIndexStart,
+	struct bbiSummaryElement *totalSum)
 /* Write out data reduced by factor of initialReduction.  Also calculate and keep in memory
  * next reduction level.  This is more work than some ways, but it keeps us from having to
  * keep the first reduction entirely in memory. */
 {
@@ -192,8 +193,9 @@
 boundsEnd = boundsPt + initialReductionCount;
 
 *retDataStart = ftell(f);
 writeOne(f, initialReductionCount);
+boolean firstRow = TRUE;
 for (;;)
     {
     /* Get next line of input if any. */
     char *row[5];
@@ -212,8 +214,27 @@
     bits32 start = sqlUnsigned(row[1]);
     bits32 end = sqlUnsigned(row[2]);
     float val = sqlFloat(row[3]);
 
+    /* Update total summary stuff. */
+    bits32 size = end-start;
+    if (firstRow)
+	{
+        totalSum->validCount = size;
+	totalSum->minVal = totalSum->maxVal = val;
+	totalSum->sumData = val*size;
+	totalSum->sumSquares = val*size*size;
+	firstRow = FALSE;
+	}
+    else
+        {
+	totalSum->validCount += size;
+	if (val < totalSum->minVal) totalSum->minVal = val;
+	if (val > totalSum->maxVal) totalSum->maxVal = val;
+	totalSum->sumData += val*size;
+	totalSum->sumSquares += val*size*size;
+	}
+
     /* If new chromosome output existing block. */
     if (differentString(chrom, usage->name))
         {
 	usage = usage->next;
@@ -256,8 +277,9 @@
 	sum->sumData += val * overlap;
 	sum->sumSquares += val*val * overlap;
 	bbiOutputOneSummaryFurtherReduce(sum, &twiceReducedList, doubleReductionSize, 
 		&boundsPt, boundsEnd, usage->size, lm, f);
+	size -= overlap;
 
 	/* Move summary to next part. */
 	sum->start = start = sum->end;
 	sum->end = start + initialReduction;
@@ -267,9 +289,8 @@
 	sum->validCount = 0;
 	}
 
     /* Add to summary. */
-    bits32 size = end - start;
     sum->validCount += size;
     if (sum->minVal > val) sum->minVal = val;
     if (sum->maxVal < val) sum->maxVal = val;
     sum->sumData += val * size;
@@ -306,8 +327,14 @@
 FILE *f = mustOpen(outName, "wb");
 bbiWriteDummyHeader(f);
 bbiWriteDummyZooms(f);
 
+/* Write out dummy total summary. */
+struct bbiSummaryElement totalSum;
+ZeroVar(&totalSum);
+bits64 totalSummaryOffset = ftell(f);
+bbiSummaryElementWrite(f, &totalSum);
+
 /* Write out chromosome/size database. */
 bits64 chromTreeOffset = ftell(f);
 bbiWriteChromInfo(usageList, blockSize, f);
 
@@ -380,9 +407,9 @@
 	lineFileRewind(lf);
 	struct bbiSummary *rezoomedList = writeReducedOnceReturnReducedTwice(usageList, 
 		lf, initialReduction, initialReducedCount,
 		resIncrement, blockSize, itemsPerSlot, lm, 
-		f, &zoomDataOffsets[0], &zoomIndexOffsets[0]);
+		f, &zoomDataOffsets[0], &zoomIndexOffsets[0], &totalSum);
 	verboseTime(2, "writeReducedOnceReturnReducedTwice");
 	zoomAmounts[0] = initialReduction;
 	zoomLevels = 1;
 
@@ -412,19 +439,24 @@
 rewind(f);
 bits32 sig = bigWigSig;
 bits16 version = bbiCurrentVersion;
 bits16 summaryCount = zoomLevels;
+bits32 reserved16 = 0;
 bits32 reserved32 = 0;
-bits32 reserved64 = 0;
+bits64 reserved64 = 0;
 
 /* Write fixed header */
 writeOne(f, sig);
 writeOne(f, version);
 writeOne(f, summaryCount);
 writeOne(f, chromTreeOffset);
 writeOne(f, dataOffset);
 writeOne(f, indexOffset);
-for (i=0; i<8; ++i)
+writeOne(f, reserved16);	// fieldCount
+writeOne(f, reserved16);	// definedFieldCount
+writeOne(f, reserved64);	// autoSqlOffset
+writeOne(f, totalSummaryOffset);
+for (i=0; i<3; ++i)
     writeOne(f, reserved32);
 
 /* Write summary headers with data. */
 verbose(2, "Writing %d levels of zoom\n", zoomLevels);
@@ -444,8 +476,12 @@
     writeOne(f, reserved64);
     writeOne(f, reserved64);
     }
 
+/* Write total summary. */
+fseek(f, totalSummaryOffset, SEEK_SET);
+bbiSummaryElementWrite(f, &totalSum);
+
 lineFileClose(&lf);
 carefulClose(&f);
 }