src/lib/bwgQuery.c 1.23
1.23 2009/11/12 23:15:52 kent
First cut of compressed bigWig/bigBed stuff. So far read side should be complete including Genome Browser, Table Browser, wigToBigWig and bigWig utility functions. Still to do bedGraphToBigWig and bedToBigBed.
Index: src/lib/bwgQuery.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/lib/bwgQuery.c,v
retrieving revision 1.22
retrieving revision 1.23
diff -b -B -U 4 -r1.22 -r1.23
--- src/lib/bwgQuery.c 10 Nov 2009 05:46:05 -0000 1.22
+++ src/lib/bwgQuery.c 12 Nov 2009 23:15:52 -0000 1.23
@@ -13,8 +13,9 @@
#include "bPlusTree.h"
#include "cirTree.h"
#include "rangeTree.h"
#include "udc.h"
+#include "zlibFace.h"
#include "bbiFile.h"
#include "bwgInternal.h"
#include "bigWig.h"
#include "bigBed.h"
@@ -38,9 +39,10 @@
UBYTE reserved; /* Always zero for now. */
bits16 itemCount; /* Number of items in block. */
};
-void bwgSectionHeadRead(struct bbiFile *bwf, struct bwgSectionHead *head)
+#ifdef OLD
+static void bwgSectionHeadRead(struct bbiFile *bwf, struct bwgSectionHead *head)
/* Read section header. */
{
struct udcFile *udc = bwf->udc;
boolean isSwapped = bwf->isSwapped;
@@ -52,10 +54,11 @@
head->type = udcGetChar(udc);
head->reserved = udcGetChar(udc);
head->itemCount = udcReadBits16(udc, isSwapped);
}
+#endif /* OLD */
-void bwgSectionHeadFromMem(char **pPt, struct bwgSectionHead *head, boolean isSwapped)
+static void bwgSectionHeadFromMem(char **pPt, struct bwgSectionHead *head, boolean isSwapped)
/* Read section header. */
{
char *pt = *pPt;
head->chromId = memReadBits32(&pt, isSwapped);
@@ -68,16 +71,14 @@
head->itemCount = memReadBits16(&pt, isSwapped);
*pPt = pt;
}
-static int bigWigBlockDumpIntersectingRange(struct bbiFile *bwf, char *chrom,
- bits32 rangeStart, bits32 rangeEnd, int maxCount, FILE *out)
+static int bigWigBlockDumpIntersectingRange(boolean isSwapped, char *blockPt, char *blockEnd,
+ char *chrom, bits32 rangeStart, bits32 rangeEnd, int maxCount, FILE *out)
/* Print out info on parts of block that intersect start-end, block starting at current position. */
{
-boolean isSwapped = bwf->isSwapped;
-struct udcFile *udc = bwf->udc;
struct bwgSectionHead head;
-bwgSectionHeadRead(bwf, &head);
+bwgSectionHeadFromMem(&blockPt, &head, isSwapped);
bits16 i;
float val;
int outCount = 0;
@@ -87,11 +88,11 @@
{
fprintf(out, "#bedGraph section %s:%u-%u\n", chrom, head.start, head.end);
for (i=0; i<head.itemCount; ++i)
{
- bits32 start = udcReadBits32(udc, isSwapped);
- bits32 end = udcReadBits32(udc, isSwapped);
- udcMustReadOne(udc, val);
+ bits32 start = memReadBits32(&blockPt, isSwapped);
+ bits32 end = memReadBits32(&blockPt, isSwapped);
+ val = memReadFloat(&blockPt, isSwapped);
if (rangeIntersection(rangeStart, rangeEnd, start, end) > 0)
{
fprintf(out, "%s\t%u\t%u\t%g\n", chrom, start, end, val);
++outCount;
@@ -105,10 +106,10 @@
{
fprintf(out, "variableStep chrom=%s span=%u\n", chrom, head.itemSpan);
for (i=0; i<head.itemCount; ++i)
{
- bits32 start = udcReadBits32(udc, isSwapped);
- udcMustReadOne(udc, val);
+ bits32 start = memReadBits32(&blockPt, isSwapped);
+ val = memReadFloat(&blockPt, isSwapped);
if (rangeIntersection(rangeStart, rangeEnd, start, start+head.itemSpan) > 0)
{
fprintf(out, "%u\t%g\n", start+1, val);
++outCount;
@@ -123,15 +124,15 @@
boolean gotStart = FALSE;
bits32 start = head.start;
for (i=0; i<head.itemCount; ++i)
{
- udcMustReadOne(udc, val);
+ val = memReadFloat(&blockPt, isSwapped);
if (rangeIntersection(rangeStart, rangeEnd, start, start+head.itemSpan) > 0)
{
if (!gotStart)
{
fprintf(out, "fixedStep chrom=%s start=%u step=%u span=%u\n",
- chrom, start, head.itemStep, head.itemSpan);
+ chrom, start+1, head.itemStep, head.itemSpan);
gotStart = TRUE;
}
fprintf(out, "%g\n", val);
++outCount;
@@ -145,18 +146,12 @@
default:
internalErr();
break;
}
+assert(blockPt == blockEnd);
return outCount;
}
-void bigWigBlockDump(struct bbiFile *bwf, char *chrom, FILE *out)
-/* Print out info on block starting at current position. */
-{
-bigWigBlockDumpIntersectingRange(bwf, chrom, 0, BIGNUM, 0, out);
-}
-
-
struct bbiInterval *bigWigIntervalQuery(struct bbiFile *bwf, char *chrom, bits32 start, bits32 end,
struct lm *lm)
/* Get data for interval. Return list allocated out of lm. */
{
@@ -165,24 +160,50 @@
bbiAttachUnzoomedCir(bwf);
struct bbiInterval *el, *list = NULL;
struct fileOffsetSize *blockList = bbiOverlappingBlocks(bwf, bwf->unzoomedCir,
chrom, start, end, NULL);
-struct fileOffsetSize *block;
+struct fileOffsetSize *block, *beforeGap, *afterGap;
struct udcFile *udc = bwf->udc;
boolean isSwapped = bwf->isSwapped;
float val;
int i;
-// slSort(&blockList, fileOffsetSizeCmp);
-struct fileOffsetSize *mergedBlocks = fileOffsetSizeMerge(blockList);
-for (block = mergedBlocks; block != NULL; block = block->next)
- {
- udcSeek(udc, block->offset);
- char *blockBuf = needLargeMem(block->size);
- udcRead(udc, blockBuf, block->size);
- char *blockPt = blockBuf, *blockEnd = blockBuf + block->size;
- while (blockPt < blockEnd)
+/* Set up for uncompression optionally. */
+char *uncompressBuf = NULL;
+if (bwf->uncompressBufSize > 0)
+ uncompressBuf = needLargeMem(bwf->uncompressBufSize);
+
+/* This loop is a little complicated because we merge the read requests for efficiency, but we
+ * have to then go back through the data one unmerged block at a time. */
+for (block = blockList; block != NULL; )
+ {
+ /* Find contigious blocks and read them into mergedBuf. */
+ fileOffsetSizeFindGap(block, &beforeGap, &afterGap);
+ bits64 mergedOffset = block->offset;
+ bits64 mergedSize = beforeGap->offset + beforeGap->size - mergedOffset;
+ udcSeek(udc, mergedOffset);
+ char *mergedBuf = needLargeMem(mergedSize);
+ udcMustRead(udc, mergedBuf, mergedSize);
+ char *blockBuf = mergedBuf;
+
+ /* Loop through individual blocks within merged section. */
+ for (;block != afterGap; block = block->next)
+ {
+ /* Uncompress if necessary. */
+ char *blockPt, *blockEnd;
+ if (uncompressBuf)
+ {
+ blockPt = uncompressBuf;
+ int uncSize = zUncompress(blockBuf, block->size, uncompressBuf, bwf->uncompressBufSize);
+ blockEnd = blockPt + uncSize;
+ }
+ else
{
+ blockPt = blockBuf;
+ blockEnd = blockPt + block->size;
+ }
+
+ /* Deal with insides of block. */
struct bwgSectionHead head;
bwgSectionHeadFromMem(&blockPt, &head, isSwapped);
switch (head.type)
{
@@ -252,11 +273,14 @@
default:
internalErr();
break;
}
+ assert(blockPt == blockEnd);
+ blockBuf += block->size;
}
+ freeMem(mergedBuf);
}
-slFreeList(&mergedBlocks);
+freeMem(uncompressBuf);
slFreeList(&blockList);
slReverse(&list);
return list;
}
@@ -270,24 +294,65 @@
errAbort("Trying to do bigWigIntervalDump on a non big-wig file.");
bbiAttachUnzoomedCir(bwf);
struct fileOffsetSize *blockList = bbiOverlappingBlocks(bwf, bwf->unzoomedCir,
chrom, start, end, NULL);
-struct fileOffsetSize *block;
+struct fileOffsetSize *block, *beforeGap, *afterGap;
struct udcFile *udc = bwf->udc;
int printCount = 0;
-for (block = blockList; block != NULL; block = block->next)
- {
- udcSeek(udc, block->offset);
- int oneCount = bigWigBlockDumpIntersectingRange(bwf, chrom, start, end, maxCount, out);
+/* Set up for uncompression optionally. */
+char *uncompressBuf = NULL;
+if (bwf->uncompressBufSize > 0)
+ uncompressBuf = needLargeMem(bwf->uncompressBufSize);
+
+/* This loop is a little complicated because we merge the read requests for efficiency, but we
+ * have to then go back through the data one unmerged block at a time. */
+for (block = blockList; block != NULL; )
+ {
+ /* Find contigious blocks and read them into mergedBuf. */
+ fileOffsetSizeFindGap(block, &beforeGap, &afterGap);
+ bits64 mergedOffset = block->offset;
+ bits64 mergedSize = beforeGap->offset + beforeGap->size - mergedOffset;
+ udcSeek(udc, mergedOffset);
+ char *mergedBuf = needLargeMem(mergedSize);
+ udcMustRead(udc, mergedBuf, mergedSize);
+ char *blockBuf = mergedBuf;
+
+ /* Loop through individual blocks within merged section. */
+ for (;block != afterGap; block = block->next)
+ {
+ /* Uncompress if necessary. */
+ char *blockPt, *blockEnd;
+ if (uncompressBuf)
+ {
+ blockPt = uncompressBuf;
+ int uncSize = zUncompress(blockBuf, block->size, uncompressBuf, bwf->uncompressBufSize);
+ blockEnd = blockPt + uncSize;
+ }
+ else
+ {
+ blockPt = blockBuf;
+ blockEnd = blockPt + block->size;
+ }
+
+ /* Do the actual dump. */
+ int oneCount = bigWigBlockDumpIntersectingRange(bwf->isSwapped, blockPt, blockEnd,
+ chrom, start, end, maxCount, out);
+
+ /* Keep track of how many dumped, not exceeding maximum. */
printCount += oneCount;
if (maxCount != 0)
{
if (oneCount >= maxCount)
break;
maxCount -= oneCount;
}
+ blockBuf += block->size;
}
+ freeMem(mergedBuf);
+ }
+freeMem(uncompressBuf);
+
slFreeList(&blockList);
return printCount;
}