src/lib/bigBed.c 1.23
1.23 2009/11/13 19:02:38 kent
Adding compression to bigBed. Improving bigWigInfo and bigBedInfo a little.
Index: src/lib/bigBed.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/lib/bigBed.c,v
retrieving revision 1.22
retrieving revision 1.23
diff -b -B -U 4 -r1.22 -r1.23
--- src/lib/bigBed.c 5 Nov 2009 19:29:31 -0000 1.22
+++ src/lib/bigBed.c 13 Nov 2009 19:02:38 -0000 1.23
@@ -11,8 +11,9 @@
#include "cirTree.h"
#include "bPlusTree.h"
#include "basicBed.h"
#include "asParse.h"
+#include "zlibFace.h"
#include "sig.h"
#include "udc.h"
#include "bbiFile.h"
#include "bigBed.h"
@@ -625,29 +626,59 @@
struct bigBedInterval *el, *list = NULL;
bits32 chromId;
struct fileOffsetSize *blockList = bbiOverlappingBlocks(bbi, bbi->unzoomedCir,
chrom, start, end, &chromId);
-struct fileOffsetSize *block;
-struct udcFile *f = bbi->udc;
+struct fileOffsetSize *block, *beforeGap, *afterGap;
+struct udcFile *udc = bbi->udc;
boolean isSwapped = bbi->isSwapped;
struct dyString *dy = dyStringNew(32);
-for (block = blockList; block != NULL; block = block->next)
+
+/* Set up for uncompression optionally. */
+char *uncompressBuf = NULL;
+if (bbi->uncompressBufSize > 0)
+ uncompressBuf = needLargeMem(bbi->uncompressBufSize);
+
+for (block = blockList; block != NULL; )
+ {
+ /* Find contigious blocks and read them into mergedBuf. */
+ fileOffsetSizeFindGap(block, &beforeGap, &afterGap);
+ bits64 mergedOffset = block->offset;
+ bits64 mergedSize = beforeGap->offset + beforeGap->size - mergedOffset;
+ udcSeek(udc, mergedOffset);
+ char *mergedBuf = needLargeMem(mergedSize);
+ udcMustRead(udc, mergedBuf, mergedSize);
+ char *blockBuf = mergedBuf;
+
+ /* Loop through individual blocks within merged section. */
+ for (;block != afterGap; block = block->next)
+ {
+ /* Uncompress if necessary. */
+ char *blockPt, *blockEnd;
+ if (uncompressBuf)
+ {
+ blockPt = uncompressBuf;
+ int uncSize = zUncompress(blockBuf, block->size, uncompressBuf, bbi->uncompressBufSize);
+ blockEnd = blockPt + uncSize;
+ }
+ else
{
- bits64 endPos = block->offset + block->size;
- udcSeek(f, block->offset);
- while (udcTell(f) < endPos)
+ blockPt = blockBuf;
+ blockEnd = blockPt + block->size;
+ }
+
+ while (blockPt < blockEnd)
{
++itemCount;
if (maxItems > 0 && itemCount > maxItems)
break;
/* Read next record into local variables. */
- bits32 chr = udcReadBits32(f, isSwapped); // Read and discard chromId
- bits32 s = udcReadBits32(f, isSwapped);
- bits32 e = udcReadBits32(f, isSwapped);
+ bits32 chr = memReadBits32(&blockPt, isSwapped); // Read and discard chromId
+ bits32 s = memReadBits32(&blockPt, isSwapped);
+ bits32 e = memReadBits32(&blockPt, isSwapped);
int c;
dyStringClear(dy);
- while ((c = udcGetChar(f)) >= 0)
+ while ((c = *blockPt++) >= 0)
{
if (c == 0)
break;
dyStringAppendC(dy, c);
@@ -665,9 +696,14 @@
}
}
if (maxItems > 0 && itemCount > maxItems)
break;
+ blockBuf += block->size;
+ }
+ if (maxItems > 0 && itemCount > maxItems)
+ break;
}
+freeMem(uncompressBuf);
slFreeList(&blockList);
slReverse(&list);
return list;
}