src/lib/bigBed.c 1.23

1.23 2009/11/13 19:02:38 kent
Adding compression to bigBed. Improving bigWigInfo and bigBedInfo a little.
Index: src/lib/bigBed.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/lib/bigBed.c,v
retrieving revision 1.22
retrieving revision 1.23
diff -b -B -U 4 -r1.22 -r1.23
--- src/lib/bigBed.c	5 Nov 2009 19:29:31 -0000	1.22
+++ src/lib/bigBed.c	13 Nov 2009 19:02:38 -0000	1.23
@@ -11,8 +11,9 @@
 #include "cirTree.h"
 #include "bPlusTree.h"
 #include "basicBed.h"
 #include "asParse.h"
+#include "zlibFace.h"
 #include "sig.h"
 #include "udc.h"
 #include "bbiFile.h"
 #include "bigBed.h"
@@ -625,29 +626,59 @@
 struct bigBedInterval *el, *list = NULL;
 bits32 chromId;
 struct fileOffsetSize *blockList = bbiOverlappingBlocks(bbi, bbi->unzoomedCir, 
 	chrom, start, end, &chromId);
-struct fileOffsetSize *block;
-struct udcFile *f = bbi->udc;
+struct fileOffsetSize *block, *beforeGap, *afterGap;
+struct udcFile *udc = bbi->udc;
 boolean isSwapped = bbi->isSwapped;
 struct dyString *dy = dyStringNew(32);
-for (block = blockList; block != NULL; block = block->next)
+
+/* Set up for uncompression optionally. */
+char *uncompressBuf = NULL;
+if (bbi->uncompressBufSize > 0)
+    uncompressBuf = needLargeMem(bbi->uncompressBufSize);
+
+for (block = blockList; block != NULL; )
+    {
+    /* Find contigious blocks and read them into mergedBuf. */
+    fileOffsetSizeFindGap(block, &beforeGap, &afterGap);
+    bits64 mergedOffset = block->offset;
+    bits64 mergedSize = beforeGap->offset + beforeGap->size - mergedOffset;
+    udcSeek(udc, mergedOffset);
+    char *mergedBuf = needLargeMem(mergedSize);
+    udcMustRead(udc, mergedBuf, mergedSize);
+    char *blockBuf = mergedBuf;
+
+    /* Loop through individual blocks within merged section. */
+    for (;block != afterGap; block = block->next)
+        {
+	/* Uncompress if necessary. */
+	char *blockPt, *blockEnd;
+	if (uncompressBuf)
+	    {
+	    blockPt = uncompressBuf;
+	    int uncSize = zUncompress(blockBuf, block->size, uncompressBuf, bbi->uncompressBufSize);
+	    blockEnd = blockPt + uncSize;
+	    }
+	else
     {
-    bits64 endPos = block->offset + block->size;
-    udcSeek(f, block->offset);
-    while (udcTell(f) < endPos)
+	    blockPt = blockBuf;
+	    blockEnd = blockPt + block->size;
+	    }
+
+	while (blockPt < blockEnd)
         {
 	++itemCount;
 	if (maxItems > 0 && itemCount > maxItems)
 	    break;
 
 	/* Read next record into local variables. */
-	bits32 chr = udcReadBits32(f, isSwapped);	// Read and discard chromId
-	bits32 s = udcReadBits32(f, isSwapped);
-	bits32 e = udcReadBits32(f, isSwapped);
+	    bits32 chr = memReadBits32(&blockPt, isSwapped);	// Read and discard chromId
+	    bits32 s = memReadBits32(&blockPt, isSwapped);
+	    bits32 e = memReadBits32(&blockPt, isSwapped);
 	int c;
 	dyStringClear(dy);
-	while ((c = udcGetChar(f)) >= 0)
+	    while ((c = *blockPt++) >= 0)
 	    {
 	    if (c == 0)
 	        break;
 	    dyStringAppendC(dy, c);
@@ -665,9 +696,14 @@
 	    }
 	}
     if (maxItems > 0 && itemCount > maxItems)
         break;
+	blockBuf += block->size;
+        }
+    if (maxItems > 0 && itemCount > maxItems)
+        break;
     }
+freeMem(uncompressBuf);
 slFreeList(&blockList);
 slReverse(&list);
 return list;
 }