3d890482f243af7cdb51d56b5efde7c43e2ed59e
braney
Wed Dec 15 13:13:52 2010 -0800
add routine to let validateFiles check for the signature at the beginning and end of bigWigs
diff --git src/lib/bwgQuery.c src/lib/bwgQuery.c
index c002808..ba12585 100644
--- src/lib/bwgQuery.c
+++ src/lib/bwgQuery.c
@@ -1,403 +1,409 @@
/* bwgQuery - implements the query side of bigWig. See bwgInternal.h for definition of file
* format. */
#include "common.h"
#include "linefile.h"
#include "hash.h"
#include "localmem.h"
#include "options.h"
#include "sig.h"
#include "sqlNum.h"
#include "obscure.h"
#include "dystring.h"
#include "bPlusTree.h"
#include "cirTree.h"
#include "rangeTree.h"
#include "udc.h"
#include "zlibFace.h"
#include "bbiFile.h"
#include "bwgInternal.h"
#include "bigWig.h"
#include "bigBed.h"
static char const rcsid[] = "$Id: bwgQuery.c,v 1.24 2010/06/03 18:08:37 kent Exp $";
struct bbiFile *bigWigFileOpen(char *fileName)
/* Open up big wig file. */
{
return bbiFileOpen(fileName, bigWigSig, "big wig");
}
+boolean bigWigFileCheckSigs(char *fileName)
+/* check file signatures at beginning and end of file */
+{
+return bbiFileCheckSigs(fileName, bigWigSig, "big wig");
+}
+
struct bwgSectionHead
/* A header from a bigWig file section */
{
bits32 chromId; /* Chromosome short identifier. */
bits32 start,end; /* Range covered. */
bits32 itemStep; /* For some section types, the # of bases between items. */
bits32 itemSpan; /* For some section types, the # of bases in each item. */
UBYTE type; /* Type byte. */
UBYTE reserved; /* Always zero for now. */
bits16 itemCount; /* Number of items in block. */
};
#ifdef OLD
static void bwgSectionHeadRead(struct bbiFile *bwf, struct bwgSectionHead *head)
/* Read section header. */
{
struct udcFile *udc = bwf->udc;
boolean isSwapped = bwf->isSwapped;
head->chromId = udcReadBits32(udc, isSwapped);
head->start = udcReadBits32(udc, isSwapped);
head->end = udcReadBits32(udc, isSwapped);
head->itemStep = udcReadBits32(udc, isSwapped);
head->itemSpan = udcReadBits32(udc, isSwapped);
head->type = udcGetChar(udc);
head->reserved = udcGetChar(udc);
head->itemCount = udcReadBits16(udc, isSwapped);
}
#endif /* OLD */
static void bwgSectionHeadFromMem(char **pPt, struct bwgSectionHead *head, boolean isSwapped)
/* Read section header. */
{
char *pt = *pPt;
head->chromId = memReadBits32(&pt, isSwapped);
head->start = memReadBits32(&pt, isSwapped);
head->end = memReadBits32(&pt, isSwapped);
head->itemStep = memReadBits32(&pt, isSwapped);
head->itemSpan = memReadBits32(&pt, isSwapped);
head->type = *pt++;
head->reserved = *pt++;
head->itemCount = memReadBits16(&pt, isSwapped);
*pPt = pt;
}
static int bigWigBlockDumpIntersectingRange(boolean isSwapped, char *blockPt, char *blockEnd,
char *chrom, bits32 rangeStart, bits32 rangeEnd, int maxCount, FILE *out)
/* Print out info on parts of block that intersect start-end, block starting at current position. */
{
struct bwgSectionHead head;
bwgSectionHeadFromMem(&blockPt, &head, isSwapped);
bits16 i;
float val;
int outCount = 0;
switch (head.type)
{
case bwgTypeBedGraph:
{
fprintf(out, "#bedGraph section %s:%u-%u\n", chrom, head.start, head.end);
for (i=0; i
0)
{
fprintf(out, "%s\t%u\t%u\t%g\n", chrom, start, end, val);
++outCount;
if (maxCount != 0 && outCount >= maxCount)
break;
}
}
break;
}
case bwgTypeVariableStep:
{
fprintf(out, "variableStep chrom=%s span=%u\n", chrom, head.itemSpan);
for (i=0; i 0)
{
fprintf(out, "%u\t%g\n", start+1, val);
++outCount;
if (maxCount != 0 && outCount >= maxCount)
break;
}
}
break;
}
case bwgTypeFixedStep:
{
boolean gotStart = FALSE;
bits32 start = head.start;
for (i=0; i 0)
{
if (!gotStart)
{
fprintf(out, "fixedStep chrom=%s start=%u step=%u span=%u\n",
chrom, start+1, head.itemStep, head.itemSpan);
gotStart = TRUE;
}
fprintf(out, "%g\n", val);
++outCount;
if (maxCount != 0 && outCount >= maxCount)
break;
}
start += head.itemStep;
}
break;
}
default:
internalErr();
break;
}
assert(blockPt == blockEnd);
return outCount;
}
struct bbiInterval *bigWigIntervalQuery(struct bbiFile *bwf, char *chrom, bits32 start, bits32 end,
struct lm *lm)
/* Get data for interval. Return list allocated out of lm. */
{
if (bwf->typeSig != bigWigSig)
errAbort("Trying to do bigWigIntervalQuery on a non big-wig file.");
bbiAttachUnzoomedCir(bwf);
struct bbiInterval *el, *list = NULL;
struct fileOffsetSize *blockList = bbiOverlappingBlocks(bwf, bwf->unzoomedCir,
chrom, start, end, NULL);
struct fileOffsetSize *block, *beforeGap, *afterGap;
struct udcFile *udc = bwf->udc;
boolean isSwapped = bwf->isSwapped;
float val;
int i;
/* Set up for uncompression optionally. */
char *uncompressBuf = NULL;
if (bwf->uncompressBufSize > 0)
uncompressBuf = needLargeMem(bwf->uncompressBufSize);
/* This loop is a little complicated because we merge the read requests for efficiency, but we
* have to then go back through the data one unmerged block at a time. */
for (block = blockList; block != NULL; )
{
/* Find contigious blocks and read them into mergedBuf. */
fileOffsetSizeFindGap(block, &beforeGap, &afterGap);
bits64 mergedOffset = block->offset;
bits64 mergedSize = beforeGap->offset + beforeGap->size - mergedOffset;
udcSeek(udc, mergedOffset);
char *mergedBuf = needLargeMem(mergedSize);
udcMustRead(udc, mergedBuf, mergedSize);
char *blockBuf = mergedBuf;
/* Loop through individual blocks within merged section. */
for (;block != afterGap; block = block->next)
{
/* Uncompress if necessary. */
char *blockPt, *blockEnd;
if (uncompressBuf)
{
blockPt = uncompressBuf;
int uncSize = zUncompress(blockBuf, block->size, uncompressBuf, bwf->uncompressBufSize);
blockEnd = blockPt + uncSize;
}
else
{
blockPt = blockBuf;
blockEnd = blockPt + block->size;
}
/* Deal with insides of block. */
struct bwgSectionHead head;
bwgSectionHeadFromMem(&blockPt, &head, isSwapped);
switch (head.type)
{
case bwgTypeBedGraph:
{
for (i=0; i end) e = end;
if (s < e)
{
lmAllocVar(lm, el);
el->start = s;
el->end = e;
el->val = val;
slAddHead(&list, el);
}
}
break;
}
case bwgTypeVariableStep:
{
for (i=0; i end) e = end;
if (s < e)
{
lmAllocVar(lm, el);
el->start = s;
el->end = e;
el->val = val;
slAddHead(&list, el);
}
}
break;
}
case bwgTypeFixedStep:
{
bits32 s = head.start;
bits32 e = s + head.itemSpan;
for (i=0; i end) clippedE = end;
if (clippedS < clippedE)
{
lmAllocVar(lm, el);
el->start = clippedS;
el->end = clippedE;
el->val = val;
slAddHead(&list, el);
}
s += head.itemStep;
e += head.itemStep;
}
break;
}
default:
internalErr();
break;
}
assert(blockPt == blockEnd);
blockBuf += block->size;
}
freeMem(mergedBuf);
}
freeMem(uncompressBuf);
slFreeList(&blockList);
slReverse(&list);
return list;
}
int bigWigIntervalDump(struct bbiFile *bwf, char *chrom, bits32 start, bits32 end, int maxCount,
FILE *out)
/* Print out info on bigWig parts that intersect chrom:start-end. Set maxCount to 0 if you
* don't care how many are printed. Returns number printed. */
{
if (bwf->typeSig != bigWigSig)
errAbort("Trying to do bigWigIntervalDump on a non big-wig file.");
bbiAttachUnzoomedCir(bwf);
struct fileOffsetSize *blockList = bbiOverlappingBlocks(bwf, bwf->unzoomedCir,
chrom, start, end, NULL);
struct fileOffsetSize *block, *beforeGap, *afterGap;
struct udcFile *udc = bwf->udc;
int printCount = 0;
/* Set up for uncompression optionally. */
char *uncompressBuf = NULL;
if (bwf->uncompressBufSize > 0)
uncompressBuf = needLargeMem(bwf->uncompressBufSize);
/* This loop is a little complicated because we merge the read requests for efficiency, but we
* have to then go back through the data one unmerged block at a time. */
for (block = blockList; block != NULL; )
{
/* Find contigious blocks and read them into mergedBuf. */
fileOffsetSizeFindGap(block, &beforeGap, &afterGap);
bits64 mergedOffset = block->offset;
bits64 mergedSize = beforeGap->offset + beforeGap->size - mergedOffset;
udcSeek(udc, mergedOffset);
char *mergedBuf = needLargeMem(mergedSize);
udcMustRead(udc, mergedBuf, mergedSize);
char *blockBuf = mergedBuf;
/* Loop through individual blocks within merged section. */
for (;block != afterGap; block = block->next)
{
/* Uncompress if necessary. */
char *blockPt, *blockEnd;
if (uncompressBuf)
{
blockPt = uncompressBuf;
int uncSize = zUncompress(blockBuf, block->size, uncompressBuf, bwf->uncompressBufSize);
blockEnd = blockPt + uncSize;
}
else
{
blockPt = blockBuf;
blockEnd = blockPt + block->size;
}
/* Do the actual dump. */
int oneCount = bigWigBlockDumpIntersectingRange(bwf->isSwapped, blockPt, blockEnd,
chrom, start, end, maxCount, out);
/* Keep track of how many dumped, not exceeding maximum. */
printCount += oneCount;
if (maxCount != 0)
{
if (oneCount >= maxCount)
break;
maxCount -= oneCount;
}
blockBuf += block->size;
}
freeMem(mergedBuf);
}
freeMem(uncompressBuf);
slFreeList(&blockList);
return printCount;
}
boolean bigWigSummaryArray(struct bbiFile *bwf, char *chrom, bits32 start, bits32 end,
enum bbiSummaryType summaryType, int summarySize, double *summaryValues)
/* Fill in summaryValues with data from indicated chromosome range in bigWig file.
* Be sure to initialize summaryValues to a default value, which will not be touched
* for regions without data in file. (Generally you want the default value to either
* be 0.0 or nan("") depending on the application.) Returns FALSE if no data
* at that position. */
{
boolean ret = bbiSummaryArray(bwf, chrom, start, end, bigWigIntervalQuery,
summaryType, summarySize, summaryValues);
return ret;
}
boolean bigWigSummaryArrayExtended(struct bbiFile *bwf, char *chrom, bits32 start, bits32 end,
int summarySize, struct bbiSummaryElement *summary)
/* Get extended summary information for summarySize evenely spaced elements into
* the summary array. */
{
boolean ret = bbiSummaryArrayExtended(bwf, chrom, start, end, bigWigIntervalQuery,
summarySize, summary);
return ret;
}
double bigWigSingleSummary(struct bbiFile *bwf, char *chrom, int start, int end,
enum bbiSummaryType summaryType, double defaultVal)
/* Return the summarized single value for a range. */
{
double arrayOfOne = defaultVal;
bigWigSummaryArray(bwf, chrom, start, end, summaryType, 1, &arrayOfOne);
return arrayOfOne;
}
boolean isBigWig(char *fileName)
/* Peak at a file to see if it's bigWig */
{
FILE *f = mustOpen(fileName, "rb");
bits32 sig;
mustReadOne(f, sig);
fclose(f);
if (sig == bigWigSig)
return TRUE;
sig = byteSwap32(sig);
return sig == bigWigSig;
}