da00911fc0d5f9f1025aca0928f4bdabd620d1b4
angie
Fri May 10 15:11:28 2013 -0700
Bugfix: for genome-wide queries, annoStreamBig{Bed,Wig} need to do a series of per-chrom bbi queries.
diff --git src/lib/annoStreamBigWig.c src/lib/annoStreamBigWig.c
index c5b90f1..fb67696 100644
--- src/lib/annoStreamBigWig.c
+++ src/lib/annoStreamBigWig.c
@@ -1,135 +1,159 @@
/* annoStreamBigWig -- subclass of annoStreamer for bigWig file or URL */
#include "annoStreamBigWig.h"
#include "bigWig.h"
char *annoRowBigWigAsText =
"table annoRowBigWig\n"
"\"autoSql description of a single annoRowBigWig value, for filtering\"\n"
" (\n"
" float value; \"data value for this range\"\n"
" )\n"
;
struct annoStreamBigWig
{
struct annoStreamer streamer; // Parent class members & methods
// Private members
struct bbiFile *bbi; // bbi handle for bigBed file/URL.
struct lm *intervalQueryLm; // localmem object for bigWigIntervalQuery
struct bbiInterval *intervalList; // results of bigWigIntervalQuery
struct bbiInterval *nextInterval; // next result to be translated into row
+ struct bbiChromInfo *chromList; // list of chromosomes for which bbi actually has data
+ struct bbiChromInfo *queryChrom; // most recently queried chrom (or NULL) for whole-genome
};
static void asbwSetRegion(struct annoStreamer *vSelf, char *chrom, uint regionStart, uint regionEnd)
/* Set region -- and free localmem from previous query if necessary. */
{
annoStreamerSetRegion(vSelf, chrom, regionStart, regionEnd);
struct annoStreamBigWig *self = (struct annoStreamBigWig *)vSelf;
self->nextInterval = self->intervalList = NULL;
lmCleanup(&(self->intervalQueryLm));
}
static void asbwDoQuery(struct annoStreamBigWig *self)
/* Store results of an interval query. [Would be nice to make a streaming version of this.] */
{
-struct annoStreamer *streamer = &(self->streamer);
+struct annoStreamer *sSelf = &(self->streamer);
if (self->intervalQueryLm == NULL)
self->intervalQueryLm = lmInit(0);
-self->intervalList = bigWigIntervalQuery(self->bbi, streamer->chrom,
- streamer->regionStart, streamer->regionEnd,
+if (sSelf->chrom != NULL)
+ {
+ self->intervalList = bigWigIntervalQuery(self->bbi, sSelf->chrom,
+ sSelf->regionStart, sSelf->regionEnd,
self->intervalQueryLm);
+ }
+else
+ {
+ // Genome-wide query: break it into chrom-by-chrom queries.
+ if (self->queryChrom == NULL)
+ self->queryChrom = self->chromList;
+ else
+ self->queryChrom = self->queryChrom->next;
+ if (self->queryChrom == NULL)
+ {
+ self->chromList = NULL; // EOF, don't start over!
+ self->intervalList = NULL;
+ }
+ else
+ {
+ char *chrom = self->queryChrom->name;
+ uint end = self->queryChrom->size;
+ self->intervalList = bigWigIntervalQuery(self->bbi, chrom, 0, end, self->intervalQueryLm);
+ }
+ }
self->nextInterval = self->intervalList;
}
static struct annoRow *annoRowFromContigBbiIntervals(char *name, char *chrom,
struct bbiInterval *startIv, struct bbiInterval *endIv,
boolean rightJoinFail, struct lm *callerLm)
/* Given a range of non-NULL contiguous bbiIntervals (i.e. no gaps between intervals),
* translate into annoRow with annoVector as data. */
{
float *vals;
int baseCount = endIv->end - startIv->start;
AllocArray(vals, baseCount);
int vecOff = 0;
struct bbiInterval *iv;
for (iv = startIv; iv != endIv->next; iv = iv->next)
{
int i;
for (i = 0; i < (iv->end - iv->start); i++)
vals[vecOff++] = iv->val;
if (vecOff > baseCount)
errAbort("annoStreamBigWig %s: overflowed baseCount (%s:%d-%d)",
name, chrom, startIv->start, endIv->end);
}
return annoRowWigNew(chrom, startIv->start, endIv->end, rightJoinFail, vals, callerLm);
}
-static struct annoRow *asbwNextRow(struct annoStreamer *vSelf, struct lm *callerLm)
+static struct annoRow *asbwNextRow(struct annoStreamer *sSelf, struct lm *callerLm)
/* Return a single annoRow, or NULL if there are no more items. */
{
-struct annoStreamBigWig *self = (struct annoStreamBigWig *)vSelf;
+struct annoStreamBigWig *self = (struct annoStreamBigWig *)sSelf;
if (self->intervalList == NULL)
asbwDoQuery(self);
if (self->nextInterval == NULL)
return NULL;
// Skip past any left-join failures until we get a right-join failure, a passing interval, or EOF.
boolean rightFail = FALSE;
struct bbiInterval *startIv = self->nextInterval;
-while (annoFilterWigValueFails(vSelf->filters, self->nextInterval->val, &rightFail))
+while (annoFilterWigValueFails(sSelf->filters, self->nextInterval->val, &rightFail))
{
if (rightFail)
break;
startIv = self->nextInterval = self->nextInterval->next;
if (self->nextInterval == NULL)
return NULL;
}
+char *chrom = sSelf->chrom ? sSelf->chrom : self->queryChrom->name;
if (rightFail)
- return annoRowFromContigBbiIntervals(vSelf->name, vSelf->chrom,
- startIv, startIv, rightFail, callerLm);
+ return annoRowFromContigBbiIntervals(sSelf->name, chrom, startIv, startIv, rightFail,
+ callerLm);
struct bbiInterval *endIv = startIv, *iv;
int maxCount = 16 * 1024, count;
for (iv = startIv->next, count = 0; iv != NULL && count < maxCount; iv = iv->next, count++)
{
// collect contiguous intervals; then make annoRow with vector.
- if (annoFilterWigValueFails(vSelf->filters, iv->val, &rightFail))
+ if (annoFilterWigValueFails(sSelf->filters, iv->val, &rightFail))
break;
if (iv->start == endIv->end)
endIv = iv;
else
break;
}
self->nextInterval = endIv->next;
-return annoRowFromContigBbiIntervals(vSelf->name, vSelf->chrom,
- startIv, endIv, rightFail, callerLm);
+return annoRowFromContigBbiIntervals(sSelf->name, chrom, startIv, endIv, rightFail, callerLm);
}
static void asbwClose(struct annoStreamer **pVSelf)
/* Close bbi handle and free self. */
{
if (pVSelf == NULL)
return;
struct annoStreamBigWig *self = *(struct annoStreamBigWig **)pVSelf;
bigWigFileClose(&(self->bbi));
self->intervalList = NULL;
lmCleanup(&(self->intervalQueryLm));
annoStreamerFree(pVSelf);
}
struct annoStreamer *annoStreamBigWigNew(char *fileOrUrl, struct annoAssembly *aa)
/* Create an annoStreamer (subclass) object from a file or URL. */
{
struct bbiFile *bbi = bigWigFileOpen(fileOrUrl);
struct asObject *asObj = asParseText(annoRowBigWigAsText);
struct annoStreamBigWig *self = NULL;
AllocVar(self);
struct annoStreamer *streamer = &(self->streamer);
annoStreamerInit(streamer, aa, asObj, fileOrUrl);
streamer->rowType = arWig;
streamer->setRegion = asbwSetRegion;
streamer->nextRow = asbwNextRow;
streamer->close = asbwClose;
self->bbi = bbi;
return (struct annoStreamer *)self;
}