481ed3546001933ed4ceb889fc34696e68bef234 angie Fri Dec 14 17:05:55 2012 -0800 Feature #6152 (Variant Annotation Integrator): added annoStreamBigWigand moved up annoGrateWig from hg/lib to lib. Tested with comparison to hgTables intersection, bitwise. Output is not identical because hgTables merges adjacent bases and splits on value changes, while annoGrator prints original item and comma-sep list of per-base values. diff --git src/lib/annoStreamBigWig.c src/lib/annoStreamBigWig.c new file mode 100644 index 0000000..ed4fe27 --- /dev/null +++ src/lib/annoStreamBigWig.c @@ -0,0 +1,133 @@ +/* annoStreamBigWig -- subclass of annoStreamer for bigWig file or URL */ + +#include "annoStreamBigWig.h" +#include "bigWig.h" + +char *annoRowBigWigAsText = +"table annoRowBigWig\n" +"\"autoSql description of a single annoRowBigWig value, for filtering\"\n" +" (\n" +" float value; \"data value for this range\"\n" +" )\n" + ; + +struct annoStreamBigWig + { + struct annoStreamer streamer; // Parent class members & methods + // Private members + struct bbiFile *bbi; // bbi handle for bigBed file/URL. + struct lm *intervalQueryLm; // localmem object for bigWigIntervalQuery + struct bbiInterval *intervalList; // results of bigWigIntervalQuery + struct bbiInterval *nextInterval; // next result to be translated into row + }; + + +static void asbwSetRegion(struct annoStreamer *vSelf, char *chrom, uint regionStart, uint regionEnd) +/* Set region -- and free localmem from previous query if necessary. */ +{ +annoStreamerSetRegion(vSelf, chrom, regionStart, regionEnd); +struct annoStreamBigWig *self = (struct annoStreamBigWig *)vSelf; +self->nextInterval = self->intervalList = NULL; +lmCleanup(&(self->intervalQueryLm)); +} + +static void asbwDoQuery(struct annoStreamBigWig *self) +/* Store results of an interval query. [Would be nice to make a streaming version of this.] */ +{ +struct annoStreamer *streamer = &(self->streamer); +if (self->intervalQueryLm == NULL) + self->intervalQueryLm = lmInit(0); +self->intervalList = bigWigIntervalQuery(self->bbi, streamer->chrom, + streamer->regionStart, streamer->regionEnd, + self->intervalQueryLm); +self->nextInterval = self->intervalList; +} + +static struct annoRow *annoRowFromContigBbiIntervals(char *chrom, + struct bbiInterval *startIv, struct bbiInterval *endIv, + boolean rightJoinFail) +/* Given a range of non-NULL contiguous bbiIntervals (i.e. no gaps between intervals), + * translate into annoRow with annoVector as data. */ +{ +float *vals; +int baseCount = endIv->end - startIv->start; +AllocArray(vals, baseCount); +int vecOff = 0; +struct bbiInterval *iv; +for (iv = startIv; iv != endIv->next; iv = iv->next) + { + int i; + for (i = 0; i < (iv->end - iv->start); i++) + vals[vecOff++] = iv->val; + if (vecOff > baseCount) + errAbort("annoRowFromContigBbiIntervals: overflowed baseCount (%s:%d-%d)", + chrom, startIv->start, endIv->end); + } +return annoRowWigNew(chrom, startIv->start, endIv->end, rightJoinFail, vals); +} + +static struct annoRow *asbwNextRow(struct annoStreamer *vSelf) +/* Return a single annoRow, or NULL if there are no more items. */ +{ +struct annoStreamBigWig *self = (struct annoStreamBigWig *)vSelf; +if (self->intervalList == NULL) + asbwDoQuery(self); +if (self->nextInterval == NULL) + return NULL; +// Skip past any left-join failures until we get a right-join failure, a passing interval, or EOF. +boolean rightFail = FALSE; +struct bbiInterval *startIv = self->nextInterval; +while (annoFilterWigValueFails(vSelf->filters, self->nextInterval->val, &rightFail)) + { + if (rightFail) + break; + startIv = self->nextInterval = self->nextInterval->next; + if (self->nextInterval == NULL) + return NULL; + } +if (rightFail) + return annoRowFromContigBbiIntervals(vSelf->chrom, startIv, startIv, rightFail); +struct bbiInterval *endIv = startIv, *iv; +int maxCount = 16 * 1024, count; +for (iv = startIv->next, count = 0; iv != NULL && count < maxCount; iv = iv->next, count++) + { + // collect contiguous intervals; then make annoRow with vector. + if (annoFilterWigValueFails(vSelf->filters, iv->val, &rightFail)) + break; + if (iv->start == endIv->end) + endIv = iv; + else + break; + } +self->nextInterval = endIv->next; +return annoRowFromContigBbiIntervals(vSelf->chrom, startIv, endIv, rightFail); +} + +static void asbwClose(struct annoStreamer **pVSelf) +/* Close bbi handle and free self. */ +{ +if (pVSelf == NULL) + return; +struct annoStreamBigWig *self = *(struct annoStreamBigWig **)pVSelf; +bigWigFileClose(&(self->bbi)); +self->intervalList = NULL; +lmCleanup(&(self->intervalQueryLm)); +annoStreamerFree(pVSelf); +} + +struct annoStreamer *annoStreamBigWigNew(char *fileOrUrl) +/* Create an annoStreamer (subclass) object from a file or URL. */ +{ +struct bbiFile *bbi = bigWigFileOpen(fileOrUrl); +struct asObject *asObj = asParseText(annoRowBigWigAsText); +struct annoStreamBigWig *self = NULL; +AllocVar(self); +struct annoStreamer *streamer = &(self->streamer); +annoStreamerInit(streamer, asObj); +streamer->rowType = arWig; +streamer->setRegion = asbwSetRegion; +streamer->nextRow = asbwNextRow; +streamer->close = asbwClose; +self->bbi = bbi; +return (struct annoStreamer *)self; +}