da00911fc0d5f9f1025aca0928f4bdabd620d1b4 angie Fri May 10 15:11:28 2013 -0700 Bugfix: for genome-wide queries, annoStreamBig{Bed,Wig} need to do a series of per-chrom bbi queries. diff --git src/lib/annoStreamBigBed.c src/lib/annoStreamBigBed.c index 1b8814f..4350c3f 100644 --- src/lib/annoStreamBigBed.c +++ src/lib/annoStreamBigBed.c @@ -1,96 +1,122 @@ /* annoStreamBigBed -- subclass of annoStreamer for bigBed file or URL */ #include "annoStreamBigBed.h" #include "bigBed.h" #include "localmem.h" #include "sqlNum.h" struct annoStreamBigBed { struct annoStreamer streamer; // Parent class members & methods // Private members struct bbiFile *bbi; // bbi handle for bigBed file/URL. struct lm *intervalQueryLm; // localmem object for bigBedIntervalQuery struct bigBedInterval *intervalList; // results of bigBedIntervalQuery struct bigBedInterval *nextInterval; // next result to be translated into row + struct bbiChromInfo *chromList; // list of chromosomes for which bbi actually has data + struct bbiChromInfo *queryChrom; // most recently queried chrom (or NULL) for whole-genome int maxItems; // max items returned from bigBedIntervalQuery char **row; // storage for results of bigBedIntervalToRow char *startBuf; // storage for stringified start from bigBedIntervalToRow char *endBuf; // storage for stringified end from bigBedIntervalToRow }; static void asbbSetRegion(struct annoStreamer *vSelf, char *chrom, uint regionStart, uint regionEnd) /* Set region -- and free localmem from previous query if necessary. */ { annoStreamerSetRegion(vSelf, chrom, regionStart, regionEnd); struct annoStreamBigBed *self = (struct annoStreamBigBed *)vSelf; self->nextInterval = self->intervalList = NULL; lmCleanup(&(self->intervalQueryLm)); } static void asbbDoQuery(struct annoStreamBigBed *self) /* Store results of an interval query. [Would be nice to make a streaming version of this.] */ { -struct annoStreamer *streamer = &(self->streamer); +struct annoStreamer *sSelf = &(self->streamer); if (self->intervalQueryLm == NULL) self->intervalQueryLm = lmInit(0); -self->intervalList = bigBedIntervalQuery(self->bbi, streamer->chrom, - streamer->regionStart, streamer->regionEnd, +if (sSelf->chrom != NULL) + { + self->intervalList = bigBedIntervalQuery(self->bbi, sSelf->chrom, + sSelf->regionStart, sSelf->regionEnd, + self->maxItems, self->intervalQueryLm); + } +else + { + // Genome-wide query: break it into chrom-by-chrom queries. + if (self->queryChrom == NULL) + self->queryChrom = self->chromList; + else + self->queryChrom = self->queryChrom->next; + if (self->queryChrom == NULL) + { + self->chromList = NULL; // EOF, don't start over! + self->intervalList = NULL; + } + else + { + char *chrom = self->queryChrom->name; + uint end = self->queryChrom->size; + self->intervalList = bigBedIntervalQuery(self->bbi, chrom, 0, end, self->maxItems, self->intervalQueryLm); + } + } self->nextInterval = self->intervalList; } static char **nextRowUnfiltered(struct annoStreamBigBed *self) /* Convert the next available interval into a row of words, or return NULL. */ { -struct annoStreamer *streamer = &(self->streamer); +struct annoStreamer *sSelf = &(self->streamer); if (self->nextInterval == NULL) return NULL; -int fieldCount = bigBedIntervalToRow(self->nextInterval, streamer->chrom, +char *chrom = sSelf->chrom ? sSelf->chrom : self->queryChrom->name; +int fieldCount = bigBedIntervalToRow(self->nextInterval, chrom, self->startBuf, self->endBuf, - self->row, streamer->numCols+1); -if (fieldCount != streamer->numCols) + self->row, sSelf->numCols+1); +if (fieldCount != sSelf->numCols) errAbort("annoStreamBigBed %s: expected %d columns, got %d", - streamer->name, streamer->numCols, fieldCount); + sSelf->name, sSelf->numCols, fieldCount); self->nextInterval = self->nextInterval->next; return self->row; } static struct annoRow *asbbNextRow(struct annoStreamer *vSelf, struct lm *lm) /* Return a single annoRow, or NULL if there are no more items. */ { struct annoStreamBigBed *self = (struct annoStreamBigBed *)vSelf; if (self->intervalList == NULL) asbbDoQuery(self); char **row = nextRowUnfiltered(self); if (row == NULL) return NULL; // Skip past any left-join failures until we get a right-join failure, a passing row, or EOF. boolean rightFail = FALSE; while (annoFilterRowFails(vSelf->filters, row, vSelf->numCols, &rightFail)) { if (rightFail) break; row = nextRowUnfiltered(self); if (row == NULL) return NULL; } +char *chrom = row[0]; uint chromStart = sqlUnsigned(row[1]); uint chromEnd = sqlUnsigned(row[2]); -return annoRowFromStringArray(vSelf->chrom, chromStart, chromEnd, rightFail, row, vSelf->numCols, - lm); +return annoRowFromStringArray(chrom, chromStart, chromEnd, rightFail, row, vSelf->numCols, lm); } static void asbbClose(struct annoStreamer **pVSelf) /* Close bbi handle and free self. */ { if (pVSelf == NULL) return; struct annoStreamBigBed *self = *(struct annoStreamBigBed **)pVSelf; bigBedFileClose(&(self->bbi)); self->intervalList = NULL; lmCleanup(&(self->intervalQueryLm)); freeMem(self->row); freeMem(self->startBuf); freeMem(self->endBuf); annoStreamerFree(pVSelf); @@ -104,17 +130,18 @@ struct bbiFile *bbi = bigBedFileOpen(fileOrUrl); struct asObject *asObj = bigBedAsOrDefault(bbi); struct annoStreamBigBed *self = NULL; AllocVar(self); struct annoStreamer *streamer = &(self->streamer); annoStreamerInit(streamer, aa, asObj, fileOrUrl); streamer->rowType = arWords; streamer->setRegion = asbbSetRegion; streamer->nextRow = asbbNextRow; streamer->close = asbbClose; self->bbi = bbi; self->maxItems = maxItems; AllocArray(self->row, streamer->numCols + 1); self->startBuf = needMem(32); self->endBuf = needMem(32); +self->chromList = bbiChromList(bbi); return (struct annoStreamer *)self; }