97052b54a39bd15ffd0f610bdb16ccd0ea65ca53 angie Mon Jun 10 14:37:22 2013 -0700 Added a chunked-query mode to annoStreamDb, similar to that of annoStreamBigBed,so that we can quickly drain sqlResults from smaller queries instead of keeping a genome-wide sqlResult open for the whole time we're processing -- that was making mysql have mysterious hangs on hgwdev. (http://bugs.mysql.com/bug.php?id=50399) diff --git src/lib/annoStreamBigBed.c src/lib/annoStreamBigBed.c index 278d4e1..f18cb4a 100644 --- src/lib/annoStreamBigBed.c +++ src/lib/annoStreamBigBed.c @@ -26,65 +26,69 @@ }; // For performance reasons, even if !useMaxItems (no limit), we need to limit the // number of intervals that are slurped into memory for a bigBedIntervalQuery, so // we don't sit waiting too long when a chromosome has millions of intervals. #define ASBB_CHUNK_SIZE 100000 static void asbbSetRegion(struct annoStreamer *vSelf, char *chrom, uint regionStart, uint regionEnd) /* Set region -- and free localmem from previous query if necessary. */ { annoStreamerSetRegion(vSelf, chrom, regionStart, regionEnd); struct annoStreamBigBed *self = (struct annoStreamBigBed *)vSelf; self->nextInterval = self->intervalList = NULL; self->queryChrom = NULL; self->eof = FALSE; +self->doNextChunk = FALSE; lmCleanup(&(self->intervalQueryLm)); } static void updateNextChunkState(struct annoStreamBigBed *self, int queryMaxItems) /* If the just-fetched interval list was limited to ASBB_CHUNK_SIZE, set doNextChunk * and trim the last interval(s) so that when we query the next chunk, we don't get * repeat rows due to querying a start coord that was already returned. */ { if (queryMaxItems == ASBB_CHUNK_SIZE) { int itemCount = slCount(self->intervalList); if (itemCount == ASBB_CHUNK_SIZE) { self->doNextChunk = TRUE; struct bigBedInterval *lastIv = NULL, *iv; for (iv = self->intervalList; iv->next != NULL; iv = iv->next) { if (iv->start != iv->next->start) { lastIv = iv; self->nextChunkStart = iv->next->start; } } lastIv->next = NULL; } + else + self->doNextChunk = FALSE; } else self->doNextChunk = FALSE; } static void asbbDoQuery(struct annoStreamBigBed *self, char *minChrom, uint minEnd) /* Store results of an interval query. */ { struct annoStreamer *sSelf = &(self->streamer); -if (sSelf->chrom != NULL && self->intervalList != NULL) +if (sSelf->chrom != NULL && self->intervalList != NULL && !self->doNextChunk) + // We're doing a region query, we already got some rows, and don't need another chunk: self->eof = TRUE; if (self->useMaxItems) { int lastIntervalCount = slCount(self->intervalList); self->maxItems -= lastIntervalCount; if (self->maxItems <= 0) self->eof = TRUE; } self->nextInterval = self->intervalList = NULL; lmCleanup(&self->intervalQueryLm); if (self->eof) return; self->intervalQueryLm = lmInit(0); int queryMaxItems = ASBB_CHUNK_SIZE; if (self->useMaxItems && self->maxItems < queryMaxItems)