70668f5dd4d6bbcf079a1025649e17b9885335c7 angie Mon May 13 10:17:53 2013 -0700 Added 2 new args to annoStreamer:nextRow: minChrom and minEnd(which could also be called regionStart depending on point of view). Streamers may use those hints to skip over data that precedes minChrom and minEnd, to avoid the overhead of creating annoRows that annoGrators will then have to skip over. When primary data are sparse and grator data are very dense, this saves significant memory and user-cycles. Unfortunately mysql can still be the bottleneck for elapsed time. Room for improvement in annoStreamDb: when assembly has a reasonably small number of sequences (<1000), genome-wide queries could be internally broken down into per-seq queries; that would let us skip over chroms that precede minChrom. refs #6152 diff --git src/hg/lib/annoStreamWig.c src/hg/lib/annoStreamWig.c index e9242fb..3e944e7 100644 --- src/hg/lib/annoStreamWig.c +++ src/hg/lib/annoStreamWig.c @@ -74,39 +74,40 @@ { value = BIN_TO_VALUE(wigBuf[i], wiggle->lowerLimit, wiggle->dataRange); if (annoFilterWigValueFails(self->streamer.filters, value, retRightFail)) value = NAN; else validCount++; } int bpOffset = i * wiggle->span; for (j = 0; j < wiggle->span; j++) vector[bpOffset + j] = value; } if (retValidCount != NULL) *retValidCount = validCount; } -static struct annoRow *aswNextRow(struct annoStreamer *vSelf, struct lm *callerLm) +static struct annoRow *aswNextRow(struct annoStreamer *vSelf, char *minChrom, uint minEnd, + struct lm *callerLm) /* Return an annoRow encoding the next chunk of wiggle data, or NULL if there are no more items. */ { struct annoStreamWig *self = (struct annoStreamWig *)vSelf; struct annoRow *rowOut = NULL; boolean done = FALSE; while (!done) { - struct annoRow *wigRow = self->wigStr->nextRow(self->wigStr, callerLm); + struct annoRow *wigRow = self->wigStr->nextRow(self->wigStr, minChrom, minEnd, callerLm); if (wigRow == NULL) return NULL; struct wiggle wiggle; wiggleStaticLoad((char **)wigRow->data, &wiggle); checkWibFile(self, wiggle.file); // translate wigRow + bytes to float vector boolean rightFail = FALSE; int validCount = 0; int bpLen = wiggle.chromEnd - wiggle.chromStart; float vector[bpLen]; getFloatArray(self, &wiggle, &rightFail, &validCount, vector); if (rightFail || validCount > 0) { rowOut = annoRowWigNew(wigRow->chrom, wigRow->start, wigRow->end, rightFail, vector, callerLm);