236814bc49d19c34bea7b90c7e879c64d2507cf7 angie Thu Aug 27 14:06:14 2015 -0700 If querying in a region, restrict wiggle values to the part of the region that overlaps the primary item, don't just use the bounds of the db rows that overlap start/end of region. fixes #15952 diff --git src/lib/annoGrateWig.c src/lib/annoGrateWig.c index dd816b5..39d8682 100644 --- src/lib/annoGrateWig.c +++ src/lib/annoGrateWig.c @@ -5,43 +5,43 @@ #include "annoGrateWig.h" #include "annoStreamBigWig.h" struct annoGrateWig { struct annoGrator grator; // external annoGrator interface struct annoGrator *mySource; // internal annoGrator of wigDb rows struct lm *lm; // localmem for rows from mySource int lmRowCount; // counter for periodic cleanup enum annoGrateWigMode mode; // output: either one annoRow with average value in range // or one or more annoRows excluding bases with NANs }; static struct annoRow *normalizeRows(const struct annoRow *rowsIn, - uint primaryStart, uint primaryEnd, struct lm *callerLm) + uint regionStart, uint regionEnd, struct lm *callerLm) /* This takes a series of wiggle chunks coming from .wig/database rows and makes it into * zero or more tidy little NAN-less annoRows. Trim rowIn to the bounds of - * primary, trim NANs from beginning and break into multiple rows where there + * region, trim NANs from beginning and break into multiple rows where there * are NANs in the middle. If the rowIn is contiguous with the row at the * head of outList, expand that row to include rowIn's data. */ { struct annoRow *rowOutList = NULL; const struct annoRow *rowIn; for (rowIn = rowsIn; rowIn != NULL; rowIn = rowIn->next) { - uint start = max(rowIn->start, primaryStart); - uint end = min(rowIn->end, primaryEnd); + uint start = max(rowIn->start, regionStart); + uint end = min(rowIn->end, regionEnd); float *vector = rowIn->data; while (end > start) { uint offset = start - rowIn->start; if (isnan(vector[offset])) start++; else { // If there is a NAN before end, that's the end of this row: uint thisEnd = start; while (thisEnd < end && ! isnan(vector[thisEnd - rowIn->start])) thisEnd++; struct annoRow *headRow = rowOutList; if (headRow == NULL || rowIn->start > headRow->end) { @@ -64,47 +64,47 @@ headRow->data = lmAllocMoreMem(callerLm, headRow->data, oldLen*sizeof(vector[0]), newLen*sizeof(vector[0])); headRow->end = thisEnd; float *newData = (float *)rowIn->data + (oldEnd - rowIn->start); float *newSpace = (float *)headRow->data + oldLen; CopyArray(newData, newSpace, (thisEnd - oldEnd)); } start = thisEnd; } } } slReverse(&rowOutList); return rowOutList; } -static struct annoRow *averageRows(struct annoRow *rowList, uint primaryStart, uint primaryEnd, +static struct annoRow *averageRows(struct annoRow *rowList, uint regionStart, uint regionEnd, struct lm *callerLm) -/* Return an annoRow with the average value of floats across all row->data (within primary item's - * region), ignoring missing data and NANs. Return NULL if rowList is NULL or if all bases - * overlapping the primary item are NAN (unlikely, but could happen). */ +/* Return an annoRow with the average value of floats across all row->data (within region), + * ignoring missing data and NANs. Return NULL if rowList is NULL or if all bases overlapping + * the region are NAN (unlikely, but could happen). */ { if (rowList == NULL) return NULL; int count = 0; double sum = 0.0; -uint start = max(rowList->start, primaryStart); +uint start = max(rowList->start, regionStart); uint end = start; struct annoRow *row; for (row = rowList; row != NULL; row = row->next) { - uint rowStart = max(row->start, primaryStart); - uint rowEnd = min(row->end, primaryEnd); + uint rowStart = max(row->start, regionStart); + uint rowEnd = min(row->end, regionEnd); if (rowEnd > rowStart) { float *vector = row->data; int iStart = rowStart - row->start; int iEnd = rowEnd - row->start; int i; for (i = iStart; i < iEnd; i++) { // skip NAN values so they don't convert sum to a NAN: if (! isnan(vector[i])) { sum += vector[i]; count++; } } @@ -126,34 +126,46 @@ struct annoGrateWig *self = (struct annoGrateWig *)gSelf; // Cleanup internal lm every so often: if (self->lmRowCount >= 4096) { lmCleanup(&(self->lm)); self->lmRowCount = 0; } if (self->lm == NULL) self->lm = lmInit(0); struct annoRow *rowsIn = annoGratorIntegrate(self->mySource, primaryData, retRJFilterFailed, self->lm); self->lmRowCount += slCount(rowsIn); if (retRJFilterFailed && *retRJFilterFailed) return NULL; struct annoRow *primaryRow = primaryData->rowList; +struct annoStreamer *sSelf = (struct annoStreamer *)gSelf; +uint regionStart, regionEnd; +if (isNotEmpty(sSelf->chrom)) + { + regionStart = max(primaryRow->start, sSelf->regionStart); + regionEnd = min(primaryRow->end, sSelf->regionEnd); + } +else + { + regionStart = primaryRow->start; + regionEnd = primaryRow->end; + } if (self->mode == agwmAverage) - return averageRows(rowsIn, primaryRow->start, primaryRow->end, callerLm); + return averageRows(rowsIn, regionStart, regionEnd, callerLm); else if (self->mode == agwmPerBase) - return normalizeRows(rowsIn, primaryRow->start, primaryRow->end, callerLm); + return normalizeRows(rowsIn, regionStart, regionEnd, callerLm); else errAbort("agwIntegrate: invalid self->mode %d", self->mode); return NULL; } static void appendAverageToValueLabel(struct asObject *asObj) /* When we are averaging wiggle values, tweak the "value" column label to remind the user. */ { struct asColumn *col; for (col = asObj->columnList; col != NULL; col = col->next) { if (sameString(col->name, "value")) { freez(&col->name); col->name = cloneString("valueAverage");