236814bc49d19c34bea7b90c7e879c64d2507cf7
angie
  Thu Aug 27 14:06:14 2015 -0700
If querying in a region, restrict wiggle values to the part of the region
that overlaps the primary item, don't just use the bounds of the db rows
that overlap start/end of region.
fixes #15952

diff --git src/lib/annoGrateWig.c src/lib/annoGrateWig.c
index dd816b5..39d8682 100644
--- src/lib/annoGrateWig.c
+++ src/lib/annoGrateWig.c
@@ -1,200 +1,212 @@
 /* annoGrateWig -- subclass of annoGrator for bigWig or wiggle data */
 
 /* Copyright (C) 2013 The Regents of the University of California 
  * See README in this or parent directory for licensing information. */
 
 #include "annoGrateWig.h"
 #include "annoStreamBigWig.h"
 
 struct annoGrateWig
 {
     struct annoGrator grator;		// external annoGrator interface
     struct annoGrator *mySource;	// internal annoGrator of wigDb rows
     struct lm *lm;			// localmem for rows from mySource
     int lmRowCount;			// counter for periodic cleanup
     enum annoGrateWigMode mode;         // output: either one annoRow with average value in range
                                         // or one or more annoRows excluding bases with NANs
 };
 
 static struct annoRow *normalizeRows(const struct annoRow *rowsIn,
-                                     uint primaryStart, uint primaryEnd, struct lm *callerLm)
+                                     uint regionStart, uint regionEnd, struct lm *callerLm)
 /* This takes a series of wiggle chunks coming from .wig/database rows and makes it into
  * zero or more tidy little NAN-less annoRows.  Trim rowIn to the bounds of
- * primary, trim NANs from beginning and break into multiple rows where there
+ * region, trim NANs from beginning and break into multiple rows where there
  * are NANs in the middle.  If the rowIn is contiguous with the row at the
  * head of outList, expand that row to include rowIn's data. */
 {
 struct annoRow *rowOutList = NULL;
 const struct annoRow *rowIn;
 for (rowIn = rowsIn;  rowIn != NULL;  rowIn = rowIn->next)
     {
-    uint start = max(rowIn->start, primaryStart);
-    uint end = min(rowIn->end, primaryEnd);
+    uint start = max(rowIn->start, regionStart);
+    uint end = min(rowIn->end, regionEnd);
     float *vector = rowIn->data;
     while (end > start)
         {
         uint offset = start - rowIn->start;
         if (isnan(vector[offset]))
             start++;
         else
             {
             // If there is a NAN before end, that's the end of this row:
             uint thisEnd = start;
             while (thisEnd < end && ! isnan(vector[thisEnd - rowIn->start]))
                 thisEnd++;
             struct annoRow *headRow = rowOutList;
             if (headRow == NULL || rowIn->start > headRow->end)
                 {
                 // allocate a new row
                 struct annoRow *rowOut = annoRowWigVecNew(rowIn->chrom, start, thisEnd, FALSE,
                                                           vector + offset, callerLm);
                 slAddHead(&rowOutList, rowOut);
                 }
             else
                 {
                 // glom new data onto headRow
                 if (thisEnd < headRow->end)
                     errAbort("Expected thisEnd (%u) to be greater than or equal to "
                              "headRow->end (%u)",
                              thisEnd, headRow->end);
                 uint oldEnd = headRow->end;
                 uint oldLen = oldEnd - headRow->start;
                 uint newLen = thisEnd - headRow->start;
                 //#*** TODO: precompute how many bytes we will need, so we don't have to realloc!!
                 headRow->data = lmAllocMoreMem(callerLm, headRow->data, oldLen*sizeof(vector[0]),
                                                newLen*sizeof(vector[0]));
                 headRow->end = thisEnd;
                 float *newData = (float *)rowIn->data + (oldEnd - rowIn->start);
                 float *newSpace = (float *)headRow->data + oldLen;
                 CopyArray(newData, newSpace, (thisEnd - oldEnd));
                 }
             start = thisEnd;
             }
         }
     }
 slReverse(&rowOutList);
 return rowOutList;
 }
 
-static struct annoRow *averageRows(struct annoRow *rowList, uint primaryStart, uint primaryEnd,
+static struct annoRow *averageRows(struct annoRow *rowList, uint regionStart, uint regionEnd,
                                    struct lm *callerLm)
-/* Return an annoRow with the average value of floats across all row->data (within primary item's
- * region), ignoring missing data and NANs.  Return NULL if rowList is NULL or if all bases
- * overlapping the primary item are NAN (unlikely, but could happen). */
+/* Return an annoRow with the average value of floats across all row->data (within region),
+ * ignoring missing data and NANs.  Return NULL if rowList is NULL or if all bases overlapping
+ * the region are NAN (unlikely, but could happen). */
 {
 if (rowList == NULL)
     return NULL;
 int count = 0;
 double sum = 0.0;
-uint start = max(rowList->start, primaryStart);
+uint start = max(rowList->start, regionStart);
 uint end = start;
 struct annoRow *row;
 for (row = rowList;  row != NULL;  row = row->next)
     {
-    uint rowStart = max(row->start, primaryStart);
-    uint rowEnd = min(row->end, primaryEnd);
+    uint rowStart = max(row->start, regionStart);
+    uint rowEnd = min(row->end, regionEnd);
     if (rowEnd > rowStart)
         {
         float *vector = row->data;
         int iStart = rowStart - row->start;
         int iEnd = rowEnd - row->start;
         int i;
         for (i = iStart;  i < iEnd;  i++)
             {
             // skip NAN values so they don't convert sum to a NAN:
             if (! isnan(vector[i]))
                 {
                 sum += vector[i];
                 count++;
                 }
             }
         end = rowEnd;
         }
     }
 if (count > 0)
     {
     double average = (sum / (double)count);
     return annoRowWigSingleNew(rowList->chrom, start, end, FALSE, average, callerLm);
     }
 return NULL;
 }
 
 static struct annoRow *agwIntegrate(struct annoGrator *gSelf, struct annoStreamRows *primaryData,
 				    boolean *retRJFilterFailed, struct lm *callerLm)
 /* Return wig annoRows that overlap primaryRow position, with NANs weeded out. */
 {
 struct annoGrateWig *self = (struct annoGrateWig *)gSelf;
 // Cleanup internal lm every so often:
 if (self->lmRowCount >= 4096)
     {
     lmCleanup(&(self->lm));
     self->lmRowCount = 0;
     }
 if (self->lm == NULL)
     self->lm = lmInit(0);
 struct annoRow *rowsIn = annoGratorIntegrate(self->mySource, primaryData, retRJFilterFailed,
 					     self->lm);
 self->lmRowCount += slCount(rowsIn);
 if (retRJFilterFailed && *retRJFilterFailed)
     return NULL;
 struct annoRow *primaryRow = primaryData->rowList;
+struct annoStreamer *sSelf = (struct annoStreamer *)gSelf;
+uint regionStart, regionEnd;
+if (isNotEmpty(sSelf->chrom))
+    {
+    regionStart = max(primaryRow->start, sSelf->regionStart);
+    regionEnd = min(primaryRow->end, sSelf->regionEnd);
+    }
+else
+    {
+    regionStart = primaryRow->start;
+    regionEnd = primaryRow->end;
+    }
 if (self->mode == agwmAverage)
-    return averageRows(rowsIn, primaryRow->start, primaryRow->end, callerLm);
+    return averageRows(rowsIn, regionStart, regionEnd, callerLm);
 else if (self->mode == agwmPerBase)
-    return normalizeRows(rowsIn, primaryRow->start, primaryRow->end, callerLm);
+    return normalizeRows(rowsIn, regionStart, regionEnd, callerLm);
 else
     errAbort("agwIntegrate: invalid self->mode %d", self->mode);
 return NULL;
 }
 
 static void appendAverageToValueLabel(struct asObject *asObj)
 /* When we are averaging wiggle values, tweak the "value" column label to remind the user. */
 {
 struct asColumn *col;
 for (col = asObj->columnList;  col != NULL;  col = col->next)
     {
     if (sameString(col->name, "value"))
         {
         freez(&col->name);
         col->name = cloneString("valueAverage");
         }
     }
 }
 
 struct annoGrator *annoGrateWigNew(struct annoStreamer *wigSource, enum annoGrateWigMode mode)
 /* Create an annoGrator subclass for source with wiggle or bigWig data.
  * If mode is agwmAverage, then this produces at most one annoRow per primary item
  * with type arWigSingle, containing the average of all overlapping non-NAN values.
  * If mode is agwmPerBase, then this produces a list of annoRows split by NANs, with type
  * arWigVec (vector of per-base values). */
 {
 if (wigSource->rowType != arWigVec)
     errAbort("annoGrateWigNew: expected source->rowType arWigVec (%d), got %d from source %s",
 	     arWigVec, wigSource->rowType, wigSource->name);
 struct annoGrateWig *self;
 AllocVar(self);
 struct annoGrator *gSelf = (struct annoGrator *)self;
 annoGratorInit(gSelf, wigSource);
 gSelf->integrate = agwIntegrate;
 self->mySource = annoGratorNew(wigSource);
 self->mode = mode;
 struct annoStreamer *sSelf = (struct annoStreamer *)gSelf;
 if (mode == agwmAverage)
     {
     sSelf->rowType = arWigSingle;
     appendAverageToValueLabel(sSelf->asObj);
     }
 else if (mode == agwmPerBase)
     sSelf->rowType = arWigVec;
 else
     errAbort("annoGrateWigNew: unrecognized enum annoGrateWigMode %d", mode);
 return gSelf;
 }
 
 struct annoGrator *annoGrateBigWigNew(char *fileOrUrl, struct annoAssembly *aa,
                                       enum annoGrateWigMode mode)
 /* Create an annoGrator subclass for bigWig file or URL.  See above description of mode. */
 {
 struct annoStreamer *bigWigSource = annoStreamBigWigNew(fileOrUrl, aa);
 return annoGrateWigNew(bigWigSource, mode);
 }