5e9d8f2db0ad369f097de9e31a6fc6a6a5302db8 braney Fri Feb 7 12:54:23 2014 -0800 fix problem of disappearing narrow peak discussed in #12558 bynormalizing the statics to reflect that base counts are integers. diff --git src/lib/bbiRead.c src/lib/bbiRead.c index af27dd4..7b3f8f5 100644 --- src/lib/bbiRead.c +++ src/lib/bbiRead.c @@ -427,67 +427,80 @@ } } } assert(blockPt == blockEnd); blockBuf += block->size; } freeMem(mergedBuf); } freeMem(uncompressBuf); slFreeList(&blockList); cirTreeFileDetach(&ctf); slReverse(&sumList); return sumList; } +static int normalizeCount(struct bbiSummaryElement *el, double countFactor, + double minVal, double maxVal, double sumData, double sumSquares) +/* normalize statistics to be based on an integer number of valid bases + * Integer value is the smallest integer not less than countFactor */ +{ +bits32 validCount = ceil(countFactor); +double normFactor = (double)validCount/countFactor; + +el->validCount = validCount; +el->minVal = minVal; +el->maxVal = maxVal; +el->sumData = sumData * normFactor; +el->sumSquares = sumSquares * normFactor; + +return validCount; +} + static bits32 bbiSummarySlice(struct bbiFile *bbi, bits32 baseStart, bits32 baseEnd, struct bbiSummary *sumList, struct bbiSummaryElement *el) /* Update retVal with the average value if there is any data in interval. Return number * of valid data bases in interval. */ { bits32 validCount = 0; if (sumList != NULL) { double minVal = sumList->minVal; double maxVal = sumList->maxVal; double sumData = 0, sumSquares = 0; + double countFactor = 0.0; struct bbiSummary *sum; for (sum = sumList; sum != NULL && sum->start < baseEnd; sum = sum->next) { int overlap = rangeIntersection(baseStart, baseEnd, sum->start, sum->end); if (overlap > 0) { double overlapFactor = (double)overlap / (sum->end - sum->start); - validCount += sum->validCount * overlapFactor; + countFactor += sum->validCount * overlapFactor; sumData += sum->sumData * overlapFactor; sumSquares += sum->sumSquares * overlapFactor; if (maxVal < sum->maxVal) maxVal = sum->maxVal; if (minVal > sum->minVal) minVal = sum->minVal; } } - if (validCount > 0) - { - el->validCount = validCount; - el->minVal = minVal; - el->maxVal = maxVal; - el->sumData = sumData; - el->sumSquares = sumSquares; - } + + if (countFactor > 0) + validCount = normalizeCount(el, countFactor, minVal, maxVal, sumData, sumSquares); } return validCount; } static int bbiChromId(struct bbiFile *bbi, char *chrom) /* Return chromosome Id */ { struct bbiChromIdSize idSize; if (!bptFileFind(bbi->chromBpt, chrom, strlen(chrom), &idSize, sizeof(idSize))) return -1; chromIdSizeHandleSwapped(bbi->isSwapped, &idSize); return idSize.chromId; } static boolean bbiSummaryArrayFromZoom(struct bbiZoomLevel *zoom, struct bbiFile *bbi, @@ -520,77 +533,75 @@ result = TRUE; /* Next time round start where we left off. */ baseStart = baseEnd; } slFreeList(&sumList); } return result; } static bits32 bbiIntervalSlice(struct bbiFile *bbi, bits32 baseStart, bits32 baseEnd, struct bbiInterval *intervalList, struct bbiSummaryElement *el) /* Update retVal with the average value if there is any data in interval. Return number * of valid data bases in interval. */ { -double validCount = 0; +bits32 validCount = 0; if (intervalList != NULL) { + double countFactor = 0; struct bbiInterval *interval; double sumData = 0, sumSquares = 0; double minVal = intervalList->val; double maxVal = intervalList->val; for (interval = intervalList; interval != NULL && interval->start < baseEnd; interval = interval->next) { int overlap = rangeIntersection(baseStart, baseEnd, interval->start, interval->end); if (overlap > 0) { int intervalSize = interval->end - interval->start; double overlapFactor = (double)overlap / intervalSize; double intervalWeight = intervalSize * overlapFactor; - validCount += intervalWeight; + countFactor += intervalWeight; sumData += interval->val * intervalWeight; sumSquares += interval->val * interval->val * intervalWeight; if (maxVal < interval->val) maxVal = interval->val; if (minVal > interval->val) minVal = interval->val; } } - el->validCount = round(validCount); - el->minVal = minVal; - el->maxVal = maxVal; - el->sumData = sumData; - el->sumSquares = sumSquares; + + validCount = normalizeCount(el, countFactor, minVal, maxVal, sumData, sumSquares); } -return round(validCount); +return validCount; } static boolean bbiSummaryArrayFromFull(struct bbiFile *bbi, char *chrom, bits32 start, bits32 end, BbiFetchIntervals fetchIntervals, int summarySize, struct bbiSummaryElement *summary) /* Summarize data, not using zoom. */ { struct bbiInterval *intervalList = NULL, *interval; struct lm *lm = lmInit(0); intervalList = (*fetchIntervals)(bbi, chrom, start, end, lm); boolean result = FALSE; -if (intervalList != NULL); +if (intervalList != NULL) { int i; bits32 baseStart = start, baseEnd; bits32 baseCount = end - start; interval = intervalList; for (i=0; iend <= baseStart)