40ed3e546ef868bffe4c338d93d6528138ecfc44 angie Wed Sep 30 09:25:12 2015 -0700 Add test cases (and bug fixes!) to make sure that we get the desired behavior when an insertion is adjacent to a non-insertion, and when an insertion falls at the start or end of the search region (if applicable): - Include insertions that fall at the start or end of the search region - If the primary row is an insertion, keep secondary non-insertion rows to the left and right. - If the primary row is a non-insertion, keep secondary insertion rows at its start and end. (i.e. keep insertions at boundaries -- but don't let any non-insertions slip through) The VCF logic is more complicated because VCF indels always include an extra base to the left, so they appear to start before they actually do, and can be interspersed with non-indels that start there. diff --git src/lib/bigBed.c src/lib/bigBed.c index 8548f2d..da33ede 100644 --- src/lib/bigBed.c +++ src/lib/bigBed.c @@ -28,33 +28,36 @@ boolean bigBedFileCheckSigs(char *fileName) /* check file signatures at beginning and end of file */ { return bbiFileCheckSigs(fileName, bigBedSig, "big bed"); } struct bigBedInterval *bigBedIntervalQuery(struct bbiFile *bbi, char *chrom, bits32 start, bits32 end, int maxItems, struct lm *lm) /* Get data for interval. Return list allocated out of lm. Set maxItems to maximum * number of items to return, or to 0 for all items. */ { struct bigBedInterval *el, *list = NULL; int itemCount = 0; bbiAttachUnzoomedCir(bbi); +// Find blocks with padded start and end to make sure we include zero-length insertions: +bits32 paddedStart = (start > 0) ? start-1 : start; +bits32 paddedEnd = end+1; bits32 chromId; struct fileOffsetSize *blockList = bbiOverlappingBlocks(bbi, bbi->unzoomedCir, - chrom, start, end, &chromId); + chrom, paddedStart, paddedEnd, &chromId); struct fileOffsetSize *block, *beforeGap, *afterGap; struct udcFile *udc = bbi->udc; boolean isSwapped = bbi->isSwapped; /* Set up for uncompression optionally. */ char *uncompressBuf = NULL; if (bbi->uncompressBufSize > 0) uncompressBuf = needLargeMem(bbi->uncompressBufSize); char *mergedBuf = NULL; for (block = blockList; block != NULL; ) { /* Find contigious blocks and read them into mergedBuf. */ fileOffsetSizeFindGap(block, &beforeGap, &afterGap); bits64 mergedOffset = block->offset; @@ -72,39 +75,42 @@ if (uncompressBuf) { blockPt = uncompressBuf; int uncSize = zUncompress(blockBuf, block->size, uncompressBuf, bbi->uncompressBufSize); blockEnd = blockPt + uncSize; } else { blockPt = blockBuf; blockEnd = blockPt + block->size; } while (blockPt < blockEnd) { /* Read next record into local variables. */ - bits32 chr = memReadBits32(&blockPt, isSwapped); // Read and discard chromId + bits32 chr = memReadBits32(&blockPt, isSwapped); bits32 s = memReadBits32(&blockPt, isSwapped); bits32 e = memReadBits32(&blockPt, isSwapped); /* calculate length of rest of bed fields */ int restLen = strlen(blockPt); /* If we're actually in range then copy it into a new element and add to list. */ - if (chr == chromId && s < end && e > start) + if (chr == chromId && + ((s < end && e > start) + // Make sure to include zero-length insertion elements at start or end: + || (s == e && (s == end || e == start)))) { ++itemCount; if (maxItems > 0 && itemCount > maxItems) break; lmAllocVar(lm, el); el->start = s; el->end = e; if (restLen > 0) el->rest = lmCloneStringZ(lm, blockPt, restLen); el->chromId = chromId; slAddHead(&list, el); } // move blockPt pointer to end of previous bed