d59b7f02e464c5b53c8be588a3b5cfd197148eab braney Mon Apr 22 13:34:00 2013 -0700 fix a couple of problems that Max found in bigBed handling. One is spaces were causing problem in custom track bigBed support (not cloning memory which was being chopped by spaces), and the second problem was that bigBedIntervalQuery was adding one character at a time to a dyString that ended up growing to megabyte, which was very slow and timed out in hgc diff --git src/lib/bigBed.c src/lib/bigBed.c index 87d910a..75130c9 100644 --- src/lib/bigBed.c +++ src/lib/bigBed.c @@ -31,31 +31,30 @@ struct bigBedInterval *bigBedIntervalQuery(struct bbiFile *bbi, char *chrom, bits32 start, bits32 end, int maxItems, struct lm *lm) /* Get data for interval. Return list allocated out of lm. Set maxItems to maximum * number of items to return, or to 0 for all items. */ { struct bigBedInterval *el, *list = NULL; int itemCount = 0; bbiAttachUnzoomedCir(bbi); bits32 chromId; struct fileOffsetSize *blockList = bbiOverlappingBlocks(bbi, bbi->unzoomedCir, chrom, start, end, &chromId); struct fileOffsetSize *block, *beforeGap, *afterGap; struct udcFile *udc = bbi->udc; boolean isSwapped = bbi->isSwapped; -struct dyString *dy = dyStringNew(32); /* Set up for uncompression optionally. */ char *uncompressBuf = NULL; if (bbi->uncompressBufSize > 0) uncompressBuf = needLargeMem(bbi->uncompressBufSize); for (block = blockList; block != NULL; ) { /* Find contigious blocks and read them into mergedBuf. */ fileOffsetSizeFindGap(block, &beforeGap, &afterGap); bits64 mergedOffset = block->offset; bits64 mergedSize = beforeGap->offset + beforeGap->size - mergedOffset; udcSeek(udc, mergedOffset); char *mergedBuf = needLargeMem(mergedSize); udcMustRead(udc, mergedBuf, mergedSize); @@ -72,66 +71,62 @@ int uncSize = zUncompress(blockBuf, block->size, uncompressBuf, bbi->uncompressBufSize); blockEnd = blockPt + uncSize; } else { blockPt = blockBuf; blockEnd = blockPt + block->size; } while (blockPt < blockEnd) { /* Read next record into local variables. */ bits32 chr = memReadBits32(&blockPt, isSwapped); // Read and discard chromId bits32 s = memReadBits32(&blockPt, isSwapped); bits32 e = memReadBits32(&blockPt, isSwapped); - int c; - dyStringClear(dy); - // TODO - can simplify this probably just to for (;;) {if ((c = *blockPt++) == 0) ... - while ((c = *blockPt++) >= 0) - { - if (c == 0) - break; - dyStringAppendC(dy, c); - } + + /* calculate length of rest of bed fields */ + int restLen = strlen(blockPt); /* If we're actually in range then copy it into a new element and add to list. */ if (chr == chromId && s < end && e > start) { ++itemCount; if (maxItems > 0 && itemCount > maxItems) break; lmAllocVar(lm, el); el->start = s; el->end = e; - if (dy->stringSize > 0) - el->rest = lmCloneString(lm, dy->string); + if (restLen > 0) + el->rest = lmCloneStringZ(lm, blockPt, restLen); el->chromId = chromId; slAddHead(&list, el); } + + // move blockPt pointer to end of previous bed + blockPt += restLen + 1; } if (maxItems > 0 && itemCount > maxItems) break; blockBuf += block->size; } if (maxItems > 0 && itemCount > maxItems) break; freez(&mergedBuf); } freeMem(uncompressBuf); -dyStringFree(&dy); slFreeList(&blockList); slReverse(&list); return list; } int bigBedIntervalToRow(struct bigBedInterval *interval, char *chrom, char *startBuf, char *endBuf, char **row, int rowSize) /* Convert bigBedInterval into an array of chars equivalent to what you'd get by * parsing the bed file. The startBuf and endBuf are used to hold the ascii representation of * start and end. Note that the interval->rest string will have zeroes inserted as a side effect. */ { int fieldCount = 3; sprintf(startBuf, "%u", interval->start); sprintf(endBuf, "%u", interval->end);