1156e1e9fd3c14d702772fdd0ff3978049f9e764 kent Tue Mar 29 14:33:06 2011 -0700 Having bigWigValsOnChromFetchData read bigWig directly rather than going through bigWigIntervalQuery in hopes of speeding up particularly on things that have a value for each base and are stored as fixedStep. It does speed it up by 2x when you are not too i/o bound. diff --git src/lib/bwgValsOnChrom.c src/lib/bwgValsOnChrom.c index 15892dd..1a96b7a 100644 --- src/lib/bwgValsOnChrom.c +++ src/lib/bwgValsOnChrom.c @@ -1,72 +1,214 @@ /* bwgValsOnChrom - implements the bigWigValsOnChrom access to bigWig. */ #include "common.h" #include "linefile.h" #include "hash.h" #include "localmem.h" +#include "bits.h" +#include "sig.h" +#include "udc.h" +#include "zlibFace.h" +#include "bbiFile.h" +#include "bwgInternal.h" #include "bigWig.h" struct bigWigValsOnChrom *bigWigValsOnChromNew() /* Allocate new empty bigWigValsOnChromStructure. */ { return needMem(sizeof(struct bigWigValsOnChrom)); } void bigWigValsOnChromFree(struct bigWigValsOnChrom **pChromVals) /* Free up bigWigValsOnChrom */ { struct bigWigValsOnChrom *chromVals = *pChromVals; if (chromVals != NULL) { freeMem(chromVals->chrom); freeMem(chromVals->valBuf); freeMem(chromVals->covBuf); freez(pChromVals); } } +static void fetchIntoBuf(struct bbiFile *bwf, char *chrom, bits32 start, bits32 end, + struct bigWigValsOnChrom *chromVals) +/* Get data for interval. Return list allocated out of lm. */ +{ +/* A lot of code duplicated with bigWigIntervalQuery, but here the clipping + * is simplified since always working across full chromosome, and the output is + * different. Since both of these are in inner loops and speed critical, it's hard + * to factor out without perhaps making it worse than the bit of duplication. */ +if (bwf->typeSig != bigWigSig) + errAbort("Trying to do fetchIntoBuf on a non big-wig file."); +bbiAttachUnzoomedCir(bwf); +struct fileOffsetSize *blockList = bbiOverlappingBlocks(bwf, bwf->unzoomedCir, + chrom, start, end, NULL); +struct fileOffsetSize *block, *beforeGap, *afterGap; +struct udcFile *udc = bwf->udc; +boolean isSwapped = bwf->isSwapped; +float val; +int i; +Bits *covBuf = chromVals->covBuf; +double *valBuf = chromVals->valBuf; + +/* Set up for uncompression optionally. */ +char *uncompressBuf = NULL; +if (bwf->uncompressBufSize > 0) + uncompressBuf = needLargeMem(bwf->uncompressBufSize); + +/* This loop is a little complicated because we merge the read requests for efficiency, but we + * have to then go back through the data one unmerged block at a time. */ +for (block = blockList; block != NULL; ) + { + /* Find contigious blocks and read them into mergedBuf. */ + fileOffsetSizeFindGap(block, &beforeGap, &afterGap); + bits64 mergedOffset = block->offset; + bits64 mergedSize = beforeGap->offset + beforeGap->size - mergedOffset; + udcSeek(udc, mergedOffset); + char *mergedBuf = needLargeMem(mergedSize); + udcMustRead(udc, mergedBuf, mergedSize); + char *blockBuf = mergedBuf; + + /* Loop through individual blocks within merged section. */ + for (;block != afterGap; block = block->next) + { + /* Uncompress if necessary. */ + char *blockPt, *blockEnd; + if (uncompressBuf) + { + blockPt = uncompressBuf; + int uncSize = zUncompress(blockBuf, block->size, uncompressBuf, bwf->uncompressBufSize); + blockEnd = blockPt + uncSize; + } + else + { + blockPt = blockBuf; + blockEnd = blockPt + block->size; + } + + /* Deal with insides of block. */ + struct bwgSectionHead head; + bwgSectionHeadFromMem(&blockPt, &head, isSwapped); + switch (head.type) + { + case bwgTypeBedGraph: + { + for (i=0; i