aa973757572023a6df9e84bddd65ff0a98c82375 kent Tue Feb 26 11:34:17 2013 -0800 Adding bptKeyAtPos and bptStringKeyAtPos methods so can go from a chromosome index back to a chromosome name quickly without loading all the chromosomes. diff --git src/lib/bPlusTree.c src/lib/bPlusTree.c index 4af4144..86398c3 100644 --- src/lib/bPlusTree.c +++ src/lib/bPlusTree.c @@ -150,31 +150,31 @@ /* Scan info for first file offset. */ bits64 fileOffset = udcReadBits64(bpt->udc, isSwapped); /* Loop through remainder. */ for (i=1; i<childCount; ++i) { udcMustRead(bpt->udc, keyBuf, bpt->keySize); if (memcmp(key, keyBuf, bpt->keySize) < 0) break; fileOffset = udcReadBits64(bpt->udc, isSwapped); } return rFind(bpt, fileOffset, key, val); } } -void rTraverse(struct bptFile *bpt, bits64 blockStart, void *context, +static void rTraverse(struct bptFile *bpt, bits64 blockStart, void *context, void (*callback)(void *context, void *key, int keySize, void *val, int valSize) ) /* Recursively go across tree, calling callback at leaves. */ { /* Seek to start of block. */ udcSeek(bpt->udc, blockStart); /* Read block header. */ UBYTE isLeaf; UBYTE reserved; bits16 i, childCount; udcMustReadOne(bpt->udc, isLeaf); udcMustReadOne(bpt->udc, reserved); boolean isSwapped = bpt->isSwapped; childCount = udcReadBits16(bpt->udc, isSwapped); @@ -191,30 +191,97 @@ else { bits64 fileOffsets[childCount]; /* Loop through to get file offsets of children. */ for (i=0; i<childCount; ++i) { udcMustRead(bpt->udc, keyBuf, bpt->keySize); fileOffsets[i] = udcReadBits64(bpt->udc, isSwapped); } /* Loop through recursing on child offsets. */ for (i=0; i<childCount; ++i) rTraverse(bpt, fileOffsets[i], context, callback); } } +static bits64 bptDataStart(struct bptFile *bpt) +/* Return offset of first bit of data (as opposed to index) in file. In hind sight I wish + * this were stored in the header, but fortunately it's not that hard to compute. */ +{ +bits64 offset = bpt->rootOffset; +for (;;) + { + /* Seek to block start */ + udcSeek(bpt->udc, offset); + + /* Read block header, break if we are leaf. */ + UBYTE isLeaf; + UBYTE reserved; + bits16 childCount; + udcMustReadOne(bpt->udc, isLeaf); + if (isLeaf) + break; + udcMustReadOne(bpt->udc, reserved); + boolean isSwapped = bpt->isSwapped; + childCount = udcReadBits16(bpt->udc, isSwapped); + + /* Read and discard first key. */ + char keyBuf[bpt->keySize]; + udcMustRead(bpt->udc, keyBuf, bpt->keySize); + + /* Get file offset of sub-block. */ + offset = udcReadBits64(bpt->udc, isSwapped); + } +return offset; +} + +static bits64 bptDataOffset(struct bptFile *bpt, bits64 itemPos) +/* Return position of file of data corresponding to given itemPos. For first piece of + * data pass in 0. */ +{ +if (itemPos >= bpt->itemCount) + errAbort("Item index %lld greater than item count %lld in %s", + itemPos, bpt->itemCount, bpt->fileName); +bits64 blockPos = itemPos/bpt->blockSize; +bits32 insidePos = itemPos - blockPos*bpt->blockSize; +int blockHeaderSize = 4; +bits64 itemByteSize = bpt->valSize + bpt->keySize; +bits64 blockByteSize = bpt->blockSize * itemByteSize + blockHeaderSize; +bits64 blockOffset = blockByteSize*blockPos + bptDataStart(bpt); +bits64 itemOffset = blockOffset + blockHeaderSize + itemByteSize * insidePos; +return itemOffset; +} + +void bptKeyAtPos(struct bptFile *bpt, bits64 itemPos, void *result) +/* Fill in result with the key at given itemPos. For first piece of data itemPos is 0 + * Result must be at least bpt->keySize. If result is a string it won't be zero terminated + * by this routine. Use bptStringKeyAtPos instead. */ +{ +bits64 offset = bptDataOffset(bpt, itemPos); +udcSeek(bpt->udc, offset); +udcMustRead(bpt->udc, result, bpt->keySize); +} + +void bptStringKeyAtPos(struct bptFile *bpt, bits64 itemPos, char *result, int maxResultSize) +/* Fill in result with the key at given itemPos. The maxResultSize should be 1+bpt->keySize + * to accommodate zero termination of string. */ +{ +assert(maxResultSize > bpt->keySize); +bptKeyAtPos(bpt, itemPos, result); +result[bpt->keySize] = 0; +} + boolean bptFileFind(struct bptFile *bpt, void *key, int keySize, void *val, int valSize) /* Find value associated with key. Return TRUE if it's found. * Parameters: * bpt - file handle returned by bptFileOpen * key - pointer to key string, which needs to be bpt->keySize long * val - pointer to where to put retrieved value */ { /* Check key size vs. file key size, and act appropriately. If need be copy key to a local * buffer and zero-extend it. */ if (keySize > bpt->keySize) return FALSE; char keyBuf[keySize]; if (keySize != bpt->keySize) {