a20c0a7ba762e49942095aff62bb6418c106d1bc kent Tue Feb 26 22:51:45 2013 -0800 Adding support for multiple values with same key to bPlusTree. diff --git src/lib/bPlusTree.c src/lib/bPlusTree.c index 86398c3..fcffba8 100644 --- src/lib/bPlusTree.c +++ src/lib/bPlusTree.c @@ -150,30 +150,89 @@ /* Scan info for first file offset. */ bits64 fileOffset = udcReadBits64(bpt->udc, isSwapped); /* Loop through remainder. */ for (i=1; iudc, keyBuf, bpt->keySize); if (memcmp(key, keyBuf, bpt->keySize) < 0) break; fileOffset = udcReadBits64(bpt->udc, isSwapped); } return rFind(bpt, fileOffset, key, val); } } +static void rFindMulti(struct bptFile *bpt, bits64 blockStart, void *key, struct slRef **pList) +/* Find values corresponding to key and add them to pList */ +{ +/* Seek to start of block. */ +udcSeek(bpt->udc, blockStart); + +/* Read block header. */ +UBYTE isLeaf; +UBYTE reserved; +bits16 i, childCount; +udcMustReadOne(bpt->udc, isLeaf); +udcMustReadOne(bpt->udc, reserved); +boolean isSwapped = bpt->isSwapped; +childCount = udcReadBits16(bpt->udc, isSwapped); + +int keySize = bpt->keySize; +UBYTE keyBuf[keySize]; /* Place to put a key, buffered on stack. */ +UBYTE valBuf[bpt->valSize]; /* Place to put a value, buffered on stack. */ + +if (isLeaf) + { + for (i=0; iudc, keyBuf, keySize); + udcMustRead(bpt->udc, valBuf, bpt->valSize); + if (memcmp(key, keyBuf, keySize) == 0) + { + void *val = cloneMem(valBuf, bpt->valSize); + refAdd(pList, val); + } + } + } +else + { + /* Read first key and first file offset. */ + udcMustRead(bpt->udc, keyBuf, keySize); + bits64 lastFileOffset = udcReadBits64(bpt->udc, isSwapped); + bits64 fileOffset = lastFileOffset; + int lastCmp = memcmp(key, keyBuf, keySize); + + /* Loop through remainder. */ + for (i=1; iudc, keyBuf, keySize); + fileOffset = udcReadBits64(bpt->udc, isSwapped); + int cmp = memcmp(key, keyBuf, keySize); + if (lastCmp >= 0 && cmp <= 0) + rFindMulti(bpt, lastFileOffset, key, pList); + if (cmp < 0) + return; + lastCmp = cmp; + lastFileOffset = fileOffset; + } + /* If made it all the way to end, do last one too. */ + rFindMulti(bpt, fileOffset, key, pList); + } +} + + static void rTraverse(struct bptFile *bpt, bits64 blockStart, void *context, void (*callback)(void *context, void *key, int keySize, void *val, int valSize) ) /* Recursively go across tree, calling callback at leaves. */ { /* Seek to start of block. */ udcSeek(bpt->udc, blockStart); /* Read block header. */ UBYTE isLeaf; UBYTE reserved; bits16 i, childCount; udcMustReadOne(bpt->udc, isLeaf); udcMustReadOne(bpt->udc, reserved); boolean isSwapped = bpt->isSwapped; childCount = udcReadBits16(bpt->udc, isSwapped); @@ -258,57 +317,80 @@ { bits64 offset = bptDataOffset(bpt, itemPos); udcSeek(bpt->udc, offset); udcMustRead(bpt->udc, result, bpt->keySize); } void bptStringKeyAtPos(struct bptFile *bpt, bits64 itemPos, char *result, int maxResultSize) /* Fill in result with the key at given itemPos. The maxResultSize should be 1+bpt->keySize * to accommodate zero termination of string. */ { assert(maxResultSize > bpt->keySize); bptKeyAtPos(bpt, itemPos, result); result[bpt->keySize] = 0; } -boolean bptFileFind(struct bptFile *bpt, void *key, int keySize, void *val, int valSize) -/* Find value associated with key. Return TRUE if it's found. -* Parameters: -* bpt - file handle returned by bptFileOpen -* key - pointer to key string, which needs to be bpt->keySize long -* val - pointer to where to put retrieved value -*/ +static boolean bptFileFindMaybeMulti(struct bptFile *bpt, void *key, int keySize, int valSize, + boolean multi, void *singleVal, struct slRef **multiVal) +/* Do either a single or multiple find depending in multi parameter. Only one of singleVal + * or multiVal should be non-NULL, depending on the same parameter. */ { /* Check key size vs. file key size, and act appropriately. If need be copy key to a local * buffer and zero-extend it. */ if (keySize > bpt->keySize) return FALSE; char keyBuf[keySize]; if (keySize != bpt->keySize) { memcpy(keyBuf, key, keySize); memset(keyBuf+keySize, 0, bpt->keySize - keySize); key = keyBuf; } /* Make sure the valSize matches what's in file. */ if (valSize != bpt->valSize) errAbort("Value size mismatch between bptFileFind (valSize=%d) and %s (valSize=%d)", valSize, bpt->fileName, bpt->valSize); -/* Call recursive finder. */ -return rFind(bpt, bpt->rootOffset, key, val); +if (multi) + { + rFindMulti(bpt, bpt->rootOffset, key, multiVal); + return *multiVal != NULL; + } +else + return rFind(bpt, bpt->rootOffset, key, singleVal); +} + +boolean bptFileFind(struct bptFile *bpt, void *key, int keySize, void *val, int valSize) +/* Find value associated with key. Return TRUE if it's found. +* Parameters: +* bpt - file handle returned by bptFileOpen +* key - pointer to key string, which needs to be bpt->keySize long +* val - pointer to where to put retrieved value +*/ +{ +return bptFileFindMaybeMulti(bpt, key, keySize, valSize, FALSE, val, NULL); +} + +struct slRef *bptFileFindMultiple(struct bptFile *bpt, void *key, int keySize, int valSize) +/* Find all values associated with key. Store this in ->val item of returned list. + * Do a slRefFreeListAndVals() on list when done. */ +{ +struct slRef *list = NULL; +bptFileFindMaybeMulti(bpt, key, keySize, valSize, TRUE, NULL, &list); +slReverse(&list); +return list; } void bptFileTraverse(struct bptFile *bpt, void *context, void (*callback)(void *context, void *key, int keySize, void *val, int valSize) ) /* Traverse bPlusTree on file, calling supplied callback function at each * leaf item. */ { return rTraverse(bpt, bpt->rootOffset, context, callback); } /* This section of code deals with making balanced b+ trees given a sorted array as input. * The difficult part is mostly just calculating the offsets of various things. As an example * if you had the sorted array: * 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27