a20c0a7ba762e49942095aff62bb6418c106d1bc
kent
  Tue Feb 26 22:51:45 2013 -0800
Adding support for multiple values with same key to bPlusTree.
diff --git src/inc/bPlusTree.h src/inc/bPlusTree.h
index a177aa5..d24f933 100644
--- src/inc/bPlusTree.h
+++ src/inc/bPlusTree.h
@@ -1,123 +1,127 @@
 /* bptFile - B+ Trees.  These are a method of indexing data similar to binary trees, but 
  * with many children rather than just two at each node. They work well when stored on disk,
  * since typically only two or three disk accesses are needed to locate any particular
  * piece of data.  This implementation is *just* for disk based storage.  For memory
  * use the rbTree instead. Currently the implementation is just useful for data warehouse
  * type applications.  That is it implements a function to create a b+ tree from bulk data
  * (bptFileCreate) and a function to lookup a value given a key (bptFileFind) but not functions
  * to add or delete individual items.
  *
  *
  * The layout of the file on disk is:
  *    header
  *    root node
  *    (other nodes)
  * In general when the tree is first built the higher level nodes are stored before the
  * lower level nodes.  It is possible if a b+ tree is dynamically updated for this to
  * no longer be strictly true, but actually currently the b+ tree code here doesn't implement
  * dynamic updates - it just creates a b+ tree from a sorted list.
  *
  * Each node can be one of two types - index or leaf.  The index nodes contain pointers
  * to child nodes.  The leaf nodes contain the actual data. 
  *
  * The layout of the file header is:
  *       <magic number>  4 bytes - The value bptSig (0x78CA8C91)
  *       <block size>    4 bytes - Number of children per block (not byte size of block)
  *       <key size>      4 bytes - Number of significant bytes in key
  *       <val size>      4 bytes - Number of bytes in value
  *       <item count>    8 bytes - Number of items in index
  *       <reserved2>     4 bytes - Always 0 for now
  *       <reserved3>     4 bytes - Always 0 for now
  * The magic number may be byte-swapped, in which case all numbers in the file
  * need to be byte-swapped. 
  *
  * The nodes start with a header:
  *       <is leaf>       1 byte  - 1 for leaf nodes, 0 for index nodes.
  *       <reserved>      1 byte  - Always 0 for now.
  *       <count>         2 bytes - The number of children/items in node
  * This is followed by count items.  For the index nodes the items are
  *       <key>           key size bytes - always written most significant byte first
  *       <offset>        8 bytes - Offset of child node in index file.
  * For leaf nodes the items are
  *       <key>           key size bytes - always written most significant byte first
  *       <val>           val sized bytes - the value associated with the key.
  * Note in general the leaf nodes may not be the same size as the index nodes, though in
  * the important case where the values are file offsets they will be.
  */
 
 #ifndef BPLUSTREE_H
 #define BPLUSTREE_H
 
 struct bptFile
 /* B+ tree index file handle. */
     {
     struct bptFile *next;	/* Next in list of index files if any. */
     char *fileName;		/* Name of file - for error reporting. */
     struct udcFile *udc;			/* Open file pointer. */
     bits32 blockSize;		/* Size of block. */
     bits32 keySize;		/* Size of keys. */
     bits32 valSize;		/* Size of values. */
     bits64 itemCount;		/* Number of items indexed. */
     boolean isSwapped;		/* If TRUE need to byte swap everything. */
     bits64 rootOffset;		/* Offset of root block. */
     };
 
 struct bptFile *bptFileOpen(char *fileName);
 /* Open up index file - reading header and verifying things. */
 
 void bptFileClose(struct bptFile **pBpt);
 /* Close down and deallocate index file. */
 
 struct bptFile *bptFileAttach(char *fileName, struct udcFile *udc);
 /* Open up index file on previously open file, with header at current file position. */
 
 void bptFileDetach(struct bptFile **pBpt);
 /* Detach and free up bptFile opened with bptFileAttach. */
 
 boolean bptFileFind(struct bptFile *bpt, void *key, int keySize, void *val, int valSize);
 /* Find value associated with key.  Return TRUE if it's found. 
 *  Parameters:
 *     bpt - file handle returned by bptFileOpen
 *     key - pointer to key string
 *     keySize - size of key.  Normally just strlen(key)
 *     val - pointer to where to put retrieved value
 *     valSize - size of memory buffer that will hold val.  Should match bpt->valSize.
 */
 
+struct slRef *bptFileFindMultiple(struct bptFile *bpt, void *key, int keySize, int valSize);
+/* Find all values associated with key.  Store this in ->val item of returned list. 
+ * Do a slRefFreeListAndVals() on list when done. */
+
 void bptFileTraverse(struct bptFile *bpt, void *context,
     void (*callback)(void *context, void *key, int keySize, void *val, int valSize) );
 /* Traverse bPlusTree on file, calling supplied callback function at each
  * leaf item. */
 
 void bptKeyAtPos(struct bptFile *bpt, bits64 itemPos, void *result);
 /* Fill in result with the key at given itemPos.  For first piece of data itemPos is 0 
  * and for last piece is bpt->itemCount - 1.  Result must be at least bpt->keySize.  
  * If result is a string it won't be zero terminated
  * by this routine.  Use bptStringKeyAtPos instead. */
 
 void bptStringKeyAtPos(struct bptFile *bpt, bits64 itemPos, char *result, int maxResultSize);
 /* Fill in result with the key at given itemPos.  The maxResultSize should be 1+bpt->keySize
  * to accommodate zero termination of string. */
 
 void bptFileCreate(
 	void *itemArray, 	/* Sorted array of things to index. */
 	int itemSize, 		/* Size of each element in array. */
 	bits64 itemCount, 	/* Number of elements in array. */
 	bits32 blockSize,	/* B+ tree block size - # of children for each node. */
 	void (*fetchKey)(const void *va, char *keyBuf),  /* Given item, copy key to keyBuf */ 
 	bits32 keySize,					 /* Size of key */
 	void* (*fetchVal)(const void *va), 		 /* Given item, return pointer to value */
 	bits32 valSize, 				 /* Size of value */
 	char *fileName);                                 /* Name of output file. */
 /* Create a b+ tree index file from a sorted array. */
 
 void bptFileBulkIndexToOpenFile(void *itemArray, int itemSize, bits64 itemCount, bits32 blockSize,
 	void (*fetchKey)(const void *va, char *keyBuf), bits32 keySize,
 	void* (*fetchVal)(const void *va), bits32 valSize, FILE *f);
 /* Create a b+ tree index from a sorted array, writing output starting at current position
  * of an already open file.  See bptFileCreate for explanation of parameters. */
 
 #define bptFileHeaderSize 32
 #define bptBlockHeaderSize 4
 
 #endif /* BPLUSTREE_H */