aa973757572023a6df9e84bddd65ff0a98c82375
kent
  Tue Feb 26 11:34:17 2013 -0800
Adding bptKeyAtPos and bptStringKeyAtPos methods so can go from a chromosome index back to a chromosome name quickly without loading all the chromosomes.
diff --git src/lib/bPlusTree.c src/lib/bPlusTree.c
index 4af4144..86398c3 100644
--- src/lib/bPlusTree.c
+++ src/lib/bPlusTree.c
@@ -150,31 +150,31 @@
     /* Scan info for first file offset. */
     bits64 fileOffset = udcReadBits64(bpt->udc, isSwapped);
 
     /* Loop through remainder. */
     for (i=1; i<childCount; ++i)
 	{
 	udcMustRead(bpt->udc, keyBuf, bpt->keySize);
 	if (memcmp(key, keyBuf, bpt->keySize) < 0)
 	    break;
 	fileOffset = udcReadBits64(bpt->udc, isSwapped);
 	}
     return rFind(bpt, fileOffset, key, val);
     }
 }
 
-void rTraverse(struct bptFile *bpt, bits64 blockStart, void *context, 
+static void rTraverse(struct bptFile *bpt, bits64 blockStart, void *context, 
     void (*callback)(void *context, void *key, int keySize, void *val, int valSize) )
 /* Recursively go across tree, calling callback at leaves. */
 {
 /* Seek to start of block. */
 udcSeek(bpt->udc, blockStart);
 
 /* Read block header. */
 UBYTE isLeaf;
 UBYTE reserved;
 bits16 i, childCount;
 udcMustReadOne(bpt->udc, isLeaf);
 udcMustReadOne(bpt->udc, reserved);
 boolean isSwapped = bpt->isSwapped;
 childCount = udcReadBits16(bpt->udc, isSwapped);
 
@@ -191,30 +191,97 @@
 else
     {
     bits64 fileOffsets[childCount];
     /* Loop through to get file offsets of children. */
     for (i=0; i<childCount; ++i)
 	{
 	udcMustRead(bpt->udc, keyBuf, bpt->keySize);
 	fileOffsets[i] = udcReadBits64(bpt->udc, isSwapped);
 	}
     /* Loop through recursing on child offsets. */
     for (i=0; i<childCount; ++i)
 	rTraverse(bpt, fileOffsets[i], context, callback);
     }
 }
 
+static bits64 bptDataStart(struct bptFile *bpt)
+/* Return offset of first bit of data (as opposed to index) in file.  In hind sight I wish
+ * this were stored in the header, but fortunately it's not that hard to compute. */
+{
+bits64 offset = bpt->rootOffset;
+for (;;)
+    {
+    /* Seek to block start */
+    udcSeek(bpt->udc, offset);
+
+    /* Read block header,  break if we are leaf. */
+    UBYTE isLeaf;
+    UBYTE reserved;
+    bits16 childCount;
+    udcMustReadOne(bpt->udc, isLeaf);
+    if (isLeaf)
+         break;
+    udcMustReadOne(bpt->udc, reserved);
+    boolean isSwapped = bpt->isSwapped;
+    childCount = udcReadBits16(bpt->udc, isSwapped);
+
+    /* Read and discard first key. */
+    char keyBuf[bpt->keySize];
+    udcMustRead(bpt->udc, keyBuf, bpt->keySize);
+
+    /* Get file offset of sub-block. */
+    offset = udcReadBits64(bpt->udc, isSwapped);
+    }
+return offset;
+}
+
+static bits64 bptDataOffset(struct bptFile *bpt, bits64 itemPos)
+/* Return position of file of data corresponding to given itemPos.  For first piece of
+ * data pass in 0. */
+{
+if (itemPos >= bpt->itemCount)
+    errAbort("Item index %lld greater than item count %lld in %s", 
+	itemPos, bpt->itemCount, bpt->fileName);
+bits64 blockPos = itemPos/bpt->blockSize;
+bits32 insidePos = itemPos - blockPos*bpt->blockSize;
+int blockHeaderSize = 4;
+bits64 itemByteSize = bpt->valSize + bpt->keySize;
+bits64 blockByteSize = bpt->blockSize * itemByteSize + blockHeaderSize;
+bits64 blockOffset = blockByteSize*blockPos + bptDataStart(bpt);
+bits64 itemOffset = blockOffset + blockHeaderSize + itemByteSize * insidePos;
+return itemOffset;
+}
+
+void bptKeyAtPos(struct bptFile *bpt, bits64 itemPos, void *result)
+/* Fill in result with the key at given itemPos.  For first piece of data itemPos is 0 
+ * Result must be at least bpt->keySize.  If result is a string it won't be zero terminated
+ * by this routine.  Use bptStringKeyAtPos instead. */
+{
+bits64 offset = bptDataOffset(bpt, itemPos);
+udcSeek(bpt->udc, offset);
+udcMustRead(bpt->udc, result, bpt->keySize);
+}
+
+void bptStringKeyAtPos(struct bptFile *bpt, bits64 itemPos, char *result, int maxResultSize)
+/* Fill in result with the key at given itemPos.  The maxResultSize should be 1+bpt->keySize
+ * to accommodate zero termination of string. */
+{
+assert(maxResultSize > bpt->keySize);
+bptKeyAtPos(bpt, itemPos, result);
+result[bpt->keySize] = 0;
+}
+
 boolean bptFileFind(struct bptFile *bpt, void *key, int keySize, void *val, int valSize)
 /* Find value associated with key.  Return TRUE if it's found. 
 *  Parameters:
 *     bpt - file handle returned by bptFileOpen
 *     key - pointer to key string, which needs to be bpt->keySize long
 *     val - pointer to where to put retrieved value
 */
 {
 /* Check key size vs. file key size, and act appropriately.  If need be copy key to a local
  * buffer and zero-extend it. */
 if (keySize > bpt->keySize)
     return FALSE;
 char keyBuf[keySize];
 if (keySize != bpt->keySize)
     {