12d0f12bd304787c52cab0780e367d36b020f84e
kent
  Tue Feb 26 12:11:18 2013 -0800
Adding name index to bigBed files.  The write side I _think_ is working.  Still developing read side.
diff --git src/utils/bigBedNamedItems/bigBedNamedItems.c src/utils/bigBedNamedItems/bigBedNamedItems.c
new file mode 100644
index 0000000..889c2c5
--- /dev/null
+++ src/utils/bigBedNamedItems/bigBedNamedItems.c
@@ -0,0 +1,120 @@
+/* bigBedNamedItems - Extract item(s) of given name(s) from bigBed. */
+#include "common.h"
+#include "linefile.h"
+#include "hash.h"
+#include "options.h"
+#include "localmem.h"
+#include "udc.h"
+#include "bPlusTree.h"
+#include "bigBed.h"
+#include "obscure.h"
+#include "zlibFace.h"
+
+void usage()
+/* Explain usage and exit. */
+{
+errAbort(
+  "bigBedNamedItems - Extract item(s) of given name(s) from bigBed\n"
+  "usage:\n"
+  "   bigBedNamedItems file.bb name output.bed\n"
+  "options:\n"
+  "   -xxx=XXX\n"
+  );
+}
+
+/* Command line validation table. */
+static struct optionSpec options[] = {
+   {NULL, 0},
+};
+
+void bigBedAttachNameIndex(struct bbiFile *bbi)
+/* Attach name index part of bbiFile to bbi */
+{
+if (bbi->nameBpt == NULL)
+    {
+    if (bbi->nameIndexOffset == 0)
+	errAbort("%s has no name index", bbi->fileName);
+    udcSeek(bbi->udc, bbi->nameIndexOffset);
+    bbi->nameBpt = bptFileAttach(bbi->fileName, bbi->udc);
+    }
+}
+
+struct bigBedInterval *bigBedNameQuery(struct bbiFile *bbi, char *name)
+/* Return (possibly empty) list of items of given name in bigBed */
+{
+bigBedAttachNameIndex(bbi);
+boolean isSwapped = bbi->isSwapped;
+struct offsetSize {bits64 offset; bits64 size;} block;
+if (bptFileFind(bbi->nameBpt, name, strlen(name), &block, sizeof(block)))
+    {
+    if (bbi->isSwapped)
+	{
+	block.offset = byteSwap64(block.offset);
+	block.size = byteSwap64(block.size);
+	}
+    uglyf("Whoohoo, found matching block at offset %lld\n", block.offset);
+
+
+    /* Read in raw data */
+    udcSeek(bbi->udc, block.offset);
+    char *rawData = needLargeMem(block.size);
+    udcRead(bbi->udc, rawData, block.size);
+
+    /* Optionally uncompress data, and set data pointer to uncompressed version. */
+    char *uncompressedData = NULL;
+    char *data = NULL;
+    if (bbi->uncompressBufSize > 0)
+	{
+	data = uncompressedData = needLargeMem(bbi->uncompressBufSize);
+	size_t uncSize = zUncompress(rawData, block.size, uncompressedData, bbi->uncompressBufSize);
+	assert(uncSize <= bbi->uncompressBufSize);
+	}
+    else
+        data = rawData;
+
+    /* Read chromosome index, start, end */
+    int chromIx = memReadBits32(&data, isSwapped);
+    int firstItemStart = memReadBits32(&data, isSwapped);   // Start of first item, not useful
+    int firstItemEnd = memReadBits32(&data, isSwapped);	    // End of first item, not useful
+
+    uglyf("Got chromIx %d, start %d, end %d\n", chromIx, firstItemStart, firstItemEnd);
+    char nameBuf[bbi->chromBpt->keySize+1];
+    bptStringKeyAtPos(bbi->chromBpt, chromIx, nameBuf, sizeof(nameBuf));
+    uglyf("Chrom is %s\n", nameBuf);
+
+    /* Clean up temporary buffers. */
+    freez(&rawData);
+    freez(&uncompressedData);
+    data = NULL;
+    }
+return NULL;
+}
+
+void bigBedNamedItems(char *bigBedFile, char *name, char *outFile)
+/* bigBedNamedItems - Extract item(s) of given name(s) from bigBed. */
+{
+struct bbiFile *bbi = bigBedFileOpen(bigBedFile);
+struct bigBedInterval *interval, *intervalList = bigBedNameQuery(bbi, name);
+FILE *f = mustOpen(outFile, "w");
+char *chromName = "uglyFoo";
+for (interval = intervalList; interval != NULL; interval = interval->next)
+    {
+    fprintf(f, "%s\t%u\t%u", chromName, interval->start, interval->end);
+    char *rest = interval->rest;
+    if (rest != NULL)
+	fprintf(f, "\t%s\n", rest);
+    else
+	fprintf(f, "\n");
+    }
+carefulClose(&f);
+}
+
+int main(int argc, char *argv[])
+/* Process command line. */
+{
+optionInit(&argc, argv, options);
+if (argc != 4)
+    usage();
+bigBedNamedItems(argv[1], argv[2], argv[3]);
+return 0;
+}