cebe26298f76146852d44f9010c57afe644d3060 kent Tue Feb 26 13:03:24 2013 -0800 First cut at read side of named index seems to work. Still needs testing and polishing. diff --git src/utils/bigBedNamedItems/bigBedNamedItems.c src/utils/bigBedNamedItems/bigBedNamedItems.c index 889c2c5..ff9afc2 100644 --- src/utils/bigBedNamedItems/bigBedNamedItems.c +++ src/utils/bigBedNamedItems/bigBedNamedItems.c @@ -1,120 +1,127 @@ /* bigBedNamedItems - Extract item(s) of given name(s) from bigBed. */ #include "common.h" #include "linefile.h" #include "hash.h" #include "options.h" #include "localmem.h" #include "udc.h" #include "bPlusTree.h" #include "bigBed.h" #include "obscure.h" #include "zlibFace.h" void usage() /* Explain usage and exit. */ { errAbort( - "bigBedNamedItems - Extract item(s) of given name(s) from bigBed\n" + "bigBedNamedItems - Extract item of given name from bigBed\n" "usage:\n" " bigBedNamedItems file.bb name output.bed\n" "options:\n" " -xxx=XXX\n" ); } /* Command line validation table. */ static struct optionSpec options[] = { {NULL, 0}, }; void bigBedAttachNameIndex(struct bbiFile *bbi) /* Attach name index part of bbiFile to bbi */ { if (bbi->nameBpt == NULL) { if (bbi->nameIndexOffset == 0) errAbort("%s has no name index", bbi->fileName); udcSeek(bbi->udc, bbi->nameIndexOffset); bbi->nameBpt = bptFileAttach(bbi->fileName, bbi->udc); } } -struct bigBedInterval *bigBedNameQuery(struct bbiFile *bbi, char *name) -/* Return (possibly empty) list of items of given name in bigBed */ +boolean bigBedNameQuery(struct bbiFile *bbi, char *name, FILE *f) +/* Write item matching name to file. Return TRUE if anything written. */ { bigBedAttachNameIndex(bbi); boolean isSwapped = bbi->isSwapped; struct offsetSize {bits64 offset; bits64 size;} block; +boolean didWrite = FALSE; if (bptFileFind(bbi->nameBpt, name, strlen(name), &block, sizeof(block))) { if (bbi->isSwapped) { block.offset = byteSwap64(block.offset); block.size = byteSwap64(block.size); } - uglyf("Whoohoo, found matching block at offset %lld\n", block.offset); - /* Read in raw data */ udcSeek(bbi->udc, block.offset); char *rawData = needLargeMem(block.size); udcRead(bbi->udc, rawData, block.size); /* Optionally uncompress data, and set data pointer to uncompressed version. */ char *uncompressedData = NULL; char *data = NULL; if (bbi->uncompressBufSize > 0) { data = uncompressedData = needLargeMem(bbi->uncompressBufSize); size_t uncSize = zUncompress(rawData, block.size, uncompressedData, bbi->uncompressBufSize); assert(uncSize <= bbi->uncompressBufSize); } else data = rawData; - /* Read chromosome index, start, end */ - int chromIx = memReadBits32(&data, isSwapped); - int firstItemStart = memReadBits32(&data, isSwapped); // Start of first item, not useful - int firstItemEnd = memReadBits32(&data, isSwapped); // End of first item, not useful + /* Set up for "memRead" routines to more or less treat memory block like file */ + char *blockPt = data, *blockEnd = data + block.size; + struct dyString *dy = dyStringNew(32); // Keep bits outside of chrom/start/end here - uglyf("Got chromIx %d, start %d, end %d\n", chromIx, firstItemStart, firstItemEnd); - char nameBuf[bbi->chromBpt->keySize+1]; - bptStringKeyAtPos(bbi->chromBpt, chromIx, nameBuf, sizeof(nameBuf)); - uglyf("Chrom is %s\n", nameBuf); + + /* Read next record into local variables. */ + while (blockPt < blockEnd) + { + bits32 chromIx = memReadBits32(&blockPt, isSwapped); + bits32 s = memReadBits32(&blockPt, isSwapped); + bits32 e = memReadBits32(&blockPt, isSwapped); + int c; + dyStringClear(dy); + while ((c = *blockPt++) >= 0) + { + if (c == 0) + break; + dyStringAppendC(dy, c); + } + if (startsWithWordByDelimiter(name, '\t', dy->string)) + { + char chromName[bbi->chromBpt->keySize+1]; + bptStringKeyAtPos(bbi->chromBpt, chromIx, chromName, sizeof(chromName)); + fprintf(f, "%s\t%u\t%u\t%s\n", chromName, s, e, dy->string); + didWrite = TRUE; + } + } /* Clean up temporary buffers. */ - freez(&rawData); + dyStringFree(&dy); freez(&uncompressedData); - data = NULL; + freez(&rawData); } -return NULL; +return didWrite; } void bigBedNamedItems(char *bigBedFile, char *name, char *outFile) /* bigBedNamedItems - Extract item(s) of given name(s) from bigBed. */ { struct bbiFile *bbi = bigBedFileOpen(bigBedFile); -struct bigBedInterval *interval, *intervalList = bigBedNameQuery(bbi, name); FILE *f = mustOpen(outFile, "w"); -char *chromName = "uglyFoo"; -for (interval = intervalList; interval != NULL; interval = interval->next) - { - fprintf(f, "%s\t%u\t%u", chromName, interval->start, interval->end); - char *rest = interval->rest; - if (rest != NULL) - fprintf(f, "\t%s\n", rest); - else - fprintf(f, "\n"); - } +bigBedNameQuery(bbi, name, f); carefulClose(&f); } int main(int argc, char *argv[]) /* Process command line. */ { optionInit(&argc, argv, options); if (argc != 4) usage(); bigBedNamedItems(argv[1], argv[2], argv[3]); return 0; }