ba300e9f4268b4e271699dab04c02a345918b31e kent Wed Feb 27 09:57:52 2013 -0800 Refactoring in preparation for moving much of this to library. diff --git src/utils/bigBedNamedItems/bigBedNamedItems.c src/utils/bigBedNamedItems/bigBedNamedItems.c index a16e7a7..09c8c70 100644 --- src/utils/bigBedNamedItems/bigBedNamedItems.c +++ src/utils/bigBedNamedItems/bigBedNamedItems.c @@ -78,37 +78,37 @@ fos->size = byteSwap64(lastOffsetSize.size); } else { fos->offset = lastOffsetSize.offset; fos->size = lastOffsetSize.size; } slAddHead(&fosList, fos); } } slRefFreeListAndVals(&blockList); slReverse(&fosList); return fosList; } -boolean bigBedNameQuery(struct bbiFile *bbi, char *name, FILE *f) -/* Write item matching name to file. Return TRUE if anything written. */ +struct bigBedInterval *bigBedNameQuery(struct bbiFile *bbi, char *name, struct lm *lm) +/* Return list of intervals matching file. These intervals will be allocated out of lm. */ { bigBedAttachNameIndex(bbi); boolean isSwapped = bbi->isSwapped; struct fileOffsetSize *fos, *fosList = bigBedChunksMatchingName(bbi, name); -boolean didWrite = FALSE; +struct bigBedInterval *interval, *intervalList = NULL; for (fos = fosList; fos != NULL; fos = fos->next) { /* Read in raw data */ udcSeek(bbi->udc, fos->offset); char *rawData = needLargeMem(fos->size); udcRead(bbi->udc, rawData, fos->size); /* Optionally uncompress data, and set data pointer to uncompressed version. */ char *uncompressedData = NULL; char *data = NULL; int dataSize = 0; if (bbi->uncompressBufSize > 0) { data = uncompressedData = needLargeMem(bbi->uncompressBufSize); dataSize = zUncompress(rawData, fos->size, uncompressedData, bbi->uncompressBufSize); @@ -128,49 +128,71 @@ while (blockPt < blockEnd) { bits32 chromIx = memReadBits32(&blockPt, isSwapped); bits32 s = memReadBits32(&blockPt, isSwapped); bits32 e = memReadBits32(&blockPt, isSwapped); int c; dyStringClear(dy); while ((c = *blockPt++) >= 0) { if (c == 0) break; dyStringAppendC(dy, c); } if (startsWithWordByDelimiter(name, '\t', dy->string)) { - char chromName[bbi->chromBpt->keySize+1]; - bptStringKeyAtPos(bbi->chromBpt, chromIx, chromName, sizeof(chromName)); - fprintf(f, "%s\t%u\t%u\t%s\n", chromName, s, e, dy->string); - didWrite = TRUE; + lmAllocVar(lm, interval); + interval->start = s; + interval->end = e; + interval->rest = cloneString(dy->string); + interval->chromId = chromIx; + slAddHead(&intervalList, interval); } } /* Clean up temporary buffers. */ dyStringFree(&dy); freez(&uncompressedData); freez(&rawData); } slFreeList(&fosList); -return didWrite; +slReverse(&intervalList); +return intervalList; +} + +void bigBedIntervalListToBedFile(struct bbiFile *bbi, struct bigBedInterval *intervalList, FILE *f) +/* Write out big bed interval list to bed file, looking up chromosome mostly */ +{ +char chromName[bbi->chromBpt->keySize+1]; +int chromId = -1; +struct bigBedInterval *interval; +for (interval = intervalList; interval != NULL; interval = interval->next) + { + if (interval->chromId != chromId) + { + chromId = interval->chromId; + bptStringKeyAtPos(bbi->chromBpt, chromId, chromName, sizeof(chromName)); + } + fprintf(f, "%s\t%u\t%u\t%s\n", chromName, interval->start, interval->end, interval->rest); + } } void bigBedNamedItems(char *bigBedFile, char *name, char *outFile) /* bigBedNamedItems - Extract item(s) of given name(s) from bigBed. */ { struct bbiFile *bbi = bigBedFileOpen(bigBedFile); FILE *f = mustOpen(outFile, "w"); -bigBedNameQuery(bbi, name, f); +struct lm *lm = lmInit(0); +struct bigBedInterval *intervalList = bigBedNameQuery(bbi, name, lm); +bigBedIntervalListToBedFile(bbi, intervalList, f); carefulClose(&f); } int main(int argc, char *argv[]) /* Process command line. */ { optionInit(&argc, argv, options); if (argc != 4) usage(); bigBedNamedItems(argv[1], argv[2], argv[3]); return 0; }