ce2018972e3c694b7f17794271465c041df0d2d9 braney Fri Dec 20 17:13:50 2013 -0800 impose kentian order on the style. Remove -clip options which didnothing. diff --git src/utils/bigWigCat/bigWigCat.c src/utils/bigWigCat/bigWigCat.c index ea196d7..a8be6dd 100644 --- src/utils/bigWigCat/bigWigCat.c +++ src/utils/bigWigCat/bigWigCat.c @@ -7,40 +7,40 @@ #include "options.h" #include "bigWig.h" #include "bwgInternal.h" #include "zlibFace.h" #include "errabort.h" #include "sqlNum.h" #include "sig.h" #include "bPlusTree.h" #include "cirTree.h" #include "bbiFile.h" #include "udc.h" static int blockSize = 256; static int itemsPerSlot = 1024; -static boolean clipDontDie = FALSE; static boolean doCompress = FALSE; -static struct optionSpec options[] = { +static struct optionSpec options[] = +{ {"itemsPerSlot", OPTION_INT}, - {"clip", OPTION_BOOLEAN}, {NULL, 0}, }; -struct bwmSection { +struct bwmSection +{ bits32 chromId; /* Chromosome name. */ bits32 start,end; /* Range of chromosome covered. */ bits64 fileOffset; /* Offset of section in file. */ }; static struct cirTreeRange bwmSectionFetchKey(const void *va, void *context) /* Fetch bwmSection key for r-tree */ { struct cirTreeRange res; const struct bwmSection *a = (struct bwmSection *)va; res.chromIx = a->chromId; res.start = a->start; res.end = a->end; return res; } @@ -91,31 +91,30 @@ in->chromId = byteSwap32(in->chromId); in->start = byteSwap32(in->start); in->end = byteSwap32(in->end); in->validCount = byteSwap32(in->validCount); in->minVal = byteSwapFloat(in->minVal); in->maxVal = byteSwapFloat(in->maxVal); in->sumData = byteSwapFloat(in->sumData); in->sumSquares = byteSwapFloat(in->sumSquares); } } static void bbiWriteFileSummary(bits32 reductionLevel, struct bbiFile * input, FILE *f, struct bwmSection ** summary) /* Write out summary and index to summary uncompressed, returning start position of * summary index. */ { - struct udcFile *udc = input->udc; struct bbiZoomLevel *zoom; for (zoom = input->levelList; zoom; zoom = zoom->next) if (zoom->reductionLevel == reductionLevel) break; udcSeek(udc, zoom->indexOffset); struct cirTreeFile *ctf = cirTreeFileAttach(input->fileName, udc); struct fileOffsetSize *blockList = cirTreeEnumerateBlocks(ctf); struct fileOffsetSize *block, *beforeGap, *afterGap; char *uncompressBuf = NULL; if (input->uncompressBufSize > 0) uncompressBuf = needLargeMem(input->uncompressBufSize); /* This loop is a little complicated because we merge the read requests for efficiency, but we @@ -170,60 +169,63 @@ (*summary)->end = dSum->end; (*summary)->fileOffset = filePos; (*summary)++; blockPt += sizeof(*dSum); } assert(blockPt == blockEnd); blockBuf += block->size; } freeMem(mergedBuf); } freeMem(uncompressBuf); slFreeList(&blockList); cirTreeFileDetach(&ctf); } -static bits32 countSummaryElementsInFile(bits32 reductionLevel, struct bbiFile * inputFile) { + +static bits32 countSummaryElementsInFile(bits32 reductionLevel, struct bbiFile * inputFile) +{ struct udcFile *udc = inputFile->udc; struct bbiZoomLevel *zoom; for (zoom = inputFile->levelList; zoom; zoom = zoom->next) if (zoom->reductionLevel == reductionLevel) break; udcSeek(udc, zoom->indexOffset); struct cirTreeFile *ctf = cirTreeFileAttach(inputFile->fileName, udc); bits32 res = ctf->itemCount; cirTreeFileDetach(&ctf); return res; } -static bits32 countSummaryElements(bits32 reductionLevel, struct bbiFile ** inputFiles, int inputFilesCount) { +static bits32 countSummaryElements(bits32 reductionLevel, struct bbiFile ** inputFiles, int inputFilesCount) +{ int i; bits32 res = 0; for (i = 0; i < inputFilesCount; i++) res += countSummaryElementsInFile(reductionLevel, inputFiles[i]); return res; } -void bbiWriteSummary(bits32 reductionLevel, struct bbiFile ** inputFiles, int inputFilesCount, FILE *f, struct bwmSection ** summary) { +void bbiWriteSummary(bits32 reductionLevel, struct bbiFile ** inputFiles, int inputFilesCount, FILE *f, struct bwmSection ** summary) +{ int i; - for (i = 0; i < inputFilesCount; i++) { +for (i = 0; i < inputFilesCount; i++) bbiWriteFileSummary(reductionLevel, inputFiles[i], f, summary); } -} bits64 bwmWriteSummaryAndIndex(bits32 reductionLevel, struct bbiFile ** inputFiles, int inputFilesCount, int blockSize, int itemsPerSlot, FILE *f) /* Write out summary and index to summary, returning start position of * summary index. */ { bits32 count = countSummaryElements(reductionLevel, inputFiles, inputFilesCount); struct bwmSection * summaryPtr, * summaryArray; AllocArray(summaryArray, count); summaryPtr = summaryArray; bbiWriteSummary(reductionLevel, inputFiles, inputFilesCount, f, &summaryPtr); bits64 indexOffset = ftell(f); @@ -328,41 +330,43 @@ // Get maximum uncompressed size bits32 uncSizeOne = bwf->uncompressBufSize; if (uncSizeOne > uncompressBufSize) uncompressBufSize = uncSizeOne; char *uncompressBuf = NULL; if (bwf->uncompressBufSize > 0) uncompressBuf = needLargeMem(bwf->uncompressBufSize); // Copy paste all the blocks struct bbiChromInfo *chrom, *chromList = bbiChromList(bwf); for (chrom = chromList; chrom != NULL; chrom = chrom->next) { struct fileOffsetSize *block, *blockList = bbiOverlappingBlocks(bwf, bwf->unzoomedCir, chrom->name, 0, chrom->size, NULL); - for (block = blockList; block != NULL; ) { + for (block = blockList; block != NULL; ) + { struct fileOffsetSize *beforeGap, *afterGap; fileOffsetSizeFindGap(block, &beforeGap, &afterGap); bits64 mergedOffset = block->offset; bits64 mergedSize = beforeGap->offset + beforeGap->size - mergedOffset; udcSeek(udc, mergedOffset); char *mergedBuf = needLargeMem(mergedSize); udcMustRead(udc, mergedBuf, mergedSize); char *blockBuf = mergedBuf; - for (;block != afterGap; block = block->next) { + for (;block != afterGap; block = block->next) + { struct bwgSectionHead head; char * blockPt = blockBuf; if (uncompressBuf) { blockPt = uncompressBuf; zUncompress(blockBuf, block->size, uncompressBuf, bwf->uncompressBufSize); } else { blockPt = blockBuf; } bwgSectionHeadFromMem(&blockPt, &head, bwf->isSwapped); section->chromId = chrom->id; section->start = head.start; section->end = head.end; @@ -379,43 +383,45 @@ } freeMem(uncompressBuf); slFreeList(chromList); } /* Write out index */ indexOffset = ftell(f); cirTreeFileBulkIndexToOpenFile(sectionArray, sizeof(sectionArray[0]), sectionCount, blockSize, 1, NULL, bwmSectionFetchKey, bwmSectionFetchOffset, indexOffset, f); freez(§ionArray); /* Write out summary sections. */ verbose(2, "bwgCreate writing %d summaries\n", summaryCount); i = 0; -for (zoom = inBbiFiles[0]->levelList; zoom; zoom = zoom->next) { +for (zoom = inBbiFiles[0]->levelList; zoom; zoom = zoom->next) + { reductionDataOffsets[i] = ftell(f); reductionIndexOffsets[i++] = bwmWriteSummaryAndIndex(zoom->reductionLevel, inBbiFiles, inBbiFilesCount, blockSize, itemsPerSlot, f); } /* Calculate summary */ struct bbiSummaryElement sum = bbiTotalSummary(inBbiFiles[0]); totalSum.validCount = sum.validCount; totalSum.minVal = sum.minVal; totalSum.maxVal = sum.maxVal; totalSum.sumData = sum.sumData; totalSum.sumSquares = sum.sumSquares; -for (index = 1; index < inBbiFilesCount; index++) { +for (index = 1; index < inBbiFilesCount; index++) + { sum = bbiTotalSummary(inBbiFiles[index]); totalSum.validCount += sum.validCount; if (totalSum.minVal > sum.minVal) totalSum.minVal = sum.minVal; if (totalSum.maxVal > sum.maxVal) totalSum.maxVal = sum.maxVal; totalSum.sumData += sum.sumData; totalSum.sumSquares += sum.sumSquares; } /* Write real summary */ fseek(f, totalSummaryOffset, SEEK_SET); bbiSummaryElementWrite(f, &totalSum); /* Go back and fill in offsets properly in header. */ fseek(f, dataOffsetPos, SEEK_SET); @@ -441,190 +447,203 @@ { fseek(f, reductionDataOffsetPos[i], SEEK_SET); writeOne(f, reductionDataOffsets[i]); writeOne(f, reductionIndexOffsets[i]); } /* Write end signature. */ fseek(f, 0L, SEEK_END); writeOne(f, sig); /* Clean up */ freez(&chromInfoArray); carefulClose(&f); } -void checkCompression(char ** inNames, struct bbiFile ** inFiles, int inNamesCount) { +void checkCompression(char ** inNames, struct bbiFile ** inFiles, int inNamesCount) +{ int index; doCompress = inFiles[0]->uncompressBufSize > 0; for (index = 1; index < inNamesCount; index++) if ((inFiles[index]->uncompressBufSize > 0) != doCompress) errAbort("Some of the files are compressed, some are not.\n"); } -void checkBlockSize(char ** inNames, struct bbiFile ** inFiles, int inNamesCount) { +void checkBlockSize(char ** inNames, struct bbiFile ** inFiles, int inNamesCount) +{ int index; blockSize = inFiles[0]->unzoomedCir->blockSize; for (index = 1; index < inNamesCount; index++) if (inFiles[index]->unzoomedCir->blockSize != blockSize) errAbort("Not all files have the same block size.\n"); } -void checkChromosomes(char ** inNames, struct bbiFile ** inFiles, int inNamesCount) { +void checkChromosomes(char ** inNames, struct bbiFile ** inFiles, int inNamesCount) +{ int index; struct bbiChromInfo *chromList0 = bbiChromList(inFiles[0]); - for (index = 1; index < inNamesCount; index++) { +for (index = 1; index < inNamesCount; index++) + { struct bbiChromInfo *chrom0, *chrom, *chromList = bbiChromList(inFiles[index]); for (chrom = chromList, chrom0 = chromList0; chrom && chrom0; chrom = chrom->next, chrom0 = chrom0->next) if (chrom0->size != chrom->size || strcmp(chrom0->name, chrom->name)) errAbort("The bigwig files do not have the same chromosome details.\n"); if (chrom || chrom0) errAbort("The bigwigs files do not have the same number of chromosomes.\n"); slFreeList(chromList); } slFreeList(chromList0); } -void checkReductions(char ** inNames, struct bbiFile ** inFiles, int inNamesCount) { +void checkReductions(char ** inNames, struct bbiFile ** inFiles, int inNamesCount) +{ int index; - for (index = 1; index < inNamesCount; index++) { +for (index = 1; index < inNamesCount; index++) + { struct bbiZoomLevel *zoom, *zoom0; - for (zoom = inFiles[index]->levelList, zoom0 = inFiles[0]->levelList; zoom && zoom0; zoom = zoom->next, zoom0 = zoom0->next) { + for (zoom = inFiles[index]->levelList, zoom0 = inFiles[0]->levelList; zoom && zoom0; zoom = zoom->next, zoom0 = zoom0->next) + { if (zoom->reductionLevel != zoom0->reductionLevel) errAbort("The bigwig files do not have the same reduction levels\n"); } if (zoom || zoom0) errAbort("The bigwig files do not all have the same number of reduction levels.\n"); } } -struct fileNamePair { +struct fileNamePair +{ char * name; struct bbiFile * file; }; -int comparePairs(const void * a, const void * b) { +int comparePairs(const void * a, const void * b) +{ struct fileNamePair * A , * B; A = (struct fileNamePair *) a; B = (struct fileNamePair *) b; if (A->file->unzoomedCir->startChromIx > B->file->unzoomedCir->startChromIx) return 1; else if (B->file->unzoomedCir->startChromIx > A->file->unzoomedCir->startChromIx) return -1; else return (int) (A->file->unzoomedCir->startBase - B->file->unzoomedCir->startBase); } -void sortFilesByCoords(char ** inNames, struct bbiFile ** inFiles, int inNamesCount) { +void sortFilesByCoords(char ** inNames, struct bbiFile ** inFiles, int inNamesCount) +{ struct fileNamePair * pairs; AllocArray(pairs, inNamesCount); int index; - for (index = 0; index < inNamesCount; index++) { +for (index = 0; index < inNamesCount; index++) + { pairs[index].name = inNames[index]; pairs[index].file = inFiles[index]; } qsort(pairs, inNamesCount, sizeof(struct fileNamePair), comparePairs); - for (index = 0; index < inNamesCount; index++) { +for (index = 0; index < inNamesCount; index++) + { inNames[index] = pairs[index].name; inFiles[index] = pairs[index].file; } // Clean up freez(&pairs); } -char filesOverlap(struct bbiFile * A, struct bbiFile * B) { +char filesOverlap(struct bbiFile * A, struct bbiFile * B) +{ if (A->unzoomedCir->endChromIx < B->unzoomedCir->startChromIx) return 0; else if (A->unzoomedCir->endChromIx > B->unzoomedCir->startChromIx) return 1; else return A->unzoomedCir->endBase >= B->unzoomedCir->startBase; } -void checkOverlaps(char ** inNames, struct bbiFile ** inFiles, int inNamesCount) { +void checkOverlaps(char ** inNames, struct bbiFile ** inFiles, int inNamesCount) +{ int index; for (index = 1; index < inNamesCount; index++) if (filesOverlap(inFiles[index-1], inFiles[index])) errAbort("Files %s and %s overlap, cannot continue!\n", inNames[index-1], inNames[index]); } -void checkFileSettings(char ** inNames, struct bbiFile ** inFiles, int inNamesCount) { +void checkFileSettings(char ** inNames, struct bbiFile ** inFiles, int inNamesCount) +{ checkCompression(inNames, inFiles, inNamesCount); checkBlockSize(inNames, inFiles, inNamesCount); checkChromosomes(inNames, inFiles, inNamesCount); sortFilesByCoords(inNames, inFiles, inNamesCount); checkOverlaps(inNames, inFiles, inNamesCount); checkReductions(inNames, inFiles, inNamesCount); } void bigWigCat( char **inNames, /* Input files in big wig format. */ int inNamesCount, /* Number of input files */ char *outName) /* Merge multiple non-overlapping bigwig files * into a single big wig file. */ { /* This code needs to agree with code in two other places currently - bigBedFileCreate, * and bbiFileOpen. I'm thinking of refactoring to share at least between * bigBedFileCreate and bigWigFileCreate. It'd be great so it could be structured * so that it could send the input in one chromosome at a time, and send in the zoom * stuff only after all the chromosomes are done. This'd potentially reduce the memory * footprint by a factor of 2 or 4. Still, for now it works. -JK */ struct lm *lm = lmInit(0); struct bbiFile ** inBbiFiles; AllocArray(inBbiFiles, inNamesCount); int i; -for (i = 0; i < inNamesCount; i++) { +for (i = 0; i < inNamesCount; i++) + { inBbiFiles[i] = bigWigFileOpen(inNames[i]); bbiAttachUnzoomedCir(inBbiFiles[i]); } checkFileSettings(inNames, inBbiFiles, inNamesCount); MergedBwgCreate(inBbiFiles, inNamesCount, blockSize, itemsPerSlot, doCompress, outName); lmCleanup(&lm); } void usage() /* Explain usage and exit. */ { errAbort( "bigWigCat v %d - merge non-overlapping bigWig files\n" "directly into bigWig format\n" "usage:\n" " mergeBigWigs out.bw in1.bw in2.bw ...\n" "Where in*.bw is in big wig format\n" "and out.bw is the output indexed big wig file.\n" "options:\n" " -itemsPerSlot=N - Number of data points bundled at lowest level. Default %d\n" - " -clip - If set just issue warning messages rather than dying if wig\n" - " file contains items off end of chromosome.\n" , bbiCurrentVersion, itemsPerSlot ); } int main(int argc, char *argv[]) { optionInit(&argc, argv, options); /* Process command line. */ optionInit(&argc, argv, options); itemsPerSlot = optionInt("itemsPerSlot", itemsPerSlot); -clipDontDie = optionExists("clip"); if (argc < 4) usage(); char * outName = argv[1]; char ** inNames = argv + 2; int inNamesCount = argc - 2; bigWigCat(inNames, inNamesCount, outName); if (verboseLevel() > 1) printVmPeak(); return 0; }