36bf509e635a8946d0c566f307bf68a9b960a375 kent Sat Mar 19 14:20:45 2011 -0700 Big wig merge utility seems to work. diff --git src/utils/bigWigMerge/bigWigMerge.c src/utils/bigWigMerge/bigWigMerge.c new file mode 100644 index 0000000..77e4059 --- /dev/null +++ src/utils/bigWigMerge/bigWigMerge.c @@ -0,0 +1,226 @@ +/* bigWigMerge - Merge together multiple bigWigs into a single one.. */ +#include "common.h" +#include "linefile.h" +#include "hash.h" +#include "options.h" +#include "localmem.h" +#include "bbiFile.h" +#include "bigWig.h" + +static char const rcsid[] = "$Id: newProg.c,v 1.30 2010/03/24 21:18:33 hiram Exp $"; + +void usage() +/* Explain usage and exit. */ +{ +errAbort( + "bigWigMerge - Merge together multiple bigWigs into a single output bedGraph.\n" + "You'll have to run bedGraphToBigWig to make the output bigWig.\n" + "The signal values are just added together to merge them\n" + "usage:\n" + " bigWigMerge in1.bw in2.bw .. inN.bw out.bedGraph\n" + "options:\n" + " -threshold=0.N - don't output values at or below this threshold. Default is 0.0\n" + " -adjust=0.N - add adjustment to each value\n" + ); +} + +double clThreshold = 0.0; +double clAdjust = 0.0; + +static struct optionSpec options[] = { + {"threshold", OPTION_DOUBLE}, + {"adjust", OPTION_DOUBLE}, + {NULL, 0}, +}; + +struct slName *getAllChroms(struct bbiFile *fileList) +/* Read chromosomes from all files and make sure they agree, and return merged list. */ +{ +struct bbiFile *file; +struct hash *hash = hashNew(0); +struct slName *nameList = NULL; +for (file = fileList; file != NULL; file = file->next) + { + struct bbiChromInfo *info, *infoList = bbiChromList(file); + for (info = infoList; info != NULL; info = info->next) + { + struct bbiChromInfo *oldInfo = hashFindVal(hash, info->name); + if (oldInfo != NULL) + { + if (info->size != oldInfo->size) + errAbort("ERROR: Merging from different assemblies? " + "Chromosome %s is %d in %s but %d in another file", + info->name, (int)(info->size), file->fileName, (int)oldInfo->size); + } + else + { + hashAdd(hash, info->name, info); + slNameAddHead(&nameList, info->name); + } + } + } +slSort(&nameList, slNameCmpStringsWithEmbeddedNumbers); +return nameList; +} + +boolean allStartEndSame(struct bbiInterval *ivList) +/* Return true if all items in list start and end the same place. */ +{ +int start = ivList->start, end = ivList->end; +struct bbiInterval *iv; +for (iv = ivList->next; iv != NULL; iv = iv->next) + { + if (iv->start != start || iv->end != end) + return FALSE; + } +return TRUE; +} + +int doublesTheSame(double *pt, int size) +/* Return count of numbers at start that are the same as first number. */ +{ +int sameCount = 1; +int i; +double x = pt[0]; +for (i=1; i<size; ++i) + { + if (pt[i] != x) + break; + ++sameCount; + } +return sameCount; +} + +#ifdef OLD +void outputRangeAsBedGraph(FILE *f, char *chrom, struct range *range, + int *pBufAlloc, double **pBuf) +/* Output range (who's val is a list of bbiIntervals) to a bedGraph file. */ +{ +struct bbiInterval *iv, *ivList = range->val; +if (ivList->next == NULL) /* Special case of just one. */ + { + fprintf(f, "%s\t%d\t%d\t%g\n", chrom, ivList->start, ivList->end, ivList->val); + } +else if (allStartEndSame(ivList)) + { + double sum = 0; + for (iv = ivList; iv != NULL; iv = iv->next) + sum += ivList->val; + fprintf(f, "%s\t%d\t%d\t%g\n", chrom, ivList->start, ivList->end, sum); + } +else + { + /* Make sure that merge buffer is big enough */ + int start = range->start; + int size = range->end - start; + if (size > *pBufAlloc) + { + int newAlloc = *pBufAlloc * 2; + if (newAlloc < size) + newAlloc = size; + *pBuf = needHugeMemResize(*pBuf, newAlloc); + } + + /* Set bits of merge buffer we'll use to zero. */ + int i; + double *buf = *pBuf; + for (i=0; i<size; ++i) + buf[i] = 0.0; + + /* Loop through ivList folding into mergeBuf. */ + for (iv = ivList; iv != NULL; iv = iv->next) + { + double val = iv->val; + for (i=iv->start; i < iv->end; ++i) + buf[i-start] += val; + } + + /* Output each range of same values as a bedGraph item */ + int sameCount; + for (i=0; i<size; i += sameCount) + { + sameCount = doublesTheSame(buf+i, size-i); + fprintf(f, "%s\t%d\t%d\t%g\n", + chrom, start + i, start + i + sameCount, buf[i]); + } + } +} +#endif /* OLD */ + +void bigWigMerge(int inCount, char *inFiles[], char *outFile) +/* bigWigMerge - Merge together multiple bigWigs into a single one.. */ +{ +/* Make a list of open bigWig files. */ +struct bbiFile *inFile, *inFileList = NULL; +int i; +for (i=0; i<inCount; ++i) + { + inFile = bigWigFileOpen(inFiles[i]); + slAddTail(&inFileList, inFile); + } + +FILE *f = mustOpen(outFile, "w"); + +struct slName *chrom, *chromList = getAllChroms(inFileList); +verbose(1, "Got %d chromosomes from %d bigWigs\n", slCount(chromList), slCount(inFileList)); +double *mergeBuf = NULL; +int mergeBufSize = 0; +for (chrom = chromList; chrom != NULL; chrom = chrom->next) + { + struct lm *lm = lmInit(0); + + /* Make sure merge buffer is big enough. */ + int chromSize = bbiChromSize(inFileList, chrom->name); + verbose(2, "Processing %s (%d bases)\n", chrom->name, chromSize); + if (chromSize > mergeBufSize) + { + mergeBufSize = chromSize; + freeMem(mergeBuf); + mergeBuf = needHugeMem(mergeBufSize * sizeof(double)); + } + int i; + for (i=0; i<chromSize; ++i) + mergeBuf[i] = 0.0; + + /* Loop through each input file grabbing data and merging it in. */ + for (inFile = inFileList; inFile != NULL; inFile = inFile->next) + { + struct bbiInterval *ivList = bigWigIntervalQuery(inFile, chrom->name, 0, chromSize, lm); + verbose(3, "Got %d intervals in %s\n", slCount(ivList), inFile->fileName); + struct bbiInterval *iv; + for (iv = ivList; iv != NULL; iv = iv->next) + { + double val = iv->val; + int end = iv->end; + for (i=iv->start; i < end; ++i) + mergeBuf[i] += val; + } + } + + /* Output each range of same values as a bedGraph item */ + int sameCount; + for (i=0; i<chromSize; i += sameCount) + { + sameCount = doublesTheSame(mergeBuf+i, chromSize-i); + double val = mergeBuf[i] + clAdjust; + if (val > clThreshold) + fprintf(f, "%s\t%d\t%d\t%g\n", chrom->name, i, i + sameCount, val); + } + + lmCleanup(&lm); + } + +carefulClose(&f); +} + +int main(int argc, char *argv[]) +/* Process command line. */ +{ +optionInit(&argc, argv, options); +clThreshold = optionDouble("threshold", clThreshold); +clAdjust = optionDouble("adjust", clAdjust); +if (argc < 4) + usage(); +bigWigMerge(argc-2, argv+1, argv[argc-1]); +return 0; +}