1fbd3bc156dcec78f3f89d715acdf9a264abdbeb kent Thu Aug 11 12:27:30 2011 -0700 Adding sampleAroundCenter option. diff --git src/utils/bigWigAverageOverBed/bigWigAverageOverBed.c src/utils/bigWigAverageOverBed/bigWigAverageOverBed.c index 0a1e820..ad6d8ca 100644 --- src/utils/bigWigAverageOverBed/bigWigAverageOverBed.c +++ src/utils/bigWigAverageOverBed/bigWigAverageOverBed.c @@ -1,52 +1,56 @@ /* bigWigAverageOverBed - Compute average score of big wig over each bed, which may have introns. */ #include "common.h" #include "linefile.h" #include "hash.h" #include "localmem.h" #include "options.h" #include "verbose.h" #include "basicBed.h" #include "bigWig.h" #include "bits.h" static char const rcsid[] = "$Id: newProg.c,v 1.30 2010/03/24 21:18:33 hiram Exp $"; char *bedOut = NULL; +int sampleAroundCenter = 0; void usage() /* Explain usage and exit. */ { errAbort( "bigWigAverageOverBed - Compute average score of big wig over each bed, which may have introns.\n" "usage:\n" " bigWigAverageOverBed in.bw in.bed out.tab\n" "The output columns are:\n" " name - name field from bed, which should be unique\n" " size - size of bed (sum of exon sizes\n" " covered - # bases within exons covered by bigWig\n" " sum - sum of values over all bases covered\n" " mean0 - average over bases with non-covered bases counting as zeroes\n" " mean - average over just covered bases\n" "Options:\n" " -bedOut=out.bed - Make output bed that is echo of input bed but with mean column appended\n" + " -sampleAroundCenter=N - Take sample at region N bases wide centered around bed item, rather\n" + " than the usual sample in the bed item.\n" ); } static struct optionSpec options[] = { {"bedOut", OPTION_STRING}, + {"sampleAroundCenter", OPTION_INT}, {NULL, 0}, }; void checkUniqueNames(struct bed *bedList) /* Make sure all names in bedList are unique */ { struct hash *hash = hashNew(16); struct bed *bed; for (bed = bedList; bed != NULL; bed = bed->next) { char *name = bed->name; if (hashLookup(hash, name) != NULL) errAbort("%s duplicated in input bed", name); else hashAdd(hash, name, NULL); @@ -93,43 +97,53 @@ } } void averageFetchingEachBlock(struct bbiFile *bbi, struct bed *bedList, int fieldCount, FILE *f, FILE *bedF) /* Do the averaging fetching each block from bedList from bigWig. Fastest for short bedList. */ { struct lm *lm = lmInit(0); struct bed *bed; for (bed = bedList; bed != NULL; bed = bed->next) { int coverage = 0; double sum = 0.0; int size = 0; + if (sampleAroundCenter > 0) + { + int center = (bed->chromStart + bed->chromEnd)/2; + int left = center - (sampleAroundCenter/2); + addBigWigIntervalInfo(bbi, lm, bed->chrom, left, left+sampleAroundCenter, + &size, &coverage, &sum); + } + else + { if (fieldCount < 12) addBigWigIntervalInfo(bbi, lm, bed->chrom, bed->chromStart, bed->chromEnd, &size, &coverage, &sum); else { int i; for (i=0; iblockCount; ++i) { int start = bed->chromStart + bed->chromStarts[i]; int end = start + bed->blockSizes[i]; addBigWigIntervalInfo(bbi, lm, bed->chrom, start, end, &size, &coverage, &sum); } } + } /* Print out result, fudging mean to 0 if no coverage at all. */ double mean = 0; if (coverage > 0) mean = sum/coverage; fprintf(f, "%s\t%d\t%d\t%g\t%g\t%g\n", bed->name, size, coverage, sum, sum/size, mean); optionallyPrintBedPlus(bedF, bed, fieldCount, mean); } } int bedCmpChrom(const void *va, const void *vb) /* Compare strings such as chromosome names that may have embedded numbers, * so that chr4 comes before chr14 */ { const struct bed *a = *((struct bed **)va); @@ -180,45 +194,55 @@ /* Figure out which chromosome we're working on, and the last bed using it. */ char *chrom = bedList->chrom; nextChrom = nextChromInList(bedList); verbose(2, "Processing %s\n", chrom); if (bigWigValsOnChromFetchData(chromVals, chrom, bbi)) { double *valBuf = chromVals->valBuf; Bits *covBuf = chromVals->covBuf; /* Loop through beds doing sums and outputting. */ for (bed = bedList; bed != nextChrom; bed = bed->next) { int size = 0, coverage = 0; double sum = 0.0; + if (sampleAroundCenter > 0) + { + int center = (bed->chromStart + bed->chromEnd)/2; + int left = center - (sampleAroundCenter/2); + addBufIntervalInfo(valBuf, covBuf, left, left+sampleAroundCenter, + &size, &coverage, &sum); + } + else + { if (fieldCount < 12) { addBufIntervalInfo(valBuf, covBuf, bed->chromStart, bed->chromEnd, &size, &coverage, &sum); } else { int i; for (i=0; iblockCount; ++i) { int start = bed->chromStart + bed->chromStarts[i]; int end = start + bed->blockSizes[i]; addBufIntervalInfo(valBuf, covBuf, start, end, &size, &coverage, &sum); } } + } /* Print out result, fudging mean to 0 if no coverage at all. */ double mean = 0; if (coverage > 0) mean = sum/coverage; fprintf(f, "%s\t%d\t%d\t%g\t%g\t%g\n", bed->name, size, coverage, sum, sum/size, mean); optionallyPrintBedPlus(bedF, bed, fieldCount, mean); } verboseDot(); } else { /* If no bigWig data on this chromosome, just output as if coverage is 0 */ for (bed = bedList; bed != nextChrom; bed = bed->next) { @@ -261,18 +285,19 @@ averageFetchingEachBlock(bbi, bedList, fieldCount, f, bedF); else averageFetchingEachChrom(bbi, &bedList, fieldCount, f, bedF); carefulClose(&bedF); carefulClose(&f); } int main(int argc, char *argv[]) /* Process command line. */ { optionInit(&argc, argv, options); if (argc != 4) usage(); bedOut = optionVal("bedOut", bedOut); +sampleAroundCenter = optionInt("sampleAroundCenter", sampleAroundCenter); bigWigAverageOverBed(argv[1], argv[2], argv[3]); return 0; }