1fbd3bc156dcec78f3f89d715acdf9a264abdbeb
kent
  Thu Aug 11 12:27:30 2011 -0700
Adding sampleAroundCenter option.
diff --git src/utils/bigWigAverageOverBed/bigWigAverageOverBed.c src/utils/bigWigAverageOverBed/bigWigAverageOverBed.c
index 0a1e820..ad6d8ca 100644
--- src/utils/bigWigAverageOverBed/bigWigAverageOverBed.c
+++ src/utils/bigWigAverageOverBed/bigWigAverageOverBed.c
@@ -1,52 +1,56 @@
 /* bigWigAverageOverBed - Compute average score of big wig over each bed, which may have introns. */
 
 #include "common.h"
 #include "linefile.h"
 #include "hash.h"
 #include "localmem.h"
 #include "options.h"
 #include "verbose.h"
 #include "basicBed.h"
 #include "bigWig.h"
 #include "bits.h"
 
 static char const rcsid[] = "$Id: newProg.c,v 1.30 2010/03/24 21:18:33 hiram Exp $";
 
 char *bedOut = NULL;
+int sampleAroundCenter = 0;
 
 void usage()
 /* Explain usage and exit. */
 {
 errAbort(
   "bigWigAverageOverBed - Compute average score of big wig over each bed, which may have introns.\n"
   "usage:\n"
   "   bigWigAverageOverBed in.bw in.bed out.tab\n"
   "The output columns are:\n"
   "   name - name field from bed, which should be unique\n"
   "   size - size of bed (sum of exon sizes\n"
   "   covered - # bases within exons covered by bigWig\n"
   "   sum - sum of values over all bases covered\n"
   "   mean0 - average over bases with non-covered bases counting as zeroes\n"
   "   mean - average over just covered bases\n"
   "Options:\n"
   "   -bedOut=out.bed - Make output bed that is echo of input bed but with mean column appended\n"
+  "   -sampleAroundCenter=N - Take sample at region N bases wide centered around bed item, rather\n"
+  "                     than the usual sample in the bed item.\n"
   );
 }
 
 static struct optionSpec options[] = {
    {"bedOut", OPTION_STRING},
+   {"sampleAroundCenter", OPTION_INT},
    {NULL, 0},
 };
 
 void checkUniqueNames(struct bed *bedList)
 /* Make sure all names in bedList are unique */
 {
 struct hash *hash = hashNew(16);
 struct bed *bed;
 for (bed = bedList; bed != NULL; bed = bed->next)
     {
     char *name = bed->name;
     if (hashLookup(hash, name) != NULL)
         errAbort("%s duplicated in input bed", name);
     else
         hashAdd(hash, name, NULL);
@@ -93,43 +97,53 @@
     }
 }
 
 void averageFetchingEachBlock(struct bbiFile *bbi, struct bed *bedList, int fieldCount, 
 	FILE *f, FILE *bedF)
 /* Do the averaging fetching each block from bedList from bigWig.  Fastest for short bedList. */
 {
 struct lm *lm = lmInit(0);
 struct bed *bed;
 for (bed = bedList; bed != NULL; bed = bed->next)
     {
     int coverage = 0;
     double sum = 0.0;
     int size = 0;
 
+    if (sampleAroundCenter > 0)
+        {
+	int center = (bed->chromStart + bed->chromEnd)/2;
+	int left = center - (sampleAroundCenter/2);
+	addBigWigIntervalInfo(bbi, lm, bed->chrom, left, left+sampleAroundCenter, 
+		&size, &coverage, &sum);
+	}
+    else
+	{
     if (fieldCount < 12)
 	addBigWigIntervalInfo(bbi, lm, bed->chrom, bed->chromStart, bed->chromEnd, 
 		&size, &coverage, &sum);
     else
 	{
 	int i;
 	for (i=0; i<bed->blockCount; ++i)
 	    {
 	    int start = bed->chromStart + bed->chromStarts[i];
 	    int end = start + bed->blockSizes[i];
 	    addBigWigIntervalInfo(bbi, lm, bed->chrom, start, end, &size, &coverage, &sum);
 	    }
 	}
+	}
 
     /* Print out result, fudging mean to 0 if no coverage at all. */
     double mean = 0;
     if (coverage > 0)
 	 mean = sum/coverage;
     fprintf(f, "%s\t%d\t%d\t%g\t%g\t%g\n", bed->name, size, coverage, sum, sum/size, mean);
     optionallyPrintBedPlus(bedF, bed, fieldCount, mean);
     }
 }
 
 int bedCmpChrom(const void *va, const void *vb)
 /* Compare strings such as chromosome names that may have embedded numbers,
  * so that chr4 comes before chr14 */
 {
 const struct bed *a = *((struct bed **)va);
@@ -180,45 +194,55 @@
     /* Figure out which chromosome we're working on, and the last bed using it. */
     char *chrom = bedList->chrom;
     nextChrom = nextChromInList(bedList);
     verbose(2, "Processing %s\n", chrom);
 
     if (bigWigValsOnChromFetchData(chromVals, chrom, bbi))
 	{
 	double *valBuf = chromVals->valBuf;
 	Bits *covBuf = chromVals->covBuf;
 
 	/* Loop through beds doing sums and outputting. */
 	for (bed = bedList; bed != nextChrom; bed = bed->next)
 	    {
 	    int size = 0, coverage = 0;
 	    double sum = 0.0;
+	    if (sampleAroundCenter > 0)
+		{
+		int center = (bed->chromStart + bed->chromEnd)/2;
+		int left = center - (sampleAroundCenter/2);
+		addBufIntervalInfo(valBuf, covBuf, left, left+sampleAroundCenter,
+		    &size, &coverage, &sum);
+		}
+	    else
+		{
 	    if (fieldCount < 12)
 		{
 		addBufIntervalInfo(valBuf, covBuf, bed->chromStart, bed->chromEnd,
 		    &size, &coverage, &sum);
 		}
 	    else
 		{
 		int i;
 		for (i=0; i<bed->blockCount; ++i)
 		    {
 		    int start = bed->chromStart + bed->chromStarts[i];
 		    int end = start + bed->blockSizes[i];
 		    addBufIntervalInfo(valBuf, covBuf, start, end, &size, &coverage, &sum);
 		    }
 		}
+		}
 
 	    /* Print out result, fudging mean to 0 if no coverage at all. */
 	    double mean = 0;
 	    if (coverage > 0)
 		 mean = sum/coverage;
 	    fprintf(f, "%s\t%d\t%d\t%g\t%g\t%g\n", bed->name, size, coverage, sum, sum/size, mean);
 	    optionallyPrintBedPlus(bedF, bed, fieldCount, mean);
 	    }
 	verboseDot();
 	}
     else
         {
 	/* If no bigWig data on this chromosome, just output as if coverage is 0 */
 	for (bed = bedList; bed != nextChrom; bed = bed->next)
 	    {
@@ -261,18 +285,19 @@
     averageFetchingEachBlock(bbi, bedList, fieldCount, f, bedF);
 else
     averageFetchingEachChrom(bbi, &bedList, fieldCount, f, bedF);
 
 carefulClose(&bedF);
 carefulClose(&f);
 }
 
 int main(int argc, char *argv[])
 /* Process command line. */
 {
 optionInit(&argc, argv, options);
 if (argc != 4)
     usage();
 bedOut = optionVal("bedOut", bedOut);
+sampleAroundCenter = optionInt("sampleAroundCenter", sampleAroundCenter);
 bigWigAverageOverBed(argv[1], argv[2], argv[3]);
 return 0;
 }