src/hg/regulate/regBedStats/regBedStats.c 1.2
1.2 2010/03/10 19:45:19 kent
Moving doubleBoxWhiskerCalc to library.
Index: src/hg/regulate/regBedStats/regBedStats.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/regulate/regBedStats/regBedStats.c,v
retrieving revision 1.1
retrieving revision 1.2
diff -b -B -U 1000000 -r1.1 -r1.2
--- src/hg/regulate/regBedStats/regBedStats.c	10 Mar 2010 19:41:55 -0000	1.1
+++ src/hg/regulate/regBedStats/regBedStats.c	10 Mar 2010 19:45:19 -0000	1.2
@@ -1,144 +1,109 @@
 /* regBedStats - Go through bed files and calculate a bunch of statistics on them. */
 #include "common.h"
 #include "linefile.h"
 #include "hash.h"
 #include "options.h"
 #include "hmmstats.h"
 #include "obscure.h"
 #include "sqlNum.h"
 
 static char const rcsid[] = "$Id$";
 
 int chromColIx = 0, startColIx=1, endColIx=2, scoreColIx=4;
 
 void usage()
 /* Explain usage and exit. */
 {
 errAbort(
   "regBedStats - Go through bed files and calculate a bunch of statistics on them\n"
   "usage:\n"
   "   regBedStats fileList output\n"
   "options:\n"
   "   -chromColIx=N (default %d)\n"
   "   -startColIx=N (default %d)\n"
   "   -endColIx=N (default %d)\n"
   "   -scoreColIx=N (default %d)\n"
   , chromColIx, startColIx, endColIx, scoreColIx
   );
 }
 
 static struct optionSpec options[] = {
    {"chromColIx", OPTION_INT},
    {"startColIx", OPTION_INT},
    {"endColIx", OPTION_INT},
    {"scoreColIx", OPTION_INT},
    {NULL, 0},
 };
 
-void doubleBoxWhiskerCalc(int count, double *array, double *retMin, 
-	double *retQ1, double *retMedian, double *retQ3, double *retMax)
-/* Calculate what you need to draw a box and whiskers plot from an array of doubles. */
-{
-double median;
-doubleSort(count, array);
-*retMin = array[0];
-*retQ1 = array[(count+2)/4];
-int halfCount = count>>1;
-if ((count&1) == 1)
-    *retMedian = array[halfCount];
-else
-    {
-    *retMedian = (array[halfCount] + array[halfCount-1]) * 0.5;
-    }
-*retQ3 = array[(3*count+2)/4];
-*retMax = array[count-1];
-}
-
-void slDoubleBoxWhiskerCalc(struct slDouble *list, double *retMin, 
-	double *retQ1, double *retMedian, double *retQ3, double *retMax)
-/* Calculate what you need to draw a box and whiskers plot from a list of slDoubles. */
-{
-int i,count = slCount(list);
-struct slDouble *el;
-double *array, minVal, q1, median, q3, maxVal;
-if (count == 0)
-    errAbort("Can't take do slDoubleBoxWhiskerCalc of empty list");
-AllocArray(array,count);
-for (i=0, el=list; i<count; ++i, el=el->next)
-    array[i] = el->val;
-doubleBoxWhiskerCalc(count, array, retMin, retQ1, retMedian, retQ3, retMax);
-freeMem(array);
-}
-
 void printStats(FILE *f, struct slDouble *list)
 /* Print out stats on list: ave +-std min 1/4 median 3/4 max */
 {
 int count = 0;
 struct slDouble *el;
 double sum=0, sumSquared=0;
 for (el = list; el != NULL; el = el->next)
     {
     sum += el->val;
     sumSquared += el->val * el->val;
     count += 1;
     }
 double ave = sum/count;
 double std = calcStdFromSums(sum, sumSquared, count);
 double minVal, q1, median, q3, maxVal;
 slDoubleBoxWhiskerCalc(list, &minVal, &q1, &median, &q3, &maxVal);
 fprintf(f, "\t%g+-%g [%g %g %g %g %g]", ave, std, minVal, q1, median, q3, maxVal);
 }
 
 void bedFileStats(char *bedFile, int colCount, FILE *f)
 /* Collect stats on sizes of things in a bed file, and scores too. */
 {
 struct lineFile *lf = lineFileOpen(bedFile, TRUE);
 struct slDouble *sizeList=NULL, *scoreList=NULL, *el;
 char *row[colCount];
 while (lineFileNextRow(lf, row, colCount))
     {
     int size = sqlUnsigned(row[endColIx]) - sqlUnsigned(row[startColIx]);
     el = slDoubleNew(size);
     slAddHead(&sizeList, el);
     double score = sqlDouble(row[scoreColIx]);
     el = slDoubleNew(score);
     slAddHead(&scoreList, el);
     }
 fprintf(f, "%s\t%d\tsize:", bedFile, slCount(scoreList));
 printStats(f, sizeList);
 fprintf(f, "\tscore:");
 printStats(f, scoreList);
 fprintf(f, "\n");
 lineFileClose(&lf);
 }
 
 void regBedStats(char *fileOfFiles, char *output)
 /* regBedStats - Go through bed files and calculate a bunch of statistics on them. */
 {
 struct slName *in, *inList = readAllLines(fileOfFiles);
 FILE *f = mustOpen(output, "w");
 int colCount = max(chromColIx, startColIx);
 colCount = max(colCount, endColIx);
 colCount = max(colCount, scoreColIx);
 colCount += 1;
 for (in = inList; in != NULL; in = in->next)
     {
     bedFileStats(in->name, colCount, f);
     }
 carefulClose(&f);
 }
 
 int main(int argc, char *argv[])
 /* Process command line. */
 {
 optionInit(&argc, argv, options);
 if (argc != 3)
     usage();
 chromColIx = optionInt("chromColIx", chromColIx);
 startColIx = optionInt("startColIx", startColIx);
 endColIx = optionInt("endColIx", endColIx);
 scoreColIx = optionInt("scoreColIx", scoreColIx);
 
 regBedStats(argv[1], argv[2]);
 return 0;
 }