src/hg/regulate/regBedStats/regBedStats.c 1.2
1.2 2010/03/10 19:45:19 kent
Moving doubleBoxWhiskerCalc to library.
Index: src/hg/regulate/regBedStats/regBedStats.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/regulate/regBedStats/regBedStats.c,v
retrieving revision 1.1
retrieving revision 1.2
diff -b -B -U 1000000 -r1.1 -r1.2
--- src/hg/regulate/regBedStats/regBedStats.c 10 Mar 2010 19:41:55 -0000 1.1
+++ src/hg/regulate/regBedStats/regBedStats.c 10 Mar 2010 19:45:19 -0000 1.2
@@ -1,144 +1,109 @@
/* regBedStats - Go through bed files and calculate a bunch of statistics on them. */
#include "common.h"
#include "linefile.h"
#include "hash.h"
#include "options.h"
#include "hmmstats.h"
#include "obscure.h"
#include "sqlNum.h"
static char const rcsid[] = "$Id$";
int chromColIx = 0, startColIx=1, endColIx=2, scoreColIx=4;
void usage()
/* Explain usage and exit. */
{
errAbort(
"regBedStats - Go through bed files and calculate a bunch of statistics on them\n"
"usage:\n"
" regBedStats fileList output\n"
"options:\n"
" -chromColIx=N (default %d)\n"
" -startColIx=N (default %d)\n"
" -endColIx=N (default %d)\n"
" -scoreColIx=N (default %d)\n"
, chromColIx, startColIx, endColIx, scoreColIx
);
}
static struct optionSpec options[] = {
{"chromColIx", OPTION_INT},
{"startColIx", OPTION_INT},
{"endColIx", OPTION_INT},
{"scoreColIx", OPTION_INT},
{NULL, 0},
};
-void doubleBoxWhiskerCalc(int count, double *array, double *retMin,
- double *retQ1, double *retMedian, double *retQ3, double *retMax)
-/* Calculate what you need to draw a box and whiskers plot from an array of doubles. */
-{
-double median;
-doubleSort(count, array);
-*retMin = array[0];
-*retQ1 = array[(count+2)/4];
-int halfCount = count>>1;
-if ((count&1) == 1)
- *retMedian = array[halfCount];
-else
- {
- *retMedian = (array[halfCount] + array[halfCount-1]) * 0.5;
- }
-*retQ3 = array[(3*count+2)/4];
-*retMax = array[count-1];
-}
-
-void slDoubleBoxWhiskerCalc(struct slDouble *list, double *retMin,
- double *retQ1, double *retMedian, double *retQ3, double *retMax)
-/* Calculate what you need to draw a box and whiskers plot from a list of slDoubles. */
-{
-int i,count = slCount(list);
-struct slDouble *el;
-double *array, minVal, q1, median, q3, maxVal;
-if (count == 0)
- errAbort("Can't take do slDoubleBoxWhiskerCalc of empty list");
-AllocArray(array,count);
-for (i=0, el=list; i<count; ++i, el=el->next)
- array[i] = el->val;
-doubleBoxWhiskerCalc(count, array, retMin, retQ1, retMedian, retQ3, retMax);
-freeMem(array);
-}
-
void printStats(FILE *f, struct slDouble *list)
/* Print out stats on list: ave +-std min 1/4 median 3/4 max */
{
int count = 0;
struct slDouble *el;
double sum=0, sumSquared=0;
for (el = list; el != NULL; el = el->next)
{
sum += el->val;
sumSquared += el->val * el->val;
count += 1;
}
double ave = sum/count;
double std = calcStdFromSums(sum, sumSquared, count);
double minVal, q1, median, q3, maxVal;
slDoubleBoxWhiskerCalc(list, &minVal, &q1, &median, &q3, &maxVal);
fprintf(f, "\t%g+-%g [%g %g %g %g %g]", ave, std, minVal, q1, median, q3, maxVal);
}
void bedFileStats(char *bedFile, int colCount, FILE *f)
/* Collect stats on sizes of things in a bed file, and scores too. */
{
struct lineFile *lf = lineFileOpen(bedFile, TRUE);
struct slDouble *sizeList=NULL, *scoreList=NULL, *el;
char *row[colCount];
while (lineFileNextRow(lf, row, colCount))
{
int size = sqlUnsigned(row[endColIx]) - sqlUnsigned(row[startColIx]);
el = slDoubleNew(size);
slAddHead(&sizeList, el);
double score = sqlDouble(row[scoreColIx]);
el = slDoubleNew(score);
slAddHead(&scoreList, el);
}
fprintf(f, "%s\t%d\tsize:", bedFile, slCount(scoreList));
printStats(f, sizeList);
fprintf(f, "\tscore:");
printStats(f, scoreList);
fprintf(f, "\n");
lineFileClose(&lf);
}
void regBedStats(char *fileOfFiles, char *output)
/* regBedStats - Go through bed files and calculate a bunch of statistics on them. */
{
struct slName *in, *inList = readAllLines(fileOfFiles);
FILE *f = mustOpen(output, "w");
int colCount = max(chromColIx, startColIx);
colCount = max(colCount, endColIx);
colCount = max(colCount, scoreColIx);
colCount += 1;
for (in = inList; in != NULL; in = in->next)
{
bedFileStats(in->name, colCount, f);
}
carefulClose(&f);
}
int main(int argc, char *argv[])
/* Process command line. */
{
optionInit(&argc, argv, options);
if (argc != 3)
usage();
chromColIx = optionInt("chromColIx", chromColIx);
startColIx = optionInt("startColIx", startColIx);
endColIx = optionInt("endColIx", endColIx);
scoreColIx = optionInt("scoreColIx", scoreColIx);
regBedStats(argv[1], argv[2]);
return 0;
}