de438b6041d00b72be32bcdec70436a9d201baf2 braney Sat May 21 12:17:40 2016 -0700 libify the guts of ave so I can use it in hgc diff --git src/utils/ave/ave.c src/utils/ave/ave.c index 418a4b0..c989841 100644 --- src/utils/ave/ave.c +++ src/utils/ave/ave.c @@ -1,125 +1,68 @@ /* ave - Compute average and basic stats. */ #include "common.h" #include "linefile.h" #include "hash.h" #include "options.h" #include "sqlNum.h" #include "hmmstats.h" +#include "aveStats.h" #include static int col = 1; static bool tableOut = FALSE; static bool noQuartiles = FALSE; void usage() /* Explain usage and exit. */ { errAbort( "ave - Compute average and basic stats\n" "usage:\n" " ave file\n" "options:\n" " -col=N Which column to use. Default 1\n" " -tableOut - output by columns (default output in rows)\n" " -noQuartiles - only calculate min,max,mean,standard deviation\n" " - for large data sets that will not fit in memory." ); } -int cmpDouble(const void *va, const void *vb) -/* Compare two slNames. */ -{ -const double *a = va; -const double *b = vb; -double diff = *a - *b; -if (diff < 0) - return -1; -else if (diff > 0) - return 1; -else - return 0; -} - void showStats(double *array, int count) /* Compute stats on sorted array */ { -double val, minVal = DBL_MAX, maxVal = -DBL_MAX; -double total = 0, average; -int i; -int q1Index, q3Index; /* quartile positions */ -double q1, q3; /* quartile values */ -double oneVar, totalVar = 0; - -for (i=0; i val) minVal = val; - if (maxVal < val) maxVal = val; - total += val; - } -average = total/count; - -q1Index = (count+1)/4; /* one fourth, rounded down */ -q3Index = (3*(count+1))/4; /* three fourths, rounded down */ -if (q1Index < (count-1)) - { - double range = array[q1Index+1] - array[q1Index]; - q1 = array[q1Index] + - ((((double)count+1.0)/4.0)-(double)q1Index)*range; - } -else - q1 = array[q1Index]; -if (q3Index < (count-1)) - { - double range = array[q3Index+1] - array[q3Index]; - q3 = array[q3Index] + - ((3.0*((double)count+1.0)/4.0)-(double)q3Index)*range; - } -else - q3 = array[q3Index]; - -for (i=0; i 1) - var /= count-1; - double stdDev = sqrt(var); if (tableOut) { printf("# min Q1 median Q3 max mean N sum stddev\n"); - printf("%g %g %g %g %g %g %d %g %g\n", minVal, q1, array[count/2], - q3, maxVal, average, count, total, stdDev); + printf("%g %g %g %g %g %g %d %g %g\n", as->minVal, as->q1, as->median, + as->q3, as->maxVal, as->average, as->count, as->total, as->stdDev); } else { - printf("Q1 %f\n", q1); - printf("median %f\n", array[count/2]); - printf("Q3 %f\n", q3); - printf("average %f\n", average); - printf("min %f\n", minVal); - printf("max %f\n", maxVal); - printf("count %d\n", count); - printf("total %f\n", total); - printf("standard deviation %f\n", stdDev); + printf("Q1 %f\n", as->q1); + printf("median %f\n", as->median); + printf("Q3 %f\n", as->q3); + printf("average %f\n", as->average); + printf("min %f\n", as->minVal); + printf("max %f\n", as->maxVal); + printf("count %d\n", as->count); + printf("total %f\n", as->total); + printf("standard deviation %f\n", as->stdDev); } } void aveNoQuartiles(char *fileName) /* aveNoQuartiles - Compute only min,max,mean,stdDev no quartiles */ { bits64 count = 0; struct lineFile *lf = lineFileOpen(fileName, TRUE); char *words[128], *word; int wordCount; int wordIx = col-1; double sumData = 0.0, sumSquares = 0.0; double minVal = DBL_MAX, maxVal = -DBL_MAX; while ((wordCount = lineFileChop(lf, words)) > 0) @@ -171,31 +114,30 @@ while ((wordCount = lineFileChop(lf, words)) > 0) { if (count >= alloc) { alloc <<= 1; ExpandArray(array, count, alloc); } word = words[wordIx]; if (word[0] == '-' || isdigit(word[0])) { array[count++] = atof(word); } } if (count == 0) errAbort("No numerical data column %d of %s", col, fileName); -qsort(array, count, sizeof(array[0]), cmpDouble); showStats(array, count); } int main(int argc, char *argv[]) /* Process command line. */ { optionHash(&argc, argv); if (argc != 2) usage(); col = optionInt("col", col); tableOut = optionExists("tableOut"); noQuartiles = optionExists("noQuartiles"); if (noQuartiles) aveNoQuartiles(argv[1]); else