a44421a79fb36cc2036fe116b97ea3bc9590cd0c
braney
  Fri Dec 2 09:34:39 2011 -0800
removed rcsid (#295)
diff --git src/utils/ave/ave.c src/utils/ave/ave.c
index 8cd04e0..418a4b0 100644
--- src/utils/ave/ave.c
+++ src/utils/ave/ave.c
@@ -1,206 +1,205 @@
 /* ave - Compute average and basic stats. */
 #include "common.h"
 #include "linefile.h"
 #include "hash.h"
 #include "options.h"
 #include "sqlNum.h"
 #include "hmmstats.h"
 #include <float.h>
 
-static char const rcsid[] = "$Id: ave.c,v 1.10 2009/11/18 20:07:23 hiram Exp $";
 
 static int col = 1;
 static bool tableOut = FALSE;
 static bool noQuartiles = FALSE;
 
 
 void usage()
 /* Explain usage and exit. */
 {
 errAbort(
   "ave - Compute average and basic stats\n"
   "usage:\n"
   "   ave file\n"
   "options:\n"
   "   -col=N Which column to use.  Default 1\n"
   "   -tableOut - output by columns (default output in rows)\n"
   "   -noQuartiles - only calculate min,max,mean,standard deviation\n"
   "                - for large data sets that will not fit in memory."
   );
 }
 
 int cmpDouble(const void *va, const void *vb)
 /* Compare two slNames. */
 {
 const double *a = va;
 const double *b = vb;
 double diff = *a - *b;
 if (diff < 0)
    return -1;
 else if (diff > 0)
    return 1;
 else
    return 0;
 }
 
 void showStats(double *array, int count)
 /* Compute stats on sorted array */
 {
 double val, minVal = DBL_MAX, maxVal = -DBL_MAX;
 double total = 0, average;
 int i;
 int q1Index, q3Index;		/*	quartile positions	*/
 double q1, q3;			/*	quartile values	*/
 double oneVar, totalVar = 0;
 
 for (i=0; i<count; ++i)
     {
     val = array[i];
     if (minVal > val) minVal = val;
     if (maxVal < val) maxVal = val;
     total += val;
     }
 average = total/count;
 
 q1Index = (count+1)/4;		/*	one fourth, rounded down	*/
 q3Index = (3*(count+1))/4;	/*	three fourths, rounded down	*/
 if (q1Index < (count-1))
     {
     double range = array[q1Index+1] - array[q1Index];
     q1 = array[q1Index] +
 	((((double)count+1.0)/4.0)-(double)q1Index)*range;
     }
 else
     q1 = array[q1Index];
 if (q3Index < (count-1))
     {
     double range = array[q3Index+1] - array[q3Index];
     q3 = array[q3Index] +
 	((3.0*((double)count+1.0)/4.0)-(double)q3Index)*range;
     }
 else
     q3 = array[q3Index];
 
 for (i=0; i<count; ++i)
     {
     val = array[i];
     oneVar = (average-val);
     totalVar += oneVar*oneVar;
     }
 
     double var = totalVar;
     if (count > 1)
 	var /= count-1;
     double stdDev = sqrt(var);
 if (tableOut)
     {
     printf("# min Q1 median Q3 max mean N sum stddev\n");
     printf("%g %g %g %g %g %g %d %g %g\n", minVal, q1, array[count/2],
 	q3, maxVal, average, count, total, stdDev);
     }
 else
     {
     printf("Q1 %f\n", q1);
     printf("median %f\n", array[count/2]);
     printf("Q3 %f\n", q3);
     printf("average %f\n", average);
     printf("min %f\n", minVal);
     printf("max %f\n", maxVal);
     printf("count %d\n", count);
     printf("total %f\n", total);
     printf("standard deviation %f\n", stdDev);
     }
 }
 
 void aveNoQuartiles(char *fileName)
 /* aveNoQuartiles - Compute only min,max,mean,stdDev no quartiles */
 {
 bits64 count = 0;
 struct lineFile *lf = lineFileOpen(fileName, TRUE);
 char *words[128], *word;
 int wordCount;
 int wordIx = col-1;
 double sumData = 0.0, sumSquares = 0.0;
 double minVal = DBL_MAX, maxVal = -DBL_MAX;
 
 while ((wordCount = lineFileChop(lf, words)) > 0)
     {
     word = words[wordIx];
     if (word[0] == '-' || isdigit(word[0]))
         {
 	double val = sqlDouble(word);
 	if (minVal > val) minVal = val;
 	if (maxVal < val) maxVal = val;
 	sumData += val;
 	sumSquares += val * val;
 	++count;
 	}
     }
 if (count == 0)
     errAbort("No numerical data column %d of %s", col, fileName);
 double average = sumData/count;
 double stdDev = calcStdFromSums(sumData, sumSquares, count);
 if (tableOut)
     {
     printf("# min max mean N sum stddev\n");
     printf("%g %g %g %llu %g %g\n",
 	minVal, maxVal, average, count, sumData, stdDev);
     }
 else
     {
     printf("average %f\n", average);
     printf("min %f\n", minVal);
     printf("max %f\n", maxVal);
     printf("count %llu\n", count);
     printf("total %f\n", sumData);
     printf("standard deviation %f\n", stdDev);
     }
 }
 
 void ave(char *fileName)
 /* ave - Compute average and basic stats. */
 {
 int count = 0;
 size_t alloc = 1024;
 double *array;
 struct lineFile *lf = lineFileOpen(fileName, TRUE);
 char *words[128], *word;
 int wordCount;
 int wordIx = col-1;
 
 AllocArray(array, alloc);
 while ((wordCount = lineFileChop(lf, words)) > 0)
     {
     if (count >= alloc)
         {
 	alloc <<= 1;
 	ExpandArray(array, count, alloc);
 	}
     word = words[wordIx];
     if (word[0] == '-' || isdigit(word[0]))
         {
 	array[count++] = atof(word);
 	}
     }
 if (count == 0)
     errAbort("No numerical data column %d of %s", col, fileName);
 qsort(array, count, sizeof(array[0]), cmpDouble);
 showStats(array, count);
 }
 
 int main(int argc, char *argv[])
 /* Process command line. */
 {
 optionHash(&argc, argv);
 if (argc != 2)
     usage();
 col = optionInt("col", col);
 tableOut = optionExists("tableOut");
 noQuartiles = optionExists("noQuartiles");
 if (noQuartiles)
     aveNoQuartiles(argv[1]);
 else
     ave(argv[1]);
 
 return 0;
 }