e70152e44cc66cc599ff6b699eb8adc07f3e656a
kent
  Sat May 24 21:09:34 2014 -0700
Adding Copyright NNNN Regents of the University of California to all files I believe with reasonable certainty were developed under UCSC employ or as part of Genome Browser copyright assignment.
diff --git src/utils/tTestTwo/tTestTwo.c src/utils/tTestTwo/tTestTwo.c
index 01431c8..15ed77c 100644
--- src/utils/tTestTwo/tTestTwo.c
+++ src/utils/tTestTwo/tTestTwo.c
@@ -1,136 +1,139 @@
 /* tTestTwo - Do statistical t-test on two sample sets and return probability that the two samples came from the same population.. */
+
+/* Copyright (C) 2006 The Regents of the University of California 
+ * See README in this or parent directory for licensing information. */
 #include "common.h"
 #include "linefile.h"
 #include "hash.h"
 #include "options.h"
 
 int aCol = 1, bCol = 1;
 void usage()
 /* Explain usage and exit. */
 {
 errAbort(
   "tTestTwo - Do statistical t-test on two sample sets and return \n"
   "probability that the two samples came from the same population.\n"
   "usage:\n"
   "   tTestTwo aFile bFile\n"
   "Where aFile and bFile both contain columns of floating point numbers.\n"
   "By default the first column will be used.  Use the options below for\n"
   "other columns. The aFile and bFile can be the same\n"
   "options:\n"
   "   -aCol=N - which column to use from aFile (one based)\n"
   "   -bCol=N - which column to use from bFile (one based)\n"
   );
 }
 
 static struct optionSpec options[] = {
    {"aCol", OPTION_INT},
    {"bCol", OPTION_INT},
    {NULL, 0},
 };
 
 void readSamples(char *fileName, int colIx, double **retSamples, int *retCount)
 /* Read in a column from a file of samples. */
 {
 struct lineFile *lf = lineFileOpen(fileName, TRUE);
 double *samples;
 int alloced = 1024;
 int count = 0;
 int rowSize = colIx+1;
 char **row;
 AllocArray(row, rowSize);
 AllocArray(samples, alloced);
 
 while (lineFileNextRow(lf, row, rowSize))
     {
     if (count >= alloced)
 	 {
 	 int newAlloc = alloced+alloced;
 	 ExpandArray(samples, alloced, newAlloc);
 	 alloced = newAlloc;
 	 }
     samples[count] = lineFileNeedDouble(lf, row, colIx);
     count += 1;
     }
 *retSamples = samples;
 *retCount = count;
 uglyf("Read %d samples from %s\n", count, fileName);
 }
 
 void calcSumSquares(double *samples, int count,
 	double *retSum, double *retSumSquares)
 /* Return sum and sum of squares - a handy thing for many
  * stats calculations. */
 {
 double sum = 0, sumSquares = 0, x;
 int i;
 for (i=0; i<count; ++i)
     {
     x = samples[i];
     sum += x;
     sumSquares += x*x;
     }
 *retSum = sum;
 *retSumSquares = sumSquares;
 }
 
 void statsCalcMeanAndSd(double *samples, int count, 
 	double *retMean, double *retSd)
 /* Calculate mean and standard deviation of sample. */
 {
 double sum = 0, sumSquares = 0;
 if (count < 2)
     errAbort("Need at least two samples in statsCalcMeanAndSd");
 calcSumSquares(samples, count, &sum, &sumSquares);
 *retMean = sum/count;
 *retSd = sqrt((sumSquares - sum*sum/count)/(count-1));
 uglyf("%d samples.  Mean %f, standard dev %f\n", count, *retMean, *retSd);
 }
 
 double statsIndependentT(double *aSamples, int aCount,
 	double *bSamples, int bCount)
 /* Do statistical t-test on two sample sets and return probability 
  * that the two samples came from different populations (one tailed). */
 {
 double aSum, aSumSquares, bSum, bSumSquares;
 calcSumSquares(aSamples, aCount, &aSum, &aSumSquares);
 calcSumSquares(bSamples, bCount, &bSum, &bSumSquares);
 
 double t = (aSum/aCount - bSum/bCount) /
 	sqrt((aSumSquares - aSum*aSum/aCount + bSumSquares - bSum*bSum/bCount)
 		       / (aCount + bCount - 2) * (1.0/aCount + 1.0/bCount));
 uglyf("t %f, degrees of freedom %d\n", t, aCount+bCount-2);
 return 0;	//uglyf
 }
 
 void tTestTwo(char *aFile, char *bFile)
 /* tTestTwo - Do statistical t-test on two sample sets and return probability 
  * that the two samples came from the same population.. */
 {
 double *aSamples, *bSamples;
 int aCount, bCount;
 double aMean, bMean;
 double aSd, bSd;
 uglyTime(NULL);
 readSamples(aFile, aCol-1, &aSamples, &aCount);
 readSamples(bFile, bCol-1, &bSamples, &bCount);
 uglyTime("Read samples");
 statsCalcMeanAndSd(aSamples, aCount, &aMean, &aSd);
 statsCalcMeanAndSd(bSamples, bCount, &bMean, &bSd);
 uglyTime("Did SD");
 statsIndependentT(aSamples, aCount, bSamples, bCount);
 uglyTime("T test");
 }
 
 int main(int argc, char *argv[])
 /* Process command line. */
 {
 optionInit(&argc, argv, options);
 if (argc != 3)
     usage();
 aCol = optionInt("aCol", aCol);
 bCol = optionInt("bCol", bCol);
 if (aCol <= 0 || bCol <= 0)
     errAbort("aCol and bCol must be positive");
 tTestTwo(argv[1], argv[2]);
 return 0;
 }