src/hg/instinct/inc/hgStatsLib.h 1.6

1.6 2009/06/04 03:47:23 jsanborn
added copyright notices, removed cluster library
Index: src/hg/instinct/inc/hgStatsLib.h
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/instinct/inc/hgStatsLib.h,v
retrieving revision 1.5
retrieving revision 1.6
diff -b -B -U 1000000 -r1.5 -r1.6
--- src/hg/instinct/inc/hgStatsLib.h	17 Mar 2009 22:51:03 -0000	1.5
+++ src/hg/instinct/inc/hgStatsLib.h	4 Jun 2009 03:47:23 -0000	1.6
@@ -1,200 +1,204 @@
+/********************************************************************************/
+/* Copyright 2007-2009 -- The Regents of the University of California           */
+/********************************************************************************/
+
 /*************** statistics functions ************************/
 
 #ifndef STATS_H
 #define STATS_H
 
 boolean aveDiff( float data1[], unsigned long n1, float data2[], unsigned long n2, float *r, float *prob);
 /* return 0 if fail */
 
 boolean ttest(float data1[], unsigned long n1, float data2[], unsigned long n2,
 	      float *t, float *prob);
 /* t test copied from NC , return 0 if fail */
 
 boolean expressionCoherence(float data1[], unsigned long n1, float data2[], unsigned long n2,float *t, float *prob);
 /* Coherence test, return 0 if fail */
 
 boolean fishersLinearDisc(float data1[], unsigned long n1, float data2[], unsigned long n2,float *t, float *prob);
 /* Coherence test, return 0 if fail */
 
 boolean fishersExact(float data1[], unsigned long n1, float data2[], unsigned long n2,float *t, float *prob);
 /* Coherence test, return 0 if fail */
 
 boolean levene(float data1[], unsigned long n1, float data2[], unsigned long n2,float *f, float *prob);
 /* homogeneity of variance test, return 0 if fail */
 
 boolean brownForsythe(float data1[], unsigned long n1, float data2[], unsigned long n2,float *f, float *prob);
 /* homogeneity of variance test, return 0 if fail */
 
 boolean fishersMeta(struct slDouble *data, float *chi2, float *prob);
 /* fisher's metaanalysis, return 0 if fail */
 
 boolean fishersMetaSigned(struct slDouble *data, float *chi2, float *prob);
 /* fisher's metaanalysis, return 0 if fail */
 
 boolean stoufferMeta(struct slDouble *data, float *norm, float *prob);
 /* stouffer's metaanalysis, return 0 if fail */
 
 boolean mudholkarMeta(struct slDouble *data, float *tval, float *prob);
 /* mudholkar's metaanalysis, return 0 if fail */
 
 boolean symmUniMeta(struct slDouble *data, float *chi2, float *prob);
 /* symmetric uniform metaanalysis, return 0 if fail */
 double zScore(double p);
 double pvcQuadraticHelper(int n, double* pvalues, double* subcomb);
 int cmp_double_asc(const void* pa, const void* pb);
 
 boolean jarqueBera(float data1[], unsigned long n1, float data2[], unsigned long n2,
          float *z, float *prob);
 
 boolean wilcoxon(float data1[], unsigned long n1, float data2[], unsigned long n2,
 		 float *u, float *prob);
 
 /* This is a wrapper function to execute an adapted version of mann-whitney-wilcoxon test implemented by ALGLIB.
  * It generates z value of U statistics and computes bothtails p value 
  * return 0 if it fails, return 1 if it succeeds 
  * Mann-Whitney U test (Mann-Whitney-Wilcoxon test) is adapted from implementation by ALGLIB, 
  * see copyright */
 
 /*************************************************************************
 Mann-Whitney U-test
 
 This test checks hypotheses about whether X  and  Y  are  samples  of  two
 continuous distributions of the same shape  and  same  median  or  whether
 their medians are different.
 
 The following tests are performed:
     * two-tailed test (null hypothesis - the medians are equal)
     * left-tailed test (null hypothesis - the median of the  first  sample
       is greater than or equal to the median of the second sample)
     * right-tailed test (null hypothesis - the median of the first  sample
       is less than or equal to the median of the second sample).
 
 Requirements:
     * the samples are independent
     * X and Y are continuous distributions (or discrete distributions well-
       approximating continuous distributions)
     * distributions of X and Y have the  same  shape.  The  only  possible
       difference is their position (i.e. the value of the median)
     * the number of elements in each sample is not less than 5
     * the scale of measurement should be ordinal, interval or ratio  (i.e.
       the test could not be applied to nominal variables).
 
 The test is non-parametric and doesn't require distributions to be normal.
 
 Input parameters:
     X   -   sample 1. Array whose index goes from 0 to N-1.
     N   -   size of the sample. N>=5
     Y   -   sample 2. Array whose index goes from 0 to M-1.
     M   -   size of the sample. M>=5
 
 Output parameters:
     BothTails   -   p-value for two-tailed test.
                     If BothTails is less than the given significance level
                     the null hypothesis is rejected.
     LeftTail    -   p-value for left-tailed test.
                     If LeftTail is less than the given significance level,
                     the null hypothesis is rejected.
     RightTail   -   p-value for right-tailed test.
                     If RightTail is less than the given significance level
                     the null hypothesis is rejected.
 
 To calculate p-values, special approximation is used. This method lets  us
 calculate p-values with satisfactory  accuracy  in  interval  [0.0001, 1].
 There is no approximation outside the [0.0001, 1] interval. Therefore,  if
 the significance level outlies this interval, the test returns 0.0001.
 
 Relative precision of approximation of p-value:
 
 N          M          Max.err.   Rms.err.
 5..10      N..10      1.4e-02    6.0e-04
 5..10      N..100     2.2e-02    5.3e-06
 10..15     N..15      1.0e-02    3.2e-04
 10..15     N..100     1.0e-02    2.2e-05
 15..100    N..100     6.1e-03    2.7e-06
 
 For N,M>100 accuracy checks weren't put into  practice,  but  taking  into
 account characteristics of asymptotic approximation used, precision should
 not be sharply different from the values for interval [5, 100].
 
   -- ALGLIB --
      Copyright 09.04.2007 by Bochkanov Sergey
 *************************************************************************/
 
 /*************************************************************************
 
 Jarque-Bera test
 
 
 
 This test checks hypotheses about the fact that a  given  sample  X  is  a
 
 sample of normal random variable.
 
 
 
 Requirements:
 
     * the number of elements in the sample is not less than 5.
 
 
 
 Input parameters:
 
     X   -   sample. Array whose index goes from 0 to N-1.
 
     N   -   size of the sample. N>=5
 
 
 
 Output parameters:
 
     BothTails   -   p-value for two-tailed test.
 
                     If BothTails is less than the given significance level
 
                     the null hypothesis is rejected.
 
     LeftTail    -   p-value for left-tailed test.
 
                     If LeftTail is less than the given significance level,
 
                     the null hypothesis is rejected.
 
     RightTail   -   p-value for right-tailed test.
 
                     If RightTail is less than the given significance level
 
                     the null hypothesis is rejected.
 
 
 
 Accuracy of the approximation used (5<=N<=1951):
 
 
 
 p-value         relative error (5<=N<=1951)
 
 [1, 0.1]            < 1%
 
 [0.1, 0.01]         < 2%
 
 [0.01, 0.001]       < 6%
 
 [0.001, 0]          wasn't measured
 
 
 
 For N>1951 accuracy wasn't measured but it shouldn't be sharply  different
 
 from table values.
 
 
 
   -- ALGLIB --
 
      Copyright 09.04.2007 by Bochkanov Sergey
 
 *************************************************************************/
 
 
 #endif /* STATS_H */