src/hg/instinct/lib/hgHeatmapLib.c 1.65

1.65 2010/02/18 05:58:51 larrym
add adjustBenjaminiHochberg
Index: src/hg/instinct/lib/hgHeatmapLib.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/instinct/lib/hgHeatmapLib.c,v
retrieving revision 1.64
retrieving revision 1.65
diff -b -B -U 4 -r1.64 -r1.65
--- src/hg/instinct/lib/hgHeatmapLib.c	10 Feb 2010 21:35:35 -0000	1.64
+++ src/hg/instinct/lib/hgHeatmapLib.c	18 Feb 2010 05:58:51 -0000	1.65
@@ -1,14 +1,8 @@
 /********************************************************************************/
 /* Copyright 2007-2009 -- The Regents of the University of California           */
 /********************************************************************************/
 
-/* hgHeatmap is a CGI script that produces a web page containing
- * a graphic with all chromosomes in genome, and a heatmap or two
- * on top of them. This module just contains the main routine,
- * the routine that dispatches to various page handlers, and shared
- * utility functions. */
-
 #include "common.h"
 #include "float.h"
 #include "bed.h"
 #include "hash.h"
@@ -1415,10 +1409,11 @@
     hashElFreeList(&hashList);
 return ptArray;
 }
 
-void hgStatsAdjust(struct hgStats *hsList, double corr)
+static void hgStatsAdjustUtil(struct hgStats *hsList, double corr, boolean onlyOne)
 {
+// onlyOne == TRUE means you want to adjust just one record.
 if (!hsList)
     return;
 
 struct hgStats *hs;
@@ -1440,11 +1435,18 @@
 	hs->outputVal -= corr;
 	if (hs->outputVal > 0)
 	    hs->outputVal = 0.0;
 	}
+    if(onlyOne)
+        break;
     }
 }
 
+void hgStatsAdjust(struct hgStats *hsList, double corr)
+{
+hgStatsAdjustUtil(hsList, corr, FALSE);
+}
+
 void listAdjustBonferroni(struct analysisResult *arList)
 {
 if (!arList)
     return;
@@ -1494,9 +1496,9 @@
 }
 
 
 
-/* Calculate the differnce of means between two subsets 
+/* Calculate the difference of means between two subsets 
  * Clean memory of returned pointer after use */
 struct analysisResult *diffAveSubgroup (struct genoHeatmap *gh, 
 					int subsetNum, char *raName, 
 					boolean (*func)(float data1[], unsigned long n1, 
@@ -2496,4 +2498,121 @@
 	return fResult;
 
 }
 */
+
+struct floatList {
+    struct floatList *next;
+    struct hgStats *hs;
+};
+
+int floatListRevCmp(const void *va, const void *vb)
+{
+// reversed (descending) sort callback - handles fact that 
+const struct floatList *a = *((struct floatList **)va);
+const struct floatList *b = *((struct floatList **)vb);
+if(a->hs->prob > b->hs->prob)
+    // bigger -log() means the value is smaller
+    return 1;
+else if(a->hs->prob < b->hs->prob)
+    return -1;
+else
+    return 0;
+}
+
+static void adjustBenjaminiHochbergUtil(struct floatList *list, int len)
+{
+// len is length of list (saves a slCount).
+struct floatList *ele;
+double logLen = log10(len);
+int i;
+float min = -1;
+
+// sort list into descending order (really ascending -log(p-values))
+
+slSort(&list, floatListRevCmp);
+for (i = len, ele=list;ele;ele = ele->next, i--)
+    {
+    if(i < len)
+        {
+        // fprintf(stderr, "%i\t%f\t%f\t%f\n", i, ele->hs->prob, log10(i), logLen);
+        hgStatsAdjustUtil(ele->hs, log10(i) - logLen, TRUE);
+        }
+    }
+
+// We imitate R's implementation of B-H (p.adjust(p, "BH")) and ensure the following property:
+// If a p_i <= p_j in original list, then p_i <= p_j in the adjusted list.
+
+for (ele=list;ele;ele=ele->next)
+    {
+    if(min >= 0 && ele->hs->prob < min)
+        ele->hs->prob = min;
+    else
+        min = ele->hs->prob;
+    }
+}
+
+void listAdjustBenjaminiHochberg(struct analysisResult *arList)
+{
+if (!arList)
+    return;
+
+// fprintf(stderr, "Hello from listAdjustBenjaminiHochberg\n");
+struct analysisResult *ar = arList;  // I don't think this is ever a list.
+struct floatList *list = NULL;
+
+struct hgStats *hsList = ar->stats; 
+int len = 0;
+
+for(;hsList;hsList = hsList->next)
+    {
+    struct floatList *sl;
+    AllocVar(sl);
+    sl->hs = hsList;
+    slAddHead(&list, sl);
+    len++;
+    }
+// fprintf(stderr, "len: %d vs slCount: %d\n", len, slCount(list));
+adjustBenjaminiHochbergUtil(list, len);
+slFreeList(&list);
+}
+
+void hashAdjustBenjaminiHochberg(struct analysisResultHash *arHash)
+{
+if (!arHash)
+    return;
+struct hash *hash = arHash->hash;
+if (!hash)
+    return;
+
+// fprintf(stderr, "Hello from hashAdjustBenjaminiHochberg\n");
+struct floatList *list = NULL;
+struct hashEl *el;
+struct hashCookie hc = hashFirst(hash);
+int len = 0;
+while ((el = hashNext(&hc)) != NULL)
+    {
+    struct floatList *sl;
+    struct hgStats *hs = el->val;
+    AllocVar(sl);
+    sl->hs = hs;
+    slAddHead(&list, sl);
+    len++;
+    }
+adjustBenjaminiHochbergUtil(list, len);
+slFreeList(&list);
+}
+
+void adjustBenjaminiHochberg(struct genoHeatmap *gh)
+{
+/* Implement Benjamini-Hochberg FDR control algorithm:
+
+Benjamini, Y., and Hochberg, Y. (1995). Controlling the false discovery rate:
+a practical and powerful approach to multiple testing. Journal of the Royal
+Statistical Society Series B, 57, 289
+*/
+
+if (gh->anaResult)
+    listAdjustBenjaminiHochberg(gh->anaResult);
+else if (gh->anaResultHash)
+    hashAdjustBenjaminiHochberg(gh->anaResultHash);
+}