src/hg/instinct/hgGeneset/hgStats.c 1.4

1.4 2010/01/29 00:34:07 jsanborn
added 2D clustering
Index: src/hg/instinct/hgGeneset/hgStats.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/instinct/hgGeneset/hgStats.c,v
retrieving revision 1.3
retrieving revision 1.4
diff -b -B -U 4 -r1.3 -r1.4
--- src/hg/instinct/hgGeneset/hgStats.c	28 Jan 2010 23:45:26 -0000	1.3
+++ src/hg/instinct/hgGeneset/hgStats.c	29 Jan 2010 00:34:07 -0000	1.4
@@ -228,16 +228,18 @@
 
 void initializeRawData(int rows, int columns, 
 		       struct rawData *rdList, struct mapSettings *settings, 
 		       double **data, int **mask, double *arrayweight, 
-		       double *geneorder, char **genename)
+		       double *geneorder, char **genename, 
+		       double *sampleorder, char **samplename)
 /* 
  * Code was adapted from LOAD function found in ../cluster/data.c to prepare 
  * data for input to treecluster program. 
  * Copyright (C) 2002 Michiel Jan Laurens de Hoon.
  */
 {
-if (!rdList || !settings || !mask || !arrayweight || !geneorder || !genename)
+if (!rdList || !settings || !mask || !arrayweight || !geneorder || !genename
+    || !sampleorder || !samplename)
     return;
 
 int column, row;
 
@@ -271,12 +273,26 @@
 	continue;
     genename[val] = cloneString(name);
     }
 
+cookie = hashFirst(settings->x_index);
+while ((el = hashNext(&cookie)) != NULL)
+    {
+    char *name = el->name;
+    int val    = ptToInt(el->val);
+
+    if (val >= columns)
+	continue;
+    samplename[val] = cloneString(name);
+    }
+
 /* Fill the gene weights with the default value of 1.0 */
 for (row = 0; row < rows; row++) 
     geneorder[row] = row;
 
+for (column = 0; column < columns; column++)
+    sampleorder[column] = column;
+
 int prevSampleId  = -1;
 int prevFeatureId = -1;
 char id[128];
 struct rawData *rd;
@@ -451,21 +467,22 @@
 return newGeneOrder;
 }
 
 
-struct slName *clusterDataByGene(struct rawData *rdList, struct mapSettings *settings,
-				 char method, char metric)
+void clusterData(struct rawData *rdList, struct mapSettings *settings,
+		 char method, char metric, 
+		 struct slName **geneOrder, struct slName **sampleOrder)
 /* Set up data structures for cluster software, perform hierarchical clustering, and 
  * return a new sorted list of gene names according to clustering */
 {
 if (rdList == NULL || settings == NULL)
-    return NULL;
+    return;
 
 int rows    = hashNumEntries(settings->y_index);
 int columns = hashNumEntries(settings->x_index);
 
 if (rows > MAX_ROWS || columns > MAX_COLUMNS)
-    return NULL;
+    return;
 
 /* Allocate space for array weights */
 double *arrayweight = needMem(columns*sizeof(double));
 
@@ -477,15 +494,19 @@
 double *geneorder = needMem(rows*sizeof(double));
 int *geneindex = needMem(rows*sizeof(int));
 char **genename = needMem(rows*sizeof(char*));
 
-initializeRawData(rows, columns, rdList, settings, data, mask, arrayweight, geneorder, genename); 
+double *sampleorder = needMem(columns*sizeof(double));
+int *sampleindex    = needMem(columns*sizeof(int));
+char **samplename   = needMem(columns*sizeof(char*));
 
-/* Perform hierarchical clustering. */
-Node* tree = treecluster(rows, columns, data, mask, arrayweight, 0, metric, method, NULL);
+initializeRawData(rows, columns, rdList, settings, data, mask, arrayweight, geneorder, genename, 
+		  sampleorder, samplename);
 
-if (tree == NULL)
-    return NULL;
+/* Perform hierarchical clustering in gene direction*/
+Node* tree = treecluster(rows, columns, data, mask, arrayweight, 0, metric, method, NULL);
+if (!tree)
+    return;
 
 const int nNodes = rows - 1;
 double *nodeorder = needMem(nNodes*sizeof(double));
 int *nodecounts = needMem(nNodes*sizeof(int));
@@ -496,10 +517,31 @@
 int i;
 struct slName *newGeneOrder = NULL;
 for (i = 0; i < rows; i++)
     slNameAddHead(&newGeneOrder, genename[geneindex[i]]);
-
 slReverse(&newGeneOrder);
+*geneOrder = newGeneOrder;
+
+freez(&nodecounts);
+freez(&nodeorder);
+
+/* Perform hierarchical clustering in sample direction. */
+tree = treecluster(rows, columns, data, mask, arrayweight, 1, metric, method, NULL);
+if (!tree)
+    return;
+
+const int ncNodes  = columns - 1;
+nodeorder  = needMem(ncNodes*sizeof(double));
+nodecounts = needMem(ncNodes*sizeof(int));
+getNodeOrderFromTree(tree, ncNodes, geneorder, nodeorder, nodecounts, metric); 
+
+TreeSort(ncNodes, columns, sampleindex, sampleorder, nodeorder, nodecounts, tree);
+
+struct slName *newSampleOrder = NULL;
+for (i = 0; i < columns; i++)
+    slNameAddHead(&newSampleOrder, samplename[sampleindex[i]]);
+slReverse(&newSampleOrder);
+*sampleOrder = newSampleOrder;
 
 freez(&nodecounts);
 freez(&nodeorder);
 freez(&tree);
@@ -508,10 +550,8 @@
 freez(&genename);
 freez(&arrayweight);
 freez(&data);
 freez(&mask);
-
-return newGeneOrder;
 }
 
 void outputGenePCA(int rows, int columns, struct slName *genes, 
 		   double eigenvalues[], double** eigenvectors, int order[])