src/hg/instinct/hgGeneset/hgStats.c 1.4
1.4 2010/01/29 00:34:07 jsanborn
added 2D clustering
Index: src/hg/instinct/hgGeneset/hgStats.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/instinct/hgGeneset/hgStats.c,v
retrieving revision 1.3
retrieving revision 1.4
diff -b -B -U 4 -r1.3 -r1.4
--- src/hg/instinct/hgGeneset/hgStats.c 28 Jan 2010 23:45:26 -0000 1.3
+++ src/hg/instinct/hgGeneset/hgStats.c 29 Jan 2010 00:34:07 -0000 1.4
@@ -228,16 +228,18 @@
void initializeRawData(int rows, int columns,
struct rawData *rdList, struct mapSettings *settings,
double **data, int **mask, double *arrayweight,
- double *geneorder, char **genename)
+ double *geneorder, char **genename,
+ double *sampleorder, char **samplename)
/*
* Code was adapted from LOAD function found in ../cluster/data.c to prepare
* data for input to treecluster program.
* Copyright (C) 2002 Michiel Jan Laurens de Hoon.
*/
{
-if (!rdList || !settings || !mask || !arrayweight || !geneorder || !genename)
+if (!rdList || !settings || !mask || !arrayweight || !geneorder || !genename
+ || !sampleorder || !samplename)
return;
int column, row;
@@ -271,12 +273,26 @@
continue;
genename[val] = cloneString(name);
}
+cookie = hashFirst(settings->x_index);
+while ((el = hashNext(&cookie)) != NULL)
+ {
+ char *name = el->name;
+ int val = ptToInt(el->val);
+
+ if (val >= columns)
+ continue;
+ samplename[val] = cloneString(name);
+ }
+
/* Fill the gene weights with the default value of 1.0 */
for (row = 0; row < rows; row++)
geneorder[row] = row;
+for (column = 0; column < columns; column++)
+ sampleorder[column] = column;
+
int prevSampleId = -1;
int prevFeatureId = -1;
char id[128];
struct rawData *rd;
@@ -451,21 +467,22 @@
return newGeneOrder;
}
-struct slName *clusterDataByGene(struct rawData *rdList, struct mapSettings *settings,
- char method, char metric)
+void clusterData(struct rawData *rdList, struct mapSettings *settings,
+ char method, char metric,
+ struct slName **geneOrder, struct slName **sampleOrder)
/* Set up data structures for cluster software, perform hierarchical clustering, and
* return a new sorted list of gene names according to clustering */
{
if (rdList == NULL || settings == NULL)
- return NULL;
+ return;
int rows = hashNumEntries(settings->y_index);
int columns = hashNumEntries(settings->x_index);
if (rows > MAX_ROWS || columns > MAX_COLUMNS)
- return NULL;
+ return;
/* Allocate space for array weights */
double *arrayweight = needMem(columns*sizeof(double));
@@ -477,15 +494,19 @@
double *geneorder = needMem(rows*sizeof(double));
int *geneindex = needMem(rows*sizeof(int));
char **genename = needMem(rows*sizeof(char*));
-initializeRawData(rows, columns, rdList, settings, data, mask, arrayweight, geneorder, genename);
+double *sampleorder = needMem(columns*sizeof(double));
+int *sampleindex = needMem(columns*sizeof(int));
+char **samplename = needMem(columns*sizeof(char*));
-/* Perform hierarchical clustering. */
-Node* tree = treecluster(rows, columns, data, mask, arrayweight, 0, metric, method, NULL);
+initializeRawData(rows, columns, rdList, settings, data, mask, arrayweight, geneorder, genename,
+ sampleorder, samplename);
-if (tree == NULL)
- return NULL;
+/* Perform hierarchical clustering in gene direction*/
+Node* tree = treecluster(rows, columns, data, mask, arrayweight, 0, metric, method, NULL);
+if (!tree)
+ return;
const int nNodes = rows - 1;
double *nodeorder = needMem(nNodes*sizeof(double));
int *nodecounts = needMem(nNodes*sizeof(int));
@@ -496,10 +517,31 @@
int i;
struct slName *newGeneOrder = NULL;
for (i = 0; i < rows; i++)
slNameAddHead(&newGeneOrder, genename[geneindex[i]]);
-
slReverse(&newGeneOrder);
+*geneOrder = newGeneOrder;
+
+freez(&nodecounts);
+freez(&nodeorder);
+
+/* Perform hierarchical clustering in sample direction. */
+tree = treecluster(rows, columns, data, mask, arrayweight, 1, metric, method, NULL);
+if (!tree)
+ return;
+
+const int ncNodes = columns - 1;
+nodeorder = needMem(ncNodes*sizeof(double));
+nodecounts = needMem(ncNodes*sizeof(int));
+getNodeOrderFromTree(tree, ncNodes, geneorder, nodeorder, nodecounts, metric);
+
+TreeSort(ncNodes, columns, sampleindex, sampleorder, nodeorder, nodecounts, tree);
+
+struct slName *newSampleOrder = NULL;
+for (i = 0; i < columns; i++)
+ slNameAddHead(&newSampleOrder, samplename[sampleindex[i]]);
+slReverse(&newSampleOrder);
+*sampleOrder = newSampleOrder;
freez(&nodecounts);
freez(&nodeorder);
freez(&tree);
@@ -508,10 +550,8 @@
freez(&genename);
freez(&arrayweight);
freez(&data);
freez(&mask);
-
-return newGeneOrder;
}
void outputGenePCA(int rows, int columns, struct slName *genes,
double eigenvalues[], double** eigenvectors, int order[])