src/hg/instinct/hgGeneset/hgStats.h 1.1
1.1 2010/01/28 22:59:07 jsanborn
added clustering
Index: src/hg/instinct/hgGeneset/hgStats.h
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/instinct/hgGeneset/hgStats.h,v
retrieving revision 1.2
retrieving revision 1.1
diff -b -B -U 1000000 -r1.2 -r1.1
--- src/hg/instinct/hgGeneset/hgStats.h 29 Jan 2010 00:34:07 -0000 1.2
+++ src/hg/instinct/hgGeneset/hgStats.h 28 Jan 2010 22:59:07 -0000 1.1
@@ -1,80 +1,78 @@
#ifndef HGSTATS_H
#define HGSTATS_H
/* hgStats.h - Include file used by hgStats.
* hgStats is collection of routines that set up hgHeatmap data for statistical
* analysis by cluster 3.0
*
* Much of code in these routines was adapted from cluster 3.0 file "data.c" or "command.c"
*/
#include "cluster.h"
#include "hgHeatmapLib.h"
#include "vGfx.h"
#include "hgGenesets.h"
struct dataVector
{
struct dataVector *next; /* linked list for multiple regions */
int count; /* number of data values */
double value; /* array of data values at these positions */
double min; /* minimum data value in the set */
double max; /* maximum data value in the set */
Color color; /* color of data point */
};
struct pcaData /* TODO: develop this more fully to keep general summary of PCA results */
{
struct pcaData *next;
int numComponents;
double *components;
};
//#define GRAPH_WIDTH 300 /* actual graphing area size */
//#define GRAPH_HEIGHT 300 /* left and bottom text will add to this */
#define PLOT_MARGIN 2 /* around graph and around everything */
#define DOT_SIZE 2 /* size of plotted point */
void getNodeOrderFromTree(Node* tree, int nNodes, const double* order,
double *nodeorder, int *nodecounts, char metric);
/* Get new order of genes from Tree for display */
void initializeData(int rows, int columns, struct hash *geneHash, struct slName *genes,
double **data, int **mask, double *arrayweight,
double *geneorder, char **genename);
/* Initialize data structures and fill with hgHeatmap data for cluster routines */
void initializeRawData(int rows, int columns,
struct rawData *rdList, struct mapSettings *settings,
double **data, int **mask, double *arrayweight,
- double *geneorder, char **genename,
- double *sampleorder, char **samplename);
+ double *geneorder, char **genename);
-void clusterData(struct rawData *rdList, struct mapSettings *settings,
- char method, char metric,
- struct slName **geneOrder, struct slName **sampleOrder);
+struct slName *clusterDataByGene(struct rawData *rdList, struct mapSettings *settings,
+ char method, char metric);
struct slName *clusterGeneSet(struct hash *geneHash,
struct slName *genes, char method, char metric);
/* Set up data structures for cluster software, perform hierarchical clustering, and
* return a new sorted list of gene names according to clustering */
Node *clusterSamplesByGeneSet(struct hash *geneHash, struct slName *allGenes,
char method, char metric, int transpose, int *nRows, int *nCols);
/* Performs clustering along samples, returns tree */
void pcaGeneSet(struct hash *geneHash, struct slName *genes,
int rows, int columns, double **data, char **genename,
double **u, double **v, double *w, int *svdorder);
/* Set up data structures for cluster software and perform PCA on gene set */
struct hash *performPCAandPlot(struct vGfx *vg, int totalWidth, int totalHeight,
int startX, int startY, struct hash *geneHash,
struct slName *genes, struct featureColor *fcList);
struct hash *performPCA(struct hash *geneHash, struct slName *genes);
/* Simply obtain the statHash containing PCA 1st and 2nd components of input geneset */
#endif /* HGSTATS_H */