src/hg/instinct/hgGeneset/hgStats.h 1.1

1.1 2010/01/28 22:59:07 jsanborn
added clustering
Index: src/hg/instinct/hgGeneset/hgStats.h
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/instinct/hgGeneset/hgStats.h,v
retrieving revision 1.2
retrieving revision 1.1
diff -b -B -U 1000000 -r1.2 -r1.1
--- src/hg/instinct/hgGeneset/hgStats.h	29 Jan 2010 00:34:07 -0000	1.2
+++ src/hg/instinct/hgGeneset/hgStats.h	28 Jan 2010 22:59:07 -0000	1.1
@@ -1,80 +1,78 @@
 #ifndef HGSTATS_H
 #define HGSTATS_H
 
 /* hgStats.h - Include file used by hgStats. 
  * hgStats is collection of routines that set up hgHeatmap data for statistical
  * analysis by cluster 3.0 
  *
  * Much of code in these routines was adapted from cluster 3.0 file "data.c" or "command.c"
  */
 
 #include "cluster.h"
 #include "hgHeatmapLib.h"
 #include "vGfx.h"
 #include "hgGenesets.h"
 
 struct dataVector
 {
     struct dataVector *next;    /* linked list for multiple regions */
     int count;          /*      number of data values   */
     double value;       /*      array of data values at these positions */
     double min;         /*      minimum data value in the set   */
     double max;         /*      maximum data value in the set   */
     Color color;        /*      color of data point */
 };            
 
 struct pcaData    /* TODO: develop this more fully to keep general summary of PCA results */
 {
     struct pcaData *next;
     int numComponents;
     double *components;
 };
 
 //#define GRAPH_WIDTH     300     /*      actual graphing area size       */
 //#define GRAPH_HEIGHT    300     /*      left and bottom text will add to this */
 #define PLOT_MARGIN     2       /*      around graph and around everything */
 #define DOT_SIZE        2       /*      size of plotted point   */
 
 void getNodeOrderFromTree(Node* tree, int nNodes, const double* order, 
 			  double *nodeorder, int *nodecounts, char metric);
 /* Get new order of genes from Tree for display */
 
 void initializeData(int rows, int columns, struct hash *geneHash, struct slName *genes, 
 		    double **data, int **mask, double *arrayweight, 
 		    double *geneorder, char **genename);
 /* Initialize data structures and fill with hgHeatmap data for cluster routines */
 
 void initializeRawData(int rows, int columns,
 		       struct rawData *rdList, struct mapSettings *settings,
 		       double **data, int **mask, double *arrayweight,
-		       double *geneorder, char **genename, 
-		       double *sampleorder, char **samplename);
+		       double *geneorder, char **genename);
 
-void clusterData(struct rawData *rdList, struct mapSettings *settings,
-		 char method, char metric, 
-		 struct slName **geneOrder, struct slName **sampleOrder);
+struct slName *clusterDataByGene(struct rawData *rdList, struct mapSettings *settings,
+				 char method, char metric);
     
 struct slName *clusterGeneSet(struct hash *geneHash, 
 			      struct slName *genes, char method, char metric);
 /* Set up data structures for cluster software, perform hierarchical clustering, and 
  * return a new sorted list of gene names according to clustering */
 
 Node *clusterSamplesByGeneSet(struct hash *geneHash, struct slName *allGenes,
 			      char method, char metric, int transpose, int *nRows, int *nCols);
 /* Performs clustering along samples, returns tree */
 
 void pcaGeneSet(struct hash *geneHash, struct slName *genes, 
 		int rows, int columns, double **data, char **genename, 
 		double **u, double **v, double *w, int *svdorder);
 /* Set up data structures for cluster software and perform PCA on gene set */
 
 struct hash *performPCAandPlot(struct vGfx *vg, int totalWidth, int totalHeight, 
 		    int startX, int startY, struct hash *geneHash, 
 		    struct slName *genes, struct featureColor *fcList); 
 
 struct hash *performPCA(struct hash *geneHash, struct slName *genes);
 /* Simply obtain the statHash containing PCA 1st and 2nd components of input geneset */
 
 
  
 #endif /* HGSTATS_H */