src/hg/instinct/hgGeneset/hgGenesets.c 1.5

1.5 2010/01/28 22:59:07 jsanborn
added clustering
Index: src/hg/instinct/hgGeneset/hgGenesets.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/instinct/hgGeneset/hgGenesets.c,v
retrieving revision 1.4
retrieving revision 1.5
diff -b -B -U 4 -r1.4 -r1.5
--- src/hg/instinct/hgGeneset/hgGenesets.c	22 Jan 2010 05:11:28 -0000	1.4
+++ src/hg/instinct/hgGeneset/hgGenesets.c	28 Jan 2010 22:59:07 -0000	1.5
@@ -19,11 +19,11 @@
 #include "featuresLib.h" 
 #include "json.h"
 #include "bioIntDb.h"
 #include "bioIntDriver.h"
+#include "hgStats.h"
 #include "hgGenesets.h"
 
-
 static char const rcsid[] = "$Id$";
 /* ---- Global variables. ---- */
 struct cart *cart;	         /* This holds cgi and other variables between clicks. */
 struct hash *oldVars;	         /* Old cart hash. */
@@ -42,8 +42,55 @@
 }
 
 /****** BEGIN HELPER FUNCTIONS *******/
 
+char getClusterMethod(char *method)
+{
+if (sameWord(method, "single"))
+    return 's';
+
+if (sameWord(method, "maximum"))
+    return 'm';
+
+if (sameWord(method, "average"))
+    return 'a';
+
+if (sameWord(method, "centroid"))
+    return 'c';
+
+return '-';
+}
+  
+char getClusterMetric(char *metric)
+{
+if (sameWord(metric, "euclidean"))
+    return 'e';
+
+if (sameWord(metric, "cityblock"))
+    return 'b';
+
+if (sameWord(metric, "correlation"))
+    return 'c';
+
+if (sameWord(metric, "absolute"))
+    return 'a';
+
+if (sameWord(metric, "uncentered"))
+    return 'u';
+
+if (sameWord(metric, "absuncentered"))
+    return 'x';
+
+if (sameWord(metric, "spearmans"))
+    return 's';
+
+if (sameWord(metric, "kendallstau"))
+    return 'k';
+
+return '-';
+}
+
+
 struct analyses *getAnalysesById(struct sqlConnection *conn, int analysis_id)
 {
 char query[256];
 safef(query, sizeof(query), 
@@ -256,8 +303,30 @@
 char *query = dyStringCannibalize(&dy);
 return analysisFeaturesLoadByQuery(conn, query);
 }
 
+char *getAllIdsInDataset(struct sqlConnection *conn,
+			 struct datasets *da, char *field)
+{
+struct dyString *dy = dyStringNew(100);
+dyStringPrintf(dy, "select DISTINCT %s from %s;", 
+	       field, da->data_table);
+char *query = dyStringCannibalize(&dy);
+
+struct slName *sl, *slList = sqlQuickList(conn, query);
+
+dy = dyStringNew(100);
+for (sl = slList; sl; sl = sl->next)
+    {
+    dyStringPrintf(dy, "%s", sl->name);
+    if (sl->next)
+	dyStringPrintf(dy, ",");
+    }
+char *ids = dyStringCannibalize(&dy);
+
+return ids;
+}
+
 char *getNumAnalysisFeatureIdsInDataset(struct sqlConnection *conn,
 					 struct datasets *da, 
 					 int numFeatures)
 {
@@ -748,34 +817,69 @@
 
 return rdList;
 }
 
+void clusterRawData(struct rawData *rdList, struct mapSettings *settings, 
+		    char *metricStr, char *methodStr)
+{
+char metric = getClusterMetric(metricStr);
+char method = getClusterMethod(methodStr);
+
+if (method == '-' || metric == '-')
+    errAbort("Invalid clustering method or metric string.\n");
+
+struct slName *sl, *ordered = clusterDataByGene(rdList, settings, method, metric);
+
+// Remove old hash, update it with new order.
+hashFree(&settings->x_index);
+settings->x_index = hashNew(0);
+
+struct hashEl *el;
+int numFeatures = 0;
+for (sl = ordered; sl; sl = sl->next)
+    {
+    if ((el = hashLookup(settings->x_index, sl->name)) != NULL)
+	continue;
+    hashAddInt(settings->x_index, sl->name, numFeatures);
+    numFeatures += 1;
+    }
+}
+
+
+
+
 void drawHeatmap()
 {
 struct sqlConnection *conn = hAllocConnProfile(localDb, db);
 
 int width  = cartUsualInt(cart, hghWidth, DEFAULT_HEATMAP_WIDTH);
 int height = cartUsualInt(cart, hghHeight, DEFAULT_HEATMAP_HEIGHT);
-
+char *sampleIds  = cartOptionalString(cart, hghSampleIds);
+char *featureIds = cartOptionalString(cart, hghFeatureIds);
 int dataset_id   = cartUsualInt(cart, hghDatasetId, -1);
+
+char *metric = cartOptionalString(cart, hghClusterMetric);
+char *method = cartOptionalString(cart, hghClusterMethod);
+
 struct datasets *da = getDatasetById(conn, dataset_id);
 if (!da)
     errAbort("No dataset matching id = %d\n", dataset_id);
 
-char *sampleIds  = cartOptionalString(cart, hghSampleIds);
 if (!sampleIds)
-    errAbort("%s is required\n", hghSampleIds);
+    {
+    sampleIds = getAllIdsInDataset(conn, da, "sample_id");
+    //errAbort("%s is required\n", hghSampleIds);
+    }
 
-char *featureIds = cartOptionalString(cart, hghFeatureIds);
 if (!featureIds)
     errAbort("%s is required\n", hghFeatureIds);
 
 struct samples *samples = getSamplesByIds(conn, sampleIds);
 if (!samples)
     errAbort("No samples matching ids = %s\n", sampleIds);
 
 // To test LOTS of features
-// featureIds = getNumAnalysisFeatureIdsInDataset(conn, da, 1000);
+//featureIds = getNumAnalysisFeatureIdsInDataset(conn, da, 1000);
 struct analysisFeatures *afList = getAnalysisFeaturesByIds(conn, featureIds);
 if (!afList)
     errAbort("No features matching ids = %s\n", featureIds);
 
@@ -790,8 +894,11 @@
 struct rawData *rdList = getRawData(conn, da, samples, afList);
 if (!rdList)
     errAbort("No data matching input parameters.");
 
+if (metric && method)
+    clusterRawData(rdList, settings, metric, method);
+
 char *filename = heatmapGif(conn, rdList, settings);
 struct json *js = newJson();
 jsonAddString(js, "image", filename);
 if (js)