src/hg/instinct/hgGeneset/hgGenesets.c 1.5
1.5 2010/01/28 22:59:07 jsanborn
added clustering
Index: src/hg/instinct/hgGeneset/hgGenesets.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/instinct/hgGeneset/hgGenesets.c,v
retrieving revision 1.4
retrieving revision 1.5
diff -b -B -U 4 -r1.4 -r1.5
--- src/hg/instinct/hgGeneset/hgGenesets.c 22 Jan 2010 05:11:28 -0000 1.4
+++ src/hg/instinct/hgGeneset/hgGenesets.c 28 Jan 2010 22:59:07 -0000 1.5
@@ -19,11 +19,11 @@
#include "featuresLib.h"
#include "json.h"
#include "bioIntDb.h"
#include "bioIntDriver.h"
+#include "hgStats.h"
#include "hgGenesets.h"
-
static char const rcsid[] = "$Id$";
/* ---- Global variables. ---- */
struct cart *cart; /* This holds cgi and other variables between clicks. */
struct hash *oldVars; /* Old cart hash. */
@@ -42,8 +42,55 @@
}
/****** BEGIN HELPER FUNCTIONS *******/
+char getClusterMethod(char *method)
+{
+if (sameWord(method, "single"))
+ return 's';
+
+if (sameWord(method, "maximum"))
+ return 'm';
+
+if (sameWord(method, "average"))
+ return 'a';
+
+if (sameWord(method, "centroid"))
+ return 'c';
+
+return '-';
+}
+
+char getClusterMetric(char *metric)
+{
+if (sameWord(metric, "euclidean"))
+ return 'e';
+
+if (sameWord(metric, "cityblock"))
+ return 'b';
+
+if (sameWord(metric, "correlation"))
+ return 'c';
+
+if (sameWord(metric, "absolute"))
+ return 'a';
+
+if (sameWord(metric, "uncentered"))
+ return 'u';
+
+if (sameWord(metric, "absuncentered"))
+ return 'x';
+
+if (sameWord(metric, "spearmans"))
+ return 's';
+
+if (sameWord(metric, "kendallstau"))
+ return 'k';
+
+return '-';
+}
+
+
struct analyses *getAnalysesById(struct sqlConnection *conn, int analysis_id)
{
char query[256];
safef(query, sizeof(query),
@@ -256,8 +303,30 @@
char *query = dyStringCannibalize(&dy);
return analysisFeaturesLoadByQuery(conn, query);
}
+char *getAllIdsInDataset(struct sqlConnection *conn,
+ struct datasets *da, char *field)
+{
+struct dyString *dy = dyStringNew(100);
+dyStringPrintf(dy, "select DISTINCT %s from %s;",
+ field, da->data_table);
+char *query = dyStringCannibalize(&dy);
+
+struct slName *sl, *slList = sqlQuickList(conn, query);
+
+dy = dyStringNew(100);
+for (sl = slList; sl; sl = sl->next)
+ {
+ dyStringPrintf(dy, "%s", sl->name);
+ if (sl->next)
+ dyStringPrintf(dy, ",");
+ }
+char *ids = dyStringCannibalize(&dy);
+
+return ids;
+}
+
char *getNumAnalysisFeatureIdsInDataset(struct sqlConnection *conn,
struct datasets *da,
int numFeatures)
{
@@ -748,34 +817,69 @@
return rdList;
}
+void clusterRawData(struct rawData *rdList, struct mapSettings *settings,
+ char *metricStr, char *methodStr)
+{
+char metric = getClusterMetric(metricStr);
+char method = getClusterMethod(methodStr);
+
+if (method == '-' || metric == '-')
+ errAbort("Invalid clustering method or metric string.\n");
+
+struct slName *sl, *ordered = clusterDataByGene(rdList, settings, method, metric);
+
+// Remove old hash, update it with new order.
+hashFree(&settings->x_index);
+settings->x_index = hashNew(0);
+
+struct hashEl *el;
+int numFeatures = 0;
+for (sl = ordered; sl; sl = sl->next)
+ {
+ if ((el = hashLookup(settings->x_index, sl->name)) != NULL)
+ continue;
+ hashAddInt(settings->x_index, sl->name, numFeatures);
+ numFeatures += 1;
+ }
+}
+
+
+
+
void drawHeatmap()
{
struct sqlConnection *conn = hAllocConnProfile(localDb, db);
int width = cartUsualInt(cart, hghWidth, DEFAULT_HEATMAP_WIDTH);
int height = cartUsualInt(cart, hghHeight, DEFAULT_HEATMAP_HEIGHT);
-
+char *sampleIds = cartOptionalString(cart, hghSampleIds);
+char *featureIds = cartOptionalString(cart, hghFeatureIds);
int dataset_id = cartUsualInt(cart, hghDatasetId, -1);
+
+char *metric = cartOptionalString(cart, hghClusterMetric);
+char *method = cartOptionalString(cart, hghClusterMethod);
+
struct datasets *da = getDatasetById(conn, dataset_id);
if (!da)
errAbort("No dataset matching id = %d\n", dataset_id);
-char *sampleIds = cartOptionalString(cart, hghSampleIds);
if (!sampleIds)
- errAbort("%s is required\n", hghSampleIds);
+ {
+ sampleIds = getAllIdsInDataset(conn, da, "sample_id");
+ //errAbort("%s is required\n", hghSampleIds);
+ }
-char *featureIds = cartOptionalString(cart, hghFeatureIds);
if (!featureIds)
errAbort("%s is required\n", hghFeatureIds);
struct samples *samples = getSamplesByIds(conn, sampleIds);
if (!samples)
errAbort("No samples matching ids = %s\n", sampleIds);
// To test LOTS of features
-// featureIds = getNumAnalysisFeatureIdsInDataset(conn, da, 1000);
+//featureIds = getNumAnalysisFeatureIdsInDataset(conn, da, 1000);
struct analysisFeatures *afList = getAnalysisFeaturesByIds(conn, featureIds);
if (!afList)
errAbort("No features matching ids = %s\n", featureIds);
@@ -790,8 +894,11 @@
struct rawData *rdList = getRawData(conn, da, samples, afList);
if (!rdList)
errAbort("No data matching input parameters.");
+if (metric && method)
+ clusterRawData(rdList, settings, metric, method);
+
char *filename = heatmapGif(conn, rdList, settings);
struct json *js = newJson();
jsonAddString(js, "image", filename);
if (js)