src/hg/instinct/hgGeneset/hgGenesets.c 1.3

1.3 2010/01/21 23:51:13 jsanborn
added initial drawing code
Index: src/hg/instinct/hgGeneset/hgGenesets.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/instinct/hgGeneset/hgGenesets.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -b -B -U 4 -r1.2 -r1.3
--- src/hg/instinct/hgGeneset/hgGenesets.c	19 Jan 2010 23:08:38 -0000	1.2
+++ src/hg/instinct/hgGeneset/hgGenesets.c	21 Jan 2010 23:51:13 -0000	1.3
@@ -34,11 +34,11 @@
 void usage()
 /* Explain usage and exit. */
 {
 errAbort(
-  "bioIntUI\n"
+  "hgGenesets\n"
   "usage:\n"
-  "   bioIntUI\n"
+  "   hgGenesets\n"
   );
 }
 
 /****** BEGIN HELPER FUNCTIONS *******/
@@ -237,10 +236,37 @@
 jsonAddString(js, "name", name);
 jsonAddString(js, "source", source);
 }
 
+struct samples *getSamplesByIds(struct sqlConnection *conn,
+				char *sampleIds)
+{
+struct dyString *dy = dyStringNew(100);
+dyStringPrintf(dy, "select * from %s where sample_id in (%s);", SA_TABLE, sampleIds);
+
+char *query = dyStringCannibalize(&dy);
+return samplesLoadByQuery(conn, query);
+}
 
+struct analysisFeatures *getAnalysisFeaturesByIds(struct sqlConnection *conn,
+						  char *featureIds)
+{
+struct dyString *dy = dyStringNew(100);
+dyStringPrintf(dy, "select * from %s where id in (%s);", AF_TABLE, featureIds);
 
+char *query = dyStringCannibalize(&dy);
+return analysisFeaturesLoadByQuery(conn, query);
+}
+
+struct datasets *getDatasetById(struct sqlConnection *conn, int dataset_id)
+{
+char query[256];
+safef(query, sizeof(query), 
+      "select * from %s where id = %d",
+      DA_TABLE, dataset_id);
+
+return datasetsLoadByQuery(conn, query);
+}
 /****** END HELPER FUNCTIONS *******/
 
 
 /* get a list of the current analyses */
@@ -318,111 +344,13 @@
 
 hFreeConn(&conn);
 }
 
-struct searchResults {
-    struct searchResults *next;
-    char *name;
-    char *type;
-    double val;
-};
-
-int searchResultsCmp(const void *va, const void *vb)
-/* Compare function to sort array of ints. */
-{
-const struct searchResults *a = *((struct searchResults **)va);
-const struct searchResults *b = *((struct searchResults **)vb);
-double diff = a->val - b->val;
-if (diff < 0)
-    return -1;
-else if (diff > 0)
-    return 1;
-else
-    return 0;
-}   
-
-struct searchResults *searchForFeatures(struct sqlConnection *conn, int cohort_id, 
-					struct datasets *daList, char *feature_name)
-{
-int maxResponse = 5;
-/* Check analysis features */
-char query[256];
-safef(query, sizeof(query), 
-      "select feature_name,type from %s where feature_name like \"%%%s%%\" "
-      "order by length(feature_name);", 
-      AF_TABLE, feature_name);
-
-int count = 0;
-struct searchResults *sp, *spList = NULL;
-struct sqlResult *sr = sqlGetResult(conn, query);
-char **row = NULL;
-while ((row = sqlNextRow(sr)) != NULL)
-    { 
-    char *name = row[0];
-    char *type = row[1];
-    AllocVar(sp);
-    sp->name = cloneString(name);
-    sp->type = cloneString(type);
-    sp->val  = strlen(sp->name);
-    slAddHead(&spList, sp);
-    if (count > maxResponse)
-	break;
-    count++;
-    }
-sqlFreeResult(&sr);
-
-/* Check clinical features */
-struct datasets *da;
-struct dyString *dy = dyStringNew(100);
-dyStringPrintf(dy,
-	       "select DISTINCT %s.name from %s join %s on %s.sample_id = %s.id ",
-	       FE_TABLE, CD_TABLE, SA_TABLE, CD_TABLE, SA_TABLE);
-dyStringPrintf(dy,
-	       "join %s on %s.id = %s.feature_id ",
-	       FE_TABLE, FE_TABLE, CD_TABLE);
-dyStringPrintf(dy,
-	       "where %s.name like \"%%%s%%\" and %s.dataset_id in (",
-	       FE_TABLE, feature_name, SA_TABLE);
-for (da = daList; da; da = da->next)
-    {
-    dyStringPrintf(dy, "%d", da->id);
-    if (da->next)
-	dyStringPrintf(dy, ",");
-    }
-dyStringPrintf(dy, ") order by length(%s.name)", FE_TABLE);     
-char *cquery = dyStringCannibalize(&dy);
-
-count = 0;
-sr = sqlGetResult(conn, cquery);
-while ((row = sqlNextRow(sr)) != NULL)
-    { 
-    char *name = row[0];
-    AllocVar(sp);
-    sp->name = cloneString(name);
-    sp->type = cloneString("clinical");
-    sp->val  = strlen(sp->name);
-    slAddHead(&spList, sp);
-    if (count < maxResponse)
-	break;
-    count++;
-    }
-sqlFreeResult(&sr);
-
-slSort(&spList, searchResultsCmp);
-return spList;
-}
-
 void sendNoMatch(struct json *js)
 {
 return;
 }
 
-void sendAmbiguities(struct json *js, struct searchResults *spList)
-{
-struct searchResults *sp;
-for (sp = spList; sp; sp = sp->next)
-    jsonAddString(js, sp->name, sp->type); 
-}
 
 void sendRawFeatureData(struct sqlConnection *conn, struct json *js, 
 			struct datasets *da, struct samples *saList, 
 			struct analysisFeatures *af)
@@ -648,44 +576,8 @@
 
 hFreeConn(&conn);
 }
 
-void getSuggestions()
-{
-int cohort_id = cartUsualInt(cart, bioIntCohortId, -1);
-
-/* feature source = gene,geneset,clinical... */
-//char *source = cartOptionalString(cart, bioIntSourceName);
-
-char *feature_name = cartOptionalString(cart, bioIntFeatureName);
-if (!feature_name)
-    errAbort("%s or %s must be set for mode=getFeatureData\n", bioIntFeature, bioIntFeatureId);
-
-struct sqlConnection *conn = hAllocConnProfile(localDb, db);
-
-struct datasets *daList = getDatasetsByCohortId(conn, cohort_id);
-if (!daList)
-    errAbort("No datasets matching cohort_id = %d", cohort_id);
-
-struct json *js = newJson();
-if (sameString(feature_name, ""))  // blank was sent, return no match
-    sendNoMatch(js);
-else
-    {    
-    struct searchResults *spList = searchForFeatures(conn, cohort_id, daList, feature_name);
-    int numMatched = slCount(spList);
-    if (numMatched == 0)
-	sendNoMatch(js);
-    else
-	sendAmbiguities(js, spList);
-    }
-
-if (js)
-    hPrintf("%s\n", js->print(js));
-
-hFreeConn(&conn); 
-}
-
 void getClinicalFeatures()
 {
 int cohort_id = cartUsualInt(cart, bioIntCohortId, -1);
 
@@ -791,248 +683,109 @@
 
 hFreeConn(&conn);
 }
 
-void getMostCorrelated()
+struct rawData *getRawData(struct sqlConnection *conn, struct datasets *da, 
+			   struct samples *samples, struct analysisFeatures *afList)
 {
-int takeTop = cartUsualInt(cart, bioIntTakeTop, 5);
-int cohort_id = cartUsualInt(cart, bioIntCohortId, -1);
-char *feature_name = cartOptionalString(cart, bioIntFeatureName);
-
-if (!feature_name || cohort_id == -1)
-    errAbort("%s or %s must be set for mode=getMostCorrelated\n", 
-	     bioIntFeatureName, bioIntCohortId);
-
-struct sqlConnection *conn = hAllocConnProfile(localDb, db);
+struct samples *sa;
+struct analysisFeatures *af;
 
-struct analysisFeatures *af = getAnalysisFeaturesByName(conn, feature_name);
-if (!af)
+struct dyString *dy = dyStringNew(100);
+dyStringPrintf(dy, "select * from %s where sample_id in (",
+	       da->data_table);
+for (sa = samples; sa; sa = sa->next)
     {
-    hFreeConn(&conn);
-    errAbort("Could not find analysisFeature in db");
+    dyStringPrintf(dy, "%d", sa->sample_id);
+    if (sa->next)
+	dyStringPrintf(dy, ",");
     }
-
-struct cohortCorr *cc = getCohortCorrByCohortId(conn, cohort_id);
-if (!cc)
+dyStringPrintf(dy, ") and feature_id in (");
+for (af = afList; af; af = af->next)
     {
-    hFreeConn(&conn);
-    errAbort("No cohort correlation table with cohort_id = %d.\n", cohort_id);
+    dyStringPrintf(dy, "%d", af->id);
+    if (af->next)
+	dyStringPrintf(dy, ",");
     }
-
-struct dyString *dy = dyStringNew(100);
-dyStringPrintf(dy, "select a1.feature_name, a1.type, a2.feature_name, a2.type, %s.val from %s "
-	       "join %s as a1 on %s.feature_id1 = a1.id ", 
-	       cc->result_table, cc->result_table, AF_TABLE, cc->result_table);
-dyStringPrintf(dy, "join %s as a2 on %s.feature_id2 = a2.id ", 
-	       AF_TABLE, cc->result_table);
-dyStringPrintf(dy, "where feature_id1 = %d or feature_id2 = %d order by %s.val DESC", 
-	       af->id, af->id, cc->result_table);
+dyStringPrintf(dy, ");");
 char *query = dyStringCannibalize(&dy);
 
-struct searchResults *sp, *spList = NULL;
+struct rawData *rd, *rdList = NULL;
 struct sqlResult *sr = sqlGetResult(conn, query);
 char **row = NULL;
 while ((row = sqlNextRow(sr)) != NULL)
     {
-    char *name = NULL;
-    char *type = NULL;
-    char *name1 = row[0];
-    char *type1 = row[1];
-    char *name2 = row[2];
-    char *type2 = row[3];
-    double val = atof(row[4]);
-
-    if (sameString(name1, af->feature_name))
-	{
-	name = name2;
-	type = type2;
-	}
-    else if (sameString(name2, af->feature_name))
-	{
-	name = name1;
-	type = type1;
-	}
-    else  // doesn't match either, should *never* get here
-	continue;
-
-    AllocVar(sp);
-    sp->name = cloneString(name);
-    sp->type = cloneString(type);
-    sp->val = val;
-    slAddHead(&spList, sp);
-    }
-slReverse(&spList);
-
-int count;
-struct json *js = newJson();
-struct json *corrs = jsonAddContainer(js, "Top Correlated");
-for (sp = spList, count = 0; sp && (count < takeTop); sp = sp->next, count++)
-    {
-    if (sp->val < 0.0)
-	break;
-    struct json *data = jsonAddContainer(corrs, sp->name);
-    jsonAddString(data, "type", sp->type);
-    jsonAddDouble(data, "val", sp->val);    
-    }
-
-slReverse(&spList);
-corrs = jsonAddContainer(js, "Top Anti-Correlated");
-for (sp = spList, count = 0; sp && (count < takeTop); sp = sp->next, count++)
-    {
-    if (sp->val > 0.0)
-	break;
-    struct json *data = jsonAddContainer(corrs, sp->name);
-    jsonAddString(data, "type", sp->type);    
-    jsonAddDouble(data, "val", sp->val);    
+    rd = AllocA(struct rawData);
+    rd->sample_id  = atoi(row[0]);
+    rd->feature_id = atoi(row[1]);
+    rd->val        = atof(row[2]);
+    rd->conf       = atof(row[3]);
+    slAddHead(&rdList, rd);
     }
+sqlFreeResult(&sr);
 
-if (js)
-    hPrintf("%s", js->print(js));
-
-hFreeConn(&conn);
+return rdList;
 }
 
-void sendContainedFeatures(struct sqlConnection *conn, int cohort_id, 
-			   struct analysisFeatures *af, char *input_table, int takeTop)
+void drawHeatmap()
 {
-if (!input_table)
-    return;
-
-if (!sqlTableExists(conn, input_table))
-    {
-    hFreeConn(&conn);
-    errAbort("Table does not exist, %s.\n", input_table);
-    }
-
-uglyTime(NULL);
-struct dyString *dy = newDyString(100);
-dyStringPrintf(dy, 
-	       "select DISTINCT gene_id from %s where id = %d",
-	       GG_TABLE, af->id);
-char *query = dyStringCannibalize(&dy);
-
-struct slInt *si, *siList = sqlQuickNumList(conn, query);
-
-dy = newDyString(100);
-dyStringPrintf(dy, 
-	       "select feature_name, type, sum(abs(conf)) as s from %s "
-	       "join %s on feature_id=id where feature_id in (",
-	       AF_TABLE, input_table);
-for (si = siList; si; si = si->next)
-    {
-    dyStringPrintf(dy, "%d", si->val);
-    if (si->next)
-	dyStringPrintf(dy, ",");
-    }
-dyStringPrintf(dy, 
-	       ") group by feature_id order by s DESC limit %d;", 
-	       takeTop);
-query = dyStringCannibalize(&dy);
-
-struct slName *fList = NULL, *tList = NULL;
-
-struct sqlResult *sr = sqlGetResult(conn, query);
-char **row = NULL;
-while ((row = sqlNextRow(sr)) != NULL)
-    { 
-    char *name = row[0];
-    char *type = row[1];
-    slNameAddHead(&fList, name);
-    slNameAddHead(&tList, type);
-    }
-slReverse(&fList);
-slReverse(&tList);
-char *feature_names = slNameListToString(fList, ',');
-char *types = slNameListToString(tList, ',');
-
-struct json *js = newJson();
-sendUniqueMatch(conn, js, cohort_id, feature_names, types, NULL);  
-
-if (js)
-    hPrintf("%s", js->print(js));
-}
+struct sqlConnection *conn = hAllocConnProfile(localDb, db);
 
-void expandFeature()
-{
-int takeTop = cartUsualInt(cart, bioIntTakeTop, 5);
-int cohort_id = cartUsualInt(cart, bioIntCohortId, -1);
-char *feature_name = cartOptionalString(cart, bioIntFeatureName);
+int dataset_id   = cartUsualInt(cart, hgh3DatasetId, -1);
+if (dataset_id < 0)
+    errAbort("%s is required\n", hgh3DatasetId);
 
-if (!feature_name)
-    errAbort("%s must be set for mode=getGenesInGeneset\n", bioIntFeatureName);
+char *sampleIds  = cartOptionalString(cart, hgh3SampleIds);
+if (!sampleIds)
+    errAbort("%s is required\n", hgh3SampleIds);
 
-struct sqlConnection *conn = hAllocConnProfile(localDb, db);
+char *featureIds = cartOptionalString(cart, hgh3FeatureIds);
+if (!featureIds)
+    errAbort("%s is required\n", hgh3FeatureIds);
 
-struct analysisFeatures *af = getAnalysisFeaturesByName(conn, feature_name);
-if (!af)
-    {
-    hFreeConn(&conn);
-    errAbort("Could not find analysisFeature in db");
-    }
+struct samples *samples = getSamplesByIds(conn, sampleIds);
+if (!samples)
+    errAbort("No samples matching ids = %s\n", sampleIds);
 
-struct analyses *an, *anList = getAnalysesByCohortId(conn, cohort_id);
-if (!anList)
-    {
-    hFreeConn(&conn);
-    errAbort("No analyses with cohort_id = %d.\n", cohort_id);
-    }
+struct analysisFeatures *afList = getAnalysisFeaturesByIds(conn, featureIds);
+if (!afList)
+    errAbort("No features matching ids = %s\n", featureIds);
 
-char tmpQ[512];
-for (an = anList; an; an = an->next)
-    {
-    safef(tmpQ, sizeof(tmpQ),
-	  "select * from %s where feature_id = %d;",
-	  an->result_table, af->id);
-    if (sqlExists(conn, tmpQ))
-	break;
-    }
+struct datasets *da = getDatasetById(conn, dataset_id);
 
-if (!an)
-    {
-    hFreeConn(&conn);
-    errAbort("No analysis feature with id=%d in any analysis table in cohort with id = %d", 
-	     af->id, cohort_id);
-    }
+struct slName *saList = slNameListFromComma(sampleIds);
+struct slName *feList = slNameListFromComma(featureIds);
 
-struct slName *tables = slNameListFromComma(an->input_tables);
+struct rawData *rdList = getRawData(conn, da, samples, afList);
+if (!rdList)
+    errAbort("No data matching input parameters.");
 
-if (slCount(tables) > 1)
-    sendRawFeatures(conn, cohort_id, af);
-else
-    sendContainedFeatures(conn, cohort_id, af, an->input_tables, takeTop);
+struct mapSettings *settings = initMapSettings(saList, feList, 20, 20);
+if (!settings)
+    errAbort("settings = NULL");
 
-hFreeConn(&conn);
+char *filename = heatmapGif(conn, rdList, settings);
+struct json *js = newJson();
+jsonAddString(js, "image", filename);
+if (js)
+    hPrintf("%s\n", js->print(js));
 }
 
 void dispatchRoutines()
 /* Look at command variables in cart and figure out which
  * page to draw. */
 {
 /* retrieve cart variables, handle various modes */
-char *mode = cartOptionalString(cart, bioIntMode);
+char *mode = cartOptionalString(cart, hgh3Mode);
 if (!mode)
-    errAbort("%s is required.", bioIntMode);
+    errAbort("%s is required.", hgh3Mode);
 
-if (sameString(mode, "getAnalyses"))
-    getAnalyses();
-else if (sameString(mode, "getCohorts"))
-    getCohorts();
-else if (sameString(mode, "getFeatureData"))
-    getFeatureData();
-else if (sameString(mode, "getSuggestions"))
-    getSuggestions();
-else if (sameString(mode, "getClinicalData"))
-    getClinicalData();
-else if (sameString(mode, "getClinicalFeatures"))
-    getClinicalFeatures();
-else if (sameString(mode, "getMostCorrelated"))
-    getMostCorrelated();
-else if (sameString(mode, "expandFeature"))
-    expandFeature();
+if (sameString(mode, "drawHeatmap"))
+    drawHeatmap();
 else
     errAbort("Incorrect mode = %s", mode);
 
-cartRemovePrefix(cart, bioIntPrefix);
+cartRemovePrefix(cart, hgh3Prefix);
 
 }
 
 void hghDoUsualHttp()