src/hg/instinct/hgGeneset/hgGenesets.c 1.15
1.15 2010/02/11 23:12:09 jsanborn
update getmatching
Index: src/hg/instinct/hgGeneset/hgGenesets.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/instinct/hgGeneset/hgGenesets.c,v
retrieving revision 1.14
retrieving revision 1.15
diff -b -B -U 1000000 -r1.14 -r1.15
--- src/hg/instinct/hgGeneset/hgGenesets.c 1 Feb 2010 14:58:41 -0000 1.14
+++ src/hg/instinct/hgGeneset/hgGenesets.c 11 Feb 2010 23:12:09 -0000 1.15
@@ -1,1447 +1,1454 @@
/* bioIntUI */
#include "common.h"
#include "bed.h"
#include "cart.h"
#include "linefile.h"
#include "customTrack.h"
#include "genoLay.h"
#include "hash.h"
#include "hCommon.h"
#include "hdb.h"
#include "hPrint.h"
#include "htmshell.h"
#include "hui.h"
#include "trackLayout.h"
#include "web.h"
#include "microarray.h"
#include "ra.h"
#include "hgStatsLib.h"
#include "featuresLib.h"
#include "json.h"
#include "hgStats.h"
#include "heatmapUtility.h"
#include "hgGenesets.h"
static char const rcsid[] = "$Id$";
/* ---- Global variables. ---- */
struct cart *cart; /* This holds cgi and other variables between clicks. */
struct hash *oldVars; /* Old cart hash. */
char *db = "bioIntDev";
char *localDb = "localDb";
void usage()
/* Explain usage and exit. */
{
errAbort(
"hgGenesets\n"
"usage:\n"
" hgGenesets\n"
);
}
/****** BEGIN HELPER FUNCTIONS *******/
char getClusterMethod(char *method)
{
if (sameWord(method, "single"))
return 's';
if (sameWord(method, "maximum"))
return 'm';
if (sameWord(method, "average"))
return 'a';
if (sameWord(method, "centroid"))
return 'c';
return '-';
}
char getClusterMetric(char *metric)
{
if (sameWord(metric, "euclidean"))
return 'e';
if (sameWord(metric, "cityblock"))
return 'b';
if (sameWord(metric, "correlation"))
return 'c';
if (sameWord(metric, "absolute"))
return 'a';
if (sameWord(metric, "uncentered"))
return 'u';
if (sameWord(metric, "absuncentered"))
return 'x';
if (sameWord(metric, "spearmans"))
return 's';
if (sameWord(metric, "kendallstau"))
return 'k';
return '-';
}
struct analyses *getAnalysesById(struct sqlConnection *conn, int analysis_id)
{
char query[256];
safef(query, sizeof(query),
"select * from %s where id = %d;",
AN_TABLE, analysis_id);
return analysesLoadByQuery(conn, query);
}
struct analyses *getAnalysesByCohortId(struct sqlConnection *conn, int cohort_id)
{
/* Currently, don't send back factorGraph tables */
char query[256];
safef(query, sizeof(query),
"select * from %s join %s on %s.id = module_id where cohort_id = %d "
"and not (%s.name = \"factorGraph\")",
AN_TABLE, AM_TABLE, AM_TABLE, cohort_id, AM_TABLE);
return analysesLoadByQuery(conn, query);
}
struct analysisFeatures *getAnalysisFeaturesByName(struct sqlConnection *conn,
char *name)
{
char query[256];
safef(query, sizeof(query),
"select * from %s where feature_name = \"%s\"",
AF_TABLE, name);
return analysisFeaturesLoadByQuery(conn, query);
}
struct analysisFeatures *getAnalysisFeaturesByNameType(struct sqlConnection *conn,
char *name, char *type)
{
char query[256];
safef(query, sizeof(query),
"select * from %s where feature_name = \"%s\" and type = \"%s\"",
AF_TABLE, name, type);
return analysisFeaturesLoadByQuery(conn, query);
}
struct analysisFeatures *getAnalysisFeaturesById(struct sqlConnection *conn,
int id)
{
char query[256];
safef(query, sizeof(query),
"select * from %s where id = %d",
AF_TABLE, id);
return analysisFeaturesLoadByQuery(conn, query);
}
struct features *getFeaturesByName(struct sqlConnection *conn,
char *name)
{
char query[256];
safef(query, sizeof(query),
"select * from %s where name = \"%s\"",
FE_TABLE, name);
return featuresLoadByQuery(conn, query);
}
struct features *getFeaturesById(struct sqlConnection *conn,
int id)
{
char query[256];
safef(query, sizeof(query),
"select * from %s where id = %d",
FE_TABLE, id);
return featuresLoadByQuery(conn, query);
}
struct tissues *getTissuesById(struct sqlConnection *conn,
int id)
{
char query[256];
safef(query, sizeof(query),
"select * from %s where id = %d",
TI_TABLE, id);
return tissuesLoadByQuery(conn, query);
}
struct datasets *getDatasetsByCohortId(struct sqlConnection *conn, int cohort_id)
{
char query[256];
safef(query, sizeof(query),
"select %s.* from %s join %s on %s.id = %s.dataset_id "
"where %s.cohort_id = %d",
DA_TABLE, DA_TABLE, DC_TABLE, DA_TABLE, DC_TABLE, DC_TABLE, cohort_id);
return datasetsLoadByQuery(conn, query);
}
struct cohortCorr *getCohortCorrByCohortId(struct sqlConnection *conn, int cohort_id)
{
char query[256];
safef(query, sizeof(query),
"select * from %s where cohort_id = %d",
CC_TABLE, cohort_id);
return cohortCorrLoadByQuery(conn, query);
}
char *getFieldFromKgXref(struct sqlConnection *conn, char *geneSymbol,
char *field)
{
char query[256];
safef(query, sizeof(query),
"select %s from %s where geneSymbol = \"%s\" ",
field, KX_TABLE, geneSymbol);
return sqlQuickString(conn, query);
}
struct genesets *getGenesetByName(struct sqlConnection *conn, char *name)
{
char query[256];
safef(query, sizeof(query),
"select * from %s where name = \"%s\" ",
GE_TABLE, name);
return genesetsLoadByQuery(conn, query);
}
struct slInt *getGenesInGeneset(struct sqlConnection *conn, int id)
{
char query[256];
safef(query, sizeof(query),
"select DISTINCT gene_id from %s where id = %d",
GG_TABLE, id);
return sqlQuickNumList(conn, query);
}
char *getDataTypeById(struct sqlConnection *conn, int id)
{
char query[256];
safef(query, sizeof(query),
"select name from %s where id = %d ",
DT_TABLE, id);
return sqlQuickString(conn, query);
}
struct samples *getOverlappingSamples(struct sqlConnection *conn,
struct datasets *daList)
{
struct dyString *dy = dyStringNew(100);
int count = 1;
dyStringPrintf(dy, "select * from %s as t%d ", SA_TABLE, count);
count++;
struct datasets *da = daList->next;
for (da = daList; da; da = da->next)
{
dyStringPrintf(dy,
"join %s as t%d on t1.id = t%d.id ",
SA_TABLE, count, count);
count++;
}
count = 1;
dyStringPrintf(dy, "where ");
for (da = daList; da; da = da->next)
{
dyStringPrintf(dy, "t%d.dataset_id=%d ", count, da->id);
if (da->next)
dyStringPrintf(dy, "and ");
count++;
}
char *query = dyStringCannibalize(&dy);
return samplesLoadByQuery(conn, query);
}
void setAnalysisFeatureDesc(struct sqlConnection *conn, struct json *js,
struct analysisFeatures *af)
{
char *desc = getFieldFromKgXref(conn, af->feature_name, "description");
if (!desc)
desc = cloneString(af->feature_name);
/* String 'isoform xxx' from description... almost all entries have them */
char *ptr = rStringIn("isoform", desc);
if (ptr)
*ptr = '\0';
jsonAddString(js, "description", desc);
}
void setAnalysisFeatureLink(struct sqlConnection *conn, struct json *js,
struct analysisFeatures *af)
{
char *name, *source;
name = getFieldFromKgXref(conn, af->feature_name, "geneSymbol");
if (name)
source = "UCSC";
else
{
struct genesets *gs = getGenesetByName(conn, af->feature_name);
if (!gs)
return;
name = gs->name;
source = gs->source;
}
jsonAddString(js, "name", name);
jsonAddString(js, "source", source);
}
struct samples *getSamplesByIds(struct sqlConnection *conn,
char *sampleIds)
{
struct dyString *dy = dyStringNew(100);
dyStringPrintf(dy, "select * from %s where sample_id in (%s);", SA_TABLE, sampleIds);
char *query = dyStringCannibalize(&dy);
return samplesLoadByQuery(conn, query);
}
struct analysisFeatures *getAnalysisFeaturesByIds(struct sqlConnection *conn,
char *featureIds)
{
struct dyString *dy = dyStringNew(100);
dyStringPrintf(dy, "select * from %s where id in (%s);", AF_TABLE, featureIds);
char *query = dyStringCannibalize(&dy);
return analysisFeaturesLoadByQuery(conn, query);
}
char *getAllIdsInDataset(struct sqlConnection *conn,
struct datasets *da, char *field)
{
struct dyString *dy = dyStringNew(100);
dyStringPrintf(dy, "select DISTINCT %s from %s;",
field, da->data_table);
char *query = dyStringCannibalize(&dy);
struct slName *sl, *slList = sqlQuickList(conn, query);
dy = dyStringNew(100);
for (sl = slList; sl; sl = sl->next)
{
dyStringPrintf(dy, "%s", sl->name);
if (sl->next)
dyStringPrintf(dy, ",");
}
char *ids = dyStringCannibalize(&dy);
return ids;
}
char *getNumAnalysisFeatureIdsInDataset(struct sqlConnection *conn,
struct datasets *da,
int numFeatures)
{
struct dyString *dy = dyStringNew(100);
dyStringPrintf(dy, "select DISTINCT feature_id from %s limit %d;",
da->data_table, numFeatures);
char *query = dyStringCannibalize(&dy);
struct slName *sl, *slList = sqlQuickList(conn, query);
dy = dyStringNew(100);
for (sl = slList; sl; sl = sl->next)
{
dyStringPrintf(dy, "%s", sl->name);
if (sl->next)
dyStringPrintf(dy, ",");
}
char *featureIds = dyStringCannibalize(&dy);
return featureIds;
}
struct datasets *getDatasetById(struct sqlConnection *conn, int dataset_id)
{
char query[256];
safef(query, sizeof(query),
"select * from %s where id = %d",
DA_TABLE, dataset_id);
return datasetsLoadByQuery(conn, query);
}
/****** END HELPER FUNCTIONS *******/
/* get a list of the current analyses */
void getAnalyses()
{
char query[256];
safef(query, sizeof(query),
"select * from analyses;");
struct sqlConnection *conn = hAllocConnProfile(localDb, db);
struct analyses *an, *anList = analysesLoadByQuery(conn, query);
struct json *js = newJson();
struct json *analysis, *analyses = jsonAddContainerList(js, "analyses");
analysis = analyses;
for (an = anList; an; an = an->next)
{
jsonAddInt(analysis, "id", an->id);
jsonAddInt(analysis, "cohort_id", an->cohort_id);
jsonAddInt(analysis, "module_id", an->module_id);
jsonAddString(analysis, "result_table", an->result_table);
jsonAddString(analysis, "input_tables", an->input_tables);
if (an->next)
analysis = jsonAddContainerToList(&analyses);
}
if (js)
hPrintf("%s\n", js->print(js));
hFreeConn(&conn);
}
void getCohorts()
{
char query[256];
safef(query, sizeof(query),
"select * from %s;", CO_TABLE);
struct sqlConnection *conn = hAllocConnProfile(localDb, db);
struct cohorts *co, *coList = cohortsLoadByQuery(conn, query);
struct json *js = newJson();
struct json *cohort, *cohorts = jsonAddContainerList(js, "cohorts");
cohort = cohorts;
for (co = coList; co; co = co->next)
{
jsonAddInt(cohort, "cohort_id", co->id);
jsonAddString(cohort, "name", co->name);
struct datasets *da, *daList = getDatasetsByCohortId(conn, co->id);
struct tissues *ti = getTissuesById(conn, daList->tissue_id);
jsonAddString(cohort, "tissue", ti->name);
tissuesFree(&ti);
struct json *dataset, *datasets = jsonAddContainerList(cohort, "datasets");
dataset = datasets;
for (da = daList; da; da = da->next)
{
jsonAddString(dataset, "name", da->name);
jsonAddInt(dataset, "num_samples", da->num_samples);
if (da->next)
dataset = jsonAddContainerToList(&datasets);
}
if (co->next)
cohort = jsonAddContainerToList(&cohorts);
}
if (js)
hPrintf("%s\n", js->print(js));
hFreeConn(&conn);
}
void getSamples()
{
char *cohortId = cartOptionalString(cart, hghCohortId);
if (!cohortId)
errAbort("%s is required\n", hghCohortId);
char query[256];
safef(query, sizeof(query),
"select s.* from %s s,%s dc where dc.dataset_id = s.dataset_id and dc.cohort_id = '%s' group by s.sample_id;", SA_TABLE, DC_TABLE, cohortId);
struct sqlConnection *conn = hAllocConnProfile(localDb, db);
struct samples *sa, *saList = samplesLoadByQuery(conn, query);
struct json *js = newJson();
struct json *sample, *samples = jsonAddContainerList(js, "samples");
sample = samples;
for (sa = saList; sa; sa = sa->next)
{
jsonAddInt(sample, "sample_id", sa->sample_id);
jsonAddString(sample, "sample_name", sa->sample_name);
jsonAddInt(sample, "patient_id", sa->patient_id);
jsonAddString(sample, "patient_name", sa->patient_name);
if (sa->next)
sample = jsonAddContainerToList(&samples);
}
if (js)
hPrintf("%s\n", js->print(js));
hFreeConn(&conn);
}
void sendRawFeatureData(struct sqlConnection *conn, struct json *js,
struct datasets *da, struct samples *saList,
struct analysisFeatures *af)
{
char *dataType = getDataTypeById(conn, da->type_id);
struct samples *sa;
struct dyString *dy = dyStringNew(100);
dyStringPrintf(dy,
"select DISTINCT %s.name, %s.conf from %s join %s on %s.sample_id = %s.id "
"where %s.feature_id = %d and %s.id in (",
SA_TABLE, da->data_table, da->data_table, SA_TABLE, da->data_table, SA_TABLE,
da->data_table, af->id, SA_TABLE);
for (sa = saList; sa; sa = sa->next)
{
dyStringPrintf(dy, "%d", sa->id);
if (sa->next)
dyStringPrintf(dy, ",");
}
dyStringPrintf(dy, ");");
char *query = dyStringCannibalize(&dy);
if (!sqlExists(conn, query))
return;
struct json *container = jsonAddContainer(js, dataType);
setAnalysisFeatureDesc(conn, container, af);
setAnalysisFeatureLink(conn, container, af); // e.g. 'hgg_gene' = kgId
struct json *data = jsonAddContainer(container, "data");
struct sqlResult *sr = sqlGetResult(conn, query);
char **row = NULL;
while ((row = sqlNextRow(sr)) != NULL)
{
char *name = row[0];
double val = atof(row[1]);
jsonAddDouble(data, name, val);
}
sqlFreeResult(&sr);
}
void sendRawFeatures(struct sqlConnection *conn,
int cohort_id, struct analysisFeatures *af)
{
struct datasets *da, *daList = getDatasetsByCohortId(conn, cohort_id);
if (!daList)
{
hFreeConn(&conn);
errAbort("No datasets with cohort_id = %d.\n", cohort_id);
}
struct samples *samples = getOverlappingSamples(conn, daList);
struct json *js = newJson();
for (da = daList; da; da = da->next)
sendRawFeatureData(conn, js, da, samples, af);
if (js)
hPrintf("%s", js->print(js));
}
void sendAnalysisFeatureData(struct sqlConnection *conn, struct json *js,
int cohort_id, char *feature_name)
{
struct analysisFeatures *af = getAnalysisFeaturesByName(conn, feature_name);
if (!af)
{
hFreeConn(&conn);
errAbort("Could not find analysisFeature named in %s in db", feature_name);
}
struct analyses *an, *anList = getAnalysesByCohortId(conn, cohort_id);
if (!anList)
{
hFreeConn(&conn);
errAbort("No analyses with cohort_id = %d.\n", cohort_id);
}
char query[512];
for (an = anList; an; an = an->next)
{
safef(query, sizeof(query),
"select DISTINCT %s.name, %s.conf from %s join %s on %s.sample_id = %s.id "
"where %s.feature_id = %d;",
SA_TABLE, an->result_table, an->result_table, SA_TABLE, an->result_table, SA_TABLE,
an->result_table, af->id);
if (sqlExists(conn, query))
break;
}
setAnalysisFeatureDesc(conn, js, af);
setAnalysisFeatureLink(conn, js, af); // e.g. 'hgg_gene' = kgId
struct json *data = jsonAddContainer(js, "data");
struct sqlResult *sr = sqlGetResult(conn, query);
char **row = NULL;
while ((row = sqlNextRow(sr)) != NULL)
{
char *name = row[0];
double val = atof(row[1]);
jsonAddDouble(data, name, val);
}
sqlFreeResult(&sr);
}
void sendClinicalData(struct sqlConnection *conn, struct json *js,
int cohort_id, char *feature_name, struct datasets *daList)
{
struct features *fe = getFeaturesByName(conn, feature_name);
if (!fe)
{
hFreeConn(&conn);
errAbort("Could not find clinical feature in db");
}
struct samples *sa, *samples = getOverlappingSamples(conn, daList);
struct dyString *dy = dyStringNew(100);
dyStringPrintf(dy,
"select DISTINCT %s.name, %s.val, %s.code from %s "
"join %s on %s.sample_id = %s.id "
"where %s.feature_id = %d and %s.id in (",
SA_TABLE, CD_TABLE, CD_TABLE, CD_TABLE, SA_TABLE, CD_TABLE, SA_TABLE,
CD_TABLE, fe->id, SA_TABLE);
for (sa = samples; sa; sa = sa->next)
{
dyStringPrintf(dy, "%d", sa->id);
if (sa->next)
dyStringPrintf(dy, ",");
}
dyStringPrintf(dy, ")");
char *query = dyStringCannibalize(&dy);
jsonAddString(js, "description", fe->longLabel);
jsonAddString(js, "name", fe->name);
jsonAddString(js, "source", "N/A");
struct slDouble *sd;
struct hash *codeHash = hashNew(0);
struct json *data = jsonAddContainer(js, "data");
struct sqlResult *sr = sqlGetResult(conn, query);
char **row = NULL;
while ((row = sqlNextRow(sr)) != NULL)
{
char *name = row[0];
double val = atof(row[1]);
char *code = row[2];
if (!sameString(code, "(null)")) // eventually need to fix this.
{
jsonAddString(data, name, code);
if (hashLookup(codeHash, code))
continue;
sd = slDoubleNew(val);
hashAdd(codeHash, code, sd);
}
else
jsonAddDouble(data, name, val);
}
if (hashNumEntries(codeHash) == 0)
return;
struct json *codes = jsonAddContainer(js, "codes");
struct hashEl *el, *elList = hashElListHash(codeHash);
for (el = elList; el != NULL; el = el->next)
{
char *name = el->name;
sd = el->val;
jsonAddDouble(codes, name, sd->val);
}
hashElFreeList(&elList);
}
void sendUniqueMatch(struct sqlConnection *conn, struct json *js,
int cohort_id, char *feature_names, char *sources,
struct datasets *daList)
{
struct slName *s, *sList = slNameListFromComma(sources);
struct slName *f, *fList = slNameListFromComma(feature_names);
if (slCount(sList) != slCount(fList))
errAbort("source list length not equal to feature list length\n");
for (s = sList, f = fList; s && f; s = s->next, f = f->next)
{
struct json *container = jsonAddContainer(js, f->name);
if (sameString(s->name, "gene") || sameString(s->name, "geneset"))
sendAnalysisFeatureData(conn, container, cohort_id, f->name);
else if (sameString(s->name, "clinical"))
sendClinicalData(conn, container, cohort_id, f->name, daList);
}
}
void getFeatureData()
{
int cohort_id = cartUsualInt(cart, bioIntCohortId, -1);
char *source = cartOptionalString(cart, bioIntSourceName);
char *feature_name = cartOptionalString(cart, bioIntFeatureName);
if (!feature_name || !source)
errAbort("%s and %s must be set for mode=getFeatureData\n", bioIntFeature, bioIntSourceName);
struct sqlConnection *conn = hAllocConnProfile(localDb, db);
struct datasets *daList = getDatasetsByCohortId(conn, cohort_id);
if (!daList)
errAbort("No datasets matching cohort_id = %d", cohort_id);
struct json *js = newJson();
sendUniqueMatch(conn, js, cohort_id, feature_name, source, daList);
if (js)
hPrintf("%s\n", js->print(js));
hFreeConn(&conn);
}
void getClinicalFeatures()
{
int cohort_id = cartUsualInt(cart, bioIntCohortId, -1);
struct sqlConnection *conn = hAllocConnProfile(localDb, db);
struct datasets *da, *daList = getDatasetsByCohortId(conn, cohort_id);
if (!daList)
{
hFreeConn(&conn);
errAbort("No datasets matching cohort_id = %d", cohort_id);
}
struct dyString *dy = dyStringNew(100);
dyStringPrintf(dy,
"select DISTINCT %s.* from %s join %s on %s.id = %s.feature_id "
"join %s on %s.sample_id = %s.id where %s.dataset_id in (",
FE_TABLE, FE_TABLE, CD_TABLE, FE_TABLE, CD_TABLE,
SA_TABLE, CD_TABLE, SA_TABLE, SA_TABLE);
for (da = daList; da; da = da->next)
{
dyStringPrintf(dy, "%d", da->id);
if (da->next)
dyStringPrintf(dy, ",");
}
dyStringPrintf(dy, ")");
char *query = dyStringCannibalize(&dy);
struct features *fe, *feList = featuresLoadByQuery(conn, query);
struct json *js = newJson();
struct json *feature, *features = jsonAddContainerList(js, "features");
feature = features;
for (fe = feList; fe; fe = fe->next)
{
jsonAddInt(feature, "id", fe->id);
jsonAddString(feature, "name", fe->name);
jsonAddString(feature, "shortLabel", fe->shortLabel);
jsonAddString(feature, "longLabel", fe->longLabel);
if (fe->next)
feature = jsonAddContainerToList(&features);
}
if (js)
hPrintf("%s\n", js->print(js));
hFreeConn(&conn);
}
void getClinicalData()
{
int cohort_id = cartUsualInt(cart, bioIntCohortId, -1);
char *feature_name = cartOptionalString(cart, bioIntFeatureName);
if (!feature_name)
errAbort("%s or %s must be set for mode=getClinicalData\n", bioIntFeature, bioIntFeatureId);
struct sqlConnection *conn = hAllocConnProfile(localDb, db);
struct features *fe = getFeaturesByName(conn, feature_name);
if (!fe)
{
hFreeConn(&conn);
errAbort("Could not find clinical feature in db");
}
struct datasets *da, *daList = getDatasetsByCohortId(conn, cohort_id);
if (!daList)
{
hFreeConn(&conn);
errAbort("No datasets matching cohort_id = %d", cohort_id);
}
struct dyString *dy = dyStringNew(100);
dyStringPrintf(dy,
"select DISTINCT %s.name, %s.val from %s join %s on %s.sample_id = %s.id ",
SA_TABLE, CD_TABLE, CD_TABLE, SA_TABLE, CD_TABLE, SA_TABLE);
dyStringPrintf(dy,
"where %s.feature_id = %d and %s.dataset_id in (",
CD_TABLE, fe->id, SA_TABLE);
for (da = daList; da; da = da->next)
{
dyStringPrintf(dy, "%d", da->id);
if (da->next)
dyStringPrintf(dy, ",");
}
dyStringPrintf(dy, ")");
char *query = dyStringCannibalize(&dy);
struct json *js = newJson();
struct sqlResult *sr = sqlGetResult(conn, query);
char **row = NULL;
while ((row = sqlNextRow(sr)) != NULL)
{
char *name = row[0];
double val = atof(row[1]);
jsonAddDouble(js, name, val);
}
if (js)
hPrintf("%s\n", js->print(js));
hFreeConn(&conn);
}
struct rawData *getRawData(struct sqlConnection *conn, struct datasets *da,
struct samples *samples, struct analysisFeatures *afList)
{
struct samples *sa;
struct analysisFeatures *af;
struct dyString *dy = dyStringNew(100);
dyStringPrintf(dy, "select * from %s where sample_id in (",
da->data_table);
for (sa = samples; sa; sa = sa->next)
{
dyStringPrintf(dy, "%d", sa->sample_id);
if (sa->next)
dyStringPrintf(dy, ",");
}
dyStringPrintf(dy, ") and feature_id in (");
for (af = afList; af; af = af->next)
{
dyStringPrintf(dy, "%d", af->id);
if (af->next)
dyStringPrintf(dy, ",");
}
dyStringPrintf(dy, ");");
char *query = dyStringCannibalize(&dy);
struct rawData *rd, *rdList = NULL;
struct sqlResult *sr = sqlGetResult(conn, query);
char **row = NULL;
while ((row = sqlNextRow(sr)) != NULL)
{
rd = AllocA(struct rawData);
rd->sample_id = atoi(row[0]);
rd->feature_id = atoi(row[1]);
rd->val = atof(row[2]);
rd->conf = atof(row[3]);
slAddHead(&rdList, rd);
}
sqlFreeResult(&sr);
return rdList;
}
void updateSampleOrder(struct mapSettings *settings, struct slName *slList)
{
if (!slList)
return;
struct slName *sl;
hashFree(&settings->x_index);
settings->x_index = hashNew(0);
struct hashEl *el;
int numSamples = 0;
for (sl = slList; sl; sl = sl->next)
{
if ((el = hashLookup(settings->x_index, sl->name)) != NULL)
continue;
hashAddInt(settings->x_index, sl->name, numSamples);
numSamples += 1;
}
char *list = strFromSlNameList(slList);
cartSetString(cart, hghSampleIds, list);
}
void updateFeatureOrder(struct mapSettings *settings, struct slName *slList)
{
if (!slList)
return;
struct slName *sl;
hashFree(&settings->y_index);
settings->y_index = hashNew(0);
struct hashEl *el;
int numFeatures = 0;
for (sl = slList; sl; sl = sl->next)
{
if ((el = hashLookup(settings->y_index, sl->name)) != NULL)
continue;
hashAddInt(settings->y_index, sl->name, numFeatures);
numFeatures += 1;
}
char *list = strFromSlNameList(slList);
cartSetString(cart, hghFeatureIds, list);
}
void clusterRawData(struct rawData *rdList, struct mapSettings *settings,
char *metricStr, char *methodStr)
{
char metric = getClusterMetric(metricStr);
char method = getClusterMethod(methodStr);
if (method == '-' || metric == '-')
errAbort("Invalid clustering method or metric string.\n");
struct slName *geneOrder = NULL, *sampleOrder = NULL;
clusterData(rdList, settings, method, metric, &geneOrder, &sampleOrder);
updateSampleOrder(settings, sampleOrder);
updateFeatureOrder(settings, geneOrder);
}
void jsonSettings(struct json *js, struct mapSettings *settings)
{
struct json *c, *map = jsonAddContainer(js, "imageMap");
/* Main heatmap */
c = jsonAddContainer(map, "main");
jsonAddInt(c, "x", settings->hm_x);
jsonAddInt(c, "y", settings->hm_y);
jsonAddInt(c, "width", settings->hm_width);
jsonAddInt(c, "height", settings->hm_height);
/* Thumbnail */
c = jsonAddContainer(map, "thumbnail");
jsonAddInt(c, "x", 0);
jsonAddInt(c, "y", 0);
jsonAddInt(c, "width", settings->hm_x);
jsonAddInt(c, "height", settings->hm_y);
/* Sample Label */
c = jsonAddContainer(map, "sampleLabels");
jsonAddInt(c, "x", settings->hm_x);
jsonAddInt(c, "y", 0);
jsonAddInt(c, "width", settings->hm_width);
jsonAddInt(c, "height", settings->hm_y);
/* Feature Label */
c = jsonAddContainer(map, "featureLabels");
jsonAddInt(c, "x", 0);
jsonAddInt(c, "y", settings->hm_y);
jsonAddInt(c, "width", settings->hm_x);
jsonAddInt(c, "height", settings->hm_height);
struct json *new, *list = jsonAddContainerList(js, "samples");
new = list;
struct hashEl *el;
struct hashCookie cookie = hashFirst(settings->sampleHash);
while ((el = hashNext(&cookie)) != NULL)
{
if (!new)
new = jsonAddContainerToList(&list);
char *name = el->val;
int i = hashIntValDefault(settings->x_index, el->name, -1);
if (i < 0)
continue;
int start = ceil(((double) i) * settings->hm_x_scale) + settings->hm_x;
int stop = ceil(((double) i + 1) * settings->hm_x_scale) + settings->hm_x;
jsonAddInt(new, "id", atoi(el->name));
jsonAddString(new, "name", name);
jsonAddInt(new, "start", start);
jsonAddInt(new, "stop", stop);
new = NULL;
}
list = jsonAddContainerList(js, "features");
new = list;
cookie = hashFirst(settings->featureHash);
while ((el = hashNext(&cookie)) != NULL)
{
if (!new)
new = jsonAddContainerToList(&list);
char *name = el->val;
int i = hashIntValDefault(settings->y_index, el->name, -1);
if (i < 0)
continue;
int start = round(((double) i) * settings->hm_y_scale) + settings->hm_y;
int stop = round(((double) i + 1) * settings->hm_y_scale) + settings->hm_y;
jsonAddInt(new, "id", atoi(el->name));
jsonAddString(new, "name", name);
jsonAddInt(new, "start", start);
jsonAddInt(new, "stop", stop);
new = NULL;
}
}
struct slData {
struct slData *next;
char *name;
double val;
};
int slDataCmp(const void *va, const void *vb)
/* Compare function to sort array of ints. */
{
const struct slData *a = *((struct slData **)va);
const struct slData *b = *((struct slData **)vb);
double diff = a->val - b->val;
if (diff < 0)
return -1;
else if (diff > 0)
return 1;
else
return 0;
}
void sortByFeature(struct sqlConnection *conn, struct datasets *da,
int feature_id, char *direction, struct samples *samples,
struct mapSettings *settings)
{
struct samples *sa;
struct dyString *dy = dyStringNew(100);
dyStringPrintf(dy, "select sample_id from %s where sample_id in (",
da->data_table);
for (sa = samples; sa; sa = sa->next)
{
dyStringPrintf(dy, "%d", sa->sample_id);
if (sa->next)
dyStringPrintf(dy, ",");
}
dyStringPrintf(dy, ") and feature_id = %d order by val", feature_id);
char *query = dyStringCannibalize(&dy);
struct slName *slList = sqlQuickList(conn, query);
if (!direction || sameString(direction, "DESC"))
slReverse(&slList);
updateSampleOrder(settings, slList);
}
void sortBySample(struct sqlConnection *conn, struct datasets *da,
int sample_id, char *direction, struct analysisFeatures *afList,
struct mapSettings *settings)
{
struct analysisFeatures *af;
struct dyString *dy = dyStringNew(100);
dyStringPrintf(dy, "select feature_id from %s where feature_id in (",
da->data_table);
for (af = afList; af; af = af->next)
{
dyStringPrintf(dy, "%d", af->id);
if (af->next)
dyStringPrintf(dy, ",");
}
dyStringPrintf(dy, ") and sample_id = %d order by val", sample_id);
char *query = dyStringCannibalize(&dy);
struct slName *slList = sqlQuickList(conn, query);
if (!direction || sameString(direction, "DESC"))
slReverse(&slList);
updateFeatureOrder(settings, slList);
}
void cartUpdateSettings(struct mapSettings *settings)
{
cartSetInt(cart, hghWidth, settings->width);
cartSetInt(cart, hghHeight, settings->height);
}
void drawHeatmap()
{
+int dataset_id = cartUsualInt(cart, hghDatasetId, -1);
+if (dataset_id == -1)
+ errAbort("Cart empty");
struct sqlConnection *conn = hAllocConnProfile(localDb, db);
int width = cartUsualInt(cart, hghWidth, DEFAULT_HEATMAP_WIDTH);
int height = cartUsualInt(cart, hghHeight, DEFAULT_HEATMAP_HEIGHT);
char *sampleIds = cartOptionalString(cart, hghSampleIds);
char *featureIds = cartOptionalString(cart, hghFeatureIds);
-int dataset_id = cartUsualInt(cart, hghDatasetId, -1);
int getFirst = cartUsualInt(cart, hghGetFirst, -1);
/* Clustering */
char *metric = cartOptionalString(cart, hghClusterMetric);
char *method = cartOptionalString(cart, hghClusterMethod);
/* Sorting by sample/gene */
int sortFeatureId = cartUsualInt(cart, hghSortFeatureId, -1);
int sortSampleId = cartUsualInt(cart, hghSortSampleId, -1);
char *sortDir = cartOptionalString(cart, hghSortDir);
struct datasets *da = getDatasetById(conn, dataset_id);
if (!da)
errAbort("No dataset matching id = %d\n", dataset_id);
if (!sampleIds)
{
sampleIds = getAllIdsInDataset(conn, da, "sample_id");
//errAbort("%s is required\n", hghSampleIds);
}
if (!featureIds && getFirst > 0)
{
featureIds = getNumAnalysisFeatureIdsInDataset(conn, da, getFirst);
cartSetString(cart, hghFeatureIds, featureIds);
}
if (!featureIds)
errAbort("%s is required\n", hghFeatureIds);
struct samples *samples = getSamplesByIds(conn, sampleIds);
if (!samples)
errAbort("No samples matching ids = %s\n", sampleIds);
struct analysisFeatures *afList = getAnalysisFeaturesByIds(conn, featureIds);
if (!afList)
errAbort("No features matching ids = %s\n", featureIds);
struct slName *saList = slNameListFromComma(sampleIds);
struct slName *feList = slNameListFromComma(featureIds);
struct mapSettings *settings = initMapSettings(saList, feList, samples, afList,
width, height);
if (!settings)
errAbort("settings = NULL");
struct rawData *rdList = getRawData(conn, da, samples, afList);
if (!rdList)
errAbort("No data matching input parameters.");
if (metric && method)
clusterRawData(rdList, settings, metric, method);
else if (sortFeatureId > 0)
sortByFeature(conn, da, sortFeatureId, sortDir, samples, settings);
else if (sortSampleId > 0)
sortBySample(conn, da, sortSampleId, sortDir, afList, settings);
cartUpdateSettings(settings);
char *filename = heatmapGif(conn, rdList, settings);
struct json *js = newJson();
jsonAddString(js, "image", filename);
jsonSettings(js, settings);
if (js)
hPrintf("%s\n", js->print(js));
cartRemovePrefix(cart, hghSortPrefix);
cartRemovePrefix(cart, hghClusterPrefix);
}
struct searchResults {
struct searchResults *next;
int id;
char *name;
char *type;
double val;
};
int searchResultsCmp(const void *va, const void *vb)
/* Compare function to sort array of ints. */
{
const struct searchResults *a = *((struct searchResults **)va);
const struct searchResults *b = *((struct searchResults **)vb);
double diff = a->val - b->val;
if (diff < 0)
return -1;
else if (diff > 0)
return 1;
else
return 0;
}
struct searchResults *searchForFeatures(struct sqlConnection *conn, char *feature_name)
{
int maxResponse = 10;
/* Check analysis features */
char query[256];
safef(query, sizeof(query),
"select id,feature_name,type from %s where feature_name like \"%%%s%%\" "
"order by length(feature_name);",
AF_TABLE, feature_name);
int count = 0;
struct searchResults *sp, *spList = NULL;
struct sqlResult *sr = sqlGetResult(conn, query);
char **row = NULL;
while ((row = sqlNextRow(sr)) != NULL)
{
int id = atoi(row[0]);
char *name = row[1];
char *type = row[2];
AllocVar(sp);
sp->id = id;
sp->name = cloneString(name);
sp->type = cloneString(type);
sp->val = strlen(sp->name);
slAddHead(&spList, sp);
if (count > maxResponse)
break;
count++;
}
sqlFreeResult(&sr);
slSort(&spList, searchResultsCmp);
return spList;
}
void sendNoMatch(struct json *js)
{
return;
}
void sendAmbiguities(struct json *js, struct searchResults *spList)
{
+struct json *match, *matches = jsonAddContainerList(js, "results");
+match = matches;
struct searchResults *sp;
for (sp = spList; sp; sp = sp->next)
{
- struct json *new = jsonAddContainer(js, sp->name);
- jsonAddInt(new, "id", sp->id);
- jsonAddString(new, "type", sp->type);
+ jsonAddString(match, "name", sp->name);
+ jsonAddInt(match, "id", sp->id);
+ jsonAddString(match, "type", sp->type);
+
+ if (sp->next)
+ match = jsonAddContainerToList(&matches);
}
}
void getMatching()
{
struct sqlConnection *conn = hAllocConnProfile(localDb, db);
char *term = cartOptionalString(cart, hghSearchTerm);
struct json *js = newJson();
if (sameString(term, "")) // blank was sent, return no match
sendNoMatch(js);
else
{
struct searchResults *spList = searchForFeatures(conn, term);
int numMatched = slCount(spList);
if (numMatched == 0)
sendNoMatch(js);
else
sendAmbiguities(js, spList);
}
if (js)
hPrintf("%s\n", js->print(js));
cartRemovePrefix(cart, hghSearchTerm);
}
void getInfo()
{
struct sqlConnection *conn = hAllocConnProfile(localDb, db);
int id = cartUsualInt(cart, hghInfoId, -1);
char *name = cartOptionalString(cart, hghInfoName);
char *type = cartOptionalString(cart, hghInfoType);
if (id == -1 && (!name || !type))
errAbort("Insufficient input for getInfo().\n");
struct analysisFeatures *af;
if (id > 0)
af = getAnalysisFeaturesById(conn, id);
else
af = getAnalysisFeaturesByNameType(conn, name, type);
if (!sameString(af->type, "geneset"))
errAbort("getInfo only supports retrieving geneset information.\n");
struct json *js = newJson();
struct slInt *geneIds = getGenesInGeneset(conn, af->id);
struct json *new = jsonAddContainer(js, af->feature_name);
jsonAddSlInt(new, "ids", geneIds);
if (js)
hPrintf("%s\n", js->print(js));
cartRemovePrefix(cart, hghInfoPrefix);
}
struct slInt *slIntListFromComma(char *list)
{ /* very dumb/lazy way of doing this */
struct slName *sl, *slList = slNameListFromComma(list);
struct slInt *si, *siList = NULL;
for (sl = slList; sl; sl = sl->next)
{
si = slIntNew(atoi(sl->name));
slAddHead(&siList, si);
}
slReverse(&siList);
return siList;
}
void requestState()
{
int datasetId = cartUsualInt(cart, hghDatasetId, -1);
int width = cartUsualInt(cart, hghWidth, -1);
int height = cartUsualInt(cart, hghHeight, -1);
char *samples = cartOptionalString(cart, hghSampleIds);
char *features = cartOptionalString(cart, hghFeatureIds);
struct json *js = newJson();
if (datasetId >= 0)
jsonAddInt(js, hghDatasetId, datasetId);
if (width >= 0)
jsonAddInt(js, hghWidth, width);
if (height >= 0)
jsonAddInt(js, hghHeight, height);
if (samples)
{
struct slInt *siList = slIntListFromComma(samples);
jsonAddSlInt(js, hghSampleIds, siList);
}
if (features)
{
struct slInt *siList = slIntListFromComma(features);
jsonAddSlInt(js, hghFeatureIds, siList);
}
if (js)
hPrintf("%s\n", js->print(js));
}
void resetState()
{
cartRemovePrefix(cart, hghPrefix);
struct json *js = newJson();
if (js)
hPrintf("%s\n", js->print(js));
}
void uploadData()
{
char *fileContent = NULL;
char *err = NULL;
//Filedata
fileContent = cgiOptionalString(hghFileUpload);
if(fileContent == NULL)
{
errAbort("File is required, set into %s.",hghFileUpload);
}
else
{
if (isNotEmpty(fileContent))
{
// handle upload in here, set err to something other than null if it fails
}
if(err)
printf("Error: %s\n",err);
else
{
struct json *js = newJson();
jsonAddString(js, "uploadData", "success");
//jsonAddString(js, "table", newCts->dbTableName);
hPrintf("%s", js->print(js));
}
}
cartRemove(cart, "Filename");
cartRemoveLike(cart, "Filedata*");
}
void dispatchRoutines()
/* Look at command variables in cart and figure out which
* page to draw. */
{
/* retrieve cart variables, handle various modes */
char *mode = cartOptionalString(cart, hghMode);
if (!mode)
errAbort("%s is required.", hghMode);
if (sameString(mode, "drawHeatmap"))
drawHeatmap();
else if (sameString(mode, "getInfo"))
getInfo();
else if (sameString(mode, "getMatching"))
getMatching();
else if (sameString(mode, "getCohorts"))
getCohorts();
else if (sameString(mode, "getSamples"))
getSamples();
else if (sameString(mode, "requestState"))
requestState();
else if (sameString(mode, "resetState"))
resetState();
else if (sameString(mode, "uploadData"))
uploadData();
else
errAbort("Incorrect mode = %s", mode);
//cartRemovePrefix(cart, hghPrefix);
}
void hghDoUsualHttp()
/* Wrap html page dispatcher with code that writes out
* HTTP header and write cart back to database. */
{
cartWriteCookie(cart, hUserCookie());
printf("Content-Type:application/x-javascript\r\n\r\n");
/* Dispatch other pages, that actually want to write HTML. */
cartWarnCatcher(dispatchRoutines, cart, jsonEarlyWarningHandler);
cartCheckout(&cart);
}
char *excludeVars[] = {"Submit", "submit", NULL};
int main(int argc, char *argv[])
/* Process command line. */
{
htmlPushEarlyHandlers();
cgiSpoof(&argc, argv);
htmlSetStyle(htmlStyleUndecoratedLink);
oldVars = hashNew(12);
cart = cartForSession(hUserCookie(), excludeVars, oldVars);
hghDoUsualHttp();
return 0;
}