src/hg/instinct/hgGeneset/hgGenesets.c 1.1
1.1 2010/01/19 23:05:14 sbenz
Adding new geneset binaries to cvs
Index: src/hg/instinct/hgGeneset/hgGenesets.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/instinct/hgGeneset/hgGenesets.c,v
retrieving revision 1.4
retrieving revision 1.1
diff -b -B -U 4 -r1.4 -r1.1
--- src/hg/instinct/hgGeneset/hgGenesets.c 22 Jan 2010 05:11:28 -0000 1.4
+++ src/hg/instinct/hgGeneset/hgGenesets.c 19 Jan 2010 23:05:14 -0000 1.1
@@ -27,18 +27,18 @@
/* ---- Global variables. ---- */
struct cart *cart; /* This holds cgi and other variables between clicks. */
struct hash *oldVars; /* Old cart hash. */
-char *db = "bioIntDev";
+char *db = "hgHeatmap";
char *localDb = "localDb";
void usage()
/* Explain usage and exit. */
{
errAbort(
- "hgGenesets\n"
+ "bioIntUI\n"
"usage:\n"
- " hgGenesets\n"
+ " bioIntUI\n"
);
}
/****** BEGIN HELPER FUNCTIONS *******/
@@ -236,60 +237,10 @@
jsonAddString(js, "name", name);
jsonAddString(js, "source", source);
}
-struct samples *getSamplesByIds(struct sqlConnection *conn,
- char *sampleIds)
-{
-struct dyString *dy = dyStringNew(100);
-dyStringPrintf(dy, "select * from %s where sample_id in (%s);", SA_TABLE, sampleIds);
-
-char *query = dyStringCannibalize(&dy);
-return samplesLoadByQuery(conn, query);
-}
-
-struct analysisFeatures *getAnalysisFeaturesByIds(struct sqlConnection *conn,
- char *featureIds)
-{
-struct dyString *dy = dyStringNew(100);
-dyStringPrintf(dy, "select * from %s where id in (%s);", AF_TABLE, featureIds);
-
-char *query = dyStringCannibalize(&dy);
-return analysisFeaturesLoadByQuery(conn, query);
-}
-
-char *getNumAnalysisFeatureIdsInDataset(struct sqlConnection *conn,
- struct datasets *da,
- int numFeatures)
-{
-struct dyString *dy = dyStringNew(100);
-dyStringPrintf(dy, "select DISTINCT feature_id from %s limit %d;",
- da->data_table, numFeatures);
-char *query = dyStringCannibalize(&dy);
-
-struct slName *sl, *slList = sqlQuickList(conn, query);
-
-dy = dyStringNew(100);
-for (sl = slList; sl; sl = sl->next)
- {
- dyStringPrintf(dy, "%s", sl->name);
- if (sl->next)
- dyStringPrintf(dy, ",");
- }
-char *featureIds = dyStringCannibalize(&dy);
-
-return featureIds;
-}
-struct datasets *getDatasetById(struct sqlConnection *conn, int dataset_id)
-{
-char query[256];
-safef(query, sizeof(query),
- "select * from %s where id = %d",
- DA_TABLE, dataset_id);
-return datasetsLoadByQuery(conn, query);
-}
/****** END HELPER FUNCTIONS *******/
/* get a list of the current analyses */
@@ -367,13 +318,111 @@
hFreeConn(&conn);
}
+struct searchResults {
+ struct searchResults *next;
+ char *name;
+ char *type;
+ double val;
+};
+
+int searchResultsCmp(const void *va, const void *vb)
+/* Compare function to sort array of ints. */
+{
+const struct searchResults *a = *((struct searchResults **)va);
+const struct searchResults *b = *((struct searchResults **)vb);
+double diff = a->val - b->val;
+if (diff < 0)
+ return -1;
+else if (diff > 0)
+ return 1;
+else
+ return 0;
+}
+
+struct searchResults *searchForFeatures(struct sqlConnection *conn, int cohort_id,
+ struct datasets *daList, char *feature_name)
+{
+int maxResponse = 5;
+/* Check analysis features */
+char query[256];
+safef(query, sizeof(query),
+ "select feature_name,type from %s where feature_name like \"%%%s%%\" "
+ "order by length(feature_name);",
+ AF_TABLE, feature_name);
+
+int count = 0;
+struct searchResults *sp, *spList = NULL;
+struct sqlResult *sr = sqlGetResult(conn, query);
+char **row = NULL;
+while ((row = sqlNextRow(sr)) != NULL)
+ {
+ char *name = row[0];
+ char *type = row[1];
+ AllocVar(sp);
+ sp->name = cloneString(name);
+ sp->type = cloneString(type);
+ sp->val = strlen(sp->name);
+ slAddHead(&spList, sp);
+ if (count > maxResponse)
+ break;
+ count++;
+ }
+sqlFreeResult(&sr);
+
+/* Check clinical features */
+struct datasets *da;
+struct dyString *dy = dyStringNew(100);
+dyStringPrintf(dy,
+ "select DISTINCT %s.name from %s join %s on %s.sample_id = %s.id ",
+ FE_TABLE, CD_TABLE, SA_TABLE, CD_TABLE, SA_TABLE);
+dyStringPrintf(dy,
+ "join %s on %s.id = %s.feature_id ",
+ FE_TABLE, FE_TABLE, CD_TABLE);
+dyStringPrintf(dy,
+ "where %s.name like \"%%%s%%\" and %s.dataset_id in (",
+ FE_TABLE, feature_name, SA_TABLE);
+for (da = daList; da; da = da->next)
+ {
+ dyStringPrintf(dy, "%d", da->id);
+ if (da->next)
+ dyStringPrintf(dy, ",");
+ }
+dyStringPrintf(dy, ") order by length(%s.name)", FE_TABLE);
+char *cquery = dyStringCannibalize(&dy);
+
+count = 0;
+sr = sqlGetResult(conn, cquery);
+while ((row = sqlNextRow(sr)) != NULL)
+ {
+ char *name = row[0];
+ AllocVar(sp);
+ sp->name = cloneString(name);
+ sp->type = cloneString("clinical");
+ sp->val = strlen(sp->name);
+ slAddHead(&spList, sp);
+ if (count < maxResponse)
+ break;
+ count++;
+ }
+sqlFreeResult(&sr);
+
+slSort(&spList, searchResultsCmp);
+return spList;
+}
+
void sendNoMatch(struct json *js)
{
return;
}
+void sendAmbiguities(struct json *js, struct searchResults *spList)
+{
+struct searchResults *sp;
+for (sp = spList; sp; sp = sp->next)
+ jsonAddString(js, sp->name, sp->type);
+}
void sendRawFeatureData(struct sqlConnection *conn, struct json *js,
struct datasets *da, struct samples *saList,
struct analysisFeatures *af)
@@ -599,8 +648,44 @@
hFreeConn(&conn);
}
+void getSuggestions()
+{
+int cohort_id = cartUsualInt(cart, bioIntCohortId, -1);
+
+/* feature source = gene,geneset,clinical... */
+//char *source = cartOptionalString(cart, bioIntSourceName);
+
+char *feature_name = cartOptionalString(cart, bioIntFeatureName);
+if (!feature_name)
+ errAbort("%s or %s must be set for mode=getFeatureData\n", bioIntFeature, bioIntFeatureId);
+
+struct sqlConnection *conn = hAllocConnProfile(localDb, db);
+
+struct datasets *daList = getDatasetsByCohortId(conn, cohort_id);
+if (!daList)
+ errAbort("No datasets matching cohort_id = %d", cohort_id);
+
+struct json *js = newJson();
+if (sameString(feature_name, "")) // blank was sent, return no match
+ sendNoMatch(js);
+else
+ {
+ struct searchResults *spList = searchForFeatures(conn, cohort_id, daList, feature_name);
+ int numMatched = slCount(spList);
+ if (numMatched == 0)
+ sendNoMatch(js);
+ else
+ sendAmbiguities(js, spList);
+ }
+
+if (js)
+ hPrintf("%s\n", js->print(js));
+
+hFreeConn(&conn);
+}
+
void getClinicalFeatures()
{
int cohort_id = cartUsualInt(cart, bioIntCohortId, -1);
@@ -706,114 +791,249 @@
hFreeConn(&conn);
}
-struct rawData *getRawData(struct sqlConnection *conn, struct datasets *da,
- struct samples *samples, struct analysisFeatures *afList)
+void getMostCorrelated()
{
-struct samples *sa;
-struct analysisFeatures *af;
+int takeTop = cartUsualInt(cart, bioIntTakeTop, 5);
+int cohort_id = cartUsualInt(cart, bioIntCohortId, -1);
+char *feature_name = cartOptionalString(cart, bioIntFeatureName);
-struct dyString *dy = dyStringNew(100);
-dyStringPrintf(dy, "select * from %s where sample_id in (",
- da->data_table);
-for (sa = samples; sa; sa = sa->next)
+if (!feature_name || cohort_id == -1)
+ errAbort("%s or %s must be set for mode=getMostCorrelated\n",
+ bioIntFeatureName, bioIntCohortId);
+
+struct sqlConnection *conn = hAllocConnProfile(localDb, db);
+
+struct analysisFeatures *af = getAnalysisFeaturesByName(conn, feature_name);
+if (!af)
{
- dyStringPrintf(dy, "%d", sa->sample_id);
- if (sa->next)
- dyStringPrintf(dy, ",");
+ hFreeConn(&conn);
+ errAbort("Could not find analysisFeature in db");
}
-dyStringPrintf(dy, ") and feature_id in (");
-for (af = afList; af; af = af->next)
+
+struct cohortCorr *cc = getCohortCorrByCohortId(conn, cohort_id);
+if (!cc)
{
- dyStringPrintf(dy, "%d", af->id);
- if (af->next)
- dyStringPrintf(dy, ",");
+ hFreeConn(&conn);
+ errAbort("No cohort correlation table with cohort_id = %d.\n", cohort_id);
}
-dyStringPrintf(dy, ");");
+
+struct dyString *dy = dyStringNew(100);
+dyStringPrintf(dy, "select a1.feature_name, a1.type, a2.feature_name, a2.type, %s.val from %s "
+ "join %s as a1 on %s.feature_id1 = a1.id ",
+ cc->result_table, cc->result_table, AF_TABLE, cc->result_table);
+dyStringPrintf(dy, "join %s as a2 on %s.feature_id2 = a2.id ",
+ AF_TABLE, cc->result_table);
+dyStringPrintf(dy, "where feature_id1 = %d or feature_id2 = %d order by %s.val DESC",
+ af->id, af->id, cc->result_table);
char *query = dyStringCannibalize(&dy);
-struct rawData *rd, *rdList = NULL;
+struct searchResults *sp, *spList = NULL;
struct sqlResult *sr = sqlGetResult(conn, query);
char **row = NULL;
while ((row = sqlNextRow(sr)) != NULL)
{
- rd = AllocA(struct rawData);
- rd->sample_id = atoi(row[0]);
- rd->feature_id = atoi(row[1]);
- rd->val = atof(row[2]);
- rd->conf = atof(row[3]);
- slAddHead(&rdList, rd);
+ char *name = NULL;
+ char *type = NULL;
+ char *name1 = row[0];
+ char *type1 = row[1];
+ char *name2 = row[2];
+ char *type2 = row[3];
+ double val = atof(row[4]);
+
+ if (sameString(name1, af->feature_name))
+ {
+ name = name2;
+ type = type2;
+ }
+ else if (sameString(name2, af->feature_name))
+ {
+ name = name1;
+ type = type1;
+ }
+ else // doesn't match either, should *never* get here
+ continue;
+
+ AllocVar(sp);
+ sp->name = cloneString(name);
+ sp->type = cloneString(type);
+ sp->val = val;
+ slAddHead(&spList, sp);
+ }
+slReverse(&spList);
+
+int count;
+struct json *js = newJson();
+struct json *corrs = jsonAddContainer(js, "Top Correlated");
+for (sp = spList, count = 0; sp && (count < takeTop); sp = sp->next, count++)
+ {
+ if (sp->val < 0.0)
+ break;
+ struct json *data = jsonAddContainer(corrs, sp->name);
+ jsonAddString(data, "type", sp->type);
+ jsonAddDouble(data, "val", sp->val);
+ }
+
+slReverse(&spList);
+corrs = jsonAddContainer(js, "Top Anti-Correlated");
+for (sp = spList, count = 0; sp && (count < takeTop); sp = sp->next, count++)
+ {
+ if (sp->val > 0.0)
+ break;
+ struct json *data = jsonAddContainer(corrs, sp->name);
+ jsonAddString(data, "type", sp->type);
+ jsonAddDouble(data, "val", sp->val);
}
-sqlFreeResult(&sr);
-return rdList;
+if (js)
+ hPrintf("%s", js->print(js));
+
+hFreeConn(&conn);
}
-void drawHeatmap()
+void sendContainedFeatures(struct sqlConnection *conn, int cohort_id,
+ struct analysisFeatures *af, char *input_table, int takeTop)
{
-struct sqlConnection *conn = hAllocConnProfile(localDb, db);
+if (!input_table)
+ return;
-int width = cartUsualInt(cart, hghWidth, DEFAULT_HEATMAP_WIDTH);
-int height = cartUsualInt(cart, hghHeight, DEFAULT_HEATMAP_HEIGHT);
+if (!sqlTableExists(conn, input_table))
+ {
+ hFreeConn(&conn);
+ errAbort("Table does not exist, %s.\n", input_table);
+ }
+
+uglyTime(NULL);
+struct dyString *dy = newDyString(100);
+dyStringPrintf(dy,
+ "select DISTINCT gene_id from %s where id = %d",
+ GG_TABLE, af->id);
+char *query = dyStringCannibalize(&dy);
-int dataset_id = cartUsualInt(cart, hghDatasetId, -1);
-struct datasets *da = getDatasetById(conn, dataset_id);
-if (!da)
- errAbort("No dataset matching id = %d\n", dataset_id);
-
-char *sampleIds = cartOptionalString(cart, hghSampleIds);
-if (!sampleIds)
- errAbort("%s is required\n", hghSampleIds);
-
-char *featureIds = cartOptionalString(cart, hghFeatureIds);
-if (!featureIds)
- errAbort("%s is required\n", hghFeatureIds);
-
-struct samples *samples = getSamplesByIds(conn, sampleIds);
-if (!samples)
- errAbort("No samples matching ids = %s\n", sampleIds);
-
-// To test LOTS of features
-// featureIds = getNumAnalysisFeatureIdsInDataset(conn, da, 1000);
-struct analysisFeatures *afList = getAnalysisFeaturesByIds(conn, featureIds);
-if (!afList)
- errAbort("No features matching ids = %s\n", featureIds);
-
-fprintf(stderr, "length of aflist = %d", slCount(afList));
-
-struct slName *saList = slNameListFromComma(sampleIds);
-struct slName *feList = slNameListFromComma(featureIds);
-struct mapSettings *settings = initMapSettings(saList, feList, width, height);
-if (!settings)
- errAbort("settings = NULL");
-
-struct rawData *rdList = getRawData(conn, da, samples, afList);
-if (!rdList)
- errAbort("No data matching input parameters.");
+struct slInt *si, *siList = sqlQuickNumList(conn, query);
+
+dy = newDyString(100);
+dyStringPrintf(dy,
+ "select feature_name, type, sum(abs(conf)) as s from %s "
+ "join %s on feature_id=id where feature_id in (",
+ AF_TABLE, input_table);
+for (si = siList; si; si = si->next)
+ {
+ dyStringPrintf(dy, "%d", si->val);
+ if (si->next)
+ dyStringPrintf(dy, ",");
+ }
+dyStringPrintf(dy,
+ ") group by feature_id order by s DESC limit %d;",
+ takeTop);
+query = dyStringCannibalize(&dy);
+
+struct slName *fList = NULL, *tList = NULL;
+
+struct sqlResult *sr = sqlGetResult(conn, query);
+char **row = NULL;
+while ((row = sqlNextRow(sr)) != NULL)
+ {
+ char *name = row[0];
+ char *type = row[1];
+ slNameAddHead(&fList, name);
+ slNameAddHead(&tList, type);
+ }
+slReverse(&fList);
+slReverse(&tList);
+char *feature_names = slNameListToString(fList, ',');
+char *types = slNameListToString(tList, ',');
-char *filename = heatmapGif(conn, rdList, settings);
struct json *js = newJson();
-jsonAddString(js, "image", filename);
+sendUniqueMatch(conn, js, cohort_id, feature_names, types, NULL);
+
if (js)
- hPrintf("%s\n", js->print(js));
+ hPrintf("%s", js->print(js));
+}
+
+void expandFeature()
+{
+int takeTop = cartUsualInt(cart, bioIntTakeTop, 5);
+int cohort_id = cartUsualInt(cart, bioIntCohortId, -1);
+char *feature_name = cartOptionalString(cart, bioIntFeatureName);
+
+if (!feature_name)
+ errAbort("%s must be set for mode=getGenesInGeneset\n", bioIntFeatureName);
+
+struct sqlConnection *conn = hAllocConnProfile(localDb, db);
+
+struct analysisFeatures *af = getAnalysisFeaturesByName(conn, feature_name);
+if (!af)
+ {
+ hFreeConn(&conn);
+ errAbort("Could not find analysisFeature in db");
+ }
+
+struct analyses *an, *anList = getAnalysesByCohortId(conn, cohort_id);
+if (!anList)
+ {
+ hFreeConn(&conn);
+ errAbort("No analyses with cohort_id = %d.\n", cohort_id);
+ }
+
+char tmpQ[512];
+for (an = anList; an; an = an->next)
+ {
+ safef(tmpQ, sizeof(tmpQ),
+ "select * from %s where feature_id = %d;",
+ an->result_table, af->id);
+ if (sqlExists(conn, tmpQ))
+ break;
+ }
+
+if (!an)
+ {
+ hFreeConn(&conn);
+ errAbort("No analysis feature with id=%d in any analysis table in cohort with id = %d",
+ af->id, cohort_id);
+ }
+
+struct slName *tables = slNameListFromComma(an->input_tables);
+
+if (slCount(tables) > 1)
+ sendRawFeatures(conn, cohort_id, af);
+else
+ sendContainedFeatures(conn, cohort_id, af, an->input_tables, takeTop);
+
+hFreeConn(&conn);
}
void dispatchRoutines()
/* Look at command variables in cart and figure out which
* page to draw. */
{
/* retrieve cart variables, handle various modes */
-char *mode = cartOptionalString(cart, hghMode);
+char *mode = cartOptionalString(cart, bioIntMode);
if (!mode)
- errAbort("%s is required.", hghMode);
+ errAbort("%s is required.", bioIntMode);
-if (sameString(mode, "drawHeatmap"))
- drawHeatmap();
+if (sameString(mode, "getAnalyses"))
+ getAnalyses();
+else if (sameString(mode, "getCohorts"))
+ getCohorts();
+else if (sameString(mode, "getFeatureData"))
+ getFeatureData();
+else if (sameString(mode, "getSuggestions"))
+ getSuggestions();
+else if (sameString(mode, "getClinicalData"))
+ getClinicalData();
+else if (sameString(mode, "getClinicalFeatures"))
+ getClinicalFeatures();
+else if (sameString(mode, "getMostCorrelated"))
+ getMostCorrelated();
+else if (sameString(mode, "expandFeature"))
+ expandFeature();
else
errAbort("Incorrect mode = %s", mode);
-cartRemovePrefix(cart, hghPrefix);
+cartRemovePrefix(cart, bioIntPrefix);
+
}
void hghDoUsualHttp()
/* Wrap html page dispatcher with code that writes out