src/hg/instinct/bioInt2/bioController.c 1.5
1.5 2009/03/23 18:19:29 jsanborn
updated
Index: src/hg/instinct/bioInt2/bioController.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/instinct/bioInt2/bioController.c,v
retrieving revision 1.4
retrieving revision 1.5
diff -b -B -U 1000000 -r1.4 -r1.5
--- src/hg/instinct/bioInt2/bioController.c 22 Mar 2009 01:07:28 -0000 1.4
+++ src/hg/instinct/bioInt2/bioController.c 23 Mar 2009 18:19:29 -0000 1.5
@@ -1,353 +1,358 @@
/* mapProbesToGenes - Will maps probes in BED format to overlapping gene(s). */
#include "common.h"
#include "linefile.h"
#include "hash.h"
#include "options.h"
#include "jksql.h"
#include "hPrint.h"
#include "hdb.h"
#include "dystring.h"
#include "bioIntDb.h"
#include "bioIntDriver.h"
#include "cprob.h"
#include "hgStatsLib.h"
#include "bioController.h"
void usage()
/* Explain usage and exit. */
{
errAbort(
"bioController - controller for bioIntegrator pipeline\n"
"usage:\n"
" bioIntegrator datasets\n"
" -datasets = comma-separated list of datasets\n"
);
}
#define BIOINT_DB "bioInt"
static struct optionSpec options[] = {
{NULL, 0}
};
void createAnalysisModulesTable(struct sqlConnection *biConn, char *tableName)
{
struct dyString *dy = newDyString(1024);
dyStringPrintf(dy, "CREATE TABLE %s (\n", tableName);
dyStringPrintf(dy, "id int unsigned not null,\n");
dyStringPrintf(dy, "name varchar(255) not null,\n");
dyStringPrintf(dy, "type varchar(255) not null,\n");
dyStringPrintf(dy, "PRIMARY KEY(id)\n");
dyStringPrintf(dy, ")\n");
sqlUpdate(biConn,dy->string);
dyStringFree(&dy);
}
void createAnalysesTable(struct sqlConnection *biConn, char *tableName)
{
struct dyString *dy = newDyString(1024);
dyStringPrintf(dy, "CREATE TABLE %s (\n", tableName);
dyStringPrintf(dy, "id int unsigned not null,\n");
dyStringPrintf(dy, "cohort_id int unsigned not null,\n");
dyStringPrintf(dy, "module_id int unsigned not null,\n");
dyStringPrintf(dy, "result_table varchar(255) not null,\n");
dyStringPrintf(dy, "input_tables longblob not null,\n");
dyStringPrintf(dy, "PRIMARY KEY(id)\n");
dyStringPrintf(dy, ")\n");
sqlUpdate(biConn,dy->string);
dyStringFree(&dy);
}
void createAnalysisParamsTable(struct sqlConnection *biConn, char *tableName)
{
struct dyString *dy = newDyString(1024);
dyStringPrintf(dy, "CREATE TABLE %s (\n", tableName);
dyStringPrintf(dy, "analysis_id int unsigned not null,\n");
dyStringPrintf(dy, "name varchar(255) not null,\n");
dyStringPrintf(dy, "val varchar(255) not null,\n");
dyStringPrintf(dy, "KEY(analysis_id)\n");
dyStringPrintf(dy, ")\n");
sqlUpdate(biConn,dy->string);
dyStringFree(&dy);
}
char *getTableName(struct datasets *daList, char *module)
{
if (!daList)
return NULL;
struct dyString *dy = dyStringNew(10);
dyStringPrintf(dy, "%s", module);
struct datasets *da;
for (da = daList; da; da = da->next)
dyStringPrintf(dy, "_%s", da->data_table);
return dyStringCannibalize(&dy);
}
boolean analysesExists(struct sqlConnection *biConn, struct analyses *an)
{
char query[256];
safef(query, sizeof(query),
"select * from analyses where id = %d "
"and cohort_id = %d "
"and module_id = %d "
"and result_table = \"%s\" "
"and input_tables = \"%s\" ",
an->id, an->cohort_id, an->module_id, an->result_table, an->input_tables);
return sqlExists(biConn, query);
}
void createAnalyses(struct sqlConnection *biConn, int cohort_id, int module_id,
char *result_table, char *input_tables)
{
int id = findIdInTable(biConn, "analyses", "id", "result_table", result_table);
struct analyses *an;
AllocVar(an);
an->id = id;
an->cohort_id = cohort_id;
an->module_id = module_id;
an->result_table = cloneString(result_table);
an->input_tables = cloneString(input_tables);
if (!analysesExists(biConn, an))
analysesSaveToDbEscaped(biConn, an, "analyses", 20);
analysesFree(&an);
}
struct datasets *datasetsInCohort(struct sqlConnection *biConn, int cohort_id)
{
char query[256];
safef(query, sizeof(query),
"select * from datasets join datasetCohort on datasets.id = datasetCohort.dataset_id "
"where datasetCohort.cohort_id = %d;",
cohort_id);
return datasetsLoadByQuery(biConn, query);
}
struct analysisModules *analysisModulesOfType(struct sqlConnection *biConn,
char *type)
{
char query[128];
safef(query, sizeof(query),
"select * from analysisModules where type = \"%s\" ",
type);
return analysisModulesLoadByQuery(biConn, query);
}
void createGeneLevelAnalyses(struct sqlConnection *biConn, int cohort_id)
{
struct datasets *da, *daList = datasetsInCohort(biConn, cohort_id);
struct dyString *dy = dyStringNew(10);
for (da = daList; da; da = da->next)
{
dyStringPrintf(dy, "%s", da->data_table);
if (da->next)
dyStringPrintf(dy, ",");
}
char *input_tables = dyStringCannibalize(&dy);
/* Gene analysis modules */
struct analysisModules *am, *amList = analysisModulesOfType(biConn, "gene");
for (am = amList; am; am = am->next)
{
char *result_table = getTableName(daList, am->name);
createAnalyses(biConn, cohort_id, am->id, result_table, input_tables);
}
}
void createSetLevelAnalyses(struct sqlConnection *biConn, int cohort_id)
{
+/* Get all gene-level analyses */
char query[256];
safef(query, sizeof(query),
"select * from analyses "
"join analysisModules on analyses.module_id = analysisModules.id "
"where analyses.cohort_id = %d and analysisModules.type = \"%s\" ",
cohort_id, "gene");
struct analyses *an, *anList = analysesLoadByQuery(biConn, query);
if (!anList)
{
fprintf(stdout, "No set level analysis to perform.\n");
return;
}
/* Set (geneset/pathway) analysis modules */
struct analysisModules *am, *amList = analysisModulesOfType(biConn, "set");
for (am = amList; am; am = am->next)
{ // loop through modules
for (an = anList; an; an = an->next)
{ // loop through gene-level results (one from each gene module)
char result_table[128];
safef(result_table, sizeof(result_table),
"%s_%s", am->name, an->result_table);
createAnalyses(biConn, cohort_id, am->id, result_table, an->result_table);
}
}
}
void biAnalysisAddModule(struct sqlConnection *biConn,
struct biAnalysis *ba, int module_id)
{
if (!ba)
return;
char query[128];
safef(query, sizeof(query),
"select name from analysisModules where id = %d",
module_id);
if (!sqlExists(biConn, query))
errAbort("No module with id = %d", module_id);
char *module = sqlQuickString(biConn, query);
if (sameString(module, "meta"))
+ {
+ ba->pipeline = geneLevelPipeline;
ba->analyze = metaGene;
+ }
+else if (sameString(module, "metaGeneset"))
+ {
+ ba->pipeline = genesetLevelPipeline;
+ ba->analyze = metaGeneset;
+ }
else
+ {
+ ba->pipeline = NULL;
ba->analyze = NULL;
+ }
}
struct biAnalysis *biAnalysisListForCohort(struct sqlConnection *biConn,
char *db, int cohort_id)
{
struct biAnalysis *ba, *baList = NULL;
char query[128];
safef(query, sizeof(query),
"select * from analyses where cohort_id = %d",
cohort_id);
struct analyses *an, *anList = analysesLoadByQuery(biConn, query);
if (!anList)
errAbort("No analyses for cohort = %d", cohort_id);
-struct datasets *da, *daList = datasetsInCohort(biConn, cohort_id);
-
for (an = anList; an; an = an->next)
{
AllocVar(ba);
ba->db = cloneString(db);
ba->tableName = cloneString(an->result_table);
ba->parameters = hashNew(0);
-
- struct slName *slList = NULL;
- for (da = daList; da; da = da->next)
- slNameAddHead(&slList, da->data_table);
- ba->inputTables = slList;
+ ba->inputTables = slNameListFromComma(an->input_tables);
biAnalysisAddModule(biConn, ba, an->module_id);
slAddHead(&baList, ba);
}
slReverse(&baList);
analysesFreeList(&anList);
-datasetsFreeList(&daList);
return baList;
}
boolean cohortExists(struct sqlConnection *biConn, int cohort_id)
{
char query[128];
safef(query, sizeof(query), "select * from cohorts where id = %d", cohort_id);
return sqlExists(biConn, query);
}
boolean analysisModuleExists(struct sqlConnection *biConn, struct analysisModules *am)
{
char query[128];
safef(query, sizeof(query),
"select * from analysisModules where id = %d "
"and name = \"%s\" "
"and type = \"%s\" ",
am->id, am->name, am->type);
return sqlExists(biConn, query);
}
void storeAnalysisModule(struct sqlConnection *biConn, char *name, char *type)
{
struct analysisModules *am;
AllocVar(am);
am->name = cloneString(name);
am->type = cloneString(type);
am->id = findIdInTable(biConn, "analysisModules", "id", "name", am->name);
if (!analysisModuleExists(biConn, am))
analysisModulesSaveToDb(biConn, am, "analysisModules", 10);
analysisModulesFree(&am);
}
void setupAnalysisModules(struct sqlConnection *biConn)
{
/* Meta-Gene module */
storeAnalysisModule(biConn, "meta", "gene");
/* Set up more modules below, similar to above */
-
+storeAnalysisModule(biConn, "metaGeneset", "set");
}
void prepareDatabase(struct sqlConnection *biConn)
{
if (!sqlTableExists(biConn, "analyses"))
createAnalysesTable(biConn, "analyses");
if (!sqlTableExists(biConn, "analysisParams"))
createAnalysisParamsTable(biConn, "analysisParams");
if (!sqlTableExists(biConn, "analysisModules"))
createAnalysisModulesTable(biConn, "analysisModules");
setupAnalysisModules(biConn);
}
void bioController(char *db, int cohort_id)
{
struct sqlConnection *biConn = hAllocConnProfile("localDb", db);
if (!cohortExists(biConn, cohort_id))
{
hFreeConn(&biConn);
errAbort("Cohort %d does not exist.", cohort_id);
}
prepareDatabase(biConn);
createGeneLevelAnalyses(biConn, cohort_id);
createSetLevelAnalyses(biConn, cohort_id);
struct biAnalysis *baList = biAnalysisListForCohort(biConn, db, cohort_id);
hFreeConn(&biConn);
runAnalysisPipeline(baList);
}
int main(int argc, char *argv[])
/* Process command line. */
{
optionInit(&argc, argv, options);
if (argc != 2)
usage();
bioController(BIOINT_DB, atoi(argv[1]));
return 0;
}