src/hg/instinct/bioInt2/bioController.c 1.4
1.4 2009/03/22 01:07:28 jsanborn
updated
Index: src/hg/instinct/bioInt2/bioController.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/instinct/bioInt2/bioController.c,v
retrieving revision 1.3
retrieving revision 1.4
diff -b -B -U 4 -r1.3 -r1.4
--- src/hg/instinct/bioInt2/bioController.c 21 Mar 2009 21:31:54 -0000 1.3
+++ src/hg/instinct/bioInt2/bioController.c 22 Mar 2009 01:07:28 -0000 1.4
@@ -29,16 +29,316 @@
static struct optionSpec options[] = {
{NULL, 0}
};
-void bioController(char *db, char *datasets)
+void createAnalysisModulesTable(struct sqlConnection *biConn, char *tableName)
{
-/* datasets is a comma-separated string, each a different dataset table */
-struct slName *slDatasets = slNameListFromComma(datasets);
+struct dyString *dy = newDyString(1024);
+dyStringPrintf(dy, "CREATE TABLE %s (\n", tableName);
+dyStringPrintf(dy, "id int unsigned not null,\n");
+dyStringPrintf(dy, "name varchar(255) not null,\n");
+dyStringPrintf(dy, "type varchar(255) not null,\n");
+dyStringPrintf(dy, "PRIMARY KEY(id)\n");
+dyStringPrintf(dy, ")\n");
+sqlUpdate(biConn,dy->string);
+dyStringFree(&dy);
+}
+
+void createAnalysesTable(struct sqlConnection *biConn, char *tableName)
+{
+struct dyString *dy = newDyString(1024);
+dyStringPrintf(dy, "CREATE TABLE %s (\n", tableName);
+dyStringPrintf(dy, "id int unsigned not null,\n");
+dyStringPrintf(dy, "cohort_id int unsigned not null,\n");
+dyStringPrintf(dy, "module_id int unsigned not null,\n");
+dyStringPrintf(dy, "result_table varchar(255) not null,\n");
+dyStringPrintf(dy, "input_tables longblob not null,\n");
+dyStringPrintf(dy, "PRIMARY KEY(id)\n");
+dyStringPrintf(dy, ")\n");
+sqlUpdate(biConn,dy->string);
+dyStringFree(&dy);
+}
+
+void createAnalysisParamsTable(struct sqlConnection *biConn, char *tableName)
+{
+struct dyString *dy = newDyString(1024);
+dyStringPrintf(dy, "CREATE TABLE %s (\n", tableName);
+dyStringPrintf(dy, "analysis_id int unsigned not null,\n");
+dyStringPrintf(dy, "name varchar(255) not null,\n");
+dyStringPrintf(dy, "val varchar(255) not null,\n");
+dyStringPrintf(dy, "KEY(analysis_id)\n");
+dyStringPrintf(dy, ")\n");
+sqlUpdate(biConn,dy->string);
+dyStringFree(&dy);
+}
+
+char *getTableName(struct datasets *daList, char *module)
+{
+if (!daList)
+ return NULL;
+struct dyString *dy = dyStringNew(10);
+dyStringPrintf(dy, "%s", module);
+
+struct datasets *da;
+for (da = daList; da; da = da->next)
+ dyStringPrintf(dy, "_%s", da->data_table);
+
+return dyStringCannibalize(&dy);
+}
+
+
+boolean analysesExists(struct sqlConnection *biConn, struct analyses *an)
+{
+char query[256];
+safef(query, sizeof(query),
+ "select * from analyses where id = %d "
+ "and cohort_id = %d "
+ "and module_id = %d "
+ "and result_table = \"%s\" "
+ "and input_tables = \"%s\" ",
+ an->id, an->cohort_id, an->module_id, an->result_table, an->input_tables);
+
+return sqlExists(biConn, query);
+}
+
+void createAnalyses(struct sqlConnection *biConn, int cohort_id, int module_id,
+ char *result_table, char *input_tables)
+{
+int id = findIdInTable(biConn, "analyses", "id", "result_table", result_table);
+
+struct analyses *an;
+AllocVar(an);
+an->id = id;
+an->cohort_id = cohort_id;
+an->module_id = module_id;
+an->result_table = cloneString(result_table);
+an->input_tables = cloneString(input_tables);
+
+if (!analysesExists(biConn, an))
+ analysesSaveToDbEscaped(biConn, an, "analyses", 20);
+
+analysesFree(&an);
+}
+
+struct datasets *datasetsInCohort(struct sqlConnection *biConn, int cohort_id)
+{
+char query[256];
+safef(query, sizeof(query),
+ "select * from datasets join datasetCohort on datasets.id = datasetCohort.dataset_id "
+ "where datasetCohort.cohort_id = %d;",
+ cohort_id);
+
+return datasetsLoadByQuery(biConn, query);
+}
+
+struct analysisModules *analysisModulesOfType(struct sqlConnection *biConn,
+ char *type)
+{
+char query[128];
+safef(query, sizeof(query),
+ "select * from analysisModules where type = \"%s\" ",
+ type);
+
+return analysisModulesLoadByQuery(biConn, query);
+}
+
+
+void createGeneLevelAnalyses(struct sqlConnection *biConn, int cohort_id)
+{
+struct datasets *da, *daList = datasetsInCohort(biConn, cohort_id);
+
+struct dyString *dy = dyStringNew(10);
+for (da = daList; da; da = da->next)
+ {
+ dyStringPrintf(dy, "%s", da->data_table);
+ if (da->next)
+ dyStringPrintf(dy, ",");
+ }
+char *input_tables = dyStringCannibalize(&dy);
+
+/* Gene analysis modules */
+struct analysisModules *am, *amList = analysisModulesOfType(biConn, "gene");
+
+for (am = amList; am; am = am->next)
+ {
+ char *result_table = getTableName(daList, am->name);
+ createAnalyses(biConn, cohort_id, am->id, result_table, input_tables);
+ }
+}
+
+void createSetLevelAnalyses(struct sqlConnection *biConn, int cohort_id)
+{
+char query[256];
+safef(query, sizeof(query),
+ "select * from analyses "
+ "join analysisModules on analyses.module_id = analysisModules.id "
+ "where analyses.cohort_id = %d and analysisModules.type = \"%s\" ",
+ cohort_id, "gene");
+
+struct analyses *an, *anList = analysesLoadByQuery(biConn, query);
+
+if (!anList)
+ {
+ fprintf(stdout, "No set level analysis to perform.\n");
+ return;
+ }
+
+/* Set (geneset/pathway) analysis modules */
+struct analysisModules *am, *amList = analysisModulesOfType(biConn, "set");
+
+for (am = amList; am; am = am->next)
+ { // loop through modules
+ for (an = anList; an; an = an->next)
+ { // loop through gene-level results (one from each gene module)
+ char result_table[128];
+ safef(result_table, sizeof(result_table),
+ "%s_%s", am->name, an->result_table);
+ createAnalyses(biConn, cohort_id, am->id, result_table, an->result_table);
+ }
+ }
+}
+
+
+void biAnalysisAddModule(struct sqlConnection *biConn,
+ struct biAnalysis *ba, int module_id)
+{
+if (!ba)
+ return;
+
+char query[128];
+safef(query, sizeof(query),
+ "select name from analysisModules where id = %d",
+ module_id);
+
+if (!sqlExists(biConn, query))
+ errAbort("No module with id = %d", module_id);
+
+char *module = sqlQuickString(biConn, query);
+
+if (sameString(module, "meta"))
+ ba->analyze = metaGene;
+else
+ ba->analyze = NULL;
+}
+
+struct biAnalysis *biAnalysisListForCohort(struct sqlConnection *biConn,
+ char *db, int cohort_id)
+{
+struct biAnalysis *ba, *baList = NULL;
+
+char query[128];
+safef(query, sizeof(query),
+ "select * from analyses where cohort_id = %d",
+ cohort_id);
+
+struct analyses *an, *anList = analysesLoadByQuery(biConn, query);
+
+if (!anList)
+ errAbort("No analyses for cohort = %d", cohort_id);
+
+struct datasets *da, *daList = datasetsInCohort(biConn, cohort_id);
+
+for (an = anList; an; an = an->next)
+ {
+ AllocVar(ba);
+ ba->db = cloneString(db);
+ ba->tableName = cloneString(an->result_table);
+ ba->parameters = hashNew(0);
+
+ struct slName *slList = NULL;
+ for (da = daList; da; da = da->next)
+ slNameAddHead(&slList, da->data_table);
+ ba->inputTables = slList;
+
+ biAnalysisAddModule(biConn, ba, an->module_id);
+
+ slAddHead(&baList, ba);
+ }
+slReverse(&baList);
+
+analysesFreeList(&anList);
+datasetsFreeList(&daList);
+
+return baList;
+}
+
+boolean cohortExists(struct sqlConnection *biConn, int cohort_id)
+{
+char query[128];
+safef(query, sizeof(query), "select * from cohorts where id = %d", cohort_id);
+
+return sqlExists(biConn, query);
+}
+
+boolean analysisModuleExists(struct sqlConnection *biConn, struct analysisModules *am)
+{
+char query[128];
+safef(query, sizeof(query),
+ "select * from analysisModules where id = %d "
+ "and name = \"%s\" "
+ "and type = \"%s\" ",
+ am->id, am->name, am->type);
+
+return sqlExists(biConn, query);
+}
+
+void storeAnalysisModule(struct sqlConnection *biConn, char *name, char *type)
+{
+struct analysisModules *am;
+AllocVar(am);
+am->name = cloneString(name);
+am->type = cloneString(type);
+am->id = findIdInTable(biConn, "analysisModules", "id", "name", am->name);
+
+if (!analysisModuleExists(biConn, am))
+ analysisModulesSaveToDb(biConn, am, "analysisModules", 10);
+
+analysisModulesFree(&am);
+}
+
+void setupAnalysisModules(struct sqlConnection *biConn)
+{
+/* Meta-Gene module */
+storeAnalysisModule(biConn, "meta", "gene");
+
+/* Set up more modules below, similar to above */
+
+}
+
+void prepareDatabase(struct sqlConnection *biConn)
+{
+if (!sqlTableExists(biConn, "analyses"))
+ createAnalysesTable(biConn, "analyses");
+
+if (!sqlTableExists(biConn, "analysisParams"))
+ createAnalysisParamsTable(biConn, "analysisParams");
+
+if (!sqlTableExists(biConn, "analysisModules"))
+ createAnalysisModulesTable(biConn, "analysisModules");
+
+setupAnalysisModules(biConn);
+}
+
+void bioController(char *db, int cohort_id)
+{
+struct sqlConnection *biConn = hAllocConnProfile("localDb", db);
+
+if (!cohortExists(biConn, cohort_id))
+ {
+ hFreeConn(&biConn);
+ errAbort("Cohort %d does not exist.", cohort_id);
+ }
+
+prepareDatabase(biConn);
+
+createGeneLevelAnalyses(biConn, cohort_id);
+
+createSetLevelAnalyses(biConn, cohort_id);
-struct biAnalysis *baList = registerGeneLevelAnalyses(db, slDatasets);
+struct biAnalysis *baList = biAnalysisListForCohort(biConn, db, cohort_id);
+hFreeConn(&biConn);
-runAnalysisPipeline(db, datasets, baList);
+runAnalysisPipeline(baList);
}
int main(int argc, char *argv[])
@@ -47,7 +347,7 @@
optionInit(&argc, argv, options);
if (argc != 2)
usage();
-bioController(BIOINT_DB, argv[1]);
+bioController(BIOINT_DB, atoi(argv[1]));
return 0;
}