src/hg/instinct/bioInt2/bioController.c 1.5

1.5 2009/03/23 18:19:29 jsanborn
updated
Index: src/hg/instinct/bioInt2/bioController.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/instinct/bioInt2/bioController.c,v
retrieving revision 1.4
retrieving revision 1.5
diff -b -B -U 1000000 -r1.4 -r1.5
--- src/hg/instinct/bioInt2/bioController.c	22 Mar 2009 01:07:28 -0000	1.4
+++ src/hg/instinct/bioInt2/bioController.c	23 Mar 2009 18:19:29 -0000	1.5
@@ -1,353 +1,358 @@
 /* mapProbesToGenes - Will maps probes in BED format to overlapping gene(s). */
 #include "common.h"
 #include "linefile.h"
 #include "hash.h"
 #include "options.h"
 #include "jksql.h"
 #include "hPrint.h"
 #include "hdb.h"
 #include "dystring.h"
 #include "bioIntDb.h"
 #include "bioIntDriver.h"
 #include "cprob.h"
 #include "hgStatsLib.h"
 #include "bioController.h"
 
 void usage()
 /* Explain usage and exit. */
 {
 errAbort(
 	 "bioController - controller for bioIntegrator pipeline\n"
 	 "usage:\n"
 	 "   bioIntegrator datasets\n"
 	 "   -datasets = comma-separated list of datasets\n"
 	 );
 }
 
 #define BIOINT_DB "bioInt"
 
 static struct optionSpec options[] = {
     {NULL, 0}
 };                            
 
 void createAnalysisModulesTable(struct sqlConnection *biConn, char *tableName)
 {
 struct dyString *dy = newDyString(1024);
 dyStringPrintf(dy, "CREATE TABLE %s (\n", tableName);
 dyStringPrintf(dy, "id int unsigned not null,\n");
 dyStringPrintf(dy, "name varchar(255) not null,\n");
 dyStringPrintf(dy, "type varchar(255) not null,\n"); 
 dyStringPrintf(dy, "PRIMARY KEY(id)\n");
 dyStringPrintf(dy, ")\n");
 sqlUpdate(biConn,dy->string);
 dyStringFree(&dy);
 }  
 
 void createAnalysesTable(struct sqlConnection *biConn, char *tableName)
 {
 struct dyString *dy = newDyString(1024);
 dyStringPrintf(dy, "CREATE TABLE %s (\n", tableName);
 dyStringPrintf(dy, "id int unsigned not null,\n");
 dyStringPrintf(dy, "cohort_id int unsigned not null,\n");
 dyStringPrintf(dy, "module_id int unsigned not null,\n"); 
 dyStringPrintf(dy, "result_table varchar(255) not null,\n");
 dyStringPrintf(dy, "input_tables longblob not null,\n");
 dyStringPrintf(dy, "PRIMARY KEY(id)\n");
 dyStringPrintf(dy, ")\n");
 sqlUpdate(biConn,dy->string);
 dyStringFree(&dy);
 }  
 
 void createAnalysisParamsTable(struct sqlConnection *biConn, char *tableName)
 {
 struct dyString *dy = newDyString(1024);
 dyStringPrintf(dy, "CREATE TABLE %s (\n", tableName);
 dyStringPrintf(dy, "analysis_id int unsigned not null,\n");
 dyStringPrintf(dy, "name varchar(255) not null,\n");
 dyStringPrintf(dy, "val varchar(255) not null,\n");
 dyStringPrintf(dy, "KEY(analysis_id)\n");
 dyStringPrintf(dy, ")\n");
 sqlUpdate(biConn,dy->string);
 dyStringFree(&dy);
 }  
 
 char *getTableName(struct datasets *daList, char *module)
 {
 if (!daList)
     return NULL;
 struct dyString *dy = dyStringNew(10);
 dyStringPrintf(dy, "%s", module);
 
 struct datasets *da;
 for (da = daList; da; da = da->next)
     dyStringPrintf(dy, "_%s", da->data_table);
 
 return dyStringCannibalize(&dy);
 }
 
 
 boolean analysesExists(struct sqlConnection *biConn, struct analyses *an)
 {
 char query[256];
 safef(query, sizeof(query),
       "select * from analyses where id = %d "
       "and cohort_id = %d "
       "and module_id = %d "
       "and result_table = \"%s\" "
       "and input_tables = \"%s\" ",
       an->id, an->cohort_id, an->module_id, an->result_table, an->input_tables);
 
 return sqlExists(biConn, query);
 }      
 
 void createAnalyses(struct sqlConnection *biConn, int cohort_id, int module_id, 
 		    char *result_table, char *input_tables)
 {
 int id = findIdInTable(biConn, "analyses", "id", "result_table", result_table);
 
 struct analyses *an;
 AllocVar(an);
 an->id = id;  
 an->cohort_id = cohort_id;
 an->module_id = module_id;
 an->result_table = cloneString(result_table);
 an->input_tables = cloneString(input_tables);
 
 if (!analysesExists(biConn, an))
     analysesSaveToDbEscaped(biConn, an, "analyses", 20);
 
 analysesFree(&an);
 }
 
 struct datasets *datasetsInCohort(struct sqlConnection *biConn, int cohort_id)
 {
 char query[256];
 safef(query, sizeof(query), 
       "select * from datasets join datasetCohort on datasets.id = datasetCohort.dataset_id "
       "where datasetCohort.cohort_id = %d;",
       cohort_id);
 
 return datasetsLoadByQuery(biConn, query);
 }
 
 struct analysisModules *analysisModulesOfType(struct sqlConnection *biConn, 
 					      char *type)
 {
 char query[128];
 safef(query, sizeof(query),
       "select * from analysisModules where type = \"%s\" ",
       type);
 
 return analysisModulesLoadByQuery(biConn, query);
 }
 
 
 void createGeneLevelAnalyses(struct sqlConnection *biConn, int cohort_id)
 {
 struct datasets *da, *daList = datasetsInCohort(biConn, cohort_id);
 
 struct dyString *dy = dyStringNew(10);
 for (da = daList; da; da = da->next)
     {
     dyStringPrintf(dy, "%s", da->data_table);
     if (da->next)
 	dyStringPrintf(dy, ",");
     }
 char *input_tables = dyStringCannibalize(&dy);
 
 /* Gene analysis modules */
 struct analysisModules *am, *amList = analysisModulesOfType(biConn, "gene");
 
 for (am = amList; am; am = am->next)
     {
     char *result_table = getTableName(daList, am->name);
     createAnalyses(biConn, cohort_id, am->id, result_table, input_tables);
     }
 }
 
 void createSetLevelAnalyses(struct sqlConnection *biConn, int cohort_id)
 {
+/* Get all gene-level analyses */
 char query[256];
 safef(query, sizeof(query),
       "select * from analyses "
       "join analysisModules on analyses.module_id = analysisModules.id "
       "where analyses.cohort_id = %d and analysisModules.type = \"%s\" ",
       cohort_id, "gene");
 
 struct analyses *an, *anList = analysesLoadByQuery(biConn, query);
 
 if (!anList)
     {
     fprintf(stdout, "No set level analysis to perform.\n");
     return;
     }
 
 /* Set (geneset/pathway) analysis modules */
 struct analysisModules *am, *amList = analysisModulesOfType(biConn, "set");
 
 for (am = amList; am; am = am->next)
     { // loop through modules
     for (an = anList; an; an = an->next)
 	{ // loop through gene-level results (one from each gene module)
 	char result_table[128];
 	safef(result_table, sizeof(result_table),
 	      "%s_%s", am->name, an->result_table);
 	createAnalyses(biConn, cohort_id, am->id, result_table, an->result_table);
 	}
     }
 }
 
 
 void biAnalysisAddModule(struct sqlConnection *biConn, 
 			 struct biAnalysis *ba, int module_id)
 {
 if (!ba)
     return;
 
 char query[128];
 safef(query, sizeof(query), 
       "select name from analysisModules where id = %d", 
       module_id);
 
 if (!sqlExists(biConn, query))
     errAbort("No module with id = %d", module_id);
 
 char *module = sqlQuickString(biConn, query);
 
 if (sameString(module, "meta"))
+    {
+    ba->pipeline = geneLevelPipeline;
     ba->analyze = metaGene;
+    }
+else if (sameString(module, "metaGeneset"))
+    {
+    ba->pipeline = genesetLevelPipeline;
+    ba->analyze = metaGeneset;
+    }
 else
+    {
+    ba->pipeline = NULL;
     ba->analyze = NULL;
+    }
 }
 
 struct biAnalysis *biAnalysisListForCohort(struct sqlConnection *biConn, 
 					   char *db, int cohort_id)
 {
 struct biAnalysis *ba, *baList = NULL;
 
 char query[128];
 safef(query, sizeof(query), 
       "select * from analyses where cohort_id = %d",
       cohort_id);
 
 struct analyses *an, *anList = analysesLoadByQuery(biConn, query);
 
 if (!anList)
     errAbort("No analyses for cohort = %d", cohort_id);
 
-struct datasets *da, *daList = datasetsInCohort(biConn, cohort_id);
-
 for (an = anList; an; an = an->next)
     {
     AllocVar(ba);
     ba->db = cloneString(db);
     ba->tableName  = cloneString(an->result_table);
     ba->parameters = hashNew(0);
-
-    struct slName *slList = NULL;
-    for (da = daList; da; da = da->next)
-	slNameAddHead(&slList, da->data_table);
-    ba->inputTables = slList;
+    ba->inputTables = slNameListFromComma(an->input_tables);
     
     biAnalysisAddModule(biConn, ba, an->module_id);
 
     slAddHead(&baList, ba);
     }
 slReverse(&baList);
 
 analysesFreeList(&anList);
-datasetsFreeList(&daList);
 
 return baList;
 } 
 
 boolean cohortExists(struct sqlConnection *biConn, int cohort_id)
 {
 char query[128];
 safef(query, sizeof(query), "select * from cohorts where id = %d", cohort_id);
 
 return sqlExists(biConn, query);
 }
 
 boolean analysisModuleExists(struct sqlConnection *biConn, struct analysisModules *am)
 {
 char query[128];
 safef(query, sizeof(query), 
       "select * from analysisModules where id = %d "
       "and name = \"%s\" "
       "and type = \"%s\" ", 
       am->id, am->name, am->type);
 
 return sqlExists(biConn, query);
 }
 
 void storeAnalysisModule(struct sqlConnection *biConn, char *name, char *type)
 {
 struct analysisModules *am;
 AllocVar(am);
 am->name = cloneString(name);
 am->type = cloneString(type);
 am->id = findIdInTable(biConn, "analysisModules", "id", "name", am->name);
 
 if (!analysisModuleExists(biConn, am))
     analysisModulesSaveToDb(biConn, am, "analysisModules", 10);
 
 analysisModulesFree(&am);
 }
 
 void setupAnalysisModules(struct sqlConnection *biConn)
 {
 /* Meta-Gene module */
 storeAnalysisModule(biConn, "meta", "gene");
 
 /* Set up more modules below, similar to above */
-
+storeAnalysisModule(biConn, "metaGeneset", "set");
 }
 
 void prepareDatabase(struct sqlConnection *biConn)
 {
 if (!sqlTableExists(biConn, "analyses"))
     createAnalysesTable(biConn, "analyses");
 
 if (!sqlTableExists(biConn, "analysisParams"))
     createAnalysisParamsTable(biConn, "analysisParams");
 
 if (!sqlTableExists(biConn, "analysisModules"))
     createAnalysisModulesTable(biConn, "analysisModules");
 
 setupAnalysisModules(biConn);
 }
 
 void bioController(char *db, int cohort_id)
 {
 struct sqlConnection *biConn = hAllocConnProfile("localDb", db);
 
 if (!cohortExists(biConn, cohort_id))
     {
     hFreeConn(&biConn);
     errAbort("Cohort %d does not exist.", cohort_id);
     }
 
 prepareDatabase(biConn);
 
 createGeneLevelAnalyses(biConn, cohort_id);
 
 createSetLevelAnalyses(biConn, cohort_id);
 
 struct biAnalysis *baList = biAnalysisListForCohort(biConn, db, cohort_id);
 hFreeConn(&biConn);
     
 runAnalysisPipeline(baList);
 }
 
 
 int main(int argc, char *argv[])
 /* Process command line. */
 {
 optionInit(&argc, argv, options);
 if (argc != 2)
     usage();
 
 bioController(BIOINT_DB, atoi(argv[1]));
 return 0;
 }