src/hg/instinct/bioInt2/bioController.c 1.12

1.12 2009/07/23 19:49:33 jsanborn
shortened table name prefixes, oops forgot one
Index: src/hg/instinct/bioInt2/bioController.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/instinct/bioInt2/bioController.c,v
retrieving revision 1.11
retrieving revision 1.12
diff -b -B -U 1000000 -r1.11 -r1.12
--- src/hg/instinct/bioInt2/bioController.c	23 Jul 2009 19:48:39 -0000	1.11
+++ src/hg/instinct/bioInt2/bioController.c	23 Jul 2009 19:49:33 -0000	1.12
@@ -1,491 +1,491 @@
 /* mapProbesToGenes - Will maps probes in BED format to overlapping gene(s). */
 #include "common.h"
 #include "linefile.h"
 #include "hash.h"
 #include "options.h"
 #include "jksql.h"
 #include "hPrint.h"
 #include "hdb.h"
 #include "dystring.h"
 #include "bioIntDb.h"
 #include "bioIntDriver.h"
 #include "cprob.h"
 #include "hgStatsLib.h"
 #include "bioController.h"
 
 void usage()
 /* Explain usage and exit. */
 {
 errAbort(
 	 "bioController - controller for bioIntegrator pipeline\n"
 	 "usage:\n"
 	 "   bioController cohort_id\n"
 	 "   -cohort_id = number of cohort in cohorts table"
 	 );
 }
 
 #define BIOINT_DB "bioInt"
 
 static struct optionSpec options[] = {
     {NULL, 0}
 };                            
 
 
 char *getTableName(struct datasets *daList, char *module)
 {
 if (!daList)
     return NULL;
 struct dyString *dy = dyStringNew(10);
 dyStringPrintf(dy, "%s", module);
 
 struct datasets *da;
 for (da = daList; da; da = da->next)
     dyStringPrintf(dy, "_%s", da->data_table);
 
 return dyStringCannibalize(&dy);
 }
 
 boolean analysesExists(struct sqlConnection *biConn, struct analyses *an)
 {
 char query[256];
 safef(query, sizeof(query),
       "select * from %s where id = %d "
       "and cohort_id = %d "
       "and module_id = %d "
       "and result_table = \"%s\" "
       "and input_tables = \"%s\" ",
       AN_TABLE, an->id, an->cohort_id, an->module_id, 
       an->result_table, an->input_tables);
 
 return sqlExists(biConn, query);
 }      
 
 boolean analysesWithResultTableExists(struct sqlConnection *biConn, char *result_table)
 {
 char query[256];
 safef(query, sizeof(query),
       "select * from %s where result_table = \"%s\" ",
       AN_TABLE, result_table);
 
 return sqlExists(biConn, query);
 }      
 
 struct hash *analysisParamsHash(struct sqlConnection *biConn, struct analyses *an)
 {
 struct hash *hash = hashNew(0);
 char query[256];
 safef(query, sizeof(query), 
       "select * from %s where analysis_id = %d",
       AP_TABLE, an->id);
 struct analysisParams *ap, *apList = analysisParamsLoadByQuery(biConn, query);
 
 for (ap = apList; ap; ap = ap->next)
     hashAdd(hash, ap->name, ap->val);
 
 return hash;
 }
 
 void storeAnalysisParams(struct sqlConnection *biConn, 
 			 struct analyses *an, struct hash *params)
 {
 struct hashEl *el;
 struct hashCookie cookie = hashFirst(params);
 while ((el = hashNext(&cookie)) != NULL)
     {
     char *name = el->name;
     char *val = el->val;  
 
     struct analysisParams *ap;
     AllocVar(ap);
     ap->analysis_id = an->id;
     ap->name = cloneString(name);
     ap->val = cloneString(val);
 
     analysisParamsSaveToDbEscaped(biConn, ap, AP_TABLE, 50);
 
     analysisParamsFree(&ap);
     }
 }
 
 
 boolean matchingParams(struct sqlConnection *biConn, int analysis_id, struct hash *params)
 {
 char query[256];
 safef(query, sizeof(query), 
       "select * from %s where analysis_id = %d",
       AP_TABLE, analysis_id);
 
 struct analysisParams *ap, *apList = analysisParamsLoadByQuery(biConn, query);
 
 if (!apList) // no analysis params for this id 
     {
     if (hashNumEntries(params) == 0)  
 	return TRUE;     // none here either, a match
     else
 	return FALSE;    // params exist in hash, no match
     }
 
 /* apList not NULL, run through list check that every analysisParam matches 
  * key-value to element in hash */
 struct hashEl *el;
 char *name, *val;
 boolean matching = TRUE;
 for (ap = apList; ap; ap = ap->next)
     {
     el = hashLookup(params, ap->name);
     if (!el)
 	{
 	matching = FALSE;
 	continue;
 	}
 
     val = el->val;
     if (!sameString(val, ap->val))
 	matching = FALSE;
     }
 
 /* need to check reverse direction */
 struct hashCookie cookie = hashFirst(params);
 while ((el = hashNext(&cookie)) != NULL)
     {
     name = el->name;
     val = el->val;
 
     boolean foundMatch = FALSE;
     for (ap = apList; ap; ap = ap->next)
 	if (sameString(ap->name, name) && sameString(ap->val, val))
 	    foundMatch = TRUE;
 
     if (!foundMatch)
 	matching = FALSE;
     }
 
 return matching;
 }
 
 char *createAnalysesResultTableName(struct sqlConnection *biConn, int cohort_id, 
 				    struct analysisModules *am, char *input_tables, 
 				    struct hash *params)
 {
 if (!input_tables)
     return NULL;
 
 struct slName *sl, *slList = slNameListFromComma(input_tables);
 struct dyString *dy = dyStringNew(10);
 
 /* Create prefix, e.g. "moduleName_table1_table2" */
 char *modulePrefix = "";
-if (sameString(am->name, "metaGeneset"))
+if (sameString(am->name, "meta"))
     modulePrefix = "m";
 else if (sameString(am->name, "metaGeneset"))
     modulePrefix = "mg";
 else if (sameString(am->name, "factorGraph"))
     modulePrefix = "fg";
 
 dyStringPrintf(dy, "%s", modulePrefix);
 for (sl = slList; sl; sl = sl->next)
     dyStringPrintf(dy, "_%s", sl->name);
 char *prefix = dyStringCannibalize(&dy);
 
 /* Initial table name is simply prefix */
 char name[256];
 safef(name, sizeof(name), "%s", prefix);
 
 int count = 0;
 boolean foundName = FALSE;
 while (!foundName && count < 100)
     {
     count++;
     if (!analysesWithResultTableExists(biConn, name))
 	{
 	foundName = TRUE;
 	break;
 	}
     
     int id = findIdInTable(biConn, AN_TABLE, "id", "result_table", name);
     if (matchingParams(biConn, id, params))
 	{ 
 	foundName = TRUE;
 	break;
 	}
 
     /* If we got here, params don't matching existing analysis id, 
      * append a number to it and try again*/
     safef(name, sizeof(name), "%s_%d", prefix, count);
     }
 
 if (!foundName)
     errAbort("Could not find a unique table name after 100 attempts\n");
 
 char *result_table = cloneString(name);
 return result_table;
 }
 
 struct analyses *createAnalyses(struct sqlConnection *biConn, int cohort_id, 
 				struct analysisModules *am, char *input_tables, 
 				struct hash *params)
 {
 if (!input_tables)
     return NULL;
 
 /* Get unique name or name already in database for exact matching analysis */
 char *result_table = createAnalysesResultTableName(biConn, cohort_id, am, input_tables, params);
 int id = findIdInTable(biConn, AN_TABLE, "id", "result_table", result_table);
 
 struct analyses *an;
 AllocVar(an);
 an->id = id;  
 an->cohort_id = cohort_id;
 an->module_id = am->id;
 an->result_table = cloneString(result_table);
 an->input_tables = cloneString(input_tables);
 
 if (!analysesExists(biConn, an))
     {
     analysesSaveToDbEscaped(biConn, an, AN_TABLE, 20);
     storeAnalysisParams(biConn, an, params);
     }
 
 return an;
 }
 
 struct datasets *datasetsInCohort(struct sqlConnection *biConn, int cohort_id)
 {
 char query[256];
 safef(query, sizeof(query), 
       "select * from %s join %s on %s.id = %s.dataset_id "
       "where %s.cohort_id = %d;",
       DA_TABLE, DC_TABLE, DA_TABLE, DC_TABLE, DC_TABLE, cohort_id);
 
 return datasetsLoadByQuery(biConn, query);
 }
 
 struct analysisModules *analysisModulesMatching(struct sqlConnection *biConn, 
 						char *field, char *val)
 {
 char query[128];
 safef(query, sizeof(query),
       "select * from %s where %s = \"%s\" ",
       AM_TABLE, field, val);
 
 return analysisModulesLoadByQuery(biConn, query);
 }
 
 void biAnalysisAddModule(struct sqlConnection *biConn, 
 			 struct biAnalysis *ba, int module_id)
 {
 if (!ba)
     return;
 
 char query[128];
 safef(query, sizeof(query), 
       "select * from %s where id = %d", 
       AM_TABLE, module_id);
 
 struct analysisModules *am = analysisModulesLoadByQuery(biConn, query);
 if (!am)
     errAbort("No module with id = %d", module_id);
 
 /* Set pipeline by module type (gene, set, ...) */
 if (sameString(am->type, "gene"))
     ba->pipeline = geneLevelPipeline;
 else if (sameString(am->type, "set"))
     ba->pipeline = genesetLevelPipeline;
 else if (sameString(am->type, "pathway"))
     ba->pipeline = pathwayLevelPipeline;
 else
     ba->pipeline = NULL;
 
 /* Set analysis algorithm by module name*/
 if (sameString(am->name, "meta"))
     ba->analyze = metaGene;
 else if (sameString(am->name, "metaGeneset"))
     ba->analyze = metaGeneset;
 else if (sameString(am->name, "factorGraph"))
     ba->analyze = factorGraph;
 else
     ba->analyze = NULL;
 }
 
 struct biAnalysis *biAnalysisListForCohort(struct sqlConnection *biConn, 
 					   char *db, int cohort_id)
 {
 struct biAnalysis *ba, *baList = NULL;
 
 char query[128];
 safef(query, sizeof(query), 
       "select * from %s where cohort_id = %d order by id",
       AN_TABLE, cohort_id);
 
 struct analyses *an, *anList = analysesLoadByQuery(biConn, query);
 
 if (!anList)
     errAbort("No analyses for cohort = %d", cohort_id);
 
 for (an = anList; an; an = an->next)
     {
     AllocVar(ba);
     ba->db = cloneString(db);
     ba->tableName  = cloneString(an->result_table);
     ba->parameters = analysisParamsHash(biConn, an);
     ba->inputTables = slNameListFromComma(an->input_tables);
     
     biAnalysisAddModule(biConn, ba, an->module_id);
 
     slAddHead(&baList, ba);
     }
 slReverse(&baList);
 
 analysesFreeList(&anList);
 
 return baList;
 } 
 
 boolean cohortExists(struct sqlConnection *biConn, int cohort_id)
 {
 char query[128];
 safef(query, sizeof(query), 
       "select * from %s where id = %d", 
       CO_TABLE, cohort_id);
 
 return sqlExists(biConn, query);
 }
 
 boolean analysisModuleExists(struct sqlConnection *biConn, struct analysisModules *am)
 {
 char query[128];
 safef(query, sizeof(query), 
       "select * from %s where id = %d "
       "and name = \"%s\" "
       "and type = \"%s\" ", 
       AM_TABLE, am->id, am->name, am->type);
 
 return sqlExists(biConn, query);
 }
 
 void storeAnalysisModule(struct sqlConnection *biConn, char *name, char *type)
 {
 struct analysisModules *am;
 AllocVar(am);
 am->name = cloneString(name);
 am->type = cloneString(type);
 am->id = findIdInTable(biConn, AM_TABLE, "id", "name", am->name);
 
 if (!analysisModuleExists(biConn, am))
     analysisModulesSaveToDb(biConn, am, AM_TABLE, 10);
 
 analysisModulesFree(&am);
 }
 
 void setupAnalysisModules(struct sqlConnection *biConn)
 {
 /* Meta-Gene module */
 storeAnalysisModule(biConn, "meta", "gene");
 
 /* Meta-geneset module */
 storeAnalysisModule(biConn, "metaGeneset", "set");
 
 /* Pathway Factor Graph module */
 storeAnalysisModule(biConn, "factorGraph", "pathway");
 
 /* Set up more modules below, similar to above */
 }
 
 void createStandardPipeline(struct sqlConnection *biConn, int cohort_id)
 {
 struct datasets *da, *daList = datasetsInCohort(biConn, cohort_id);
 
 struct dyString *dy = dyStringNew(10);
 for (da = daList; da; da = da->next)
     {
     dyStringPrintf(dy, "%s", da->data_table);
     if (da->next)
 	dyStringPrintf(dy, ",");
     }
 char *input_tables = dyStringCannibalize(&dy);
 
 struct analysisModules *am;
 
 /* Gene analysis modules */
 am = analysisModulesMatching(biConn, "name", "meta");
 if (!am)
     errAbort("No analysis module named meta");
 
 struct hash *params = hashNew(0);
 hashAdd(params, "fxn", cloneString("fishers"));
 struct analyses *gene = createAnalyses(biConn, cohort_id, am, input_tables, params);
 char *gene_result_table = cloneString(gene->result_table);
 analysesFree(&gene);
 analysisModulesFree(&am);
 hashFree(&params);
 
 /* Set (geneset/pathway) analysis modules */
 am = analysisModulesMatching(biConn, "name", "metaGeneset");
 if (!am)
     errAbort("No analysis module named metaGeneset");
 
 params = hashNew(0);
 hashAdd(params, "fxn", cloneString("fishers"));
 struct analyses *set = createAnalyses(biConn, cohort_id, am, gene_result_table, params);
 analysesFree(&set);
 analysisModulesFree(&am);
 hashFree(&params);
 
 /* Set factor graph analysis modules */
 am = analysisModulesMatching(biConn, "name", "factorGraph");
 if (!am)
     errAbort("No analysis module named factorGraph");
 
 params = hashNew(0);
 hashAdd(params, "fxn", cloneString("factorGraphDAI"));
 struct analyses *fg = createAnalyses(biConn, cohort_id, am, input_tables, params);
 analysesFree(&fg);
 analysisModulesFree(&am);
 hashFree(&params);
 }
 
 
 void prepareDatabase(struct sqlConnection *biConn)
 {
 if (!sqlTableExists(biConn, AN_TABLE))
     createAnalysesTable(biConn, AN_TABLE);
 
 if (!sqlTableExists(biConn, AP_TABLE))
     createAnalysisParamsTable(biConn, AP_TABLE);
 
 if (!sqlTableExists(biConn, AM_TABLE))
     createAnalysisModulesTable(biConn, AM_TABLE);
 
 setupAnalysisModules(biConn);
 }
 
 void bioController(char *db, int cohort_id)
 {
 struct sqlConnection *biConn = hAllocConnProfile("localDb", db);
 
 if (!cohortExists(biConn, cohort_id))
     {
     hFreeConn(&biConn);
     errAbort("Cohort %d does not exist.", cohort_id);
     }
 prepareDatabase(biConn);
 
 createStandardPipeline(biConn, cohort_id);
 
 struct biAnalysis *baList = biAnalysisListForCohort(biConn, db, cohort_id);
 hFreeConn(&biConn);
     
 runAnalysisPipeline(baList);
 }
 
 
 int main(int argc, char *argv[])
 /* Process command line. */
 {
 optionInit(&argc, argv, options);
 if (argc != 2)
     usage();
 
 bioController(BIOINT_DB, atoi(argv[1]));
 return 0;
 }