src/hg/instinct/bioInt2/bioController.c 1.3

1.3 2009/03/21 21:31:54 jsanborn
moved stuff around
Index: src/hg/instinct/bioInt2/bioController.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/instinct/bioInt2/bioController.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -b -B -U 4 -r1.2 -r1.3
--- src/hg/instinct/bioInt2/bioController.c	21 Mar 2009 19:54:10 -0000	1.2
+++ src/hg/instinct/bioInt2/bioController.c	21 Mar 2009 21:31:54 -0000	1.3
@@ -29,425 +29,19 @@
 static struct optionSpec options[] = {
     {NULL, 0}
 }; 
 
-void printSlName(struct slName *slList)
-{
-if (!slList)
-    fprintf(stdout, "nothing in slName list\n");
-
-struct slName *sl;
-
-fprintf(stdout, "num samples = %d\n", slCount(slList));
-for (sl = slList; sl; sl = sl->next)
-    {
-    fprintf(stdout, "%s", sl->name);
-    if (sl->next)
-	fprintf(stdout, ",");
-    }
-fprintf(stdout, "\n");
-}
-
-void slPairDoubleFree(struct slPair **pEl)
-{
-struct slPair *el;
-if ((el = *pEl) == NULL) return;
-
-freeMem(el->name);
-struct slDouble *sdList = el->val;
-slFreeList(&sdList);
-freez(pEl);
-}
-
-void slPairDoubleFreeList(struct slPair **pList)
-{
-struct slPair *el, *next;
-
-for (el = *pList; el != NULL; el = next)
-    {
-    next = el->next;
-    slPairDoubleFree(&el);
-    }
-*pList = NULL;
-} 
-
-void analysisResultFree(struct analysisResult **pEl)
-{
-struct analysisResult *el;
-if ((el = *pEl) == NULL) return;
-
-freeMem(el->sample);
-freeMem(el->feature);
-freez(pEl);
-}
-
-void analysisResultFreeList(struct analysisResult **pList)
-{
-struct analysisResult *el, *next;
-
-for (el = *pList; el != NULL; el = next)
-    {
-    next = el->next;
-    analysisResultFree(&el);
-    }
-*pList = NULL;
-} 
-
-struct slName *getAvailableGenes(char *db, struct biResults *br)
-{
-struct sqlConnection *biConn = hAllocConnProfile("localDb", db);
-
-char query[256];
-safef(query, sizeof(query), "select DISTINCT geneSymbol from kgXref join knownGene on kgXref.kgId = knownGene.name");
-
-struct slName *sl, *slList = sqlQuickList(biConn, query);
-hFreeConn(&biConn);
-
-struct slName *geneList = NULL;
-for (sl = slList; sl; sl = sl->next)
-    {
-    struct slName *probes = br->probesForGene(br, sl->name);
-    int numProbes = slCount(probes);
-    slNameFreeList(&probes);
-    if (numProbes == 0)
-	continue;
-
-    slNameAddHead(&geneList, sl->name);
-    }
-
-return geneList;
-} 
-
-
-struct slPair *geneLevelData(struct biResults *br, struct biData *bdList, char *sample, char *gene) 
-{
-struct biData *bd;
-struct slPair *sp, *spList = NULL;
-for (bd = bdList; bd; bd = bd->next)
-    {
-    char *dataset = bd->name;
-    struct slName *sl, *probes = br->probesForGeneInDataset(br, gene, dataset);
-
-    AllocVar(sp);
-    sp->name = cloneString(bd->type);
-    sp->val = NULL;
-    struct slDouble *sd, *sdList = NULL;
-    for (sl = probes; sl; sl = sl->next)
-	{
-	struct hashEl *el = hashLookup(bd->hash, sl->name);
-	if (!el)
-	    continue;
-	struct slDouble *sdVal = el->val;
-	sd = slDoubleNew(sdVal->val);
-	slAddHead(&sdList, sd);
-	}
-    sp->val = sdList;
-    slAddHead(&spList, sp);
-
-    slNameFreeList(&probes);
-    }
-
-return spList;
-}
-
-struct analysisResult *geneLevelAnalysis(struct biAnalysis *ba, struct biResults *br, 
-					 struct slName *genes)
-{
-fprintf(stdout, "starting gene level analysis\n");
-
-struct slName *gene, *sample, *samples = br->allSamplesInCommon(br);
-
-struct analysisResult *ar, *arList = NULL;
-for (sample = samples; sample; sample = sample->next)
-    {
-    struct biData *bdList = br->dataForSample(br, sample->name);
-    for (gene = genes; gene; gene = gene->next)
-	{
-	struct slPair *spList = geneLevelData(br, bdList, sample->name, gene->name);
-	if (!spList)
-	    continue;
-	ar = ba->analyze(ba, spList, sample->name, gene->name);
-	slPairDoubleFreeList(&spList);
-	if (!ar)
-	    continue;
-	slAddHead(&arList, ar);
-	}
-    fprintf(stdout, ".");    
-    biDataFree(&bdList);    
-    }
-fprintf(stdout, "\n");
-
-return arList;
-}
-
-int findIdInTable(struct sqlConnection *biConn, char *tableName,
-		  char *idField, char *sField, char *name)
-{
-if (sqlTableSize(biConn, tableName) == 0)  /* brand new table, return 0 */
-    return 0;
-
-char query[256];
-safef(query, sizeof(query),
-      "select DISTINCT %s from %s where %s = \"%s\";",
-      idField, tableName, sField, name);
-if (sqlExists(biConn, query))  /* sample name found, use same id */
-    return sqlQuickNum(biConn, query);
-
-/* Else, find maximum sample id and add one to it */
-safef(query, sizeof(query),
-      "select max(%s) from %s;",
-      idField, tableName);
-int maxId = sqlQuickNum(biConn, query);
-return maxId + 1;
-}   
-
-
-void createAnalysisFeaturesTable(struct sqlConnection *biConn, char *tableName)
-{
-struct dyString *dy = newDyString(1024);
-dyStringPrintf(dy, "CREATE TABLE %s (\n", tableName);
-dyStringPrintf(dy, "id int unsigned not null,\n");
-dyStringPrintf(dy, "feature_name varchar(255) not null,\n");
-dyStringPrintf(dy, "KEY(id),\n");
-dyStringPrintf(dy, "KEY(feature_name),\n");
-dyStringPrintf(dy, "KEY(id, feature_name),\n");
-dyStringPrintf(dy, "KEY(feature_name, id)\n");
-dyStringPrintf(dy, ")\n");
-sqlUpdate(biConn,dy->string);
-dyStringFree(&dy);
-} 
-
-void createAnalysisValsTable(struct sqlConnection *biConn, char *tableName)
-{
-struct dyString *dy = newDyString(1024);
-dyStringPrintf(dy, "CREATE TABLE %s (\n", tableName);
-dyStringPrintf(dy, "sample_id int unsigned not null,\n");
-dyStringPrintf(dy, "feature_id int unsigned not null,\n");
-dyStringPrintf(dy, "val float not null,\n");
-dyStringPrintf(dy, "conf float not null,\n");
-dyStringPrintf(dy, "KEY(feature_id, sample_id),\n");
-dyStringPrintf(dy, "KEY(sample_id, feature_id)\n");
-dyStringPrintf(dy, ")\n");
-sqlUpdate(biConn,dy->string);
-dyStringFree(&dy);
-} 
-
-boolean analysisFeatureExists(struct sqlConnection *biConn, struct analysisFeatures *af)
-{
-char query[256];
-safef(query, sizeof(query),
-      "select * from analysisFeatures where id = %d "
-      "and feature_name = \"%s\" ",
-      af->id, af->feature_name);
-
-return sqlExists(biConn, query);
-} 
-
-struct hash *getAnalysisFeaturesHash(struct sqlConnection *biConn)
-{
-fprintf(stdout, "getting analysis features hash.\n");
-
-struct hash *hash = hashNew(0);
-char query[128];
-safef(query, sizeof(query), "select * from analysisFeatures");
-
-struct analysisFeatures *af, *afList = analysisFeaturesLoadByQuery(biConn, query);
-
-for (af = afList; af; af = af->next)
-    hashAddInt(hash, af->feature_name, af->id);
-
-analysisFeaturesFreeList(&afList);
-return hash;
-}  
-
-struct hash *storeAnalysisFeaturesInDb(struct sqlConnection *biConn, struct analysisResult *arList)
-{
-fprintf(stdout, "storing analysis features\n");
-
-char *tableName = "analysisFeatures";
-if (!sqlTableExists(biConn, tableName))
-    createAnalysisFeaturesTable(biConn, tableName);
-
-/* Get existing analysis features */
-struct hash *hash = getAnalysisFeaturesHash(biConn);
-
-struct analysisFeatures *af;
-struct analysisResult *ar;
-for (ar = arList; ar; ar = ar->next)
-    {
-    if (hashLookup(hash, ar->feature))
-	continue;  // already visited, ignore
-
-    int feature_id = findIdInTable(biConn, tableName, "id", "feature_name", ar->feature); 
-
-    AllocVar(af);
-    af->id = feature_id;
-    af->feature_name = cloneString(ar->feature);
-    if (!analysisFeatureExists(biConn, af))
-	analysisFeaturesSaveToDb(biConn, af, tableName, 10);
-
-    hashAddInt(hash, af->feature_name, af->id);
-    analysisFeaturesFree(&af);
-    }
-
-return hash;
-}
-
-struct hash *getSampleIdHash(struct sqlConnection *biConn)
-{
-fprintf(stdout, "getting sample id hash.\n");
-
-struct hash *hash = hashNew(0);
-char query[128];
-safef(query, sizeof(query), "select * from samples");
-
-struct samples *sa, *saList = samplesLoadByQuery(biConn, query);
-
-for (sa = saList; sa; sa = sa->next)
-    hashAddInt(hash, sa->name, sa->id);
-
-samplesFreeList(&saList);
-return hash;
-}
-
-
-void storeAnalysisResultsInDb(struct sqlConnection *biConn, struct biAnalysis *ba, 
-			      struct analysisResult *arList)
-{
-fprintf(stdout, "storing analysis in table %s in %s db\n", 
-	ba->tableName, ba->db);
-
-if (!sqlTableExists(biConn, ba->tableName))
-    createAnalysisValsTable(biConn, ba->tableName);
-
-struct hash *featureIds = storeAnalysisFeaturesInDb(biConn, arList); 
-struct hash *sampleIds = getSampleIdHash(biConn);
-
-struct analysisVals *av = AllocA(struct analysisVals);
-struct analysisResult *ar;
-for (ar = arList; ar; ar = ar->next)
-    {
-    int sample_id = hashIntValDefault(sampleIds, ar->sample, -1);
-    int feature_id = hashIntValDefault(featureIds, ar->feature, -1);
-
-    if (sample_id == -1 || feature_id == -1)
-	continue;
-
-    av->sample_id  = sample_id;
-    av->feature_id = feature_id;
-    av->val        = ar->val;
-    av->conf       = ar->conf;
-
-    analysisValsSaveToDb(biConn, av, ba->tableName, 50);
-    }
-
-analysisValsFree(&av);
-}
-
-boolean analysisExists(struct sqlConnection *biConn, struct biAnalysis *ba)
-{
-if (!sqlTableExists(biConn, ba->tableName))
-    return FALSE;
-
-if (sqlTableSize(biConn, ba->tableName) == 0)
-    return FALSE;
-
-return TRUE;
-}
-
-boolean analysisListExists(char *db, struct biAnalysis *baList)
-{
-struct sqlConnection *biConn = hAllocConnProfile("localDb", db);
-
-boolean exists = TRUE;
-struct biAnalysis *ba;
-for (ba = baList; ba; ba = ba->next)
-    if (!analysisExists(biConn, ba))
-	exists = FALSE;
-
-hFreeConn(&biConn);
-return exists;
-}
-
-
-void geneLevelPipeline(struct biAnalysis *baList, struct biResults *br, struct slName *genes)
-{
-if (!baList)
-    return;
-
-/* This assumes all results go to same db (which is safe for now) */
-struct sqlConnection *biConn = hAllocConnProfile("localDb", baList->db);
-
-struct biAnalysis *ba;
-for (ba = baList; ba; ba = ba->next)
-    {
-    if (analysisExists(biConn, ba))
-	continue;
-   
-    struct analysisResult *arList = geneLevelAnalysis(ba, br, genes);
-
-    fprintf(stdout, "storing in db\n");
-    uglyTime(NULL);
-    storeAnalysisResultsInDb(biConn, ba, arList);
-    uglyTime("done");
-
-    analysisResultFreeList(&arList);
-    }
-
-hFreeConn(&biConn);
-}
-
-struct biResults *retrieveData(char *db, struct slName *datasets, boolean toLogP)
-{
-struct slName *dataset;
-
-struct biQuery *bq, *bqList = NULL;
-for (dataset = datasets; dataset; dataset = dataset->next)
-    {
-    bq = biQueryNew(db, dataset->name);  // Add Dataset 
-    bq->getAllProbes = TRUE;        // Set flag to retrieve all probes
-    biQueryAppend(&bqList, bq);     // Append query to query list 
-    }
-
-/* Get results from all queries in list */
-struct biResults *br = biQueryResults(bqList);
-
-/* Convert to log p */
-if (toLogP)
-    br->toLogP(br);
-
-return br;
-}
-
-void runPipelineOnDatasets(char *db, char *datasets)
+void bioController(char *db, char *datasets)
 {
 /* datasets is a comma-separated string, each a different dataset table */
 struct slName *slDatasets = slNameListFromComma(datasets);
 
 struct biAnalysis *baList = registerGeneLevelAnalyses(db, slDatasets);
 
-if (!analysisListExists(db, baList))
-    {
-    /* Get raw gene/sample data for all overlapping samples in dataset list */
-    uglyTime(NULL);
-    struct biResults *br = retrieveData(db, slDatasets, TRUE);
-    uglyTime("retrieveData");
-    
-    if (!br)
-	errAbort("No gene data!\n");
-    
-    /* Run gene level analyses (meta-gene, pathlet, etc.) */
-    struct slName *genes = getAvailableGenes(db, br);
-    uglyTime("Num available genes = %d", slCount(genes));
-
-    geneLevelPipeline(baList, br, genes);
-    }
-
-/* Run pathway/geneset level analyses */
-
-
+runAnalysisPipeline(db, datasets, baList);
 }
 
+
 int main(int argc, char *argv[])
 /* Process command line. */
 {
 optionInit(&argc, argv, options);
@@ -453,8 +47,7 @@
 optionInit(&argc, argv, options);
 if (argc != 2)
     usage();
 
-runPipelineOnDatasets(BIOINT_DB, argv[1]);
-
+bioController(BIOINT_DB, argv[1]);
 return 0;
 }