src/hg/instinct/bioInt2/bioGeneLevel.c 1.4

1.4 2009/04/04 00:39:22 jsanborn
added cohorts api
Index: src/hg/instinct/bioInt2/bioGeneLevel.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/instinct/bioInt2/bioGeneLevel.c,v
retrieving revision 1.3
retrieving revision 1.4
diff -b -B -U 1000000 -r1.3 -r1.4
--- src/hg/instinct/bioInt2/bioGeneLevel.c	23 Mar 2009 18:19:29 -0000	1.3
+++ src/hg/instinct/bioInt2/bioGeneLevel.c	4 Apr 2009 00:39:22 -0000	1.4
@@ -1,207 +1,212 @@
 /* mapProbesToGenes - Will maps probes in BED format to overlapping gene(s). */
 #include "common.h"
 #include "linefile.h"
 #include "hash.h"
 #include "options.h"
 #include "jksql.h"
 #include "hPrint.h"
 #include "hdb.h"  
 #include "dystring.h"
 #include "bioIntDb.h"
 #include "bioIntDriver.h"
 #include "cprob.h"
 #include "hgStatsLib.h"
 #include "bioController.h"
 
 
 /* Gene-level analysis functions */
 
 struct analysisResult *metaGene(struct biAnalysis *ba, void *data, 
 				char *sample, char *gene)
 {
 struct slPair *sp, *spList = data;
 struct slDouble *sd, *sdList = NULL;
 for (sp = spList; sp; sp = sp->next)
     {
     struct slDouble *vals = sp->val;
     if (!vals)
 	continue;
     double val = slDoubleMedian(vals);
     sd = slDoubleNew(val);
     slAddHead(&sdList, sd);
     }
 
 float chi2, metaP;
 if (!fishersMetaSigned(sdList, &chi2, &metaP))
     return NULL;
 
 slFreeList(&sdList);
 struct analysisResult *ar;
 AllocVar(ar);
 ar->sample  = cloneString(sample);
 ar->feature = cloneString(gene);
 ar->val     = metaP;
 ar->conf    = chi2;
 return ar;
 }
 
 
 /* Pipeline Stuff */
 
 void slPairDoubleFree(struct slPair **pEl)
 {
 struct slPair *el;
 if ((el = *pEl) == NULL) return;
 
 freeMem(el->name);
 struct slDouble *sdList = el->val;
 slFreeList(&sdList);
 freez(pEl);
 }
 
 void slPairDoubleFreeList(struct slPair **pList)
 {
 struct slPair *el, *next;
 
 for (el = *pList; el != NULL; el = next)
     {
     next = el->next;
     slPairDoubleFree(&el);
     }
 *pList = NULL;
 }   
 
 struct slName *getAvailableGenes(char *db, struct biResults *br)
 {
 struct sqlConnection *biConn = hAllocConnProfile("localDb", db);
 char query[256];
 safef(query, sizeof(query),
       "select DISTINCT geneSymbol from kgXref join knownGene on kgXref.kgId = knownGene.name");
 
 struct slName *sl, *slList = sqlQuickList(biConn, query);
 
 struct slName *geneList = NULL;
 for (sl = slList; sl; sl = sl->next)
     {
     struct slName *probes = br->probesForGene(br, sl->name);
     int numProbes = slCount(probes);
     slNameFreeList(&probes);
     if (numProbes == 0)
 	continue;
 
     slNameAddHead(&geneList, sl->name);
     }
 slReverse(&geneList);
 
 return geneList;
 } 
 
 struct slPair *geneLevelData(struct biResults *br, struct biData *bdList, 
 			     char *sample, char *gene)
 {
 struct biData *bd;
 struct slPair *sp, *spList = NULL;
 for (bd = bdList; bd; bd = bd->next)
     {
     char *dataset = bd->name;
     struct slName *sl, *probes = br->probesForGeneInDataset(br, gene, dataset);
 
     AllocVar(sp);
     sp->name = cloneString(bd->type);
     sp->val = NULL;
     struct slDouble *sd, *sdList = NULL;
     for (sl = probes; sl; sl = sl->next)
 	{
 	struct hashEl *el = hashLookup(bd->hash, sl->name);
 	if (!el)
 	    continue;
 	struct slDouble *sdVal = el->val;
 	sd = slDoubleNew(sdVal->val);
 	slAddHead(&sdList, sd);
 	}
     sp->val = sdList;
     slAddHead(&spList, sp);
 
     slNameFreeList(&probes);
     }
 
 return spList;
 }   
 
-struct analysisResult *geneLevelAnalysis(struct biAnalysis *ba, struct biResults *br,
+void geneLevelAnalysis(struct sqlConnection *biConn, 
+		       struct biAnalysis *ba, struct biResults *br,
 					 struct slName *genes)
 {
 if (!ba->analyze)
-    return NULL;
+    return;
 
 fprintf(stdout, "starting gene level analysis\n");
 
 struct slName *gene, *sample, *samples = br->allSamplesInCommon(br);
 
 struct analysisResult *ar, *arList = NULL;
 for (sample = samples; sample; sample = sample->next)
     {
     struct biData *bdList = br->dataForSample(br, sample->name);
     for (gene = genes; gene; gene = gene->next)
 	{
 	struct slPair *spList = geneLevelData(br, bdList, sample->name, gene->name);
 	if (!spList)
 	    continue;
 	ar = ba->analyze(ba, spList, sample->name, gene->name);
 	slPairDoubleFreeList(&spList);
 	if (!ar)
 	    continue;
 	slAddHead(&arList, ar);
 	}
+    slReverse(&arList);
+
+    storeAnalysisResultsInDb(biConn, ba, arList);
+    analysisResultFreeList(&arList);
+    arList = NULL;
+
     fprintf(stdout, ".");
     fflush(stdout);
+
     biDataFree(&bdList);
     }
-slReverse(&arList);
 fprintf(stdout, "\n");
-
-return arList;
 }    
 
 struct biResults *retrieveRawData(char *db, struct slName *datasets, boolean toLogP)
 {
 struct slName *dataset;
 
 struct biQuery *bq, *bqList = NULL;
 for (dataset = datasets; dataset; dataset = dataset->next)
     {
     bq = biQueryNew(db, dataset->name);  // Add Dataset
     bq->getAllProbes = TRUE;        // Set flag to retrieve all probes
     biQueryAppend(&bqList, bq);     // Append query to query list
     }
 
 /* Get results from all queries in list */
 struct biResults *br = biQueryResults(bqList);
 
 /* Convert to log p */
 if (toLogP)
     br->toLogP(br);
 
 return br;
 }    
 
 void geneLevelPipeline(struct biAnalysis *ba)
 {
 /* Get raw gene/sample data for all overlapping samples in dataset list */
 struct biResults *br = retrieveRawData(ba->db, ba->inputTables, TRUE);
 
 if (!br)
     return;
 
 struct slName *genes = getAvailableGenes(ba->db, br);
 
-struct analysisResult *arList = geneLevelAnalysis(ba, br, genes);
-
 struct sqlConnection *biConn = hAllocConnProfile("localDb", ba->db);
-storeAnalysisResultsInDb(biConn, ba, arList);
+geneLevelAnalysis(biConn, ba, br, genes);
+
+//storeAnalysisResultsInDb(biConn, ba, arList);
 hFreeConn(&biConn);
 
-analysisResultFreeList(&arList);
+//analysisResultFreeList(&arList);
 biResultsFree(&br);
 slNameFreeList(&genes);
 }