src/hg/instinct/bioInt2/bioGeneLevel.c 1.3

1.3 2009/03/23 18:19:29 jsanborn
updated
Index: src/hg/instinct/bioInt2/bioGeneLevel.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/instinct/bioInt2/bioGeneLevel.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -b -B -U 4 -r1.2 -r1.3
--- src/hg/instinct/bioInt2/bioGeneLevel.c	22 Mar 2009 01:07:28 -0000	1.2
+++ src/hg/instinct/bioInt2/bioGeneLevel.c	23 Mar 2009 18:19:29 -0000	1.3
@@ -13,12 +13,14 @@
 #include "hgStatsLib.h"
 #include "bioController.h"
 
 
-struct analysisResult *metaGene(struct biAnalysis *ba, struct slPair *spList, 
+/* Gene-level analysis functions */
+
+struct analysisResult *metaGene(struct biAnalysis *ba, void *data, 
 				char *sample, char *gene)
 {
-struct slPair *sp;
+struct slPair *sp, *spList = data;
 struct slDouble *sd, *sdList = NULL;
 for (sp = spList; sp; sp = sp->next)
     {
     struct slDouble *vals = sp->val;
@@ -41,4 +43,165 @@
 ar->val     = metaP;
 ar->conf    = chi2;
 return ar;
 }
+
+
+/* Pipeline Stuff */
+
+void slPairDoubleFree(struct slPair **pEl)
+{
+struct slPair *el;
+if ((el = *pEl) == NULL) return;
+
+freeMem(el->name);
+struct slDouble *sdList = el->val;
+slFreeList(&sdList);
+freez(pEl);
+}
+
+void slPairDoubleFreeList(struct slPair **pList)
+{
+struct slPair *el, *next;
+
+for (el = *pList; el != NULL; el = next)
+    {
+    next = el->next;
+    slPairDoubleFree(&el);
+    }
+*pList = NULL;
+}   
+
+struct slName *getAvailableGenes(char *db, struct biResults *br)
+{
+struct sqlConnection *biConn = hAllocConnProfile("localDb", db);
+char query[256];
+safef(query, sizeof(query),
+      "select DISTINCT geneSymbol from kgXref join knownGene on kgXref.kgId = knownGene.name");
+
+struct slName *sl, *slList = sqlQuickList(biConn, query);
+
+struct slName *geneList = NULL;
+for (sl = slList; sl; sl = sl->next)
+    {
+    struct slName *probes = br->probesForGene(br, sl->name);
+    int numProbes = slCount(probes);
+    slNameFreeList(&probes);
+    if (numProbes == 0)
+	continue;
+
+    slNameAddHead(&geneList, sl->name);
+    }
+slReverse(&geneList);
+
+return geneList;
+} 
+
+struct slPair *geneLevelData(struct biResults *br, struct biData *bdList, 
+			     char *sample, char *gene)
+{
+struct biData *bd;
+struct slPair *sp, *spList = NULL;
+for (bd = bdList; bd; bd = bd->next)
+    {
+    char *dataset = bd->name;
+    struct slName *sl, *probes = br->probesForGeneInDataset(br, gene, dataset);
+
+    AllocVar(sp);
+    sp->name = cloneString(bd->type);
+    sp->val = NULL;
+    struct slDouble *sd, *sdList = NULL;
+    for (sl = probes; sl; sl = sl->next)
+	{
+	struct hashEl *el = hashLookup(bd->hash, sl->name);
+	if (!el)
+	    continue;
+	struct slDouble *sdVal = el->val;
+	sd = slDoubleNew(sdVal->val);
+	slAddHead(&sdList, sd);
+	}
+    sp->val = sdList;
+    slAddHead(&spList, sp);
+
+    slNameFreeList(&probes);
+    }
+
+return spList;
+}   
+
+struct analysisResult *geneLevelAnalysis(struct biAnalysis *ba, struct biResults *br,
+					 struct slName *genes)
+{
+if (!ba->analyze)
+    return NULL;
+
+fprintf(stdout, "starting gene level analysis\n");
+
+struct slName *gene, *sample, *samples = br->allSamplesInCommon(br);
+
+struct analysisResult *ar, *arList = NULL;
+for (sample = samples; sample; sample = sample->next)
+    {
+    struct biData *bdList = br->dataForSample(br, sample->name);
+    for (gene = genes; gene; gene = gene->next)
+	{
+	struct slPair *spList = geneLevelData(br, bdList, sample->name, gene->name);
+	if (!spList)
+	    continue;
+	ar = ba->analyze(ba, spList, sample->name, gene->name);
+	slPairDoubleFreeList(&spList);
+	if (!ar)
+	    continue;
+	slAddHead(&arList, ar);
+	}
+    fprintf(stdout, ".");
+    fflush(stdout);
+    biDataFree(&bdList);
+    }
+slReverse(&arList);
+fprintf(stdout, "\n");
+
+return arList;
+}    
+
+struct biResults *retrieveRawData(char *db, struct slName *datasets, boolean toLogP)
+{
+struct slName *dataset;
+
+struct biQuery *bq, *bqList = NULL;
+for (dataset = datasets; dataset; dataset = dataset->next)
+    {
+    bq = biQueryNew(db, dataset->name);  // Add Dataset
+    bq->getAllProbes = TRUE;        // Set flag to retrieve all probes
+    biQueryAppend(&bqList, bq);     // Append query to query list
+    }
+
+/* Get results from all queries in list */
+struct biResults *br = biQueryResults(bqList);
+
+/* Convert to log p */
+if (toLogP)
+    br->toLogP(br);
+
+return br;
+}    
+
+void geneLevelPipeline(struct biAnalysis *ba)
+{
+/* Get raw gene/sample data for all overlapping samples in dataset list */
+struct biResults *br = retrieveRawData(ba->db, ba->inputTables, TRUE);
+
+if (!br)
+    return;
+
+struct slName *genes = getAvailableGenes(ba->db, br);
+
+struct analysisResult *arList = geneLevelAnalysis(ba, br, genes);
+
+struct sqlConnection *biConn = hAllocConnProfile("localDb", ba->db);
+storeAnalysisResultsInDb(biConn, ba, arList);
+hFreeConn(&biConn);
+
+analysisResultFreeList(&arList);
+biResultsFree(&br);
+slNameFreeList(&genes);
+}