src/hg/instinct/bioInt2/bioGeneLevel.c 1.4
1.4 2009/04/04 00:39:22 jsanborn
added cohorts api
Index: src/hg/instinct/bioInt2/bioGeneLevel.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/instinct/bioInt2/bioGeneLevel.c,v
retrieving revision 1.3
retrieving revision 1.4
diff -b -B -U 1000000 -r1.3 -r1.4
--- src/hg/instinct/bioInt2/bioGeneLevel.c 23 Mar 2009 18:19:29 -0000 1.3
+++ src/hg/instinct/bioInt2/bioGeneLevel.c 4 Apr 2009 00:39:22 -0000 1.4
@@ -1,207 +1,212 @@
/* mapProbesToGenes - Will maps probes in BED format to overlapping gene(s). */
#include "common.h"
#include "linefile.h"
#include "hash.h"
#include "options.h"
#include "jksql.h"
#include "hPrint.h"
#include "hdb.h"
#include "dystring.h"
#include "bioIntDb.h"
#include "bioIntDriver.h"
#include "cprob.h"
#include "hgStatsLib.h"
#include "bioController.h"
/* Gene-level analysis functions */
struct analysisResult *metaGene(struct biAnalysis *ba, void *data,
char *sample, char *gene)
{
struct slPair *sp, *spList = data;
struct slDouble *sd, *sdList = NULL;
for (sp = spList; sp; sp = sp->next)
{
struct slDouble *vals = sp->val;
if (!vals)
continue;
double val = slDoubleMedian(vals);
sd = slDoubleNew(val);
slAddHead(&sdList, sd);
}
float chi2, metaP;
if (!fishersMetaSigned(sdList, &chi2, &metaP))
return NULL;
slFreeList(&sdList);
struct analysisResult *ar;
AllocVar(ar);
ar->sample = cloneString(sample);
ar->feature = cloneString(gene);
ar->val = metaP;
ar->conf = chi2;
return ar;
}
/* Pipeline Stuff */
void slPairDoubleFree(struct slPair **pEl)
{
struct slPair *el;
if ((el = *pEl) == NULL) return;
freeMem(el->name);
struct slDouble *sdList = el->val;
slFreeList(&sdList);
freez(pEl);
}
void slPairDoubleFreeList(struct slPair **pList)
{
struct slPair *el, *next;
for (el = *pList; el != NULL; el = next)
{
next = el->next;
slPairDoubleFree(&el);
}
*pList = NULL;
}
struct slName *getAvailableGenes(char *db, struct biResults *br)
{
struct sqlConnection *biConn = hAllocConnProfile("localDb", db);
char query[256];
safef(query, sizeof(query),
"select DISTINCT geneSymbol from kgXref join knownGene on kgXref.kgId = knownGene.name");
struct slName *sl, *slList = sqlQuickList(biConn, query);
struct slName *geneList = NULL;
for (sl = slList; sl; sl = sl->next)
{
struct slName *probes = br->probesForGene(br, sl->name);
int numProbes = slCount(probes);
slNameFreeList(&probes);
if (numProbes == 0)
continue;
slNameAddHead(&geneList, sl->name);
}
slReverse(&geneList);
return geneList;
}
struct slPair *geneLevelData(struct biResults *br, struct biData *bdList,
char *sample, char *gene)
{
struct biData *bd;
struct slPair *sp, *spList = NULL;
for (bd = bdList; bd; bd = bd->next)
{
char *dataset = bd->name;
struct slName *sl, *probes = br->probesForGeneInDataset(br, gene, dataset);
AllocVar(sp);
sp->name = cloneString(bd->type);
sp->val = NULL;
struct slDouble *sd, *sdList = NULL;
for (sl = probes; sl; sl = sl->next)
{
struct hashEl *el = hashLookup(bd->hash, sl->name);
if (!el)
continue;
struct slDouble *sdVal = el->val;
sd = slDoubleNew(sdVal->val);
slAddHead(&sdList, sd);
}
sp->val = sdList;
slAddHead(&spList, sp);
slNameFreeList(&probes);
}
return spList;
}
-struct analysisResult *geneLevelAnalysis(struct biAnalysis *ba, struct biResults *br,
+void geneLevelAnalysis(struct sqlConnection *biConn,
+ struct biAnalysis *ba, struct biResults *br,
struct slName *genes)
{
if (!ba->analyze)
- return NULL;
+ return;
fprintf(stdout, "starting gene level analysis\n");
struct slName *gene, *sample, *samples = br->allSamplesInCommon(br);
struct analysisResult *ar, *arList = NULL;
for (sample = samples; sample; sample = sample->next)
{
struct biData *bdList = br->dataForSample(br, sample->name);
for (gene = genes; gene; gene = gene->next)
{
struct slPair *spList = geneLevelData(br, bdList, sample->name, gene->name);
if (!spList)
continue;
ar = ba->analyze(ba, spList, sample->name, gene->name);
slPairDoubleFreeList(&spList);
if (!ar)
continue;
slAddHead(&arList, ar);
}
+ slReverse(&arList);
+
+ storeAnalysisResultsInDb(biConn, ba, arList);
+ analysisResultFreeList(&arList);
+ arList = NULL;
+
fprintf(stdout, ".");
fflush(stdout);
+
biDataFree(&bdList);
}
-slReverse(&arList);
fprintf(stdout, "\n");
-
-return arList;
}
struct biResults *retrieveRawData(char *db, struct slName *datasets, boolean toLogP)
{
struct slName *dataset;
struct biQuery *bq, *bqList = NULL;
for (dataset = datasets; dataset; dataset = dataset->next)
{
bq = biQueryNew(db, dataset->name); // Add Dataset
bq->getAllProbes = TRUE; // Set flag to retrieve all probes
biQueryAppend(&bqList, bq); // Append query to query list
}
/* Get results from all queries in list */
struct biResults *br = biQueryResults(bqList);
/* Convert to log p */
if (toLogP)
br->toLogP(br);
return br;
}
void geneLevelPipeline(struct biAnalysis *ba)
{
/* Get raw gene/sample data for all overlapping samples in dataset list */
struct biResults *br = retrieveRawData(ba->db, ba->inputTables, TRUE);
if (!br)
return;
struct slName *genes = getAvailableGenes(ba->db, br);
-struct analysisResult *arList = geneLevelAnalysis(ba, br, genes);
-
struct sqlConnection *biConn = hAllocConnProfile("localDb", ba->db);
-storeAnalysisResultsInDb(biConn, ba, arList);
+geneLevelAnalysis(biConn, ba, br, genes);
+
+//storeAnalysisResultsInDb(biConn, ba, arList);
hFreeConn(&biConn);
-analysisResultFreeList(&arList);
+//analysisResultFreeList(&arList);
biResultsFree(&br);
slNameFreeList(&genes);
}