src/hg/instinct/bioInt2/bioGeneLevel.c 1.6
1.6 2009/05/20 20:34:36 jsanborn
initial commit
Index: src/hg/instinct/bioInt2/bioGeneLevel.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/instinct/bioInt2/bioGeneLevel.c,v
retrieving revision 1.5
retrieving revision 1.6
diff -b -B -U 1000000 -r1.5 -r1.6
--- src/hg/instinct/bioInt2/bioGeneLevel.c 27 Apr 2009 06:15:48 -0000 1.5
+++ src/hg/instinct/bioInt2/bioGeneLevel.c 20 May 2009 20:34:36 -0000 1.6
@@ -1,175 +1,175 @@
/* mapProbesToGenes - Will maps probes in BED format to overlapping gene(s). */
#include "common.h"
#include "linefile.h"
#include "hash.h"
#include "options.h"
#include "jksql.h"
#include "hPrint.h"
#include "hdb.h"
#include "dystring.h"
#include "bioIntDb.h"
#include "bioIntDriver.h"
#include "cprob.h"
#include "hgStatsLib.h"
#include "bioController.h"
/* Gene-level analysis functions */
struct analysisVals *metaGene(struct biAnalysis *ba, void *data,
int sample_id, int feature_id)
{
if (!data)
return NULL;
double total = 0.0, count = 0.0;
struct slPair *sp, *spList = data;
struct slDouble *sd, *sdList = NULL;
for (sp = spList; sp; sp = sp->next)
{
struct analysisVals *av = sp->val;
if (!av)
continue;
total += av->val;
count += 1.0;
sd = slDoubleNew(av->conf);
slAddHead(&sdList, sd);
}
float chi2, metaP;
if (!fishersMetaSigned(sdList, &chi2, &metaP))
return NULL;
slFreeList(&sdList);
struct analysisVals *av;
AllocVar(av);
av->sample_id = sample_id;
av->feature_id = feature_id;
av->val = total / count;
av->conf = metaP;
return av;
}
/* Pipeline Stuff */
struct slName *getAvailableGenes(char *db, struct biResults *br)
{
return br->allFeatures(br);
}
struct slPair *geneLevelData(struct biResults *br, struct biData *bdList,
char *sample, char *gene)
{
struct biData *bd;
struct slPair *sp, *spList = NULL;
for (bd = bdList; bd; bd = bd->next)
{
struct hashEl *el = hashLookup(bd->hash, gene);
if (!el)
continue;
AllocVar(sp);
sp->name = cloneString(bd->type);
sp->val = NULL;
struct analysisVals *newAv, *av = el->val;
newAv = cloneAnalysisVals(av);
sp->val = newAv;
slAddHead(&spList, sp);
}
return spList;
}
void geneLevelAnalysis(struct sqlConnection *biConn,
struct biAnalysis *ba, struct biResults *br,
struct slName *genes)
{
if (!ba->analyze)
return;
fprintf(stdout, "starting gene level analysis\n");
struct hash *sampleHash = createIdHash(biConn, SA_TABLE, "name");
struct hash *featureHash = createIdHash(biConn, AF_TABLE, "feature_name");
struct slName *gene, *sample, *samples = br->allSamplesInCommon(br);
int count = 0;
int numSamples = slCount(samples);
struct analysisVals *av, *avList = NULL;
for (sample = samples; sample; sample = sample->next)
{
int sample_id = hashIntValDefault(sampleHash, sample->name, -1);
if (sample_id == -1)
continue;
struct biData *bdList = br->dataForSample(br, sample->name);
for (gene = genes; gene; gene = gene->next)
{
int feature_id = hashIntValDefault(featureHash, gene->name, -1);
if (feature_id == -1)
continue;
struct slPair *spList = geneLevelData(br, bdList, sample->name, gene->name);
if (!spList)
continue;
av = ba->analyze(ba, spList, sample_id, feature_id); //e->name, gene->name);
slPairAnalysisValsFreeList(&spList);
if (!av)
continue;
slAddHead(&avList, av);
}
count++;
- fprintf(stdout, "%d of %d samples\n", count, numSamples);
+ fprintf(stdout, "%d of %d samples, %d av's\n", count, numSamples, slCount(avList));
fflush(stdout);
biDataFree(&bdList);
}
uglyTime(NULL);
storeAnalysisValsInDb(biConn, ba->tableName, avList);
uglyTime("stored ARs");
hashFree(&sampleHash);
hashFree(&featureHash);
}
struct biResults *retrieveRawData(char *db, struct slName *datasets, boolean toLogP)
{
struct slName *dataset;
struct biQuery *bq, *bqList = NULL;
for (dataset = datasets; dataset; dataset = dataset->next)
{
bq = biQueryNew(db, dataset->name); // Add Dataset
bq->getAllFeatures = TRUE; // Set flag to retrieve all data
biQueryAppend(&bqList, bq); // Append query to query list
}
/* Get results from all queries in list */
struct biResults *br = biQueryResults(bqList);
return br;
}
void geneLevelPipeline(struct biAnalysis *ba)
{
/* Get raw gene/sample data for all overlapping samples in dataset list */
struct biResults *br = retrieveRawData(ba->db, ba->inputTables, TRUE);
if (!br)
return;
struct slName *genes = getAvailableGenes(ba->db, br);
struct sqlConnection *biConn = hAllocConnProfile("localDb", ba->db);
geneLevelAnalysis(biConn, ba, br, genes);
hFreeConn(&biConn);
biResultsFree(&br);
slNameFreeList(&genes);
}