src/hg/instinct/bioInt2/bioSetLevel.c 1.3
1.3 2009/05/20 20:34:36 jsanborn
initial commit
Index: src/hg/instinct/bioInt2/bioSetLevel.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/instinct/bioInt2/bioSetLevel.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -b -B -U 1000000 -r1.2 -r1.3
--- src/hg/instinct/bioInt2/bioSetLevel.c 27 Apr 2009 06:15:48 -0000 1.2
+++ src/hg/instinct/bioInt2/bioSetLevel.c 20 May 2009 20:34:36 -0000 1.3
@@ -1,261 +1,176 @@
/* mapProbesToGenes - Will maps probes in BED format to overlapping gene(s). */
#include "common.h"
#include "linefile.h"
#include "hash.h"
#include "options.h"
#include "jksql.h"
#include "hPrint.h"
#include "hdb.h"
#include "dystring.h"
#include "bioIntDb.h"
#include "bioIntDriver.h"
#include "cprob.h"
#include "hgStatsLib.h"
#include "bioController.h"
/* Gene-level analysis functions */
struct genesetData {
struct slName *genes;
struct hash *data;
};
struct analysisVals *metaGeneset(struct biAnalysis *ba, void *data,
int sample_id, int feature_id)
{
if (!data)
return NULL;
struct genesetData *gd = data;
struct slName *sl, *genes = gd->genes;
struct hash *geneData = gd->data;
double total = 0.0;
double count = 0.0;
struct slDouble *sd, *sdList = NULL;
for (sl = genes; sl; sl = sl->next)
{
struct hashEl *el = hashLookup(geneData, sl->name);
if (!el)
continue;
struct analysisVals *av = el->val;
total += av->val;
count += 1.0;
sd = slDoubleNew(av->conf);
slAddHead(&sdList, sd);
}
if (!sdList)
return NULL;
float chi2, metaP;
if (!fishersMetaSigned(sdList, &chi2, &metaP))
return NULL;
struct analysisVals *av;
AllocVar(av);
av->sample_id = sample_id;
av->feature_id = feature_id;
av->val = total / count;
av->conf = metaP;
return av;
}
/* Pipeline Stuff */
struct slPair *getGenesets(struct sqlConnection *biConn)
{
char query[256];
safef(query, sizeof(query), "select * from %s", GG_TABLE);
struct sqlResult *sr = sqlGetResult(biConn, query);
char **row = NULL;
struct hash *hash = hashNew(0);
struct slPair *sp, *spList = NULL;
while ((row = sqlNextRow(sr)) != NULL)
{
char *gs_id = row[0]; // name
char *gene_id = row[1]; // members
struct hashEl *el = hashLookup(hash, gs_id);
if (!el)
{
AllocVar(sp);
sp->name = cloneString(gs_id);
sp->val = NULL;
slAddHead(&spList, sp);
hashAdd(hash, gs_id, sp);
}
else
sp = el->val;
struct slName *sl = slNameNew(gene_id);
slAddTail(&sp->val, sl);
}
slReverse(&spList);
sqlFreeResult(&sr);
hashFree(&hash);
return spList;
}
struct analysisVals *genesetLevelAnalysis(struct sqlConnection *biConn, struct biAnalysis *ba,
struct slPair *spData, struct slPair *spGenesets)
{
if (!ba->analyze)
return NULL;
//struct hash *featureHash = createIdHash(biConn, AF_TABLE, "feature_name");
fprintf(stdout, "starting geneset analysis.\n");
struct slPair *gs, *sp;
struct genesetData *gd;
AllocVar(gd);
int count = 0, numGenesets = slCount(spGenesets);
struct analysisVals *av, *avList = NULL;
for (gs = spGenesets; gs; gs = gs->next)
{
- int feature_id = atoi(gs->name); //hashIntValDefault(featureHash, gs->name, -1);
+ int feature_id = atoi(gs->name);
struct slName *members = gs->val;
gd->genes = members;
for (sp = spData; sp; sp = sp->next)
{
- gd->data = sp->val;
+ struct typeHash *th = sp->val;
+ gd->data = th->hash;
int sample_id = atoi(sp->name);
av = ba->analyze(ba, gd, sample_id, feature_id);
if (!av)
continue;
slAddHead(&avList, av);
}
count++;
fprintf(stdout, "%d of %d genesets\n", count, numGenesets);
fflush(stdout);
gd->genes = NULL;
gd->data = NULL;
}
fprintf(stdout, "\n");
return avList;
}
-
-void slPairHashesFree(struct slPair **pEl)
-{
-struct slPair *el;
-
-if ((el = *pEl) == NULL) return;
-
-freeMem(el->name);
-struct hash *hash = el->val;
-hashFreeWithVals(&hash, analysisValsFree);
-freez(pEl);
-}
-
-void slPairHashesFreeList(struct slPair **pList)
-{
-struct slPair *el, *next;
-
-for (el = *pList; el != NULL; el = next)
- {
- next = el->next;
- slPairHashesFree(&el);
- }
-*pList = NULL;
-}
-
-void slPairStringFree(struct slPair **pEl)
-{
-struct slPair *el;
-
-if ((el = *pEl) == NULL) return;
-
-freeMem(el->name);
-char *name = el->val;
-freeMem(name);
-freez(pEl);
-}
-
-void slPairStringFreeList(struct slPair **pList)
-{
-struct slPair *el, *next;
-
-for (el = *pList; el != NULL; el = next)
- {
- next = el->next;
- slPairStringFree(&el);
- }
-*pList = NULL;
-}
-
-struct slPair *analysisValsSamplesHashes(struct sqlConnection *biConn,
- struct slName *dataset)
-{
-/* Currently only looks at first dataset in slName list passed in */
-char query[128];
-safef(query, sizeof(query), "select * from %s", dataset->name);
-struct slPair *sp, *spList = NULL;
-
-struct hash *hash = hashNew(0);
-struct sqlResult *sr = sqlGetResult(biConn, query);
-char **row = NULL;
-while ((row = sqlNextRow(sr)) != NULL)
- {
- char *sample_id = row[0];
- char *feature_id = row[1];
- struct analysisVals *av = analysisValsLoad(row);
-
- struct hashEl *el = hashLookup(hash, sample_id);
- if (!el)
- {
- AllocVar(sp);
- sp->name = cloneString(sample_id);
- sp->val = hashNew(0);
- hashAdd(hash, sample_id, sp);
- slAddHead(&spList, sp);
- }
- else
- sp = el->val;
-
- struct hash *featureHash = sp->val;
- hashAdd(featureHash, feature_id, av);
- }
-
-sqlFreeResult(&sr);
-hashFree(&hash);
-return spList;
-}
-
void genesetLevelPipeline(struct biAnalysis *ba)
{
uglyTime(NULL);
struct sqlConnection *biConn = hAllocConnProfile("localDb", ba->db);
-struct slPair *spData = analysisValsSamplesHashes(biConn, ba->inputTables);
+struct slPair *spData = analysisValsSamplesHashesList(biConn, ba->inputTables);
uglyTime("got sample hashes");
struct slPair *spGenesets = getGenesets(biConn);
fprintf(stderr, "got %d genesets\n", slCount(spGenesets));
struct analysisVals *avList = genesetLevelAnalysis(biConn, ba, spData, spGenesets);
uglyTime("analyzed all genesets");
fprintf(stdout, "storing results...\n");
storeAnalysisValsInDb(biConn, ba->tableName, avList);
uglyTime("analyzed all genesets");
hFreeConn(&biConn);
+analysisValsFreeList(&avList);
slPairHashesFreeList(&spData);
slPairStringFreeList(&spGenesets);
hFreeConn(&biConn);
}