src/hg/instinct/bioInt2/bioSetLevel.c 1.1
1.1 2009/04/05 21:08:07 jsanborn
added files, fixed cohorts API
Index: src/hg/instinct/bioInt2/bioSetLevel.c
===================================================================
RCS file: src/hg/instinct/bioInt2/bioSetLevel.c
diff -N src/hg/instinct/bioInt2/bioSetLevel.c
--- /dev/null 1 Jan 1970 00:00:00 -0000
+++ src/hg/instinct/bioInt2/bioSetLevel.c 5 Apr 2009 21:08:07 -0000 1.1
@@ -0,0 +1,251 @@
+/* mapProbesToGenes - Will maps probes in BED format to overlapping gene(s). */
+#include "common.h"
+#include "linefile.h"
+#include "hash.h"
+#include "options.h"
+#include "jksql.h"
+#include "hPrint.h"
+#include "hdb.h"
+#include "dystring.h"
+#include "bioIntDb.h"
+#include "bioIntDriver.h"
+#include "cprob.h"
+#include "hgStatsLib.h"
+#include "bioController.h"
+
+
+/* Gene-level analysis functions */
+struct genesetData {
+ struct slName *genes;
+ struct hash *data;
+};
+
+struct analysisResult *metaGeneset(struct biAnalysis *ba, void *data,
+ char *sample, char *gene)
+{
+if (!data)
+ return NULL;
+
+struct genesetData *gd = data;
+struct slName *sl, *genes = gd->genes;
+struct hash *geneData = gd->data;
+
+struct slDouble *sd, *sdList = NULL;
+for (sl = genes; sl; sl = sl->next)
+ {
+ struct hashEl *el = hashLookup(geneData, sl->name);
+ if (!el)
+ continue;
+ struct analysisResult *ar = el->val;
+
+ sd = slDoubleNew(ar->val);
+ slAddHead(&sdList, sd);
+ }
+
+if (!sdList)
+ return NULL;
+
+float chi2, metaP;
+if (!fishersMetaSigned(sdList, &chi2, &metaP))
+ return NULL;
+
+struct analysisResult *ar;
+AllocVar(ar);
+ar->sample = cloneString(sample);
+ar->feature = cloneString(gene);
+ar->val = metaP;
+ar->conf = chi2;
+return ar;
+}
+
+/* Pipeline Stuff */
+
+struct slPair *getGenesets()
+{
+char query[256];
+safef(query, sizeof(query), "select * from genesets");
+
+struct sqlConnection *conn = hAllocConnProfile("localDb", "pathway");
+struct sqlResult *sr = sqlGetResult(conn, query);
+char **row = NULL;
+
+struct slPair *sp, *spList = NULL;
+while ((row = sqlNextRow(sr)) != NULL)
+ {
+ AllocVar(sp);
+ sp->name = cloneString(row[0]); // name
+ sp->val = cloneString(row[1]); // members
+ slAddHead(&spList, sp);
+ }
+sqlFreeResult(&sr);
+
+slReverse(&spList);
+hFreeConn(&conn);
+
+return spList;
+}
+
+struct analysisResult *genesetLevelAnalysis(struct biAnalysis *ba,
+ struct slPair *spData, struct slPair *spGenesets)
+{
+if (!ba->analyze)
+ return NULL;
+
+fprintf(stdout, "starting geneset analysis.\n");
+
+struct slPair *gs, *sp;
+
+struct genesetData *gd;
+AllocVar(gd);
+
+struct analysisResult *ar, *arList = NULL;
+for (gs = spGenesets; gs; gs = gs->next)
+ {
+ char *members = gs->val;
+
+ struct slName *gList = slNameListFromComma(members);
+ gd->genes = gList;
+ for (sp = spData; sp; sp = sp->next)
+ {
+ gd->data = sp->val;
+ ar = ba->analyze(ba, gd, sp->name, gs->name);
+ if (!ar)
+ continue;
+ slAddHead(&arList, ar);
+ }
+
+ fprintf(stdout, ".");
+ fflush(stdout);
+
+ slNameFreeList(&gList);
+ gd->genes = NULL;
+ gd->data = NULL;
+ }
+
+fprintf(stdout, "\n");
+
+return arList;
+}
+
+
+void slPairHashesFree(struct slPair **pEl)
+{
+struct slPair *el;
+
+if ((el = *pEl) == NULL) return;
+
+freeMem(el->name);
+struct hash *hash = el->val;
+hashFreeWithVals(&hash, analysisResultFree);
+freez(pEl);
+}
+
+void slPairHashesFreeList(struct slPair **pList)
+{
+struct slPair *el, *next;
+
+for (el = *pList; el != NULL; el = next)
+ {
+ next = el->next;
+ slPairHashesFree(&el);
+ }
+*pList = NULL;
+}
+
+void slPairStringFree(struct slPair **pEl)
+{
+struct slPair *el;
+
+if ((el = *pEl) == NULL) return;
+
+freeMem(el->name);
+char *name = el->val;
+freeMem(name);
+freez(pEl);
+}
+
+void slPairStringFreeList(struct slPair **pList)
+{
+struct slPair *el, *next;
+
+for (el = *pList; el != NULL; el = next)
+ {
+ next = el->next;
+ slPairStringFree(&el);
+ }
+*pList = NULL;
+}
+
+struct slPair *analysisValsSamplesHashes(struct sqlConnection *biConn,
+ struct slName *dataset)
+{
+struct hash *sampleHash = createHash(biConn, SA_TABLE, "id", "name");
+struct hash *featureHash = createHash(biConn, AF_TABLE, "id", "feature_name");
+
+/* Currently only looks at first dataset in slName list passed in */
+char query[128];
+safef(query, sizeof(query), "select * from %s", dataset->name);
+struct slPair *sp, *spList = NULL;
+
+struct hash *hash = hashNew(0);
+struct sqlResult *sr = sqlGetResult(biConn, query);
+char **row = NULL;
+while ((row = sqlNextRow(sr)) != NULL)
+ {
+ char *sample_id = row[0];
+ char *feature_id = row[1];
+ char *sample_name = hashMustFindVal(sampleHash, sample_id);
+ char *feature_name = hashMustFindVal(featureHash, feature_id);
+
+ struct analysisResult *ar;
+ AllocVar(ar);
+ ar->sample = cloneString(sample_name);
+ ar->feature = cloneString(feature_name);
+ ar->val = atof(row[2]);
+ ar->conf = atof(row[3]);
+
+ struct hashEl *el = hashLookup(hash, ar->sample);
+ if (!el)
+ {
+ AllocVar(sp);
+ sp->name = cloneString(ar->sample);
+ sp->val = hashNew(0);
+ hashAdd(hash, ar->sample, sp);
+ slAddHead(&spList, sp);
+ }
+ else
+ sp = el->val;
+
+ struct hash *featureHash = sp->val;
+ hashAdd(featureHash, ar->feature, ar);
+ }
+
+sqlFreeResult(&sr);
+hashFree(&hash);
+hashFree(&sampleHash);
+hashFree(&featureHash);
+return spList;
+}
+
+void genesetLevelPipeline(struct biAnalysis *ba)
+{
+uglyTime(NULL);
+struct sqlConnection *biConn = hAllocConnProfile("localDb", ba->db);
+struct slPair *spData = analysisValsSamplesHashes(biConn, ba->inputTables);
+uglyTime("got sample hashes");
+
+struct slPair *spGenesets = getGenesets();
+fprintf(stderr, "got %d genesets\n", slCount(spGenesets));
+
+struct analysisResult *arList = genesetLevelAnalysis(ba, spData, spGenesets);
+uglyTime("analyzed all genesets");
+
+fprintf(stdout, "storing results...\n");
+storeAnalysisResultsInDb(biConn, ba, arList);
+uglyTime("analyzed all genesets");
+hFreeConn(&biConn);
+
+slPairHashesFreeList(&spData);
+slPairStringFreeList(&spGenesets);
+hFreeConn(&biConn);
+}