src/hg/instinct/bioInt2/bioSetLevel.c 1.1

1.1 2009/04/05 21:08:07 jsanborn
added files, fixed cohorts API
Index: src/hg/instinct/bioInt2/bioSetLevel.c
===================================================================
RCS file: src/hg/instinct/bioInt2/bioSetLevel.c
diff -N src/hg/instinct/bioInt2/bioSetLevel.c
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ src/hg/instinct/bioInt2/bioSetLevel.c	5 Apr 2009 21:08:07 -0000	1.1
@@ -0,0 +1,251 @@
+/* mapProbesToGenes - Will maps probes in BED format to overlapping gene(s). */
+#include "common.h"
+#include "linefile.h"
+#include "hash.h"
+#include "options.h"
+#include "jksql.h"
+#include "hPrint.h"
+#include "hdb.h"  
+#include "dystring.h"
+#include "bioIntDb.h"
+#include "bioIntDriver.h"
+#include "cprob.h"
+#include "hgStatsLib.h"
+#include "bioController.h"
+
+
+/* Gene-level analysis functions */
+struct genesetData {
+    struct slName *genes;
+    struct hash *data;
+};
+
+struct analysisResult *metaGeneset(struct biAnalysis *ba, void *data,
+				   char *sample, char *gene)
+{ 
+if (!data)
+    return NULL;
+
+struct genesetData *gd = data;
+struct slName *sl, *genes = gd->genes;
+struct hash *geneData = gd->data;
+
+struct slDouble *sd, *sdList = NULL;
+for (sl = genes; sl; sl = sl->next)
+    {
+    struct hashEl *el = hashLookup(geneData, sl->name);
+    if (!el)
+	continue;
+    struct analysisResult *ar = el->val;
+    
+    sd = slDoubleNew(ar->val);
+    slAddHead(&sdList, sd);
+    }
+
+if (!sdList)
+    return NULL;
+
+float chi2, metaP;
+if (!fishersMetaSigned(sdList, &chi2, &metaP))
+    return NULL;  
+
+struct analysisResult *ar;
+AllocVar(ar);
+ar->sample  = cloneString(sample);
+ar->feature = cloneString(gene);
+ar->val     = metaP;
+ar->conf    = chi2;
+return ar;
+}
+
+/* Pipeline Stuff */
+
+struct slPair *getGenesets()
+{
+char query[256];
+safef(query, sizeof(query), "select * from genesets");
+
+struct sqlConnection *conn = hAllocConnProfile("localDb", "pathway");
+struct sqlResult *sr = sqlGetResult(conn, query);
+char **row = NULL;
+
+struct slPair *sp, *spList = NULL;
+while ((row = sqlNextRow(sr)) != NULL)
+    {
+    AllocVar(sp);
+    sp->name = cloneString(row[0]);  // name
+    sp->val = cloneString(row[1]);   // members
+    slAddHead(&spList, sp);
+    }
+sqlFreeResult(&sr);
+
+slReverse(&spList);
+hFreeConn(&conn);
+
+return spList;
+}
+
+struct analysisResult *genesetLevelAnalysis(struct biAnalysis *ba, 
+					    struct slPair *spData, struct slPair *spGenesets)
+{
+if (!ba->analyze)
+    return NULL; 
+
+fprintf(stdout, "starting geneset analysis.\n");
+
+struct slPair *gs, *sp;
+
+struct genesetData *gd;
+AllocVar(gd);
+
+struct analysisResult *ar, *arList = NULL; 
+for (gs = spGenesets; gs; gs = gs->next)
+    {
+    char *members = gs->val;
+
+    struct slName *gList = slNameListFromComma(members);
+    gd->genes = gList;
+    for (sp = spData; sp; sp = sp->next)
+	{
+	gd->data = sp->val;
+	ar = ba->analyze(ba, gd, sp->name, gs->name); 
+	if (!ar)
+	    continue; 
+	slAddHead(&arList, ar);
+	}
+
+    fprintf(stdout, ".");
+    fflush(stdout);
+
+    slNameFreeList(&gList);
+    gd->genes = NULL;
+    gd->data = NULL;
+    }
+
+fprintf(stdout, "\n");
+
+return arList;
+}            
+
+
+void slPairHashesFree(struct slPair **pEl)
+{
+struct slPair *el;
+
+if ((el = *pEl) == NULL) return;
+
+freeMem(el->name);
+struct hash *hash = el->val;
+hashFreeWithVals(&hash, analysisResultFree);
+freez(pEl);
+}
+
+void slPairHashesFreeList(struct slPair **pList)
+{
+struct slPair *el, *next;
+
+for (el = *pList; el != NULL; el = next)
+    {
+    next = el->next;
+    slPairHashesFree(&el);
+    }
+*pList = NULL;
+} 
+
+void slPairStringFree(struct slPair **pEl)
+{
+struct slPair *el;
+
+if ((el = *pEl) == NULL) return;
+
+freeMem(el->name);
+char *name = el->val;
+freeMem(name);
+freez(pEl);
+}
+
+void slPairStringFreeList(struct slPair **pList)
+{
+struct slPair *el, *next;
+
+for (el = *pList; el != NULL; el = next)
+    {
+    next = el->next;
+    slPairStringFree(&el);
+    }
+*pList = NULL;
+} 
+
+struct slPair *analysisValsSamplesHashes(struct sqlConnection *biConn,
+					 struct slName *dataset)
+{
+struct hash *sampleHash = createHash(biConn, SA_TABLE, "id", "name");
+struct hash *featureHash = createHash(biConn, AF_TABLE, "id", "feature_name");
+
+/* Currently only looks at first dataset in slName list passed in */
+char query[128];
+safef(query, sizeof(query), "select * from %s", dataset->name);
+struct slPair *sp, *spList = NULL;
+
+struct hash *hash = hashNew(0);
+struct sqlResult *sr = sqlGetResult(biConn, query);
+char **row = NULL;
+while ((row = sqlNextRow(sr)) != NULL)
+    {
+    char *sample_id = row[0];
+    char *feature_id = row[1];
+    char *sample_name = hashMustFindVal(sampleHash, sample_id);
+    char *feature_name = hashMustFindVal(featureHash, feature_id);
+
+    struct analysisResult *ar;
+    AllocVar(ar);
+    ar->sample = cloneString(sample_name);
+    ar->feature = cloneString(feature_name);
+    ar->val = atof(row[2]);
+    ar->conf = atof(row[3]);
+
+    struct hashEl *el = hashLookup(hash, ar->sample);
+    if (!el)
+	{
+	AllocVar(sp);
+	sp->name = cloneString(ar->sample);
+	sp->val = hashNew(0);
+	hashAdd(hash, ar->sample, sp);
+	slAddHead(&spList, sp);
+	}
+    else
+	sp = el->val;
+                          
+    struct hash *featureHash = sp->val;
+    hashAdd(featureHash, ar->feature, ar);
+    }
+
+sqlFreeResult(&sr);
+hashFree(&hash);
+hashFree(&sampleHash);
+hashFree(&featureHash);
+return spList;
+}  
+
+void genesetLevelPipeline(struct biAnalysis *ba)
+{
+uglyTime(NULL);
+struct sqlConnection *biConn = hAllocConnProfile("localDb", ba->db);
+struct slPair *spData = analysisValsSamplesHashes(biConn, ba->inputTables);
+uglyTime("got sample hashes");
+
+struct slPair *spGenesets = getGenesets();
+fprintf(stderr, "got %d genesets\n", slCount(spGenesets));
+
+struct analysisResult *arList = genesetLevelAnalysis(ba, spData, spGenesets);
+uglyTime("analyzed all genesets");
+
+fprintf(stdout, "storing results...\n");
+storeAnalysisResultsInDb(biConn, ba, arList);
+uglyTime("analyzed all genesets");
+hFreeConn(&biConn);   
+
+slPairHashesFreeList(&spData);
+slPairStringFreeList(&spGenesets);
+hFreeConn(&biConn);
+}