src/hg/instinct/bioInt2/bioSetLevel.c 1.2

1.2 2009/04/27 06:15:48 jsanborn
updated lots of stuff, will break older implementation of database
Index: src/hg/instinct/bioInt2/bioSetLevel.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/instinct/bioInt2/bioSetLevel.c,v
retrieving revision 1.1
retrieving revision 1.2
diff -b -B -U 4 -r1.1 -r1.2
--- src/hg/instinct/bioInt2/bioSetLevel.c	5 Apr 2009 21:08:07 -0000	1.1
+++ src/hg/instinct/bioInt2/bioSetLevel.c	27 Apr 2009 06:15:48 -0000	1.2
@@ -19,27 +19,32 @@
     struct slName *genes;
     struct hash *data;
 };
 
-struct analysisResult *metaGeneset(struct biAnalysis *ba, void *data,
-				   char *sample, char *gene)
+struct analysisVals *metaGeneset(struct biAnalysis *ba, void *data,
+				 int sample_id, int feature_id)
 { 
 if (!data)
     return NULL;
 
 struct genesetData *gd = data;
 struct slName *sl, *genes = gd->genes;
 struct hash *geneData = gd->data;
 
+double total = 0.0;
+double count = 0.0;
+
 struct slDouble *sd, *sdList = NULL;
 for (sl = genes; sl; sl = sl->next)
     {
     struct hashEl *el = hashLookup(geneData, sl->name);
     if (!el)
 	continue;
-    struct analysisResult *ar = el->val;
+    struct analysisVals *av = el->val;
     
-    sd = slDoubleNew(ar->val);
+    total += av->val;
+    count += 1.0;
+    sd = slDoubleNew(av->conf);
     slAddHead(&sdList, sd);
     }
 
 if (!sdList)
@@ -48,84 +53,102 @@
 float chi2, metaP;
 if (!fishersMetaSigned(sdList, &chi2, &metaP))
     return NULL;  
 
-struct analysisResult *ar;
-AllocVar(ar);
-ar->sample  = cloneString(sample);
-ar->feature = cloneString(gene);
-ar->val     = metaP;
-ar->conf    = chi2;
-return ar;
+struct analysisVals *av;
+AllocVar(av);
+av->sample_id  = sample_id;
+av->feature_id = feature_id;
+av->val     = total / count;
+av->conf    = metaP;
+return av;
 }
 
 /* Pipeline Stuff */
 
-struct slPair *getGenesets()
+struct slPair *getGenesets(struct sqlConnection *biConn)
 {
 char query[256];
-safef(query, sizeof(query), "select * from genesets");
+safef(query, sizeof(query), "select * from %s", GG_TABLE);
 
-struct sqlConnection *conn = hAllocConnProfile("localDb", "pathway");
-struct sqlResult *sr = sqlGetResult(conn, query);
+struct sqlResult *sr = sqlGetResult(biConn, query);
 char **row = NULL;
 
+struct hash *hash = hashNew(0);
+
 struct slPair *sp, *spList = NULL;
 while ((row = sqlNextRow(sr)) != NULL)
     {
+    char *gs_id = row[0];  // name
+    char *gene_id = row[1];   // members
+  
+    struct hashEl *el = hashLookup(hash, gs_id);
+    if (!el)
+	{
     AllocVar(sp);
-    sp->name = cloneString(row[0]);  // name
-    sp->val = cloneString(row[1]);   // members
+	sp->name = cloneString(gs_id);
+	sp->val = NULL;
     slAddHead(&spList, sp);
+	hashAdd(hash, gs_id, sp);
     }
-sqlFreeResult(&sr);
+    else
+	sp = el->val;
 
+    struct slName *sl = slNameNew(gene_id);
+    slAddTail(&sp->val, sl);
+    }
 slReverse(&spList);
-hFreeConn(&conn);
+
+sqlFreeResult(&sr);
+hashFree(&hash);
 
 return spList;
 }
 
-struct analysisResult *genesetLevelAnalysis(struct biAnalysis *ba, 
+struct analysisVals *genesetLevelAnalysis(struct sqlConnection *biConn, struct biAnalysis *ba, 
 					    struct slPair *spData, struct slPair *spGenesets)
 {
 if (!ba->analyze)
     return NULL; 
 
+//struct hash *featureHash = createIdHash(biConn, AF_TABLE, "feature_name");
+
 fprintf(stdout, "starting geneset analysis.\n");
 
 struct slPair *gs, *sp;
 
 struct genesetData *gd;
 AllocVar(gd);
 
-struct analysisResult *ar, *arList = NULL; 
+int count = 0, numGenesets = slCount(spGenesets);
+
+struct analysisVals *av, *avList = NULL; 
 for (gs = spGenesets; gs; gs = gs->next)
     {
-    char *members = gs->val;
-
-    struct slName *gList = slNameListFromComma(members);
-    gd->genes = gList;
+    int feature_id = atoi(gs->name); //hashIntValDefault(featureHash, gs->name, -1);
+    struct slName *members = gs->val;
+    gd->genes = members;
     for (sp = spData; sp; sp = sp->next)
 	{
 	gd->data = sp->val;
-	ar = ba->analyze(ba, gd, sp->name, gs->name); 
-	if (!ar)
+	int sample_id = atoi(sp->name);
+	av = ba->analyze(ba, gd, sample_id, feature_id);
+	if (!av)
 	    continue; 
-	slAddHead(&arList, ar);
+	slAddHead(&avList, av);
 	}
 
-    fprintf(stdout, ".");
+    count++;
+    fprintf(stdout, "%d of %d genesets\n", count, numGenesets);
     fflush(stdout);
 
-    slNameFreeList(&gList);
     gd->genes = NULL;
     gd->data = NULL;
     }
 
 fprintf(stdout, "\n");
 
-return arList;
+return avList;
 }            
 
 
 void slPairHashesFree(struct slPair **pEl)
@@ -135,9 +158,9 @@
 if ((el = *pEl) == NULL) return;
 
 freeMem(el->name);
 struct hash *hash = el->val;
-hashFreeWithVals(&hash, analysisResultFree);
+hashFreeWithVals(&hash, analysisValsFree);
 freez(pEl);
 }
 
 void slPairHashesFreeList(struct slPair **pList)
@@ -178,11 +201,8 @@
 
 struct slPair *analysisValsSamplesHashes(struct sqlConnection *biConn,
 					 struct slName *dataset)
 {
-struct hash *sampleHash = createHash(biConn, SA_TABLE, "id", "name");
-struct hash *featureHash = createHash(biConn, AF_TABLE, "id", "feature_name");
-
 /* Currently only looks at first dataset in slName list passed in */
 char query[128];
 safef(query, sizeof(query), "select * from %s", dataset->name);
 struct slPair *sp, *spList = NULL;
@@ -193,38 +213,28 @@
 while ((row = sqlNextRow(sr)) != NULL)
     {
     char *sample_id = row[0];
     char *feature_id = row[1];
-    char *sample_name = hashMustFindVal(sampleHash, sample_id);
-    char *feature_name = hashMustFindVal(featureHash, feature_id);
-
-    struct analysisResult *ar;
-    AllocVar(ar);
-    ar->sample = cloneString(sample_name);
-    ar->feature = cloneString(feature_name);
-    ar->val = atof(row[2]);
-    ar->conf = atof(row[3]);
+    struct analysisVals *av = analysisValsLoad(row);
 
-    struct hashEl *el = hashLookup(hash, ar->sample);
+    struct hashEl *el = hashLookup(hash, sample_id);
     if (!el)
 	{
 	AllocVar(sp);
-	sp->name = cloneString(ar->sample);
+	sp->name = cloneString(sample_id);
 	sp->val = hashNew(0);
-	hashAdd(hash, ar->sample, sp);
+	hashAdd(hash, sample_id, sp);
 	slAddHead(&spList, sp);
 	}
     else
 	sp = el->val;
                           
     struct hash *featureHash = sp->val;
-    hashAdd(featureHash, ar->feature, ar);
+    hashAdd(featureHash, feature_id, av);
     }
 
 sqlFreeResult(&sr);
 hashFree(&hash);
-hashFree(&sampleHash);
-hashFree(&featureHash);
 return spList;
 }  
 
 void genesetLevelPipeline(struct biAnalysis *ba)
@@ -233,16 +243,16 @@
 struct sqlConnection *biConn = hAllocConnProfile("localDb", ba->db);
 struct slPair *spData = analysisValsSamplesHashes(biConn, ba->inputTables);
 uglyTime("got sample hashes");
 
-struct slPair *spGenesets = getGenesets();
+struct slPair *spGenesets = getGenesets(biConn);
 fprintf(stderr, "got %d genesets\n", slCount(spGenesets));
 
-struct analysisResult *arList = genesetLevelAnalysis(ba, spData, spGenesets);
+struct analysisVals *avList = genesetLevelAnalysis(biConn, ba, spData, spGenesets);
 uglyTime("analyzed all genesets");
 
 fprintf(stdout, "storing results...\n");
-storeAnalysisResultsInDb(biConn, ba, arList);
+storeAnalysisValsInDb(biConn, ba->tableName, avList);
 uglyTime("analyzed all genesets");
 hFreeConn(&biConn);   
 
 slPairHashesFreeList(&spData);