src/hg/instinct/bioInt2/bioSetLevel.c 1.3

1.3 2009/05/20 20:34:36 jsanborn
initial commit
Index: src/hg/instinct/bioInt2/bioSetLevel.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/instinct/bioInt2/bioSetLevel.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -b -B -U 1000000 -r1.2 -r1.3
--- src/hg/instinct/bioInt2/bioSetLevel.c	27 Apr 2009 06:15:48 -0000	1.2
+++ src/hg/instinct/bioInt2/bioSetLevel.c	20 May 2009 20:34:36 -0000	1.3
@@ -1,261 +1,176 @@
 /* mapProbesToGenes - Will maps probes in BED format to overlapping gene(s). */
 #include "common.h"
 #include "linefile.h"
 #include "hash.h"
 #include "options.h"
 #include "jksql.h"
 #include "hPrint.h"
 #include "hdb.h"  
 #include "dystring.h"
 #include "bioIntDb.h"
 #include "bioIntDriver.h"
 #include "cprob.h"
 #include "hgStatsLib.h"
 #include "bioController.h"
 
 
 /* Gene-level analysis functions */
 struct genesetData {
     struct slName *genes;
     struct hash *data;
 };
 
 struct analysisVals *metaGeneset(struct biAnalysis *ba, void *data,
 				 int sample_id, int feature_id)
 { 
 if (!data)
     return NULL;
 
 struct genesetData *gd = data;
 struct slName *sl, *genes = gd->genes;
 struct hash *geneData = gd->data;
 
 double total = 0.0;
 double count = 0.0;
 
 struct slDouble *sd, *sdList = NULL;
 for (sl = genes; sl; sl = sl->next)
     {
     struct hashEl *el = hashLookup(geneData, sl->name);
     if (!el)
 	continue;
     struct analysisVals *av = el->val;
 
     total += av->val;
     count += 1.0;
     sd = slDoubleNew(av->conf);
     slAddHead(&sdList, sd);
     }
 
 if (!sdList)
     return NULL;
 
 float chi2, metaP;
 if (!fishersMetaSigned(sdList, &chi2, &metaP))
     return NULL;  
 
 struct analysisVals *av;
 AllocVar(av);
 av->sample_id  = sample_id;
 av->feature_id = feature_id;
 av->val     = total / count;
 av->conf    = metaP;
 return av;
 }
 
 /* Pipeline Stuff */
 
 struct slPair *getGenesets(struct sqlConnection *biConn)
 {
 char query[256];
 safef(query, sizeof(query), "select * from %s", GG_TABLE);
 
 struct sqlResult *sr = sqlGetResult(biConn, query);
 char **row = NULL;
 
 struct hash *hash = hashNew(0);
 
 struct slPair *sp, *spList = NULL;
 while ((row = sqlNextRow(sr)) != NULL)
     { 
     char *gs_id = row[0];  // name
     char *gene_id = row[1];   // members
   
     struct hashEl *el = hashLookup(hash, gs_id);
     if (!el)
 	{
 	AllocVar(sp);
 	sp->name = cloneString(gs_id);
 	sp->val = NULL;
 	slAddHead(&spList, sp);
 	hashAdd(hash, gs_id, sp);
 	}
     else
 	sp = el->val;
    
     struct slName *sl = slNameNew(gene_id);
     slAddTail(&sp->val, sl);
     }
 slReverse(&spList);
 
 sqlFreeResult(&sr);
 hashFree(&hash);
 
 return spList;
 }
 
 struct analysisVals *genesetLevelAnalysis(struct sqlConnection *biConn, struct biAnalysis *ba, 
 					  struct slPair *spData, struct slPair *spGenesets)
 {
 if (!ba->analyze)
     return NULL; 
 
 //struct hash *featureHash = createIdHash(biConn, AF_TABLE, "feature_name");
 
 fprintf(stdout, "starting geneset analysis.\n");
 
 struct slPair *gs, *sp;
 
 struct genesetData *gd;
 AllocVar(gd);
 
 int count = 0, numGenesets = slCount(spGenesets);
 
 struct analysisVals *av, *avList = NULL; 
 for (gs = spGenesets; gs; gs = gs->next)
     {
-    int feature_id = atoi(gs->name); //hashIntValDefault(featureHash, gs->name, -1);
+    int feature_id = atoi(gs->name);
     struct slName *members = gs->val;
     gd->genes = members;
     for (sp = spData; sp; sp = sp->next)
 	{
-	gd->data = sp->val;
+	struct typeHash *th = sp->val;
+	gd->data = th->hash;
 	int sample_id = atoi(sp->name);
 	av = ba->analyze(ba, gd, sample_id, feature_id);
 	if (!av)
 	    continue; 
 	slAddHead(&avList, av);
 	}
 
     count++;
     fprintf(stdout, "%d of %d genesets\n", count, numGenesets);
     fflush(stdout);
 
     gd->genes = NULL;
     gd->data = NULL;
     }
 
 fprintf(stdout, "\n");
 
 return avList;
 }            
 
-
-void slPairHashesFree(struct slPair **pEl)
-{
-struct slPair *el;
-
-if ((el = *pEl) == NULL) return;
-
-freeMem(el->name);
-struct hash *hash = el->val;
-hashFreeWithVals(&hash, analysisValsFree);
-freez(pEl);
-}
-
-void slPairHashesFreeList(struct slPair **pList)
-{
-struct slPair *el, *next;
-
-for (el = *pList; el != NULL; el = next)
-    {
-    next = el->next;
-    slPairHashesFree(&el);
-    }
-*pList = NULL;
-} 
-
-void slPairStringFree(struct slPair **pEl)
-{
-struct slPair *el;
-
-if ((el = *pEl) == NULL) return;
-
-freeMem(el->name);
-char *name = el->val;
-freeMem(name);
-freez(pEl);
-}
-
-void slPairStringFreeList(struct slPair **pList)
-{
-struct slPair *el, *next;
-
-for (el = *pList; el != NULL; el = next)
-    {
-    next = el->next;
-    slPairStringFree(&el);
-    }
-*pList = NULL;
-} 
-
-struct slPair *analysisValsSamplesHashes(struct sqlConnection *biConn,
-					 struct slName *dataset)
-{
-/* Currently only looks at first dataset in slName list passed in */
-char query[128];
-safef(query, sizeof(query), "select * from %s", dataset->name);
-struct slPair *sp, *spList = NULL;
-
-struct hash *hash = hashNew(0);
-struct sqlResult *sr = sqlGetResult(biConn, query);
-char **row = NULL;
-while ((row = sqlNextRow(sr)) != NULL)
-    {
-    char *sample_id = row[0];
-    char *feature_id = row[1];
-    struct analysisVals *av = analysisValsLoad(row);
-
-    struct hashEl *el = hashLookup(hash, sample_id);
-    if (!el)
-	{
-	AllocVar(sp);
-	sp->name = cloneString(sample_id);
-	sp->val = hashNew(0);
-	hashAdd(hash, sample_id, sp);
-	slAddHead(&spList, sp);
-	}
-    else
-	sp = el->val;
-                          
-    struct hash *featureHash = sp->val;
-    hashAdd(featureHash, feature_id, av);
-    }
-
-sqlFreeResult(&sr);
-hashFree(&hash);
-return spList;
-}  
-
 void genesetLevelPipeline(struct biAnalysis *ba)
 {
 uglyTime(NULL);
 struct sqlConnection *biConn = hAllocConnProfile("localDb", ba->db);
-struct slPair *spData = analysisValsSamplesHashes(biConn, ba->inputTables);
+struct slPair *spData = analysisValsSamplesHashesList(biConn, ba->inputTables);
 uglyTime("got sample hashes");
 
 struct slPair *spGenesets = getGenesets(biConn);
 fprintf(stderr, "got %d genesets\n", slCount(spGenesets));
 
 struct analysisVals *avList = genesetLevelAnalysis(biConn, ba, spData, spGenesets);
 uglyTime("analyzed all genesets");
 
 fprintf(stdout, "storing results...\n");
 storeAnalysisValsInDb(biConn, ba->tableName, avList);
 uglyTime("analyzed all genesets");
 hFreeConn(&biConn);   
 
+analysisValsFreeList(&avList);
 slPairHashesFreeList(&spData);
 slPairStringFreeList(&spGenesets);
 hFreeConn(&biConn);
 }