src/hg/instinct/bioInt2/bioGeneLevel.c 1.6

1.6 2009/05/20 20:34:36 jsanborn
initial commit
Index: src/hg/instinct/bioInt2/bioGeneLevel.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/instinct/bioInt2/bioGeneLevel.c,v
retrieving revision 1.5
retrieving revision 1.6
diff -b -B -U 1000000 -r1.5 -r1.6
--- src/hg/instinct/bioInt2/bioGeneLevel.c	27 Apr 2009 06:15:48 -0000	1.5
+++ src/hg/instinct/bioInt2/bioGeneLevel.c	20 May 2009 20:34:36 -0000	1.6
@@ -1,175 +1,175 @@
 /* mapProbesToGenes - Will maps probes in BED format to overlapping gene(s). */
 #include "common.h"
 #include "linefile.h"
 #include "hash.h"
 #include "options.h"
 #include "jksql.h"
 #include "hPrint.h"
 #include "hdb.h"  
 #include "dystring.h"
 #include "bioIntDb.h"
 #include "bioIntDriver.h"
 #include "cprob.h"
 #include "hgStatsLib.h"
 #include "bioController.h"
 
 
 /* Gene-level analysis functions */
 
 struct analysisVals *metaGene(struct biAnalysis *ba, void *data, 
 			      int sample_id, int feature_id)
 {
 if (!data)
     return NULL;
 
 double total = 0.0, count = 0.0;
 struct slPair *sp, *spList = data;
 struct slDouble *sd, *sdList = NULL;
 for (sp = spList; sp; sp = sp->next)
     {
     struct analysisVals *av = sp->val;
     if (!av)
 	continue;
 
     total += av->val;
     count += 1.0;
     sd = slDoubleNew(av->conf);
     slAddHead(&sdList, sd);
     }
 
 float chi2, metaP;
 if (!fishersMetaSigned(sdList, &chi2, &metaP))
     return NULL;
 
 slFreeList(&sdList);
 struct analysisVals *av;
 AllocVar(av);
 av->sample_id  = sample_id;
 av->feature_id = feature_id;
 av->val        = total / count;
 av->conf       = metaP;
 return av;
 }
 
 
 /* Pipeline Stuff */
 
 struct slName *getAvailableGenes(char *db, struct biResults *br)
 {
 return br->allFeatures(br);
 } 
 
 struct slPair *geneLevelData(struct biResults *br, struct biData *bdList, 
 			     char *sample, char *gene)
 {
 struct biData *bd;
 struct slPair *sp, *spList = NULL;
 for (bd = bdList; bd; bd = bd->next)
     {
     struct hashEl *el = hashLookup(bd->hash, gene);
     if (!el)
 	continue;
 
     AllocVar(sp);
     sp->name = cloneString(bd->type);
     sp->val = NULL;
 
     struct analysisVals *newAv, *av = el->val;
     newAv = cloneAnalysisVals(av);
     sp->val = newAv;
     slAddHead(&spList, sp);
     }
 
 return spList;
 }   
 
 void geneLevelAnalysis(struct sqlConnection *biConn, 
 		       struct biAnalysis *ba, struct biResults *br,
 		       struct slName *genes)
 {
 if (!ba->analyze)
     return;
 
 fprintf(stdout, "starting gene level analysis\n");
 
 struct hash *sampleHash = createIdHash(biConn, SA_TABLE, "name");
 struct hash *featureHash = createIdHash(biConn, AF_TABLE, "feature_name");
 
 struct slName *gene, *sample, *samples = br->allSamplesInCommon(br);
 
 int count = 0;
 int numSamples = slCount(samples);
 
 struct analysisVals *av, *avList = NULL;
 for (sample = samples; sample; sample = sample->next)
     {
     int sample_id = hashIntValDefault(sampleHash, sample->name, -1);
     if (sample_id == -1)
 	continue;
 
     struct biData *bdList = br->dataForSample(br, sample->name);
     for (gene = genes; gene; gene = gene->next)
 	{
 	int feature_id = hashIntValDefault(featureHash, gene->name, -1);
 	if (feature_id == -1)
 	    continue;
 
 	struct slPair *spList = geneLevelData(br, bdList, sample->name, gene->name);
 	if (!spList)
 	    continue;
 	av = ba->analyze(ba, spList, sample_id, feature_id); //e->name, gene->name);
 	slPairAnalysisValsFreeList(&spList);
 	if (!av)
 	    continue;
 	slAddHead(&avList, av);
 	}
     count++;
-    fprintf(stdout, "%d of %d samples\n", count, numSamples);
+    fprintf(stdout, "%d of %d samples, %d av's\n", count, numSamples, slCount(avList));
     fflush(stdout);
 
     biDataFree(&bdList);
     }
 uglyTime(NULL);    
 storeAnalysisValsInDb(biConn, ba->tableName, avList);
 uglyTime("stored ARs");
 
 hashFree(&sampleHash);
 hashFree(&featureHash);
 }    
 
 struct biResults *retrieveRawData(char *db, struct slName *datasets, boolean toLogP)
 {
 struct slName *dataset;
 
 struct biQuery *bq, *bqList = NULL;
 for (dataset = datasets; dataset; dataset = dataset->next)
     {
     bq = biQueryNew(db, dataset->name);  // Add Dataset
     bq->getAllFeatures = TRUE;           // Set flag to retrieve all data
     biQueryAppend(&bqList, bq);          // Append query to query list
     }
 
 /* Get results from all queries in list */
 struct biResults *br = biQueryResults(bqList);
 
 return br;
 }    
 
 void geneLevelPipeline(struct biAnalysis *ba)
 {
 /* Get raw gene/sample data for all overlapping samples in dataset list */
 struct biResults *br = retrieveRawData(ba->db, ba->inputTables, TRUE);
 
 if (!br)
     return;
 
 struct slName *genes = getAvailableGenes(ba->db, br);
 
 struct sqlConnection *biConn = hAllocConnProfile("localDb", ba->db);
 geneLevelAnalysis(biConn, ba, br, genes);
 
 hFreeConn(&biConn);
 
 biResultsFree(&br);
 slNameFreeList(&genes);
 }