src/hg/instinct/bioInt2/pathwayNullDist.c 1.2

1.2 2009/05/20 20:34:36 jsanborn
initial commit
Index: src/hg/instinct/bioInt2/pathwayNullDist.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/instinct/bioInt2/pathwayNullDist.c,v
retrieving revision 1.1
retrieving revision 1.2
diff -b -B -U 4 -r1.1 -r1.2
--- src/hg/instinct/bioInt2/pathwayNullDist.c	14 May 2009 22:34:07 -0000	1.1
+++ src/hg/instinct/bioInt2/pathwayNullDist.c	20 May 2009 20:34:36 -0000	1.2
@@ -296,10 +296,10 @@
 safef(tmpPathway, sizeof(tmpPathway), "%s/tmp_%s_path.tab", tmpDir, tableName);
 
 /* make temporary file containing evidence */
 char tmpEvidence[512];
-safef(tmpEvidence, sizeof(tmpEvidence), "%s/%s_pid_%d_sample_%d_evid.tab", 
-      tmpDir, tableName, pd->id, sample_id);
+safef(tmpEvidence, sizeof(tmpEvidence), "%s/tmp_%s_evid.tab", tmpDir, tableName); 
+
 
 if (!writeEvidenceFile(tmpEvidence, pd))
     errAbort("Problem writing evidence file %s\n", tmpEvidence);
 
@@ -322,11 +322,11 @@
 struct analysisVals *avList = readAnalysisValsFromFile(tmpOutput, pd->entities, sample_id); 
 return avList;
 }
 
-void factorGraphNull(char *tableName, void *data, int sample_id, int feature_id)
+void factorGraphNull(char *tableName, void *data, int sample_id, int feature_id, 
+		     char *nullType)
 { 
-
 struct pathwayData *pd = data;
 
 /* make temporary file containing pathway info */
 char tmpPathway[512];
@@ -334,19 +334,22 @@
       "%s/tmp_%s_path.tab", tmpDir, tableName);
 
 /* make temporary file containing evidence */
 char tmpEvidence[512];
-safef(tmpEvidence, sizeof(tmpEvidence), 
-      "%s/null_%s_pid_%d_iter_%d_evid.tab", 
-      tmpDir, tableName, pd->id, sample_id);
+safef(tmpEvidence, sizeof(tmpEvidence), "%s/tmp_%s_evid.tab", tmpDir, tableName); 
 
 if (!writeNullEvidenceFile(tmpEvidence, pd))
     errAbort("Problem writing evidence file %s\n", tmpEvidence);
 
 /* build command */
 char tmpOutput[128];
-safef(tmpOutput, sizeof(tmpOutput), 
-      "%s/null_%s_pid_%d_iter_%d_output.tab", 
+if (sameString(nullType, "allData"))
+    safef(tmpOutput, sizeof(tmpOutput), 
+	  "%s/null_allData_%s_pid_%d_iter_%d_output.tab", 
+	  tmpDir, tableName, pd->id, sample_id);
+else
+    safef(tmpOutput, sizeof(tmpOutput), 
+	  "%s/null_within_%s_pid_%d_iter_%d_output.tab", 
       tmpDir, tableName, pd->id, sample_id);
 
 char command[512];
 safef(command, sizeof(command), 
@@ -418,9 +421,9 @@
 return sqlQuickList(biConn, query);
 }
 
 
-struct dataTuple *getDataTuples(struct sqlConnection *biConn, struct hash *featureHash, 
+struct dataTuple *getAllDataTuples(struct sqlConnection *biConn, struct hash *featureHash, 
 				struct slPair *spData)
 {
 struct slName *sl, *featureNames = getAnalysisFeatureNames(biConn, AF_TABLE, "gene");
 
@@ -472,17 +475,77 @@
 
 return dtList;
 }
 
+struct dataTuple *getPathwayDataTuples(struct sqlConnection *biConn, struct hash *featureHash, 
+				       struct slPair *spData, struct slPair *spPathway)
+{
+struct pathwayData *pd = spPathway->val;
+struct entities *en, *enList = pd->entities;
+struct slName *featureIds = NULL;
+char idStr[128];
+for (en = enList; en; en = en->next)
+    {
+    int id = hashIntValDefault(featureHash, en->name, -1);
+    if (id == -1)
+	continue;
+    
+    safef(idStr, sizeof(idStr), "%d", id);
+    slNameAddHead(&featureIds, idStr);
+    }
+
+struct slPair *sp;
+struct dataTuple *dt, *dtList = NULL;
+for (sp = spData; sp; sp = sp->next)
+    {
+    struct typeHash *th, *thList = sp->val;
+    if (slCount(thList) < 2) 
+	continue;  // currently only consider samples with more than one type of evidence.
+    
+    struct slName *sl;
+    for (sl = featureIds; sl; sl = sl->next)
+	{
+	AllocVar(dt);
+	dt->cnv = BAD_VAL;
+	dt->exp = BAD_VAL;
+	for (th = thList; th; th = th->next)
+	    {
+	    struct hashEl *el = hashLookup(th->hash, sl->name);
+	    if (!el)
+		continue;
+	    
+	    struct analysisVals *av = el->val;
+	    if (sameString(th->type, "Expression"))
+		dt->exp = av->conf;
+	    else if (sameString(th->type, "CNV"))
+		dt->cnv = av->conf;	    
+	    }
+	if (dt->cnv == BAD_VAL && dt->exp == BAD_VAL)
+	    dataTupleFree(&dt);
+	else
+	    slAddHead(&dtList, dt);
+	}
+    }
+
+return dtList;
+}
+
 /* Pipeline Stuff */
 struct pathwayVals *pathwayNullAnalysis(struct sqlConnection *biConn, char *tableName, 
 					struct slPair *spData, struct slPair *spPathways,
-					int numIters)
+					int numIters, char *nullType)
 {
 fprintf(stdout, "starting null analysis analysis.\n");
 
 struct hash *featureHash = createIdHash(biConn, AF_TABLE, "feature_name");
-struct dataTuple *dtList = getDataTuples(biConn, featureHash, spData);
+
+struct dataTuple *dtList = NULL;
+if (sameString(nullType, "allData"))
+    dtList = getAllDataTuples(biConn, featureHash, spData);
+else if (sameString(nullType, "withinPathway"))
+    dtList = getPathwayDataTuples(biConn, featureHash, spData, spPathways);
+else
+    errAbort("%s not supported", nullType);
 
 int count = 0, numPathways = slCount(spPathways);
 
 struct slPair *pa;
@@ -501,9 +565,9 @@
     
     int i;
     for (i = 0; i < numIters; i++)
 	{
-	factorGraphNull(tableName, pd, i, -1);
+	factorGraphNull(tableName, pd, i, -1, nullType);
 	fprintf(stderr, ".");
 	fflush(stderr);
 	}
     fprintf(stderr, "\n");
@@ -745,15 +809,21 @@
 uglyTime(NULL);
 struct slPair *spPathways = getPathwaysByName(biConn, pathway);
 uglyTime("got pathways");
 
+if (slCount(spPathways) == 0)
+    errAbort("No pathways by name of %s", pathway);
+
 struct slPair *spData = analysisValsSamplesHashesList(biConn, tableNames);
 uglyTime("got sample hashes");   
 
-//pathwayLevelAnalysis(biConn, cohort_name, spData, spPathways);
-//uglyTime("analyzed all genesets");
+pathwayLevelAnalysis(biConn, cohort_name, spData, spPathways);
+uglyTime("analyzed all genesets");
+
+pathwayNullAnalysis(biConn, cohort_name, spData, spPathways, numIters, "withinPathway");
+uglyTime("produced null distribution");
 
-pathwayNullAnalysis(biConn, cohort_name, spData, spPathways, numIters);
+pathwayNullAnalysis(biConn, cohort_name, spData, spPathways, numIters, "allData");
 uglyTime("produced null distribution");
 
 slPairHashesFreeList(&spData);
 slPairPathwayFreeList(&spPathways);