src/hg/instinct/bioInt2/bioPathwayLevel.c 1.5

1.5 2009/09/05 01:12:01 sbenz
Added em to pipeline
Index: src/hg/instinct/bioInt2/bioPathwayLevel.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/instinct/bioInt2/bioPathwayLevel.c,v
retrieving revision 1.4
retrieving revision 1.5
diff -b -B -U 4 -r1.4 -r1.5
--- src/hg/instinct/bioInt2/bioPathwayLevel.c	1 Sep 2009 05:18:02 -0000	1.4
+++ src/hg/instinct/bioInt2/bioPathwayLevel.c	5 Sep 2009 01:12:01 -0000	1.5
@@ -235,9 +235,9 @@
 return (gClose && mClose);
 }
 
 // eventually this will be more dynamic, but for now...
-boolean writeConfigFile(char *filename, char *evidPrefix)
+boolean writeConfigFile(char *filename, char *evidPrefix, struct emParams *emParameters)
 {
 
 FILE *f = mustOpen(filename, "w");
 if (!f)
@@ -247,10 +247,36 @@
 fprintf(f, "inference [pathway_match=_pid_48_,method=BP,updates=SEQFIX,tol=1e-9,maxiter=10000,logdomain=0]\n");
 //fprintf(f, "inference [pathway_match=_pid_3_,method=BP,updates=SEQFIX,tol=1e-9,maxiter=10000,logdomain=0]\n");
 //fprintf(f, "inference [pathway_match=_pid_9_,method=BP,updates=SEQFIX,tol=1e-9,maxiter=10000,logdomain=0]\n");
 fprintf(f, "inference [method=JTREE,updates=HUGIN,verbose=1]\n");
-fprintf(f, "evidence [suffix=_genome.tab,node=genome,disc=-0.451727;0.467580,epsilon=0.01,epsilon0=0.2]\n");
-fprintf(f, "evidence [suffix=_mRNA.tab,node=mRNA,disc=-0.1817733;0.1824913,epsilon=0.01,epsilon0=0.2]\n");
+
+// print out the EM parameters, if they exist... otherwise use the default epsilon vals
+struct dyString *genomeParamString = dyStringNew(256);
+struct dyString *mrnaParamString = dyStringNew(256);
+if(emParameters != NULL)
+	{
+	int i,j;
+	dyStringPrintf(genomeParamString, "factorParams=");
+	dyStringPrintf(mrnaParamString, "factorParams=");
+	for(i = 0; i < 3; i++)
+		{
+		for(j = 0; j < 3; j++)
+			{
+			dyStringPrintf(genomeParamString, "%f;",emParameters->genome[i][j]);
+			dyStringPrintf(mrnaParamString, "%f;",emParameters->mRNA[i][j]);
+			}
+		}
+	// trim the last ; off
+	dyStringResize(genomeParamString,dyStringLen(genomeParamString)-1);
+	dyStringResize(mrnaParamString,dyStringLen(mrnaParamString)-1);	
+	}
+else
+	{
+	dyStringPrintf(genomeParamString, "epsilon=0.01,epsilon0=0.2");
+	dyStringPrintf(mrnaParamString, "epsilon=0.01,epsilon0=0.2");
+	}
+fprintf(f, "evidence [suffix=_genome.tab,node=genome,disc=-0.451727;0.467580,%s]\n",genomeParamString->string);
+fprintf(f, "evidence [suffix=_mRNA.tab,node=mRNA,disc=-0.1817733;0.1824913,%s]\n",mrnaParamString->string);
 fprintf(f, "em_step [_mRNA.tab=-obs>,_genome.tab=-obs>]\n");
 fprintf(f, "em [max_iters=0,log_z_tol=0.001]\n");
 
 return carefulCloseWarn(&f);
@@ -279,9 +305,9 @@
 /* make a new tmp file containing the config */
 char tmpConfig[512];
 safef(tmpConfig, sizeof(tmpConfig), "%s/tmp_%s_config.txt", tmpDir, ba->tableName);
 
-if (!writeConfigFile(tmpConfig, tmpEvidence))
+if (!writeConfigFile(tmpConfig, tmpEvidence, pd->emParameters))
     errAbort("Problem writing config file %s\n", tmpConfig);
 
 /* build command */
 char tmpOutput[128];
@@ -338,28 +364,11 @@
 
 	// redesigned to handle all data
 	pd->data = spData;
 	pd->featureIds = featureHash;
+	pd->emParameters = NULL; // null this out for this method
 	struct analysisVals *avList = ba->analyze(ba, pd, -1, feature_id);
 	
-    /* old, single value method
-	struct analysisVals *av, *avList = NULL;
-    for (sp = spData; sp; sp = sp->next)
-	{
-	pd->data = sp->val;
-	if (slCount(pd->data) < 2) 
-	    continue;  // currently only consider samples with more than one type of evidence.
-	pd->featureIds = featureHash;
-	int sample_id = atoi(sp->name);
-	av = ba->analyze(ba, pd, sample_id, -1);
-	if (!av)
-	    break;
-	avList = slCat(avList, av);
-	fprintf(stderr, ".");
-	fflush(stderr);
-	}
-    fprintf(stderr, "\n");*/
-    
     struct pathwayVals *newPvList = convertToPathwayVals(avList, feature_id);
     pvList = slCat(pvList, newPvList);
     count++;
     fprintf(stderr, "%d of %d pathways complete!\n", count, numPathways);