src/hg/instinct/bioInt2/bioPathwayLevel.c 1.5
1.5 2009/09/05 01:12:01 sbenz
Added em to pipeline
Index: src/hg/instinct/bioInt2/bioPathwayLevel.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/instinct/bioInt2/bioPathwayLevel.c,v
retrieving revision 1.4
retrieving revision 1.5
diff -b -B -U 4 -r1.4 -r1.5
--- src/hg/instinct/bioInt2/bioPathwayLevel.c 1 Sep 2009 05:18:02 -0000 1.4
+++ src/hg/instinct/bioInt2/bioPathwayLevel.c 5 Sep 2009 01:12:01 -0000 1.5
@@ -235,9 +235,9 @@
return (gClose && mClose);
}
// eventually this will be more dynamic, but for now...
-boolean writeConfigFile(char *filename, char *evidPrefix)
+boolean writeConfigFile(char *filename, char *evidPrefix, struct emParams *emParameters)
{
FILE *f = mustOpen(filename, "w");
if (!f)
@@ -247,10 +247,36 @@
fprintf(f, "inference [pathway_match=_pid_48_,method=BP,updates=SEQFIX,tol=1e-9,maxiter=10000,logdomain=0]\n");
//fprintf(f, "inference [pathway_match=_pid_3_,method=BP,updates=SEQFIX,tol=1e-9,maxiter=10000,logdomain=0]\n");
//fprintf(f, "inference [pathway_match=_pid_9_,method=BP,updates=SEQFIX,tol=1e-9,maxiter=10000,logdomain=0]\n");
fprintf(f, "inference [method=JTREE,updates=HUGIN,verbose=1]\n");
-fprintf(f, "evidence [suffix=_genome.tab,node=genome,disc=-0.451727;0.467580,epsilon=0.01,epsilon0=0.2]\n");
-fprintf(f, "evidence [suffix=_mRNA.tab,node=mRNA,disc=-0.1817733;0.1824913,epsilon=0.01,epsilon0=0.2]\n");
+
+// print out the EM parameters, if they exist... otherwise use the default epsilon vals
+struct dyString *genomeParamString = dyStringNew(256);
+struct dyString *mrnaParamString = dyStringNew(256);
+if(emParameters != NULL)
+ {
+ int i,j;
+ dyStringPrintf(genomeParamString, "factorParams=");
+ dyStringPrintf(mrnaParamString, "factorParams=");
+ for(i = 0; i < 3; i++)
+ {
+ for(j = 0; j < 3; j++)
+ {
+ dyStringPrintf(genomeParamString, "%f;",emParameters->genome[i][j]);
+ dyStringPrintf(mrnaParamString, "%f;",emParameters->mRNA[i][j]);
+ }
+ }
+ // trim the last ; off
+ dyStringResize(genomeParamString,dyStringLen(genomeParamString)-1);
+ dyStringResize(mrnaParamString,dyStringLen(mrnaParamString)-1);
+ }
+else
+ {
+ dyStringPrintf(genomeParamString, "epsilon=0.01,epsilon0=0.2");
+ dyStringPrintf(mrnaParamString, "epsilon=0.01,epsilon0=0.2");
+ }
+fprintf(f, "evidence [suffix=_genome.tab,node=genome,disc=-0.451727;0.467580,%s]\n",genomeParamString->string);
+fprintf(f, "evidence [suffix=_mRNA.tab,node=mRNA,disc=-0.1817733;0.1824913,%s]\n",mrnaParamString->string);
fprintf(f, "em_step [_mRNA.tab=-obs>,_genome.tab=-obs>]\n");
fprintf(f, "em [max_iters=0,log_z_tol=0.001]\n");
return carefulCloseWarn(&f);
@@ -279,9 +305,9 @@
/* make a new tmp file containing the config */
char tmpConfig[512];
safef(tmpConfig, sizeof(tmpConfig), "%s/tmp_%s_config.txt", tmpDir, ba->tableName);
-if (!writeConfigFile(tmpConfig, tmpEvidence))
+if (!writeConfigFile(tmpConfig, tmpEvidence, pd->emParameters))
errAbort("Problem writing config file %s\n", tmpConfig);
/* build command */
char tmpOutput[128];
@@ -338,28 +364,11 @@
// redesigned to handle all data
pd->data = spData;
pd->featureIds = featureHash;
+ pd->emParameters = NULL; // null this out for this method
struct analysisVals *avList = ba->analyze(ba, pd, -1, feature_id);
- /* old, single value method
- struct analysisVals *av, *avList = NULL;
- for (sp = spData; sp; sp = sp->next)
- {
- pd->data = sp->val;
- if (slCount(pd->data) < 2)
- continue; // currently only consider samples with more than one type of evidence.
- pd->featureIds = featureHash;
- int sample_id = atoi(sp->name);
- av = ba->analyze(ba, pd, sample_id, -1);
- if (!av)
- break;
- avList = slCat(avList, av);
- fprintf(stderr, ".");
- fflush(stderr);
- }
- fprintf(stderr, "\n");*/
-
struct pathwayVals *newPvList = convertToPathwayVals(avList, feature_id);
pvList = slCat(pvList, newPvList);
count++;
fprintf(stderr, "%d of %d pathways complete!\n", count, numPathways);