src/hg/instinct/bioInt2/bioPathwayLevel.c 1.3
1.3 2009/08/16 02:19:17 sbenz
Updated pipeline to be compatible with new binary
Index: src/hg/instinct/bioInt2/bioPathwayLevel.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/instinct/bioInt2/bioPathwayLevel.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -b -B -U 4 -r1.2 -r1.3
--- src/hg/instinct/bioInt2/bioPathwayLevel.c 20 May 2009 22:42:13 -0000 1.2
+++ src/hg/instinct/bioInt2/bioPathwayLevel.c 16 Aug 2009 02:19:17 -0000 1.3
@@ -53,8 +53,11 @@
struct hash *hash = hashNew(0);
for (en = enList; en; en = en->next)
hashAddInt(hash, en->name, en->id);
+// skip the first
+lineFileSkip(lf,1);
+
char *row[2];
while (lineFileRowTab(lf, row))
{
char *name = row[0];
@@ -134,22 +137,26 @@
struct pathwayData *pd = data;
/* make temporary file containing pathway info */
char tmpPathway[512];
-safef(tmpPathway, sizeof(tmpPathway), "%s/tmp_%s_path.tab", tmpDir, ba->tableName);
+//safef(tmpPathway, sizeof(tmpPathway), "%s/tmp_%s_path.tab", tmpDir, ba->tableName);
+safef(tmpPathway, sizeof(tmpPathway), "%s/tmp_%s_pid_%d_path.tab", tmpDir, ba->tableName, pd->id);
+
if (!writePathwayFile(tmpPathway, pd))
errAbort("Problem writing pathway file %s\n", tmpPathway);
-char command[512];
+/*char command[512];
safef(command, sizeof(command),
"%s/patient_2stage_prep.sh %s", fgDir, tmpPathway);
-
+*/
/* Run command (cross fingers!) */
-int ret = system(command);
-return (ret == 0);
+/*int ret = system(command);
+return (ret == 0);*/
+return TRUE;
}
+// this no longer gets called
boolean cleanupFactorGraph(struct biAnalysis *ba)
{
/* make temporary file containing pathway info */
char tmpPathway[512];
@@ -164,9 +171,9 @@
return (ret == 0);
}
-boolean writeEvidenceFile(char *filename, struct pathwayData *pd)
+boolean writeEvidenceFile_OLD(char *filename, struct pathwayData *pd)
{
if (!pd)
return FALSE;
if (!pd->entities || !pd->links)
@@ -209,8 +216,124 @@
return carefulCloseWarn(&f);
}
+boolean writeEvidenceFile(char *filenamePrefix, struct pathwayData *pd)
+{
+if (!pd)
+ return FALSE;
+if (!pd->entities || !pd->links)
+ return FALSE;
+
+char genomeEvidenceFile[512];
+safef(genomeEvidenceFile, sizeof(genomeEvidenceFile), "%s_genome.tab", filenamePrefix);
+FILE *genomeF = mustOpen(genomeEvidenceFile, "w");
+if (!genomeF)
+ return FALSE;
+
+char mrnaEvidenceFile[512];
+safef(mrnaEvidenceFile, sizeof(mrnaEvidenceFile), "%s_mRNA.tab", filenamePrefix);
+FILE *mrnaF = mustOpen(mrnaEvidenceFile, "w");
+if (!mrnaF)
+ return FALSE;
+
+// write the upper-left node
+fprintf(genomeF,"id");
+fprintf(mrnaF,"id");
+struct entities *en, *enList = pd->entities;
+char idStr[128];
+for (en = enList; en; en = en->next)
+ {
+ int id = hashIntValDefault(pd->featureIds, en->name, -1);
+ if (id == -1)
+ continue;
+ safef(idStr, sizeof(idStr), "%d", id);
+
+ struct typeHash *th;
+ for (th = pd->data; th; th = th->next)
+ {
+ struct hashEl *el = hashLookup(th->hash, idStr);
+ if (!el)
+ continue;
+
+ char *type;
+ if (sameString(th->type, "Expression"))
+ fprintf(mrnaF, "\t%s", en->name);
+ else if (sameString(th->type, "CNV"))
+ fprintf(genomeF, "\t%s", en->name);
+ else
+ continue;
+ }
+ }
+
+ fprintf(genomeF,"\nsample");
+ fprintf(mrnaF,"\nsample");
+ for (en = enList; en; en = en->next)
+ {
+ int id = hashIntValDefault(pd->featureIds, en->name, -1);
+ if (id == -1)
+ continue;
+ safef(idStr, sizeof(idStr), "%d", id);
+
+ struct typeHash *th;
+ for (th = pd->data; th; th = th->next)
+ {
+ struct hashEl *el = hashLookup(th->hash, idStr);
+ if (!el)
+ continue;
+
+ struct analysisVals *av = el->val;
+ double val = av->conf;
+
+ char *type;
+ if (sameString(th->type, "Expression"))
+ fprintf(mrnaF, "\t%f", val);
+ else if (sameString(th->type, "CNV"))
+ fprintf(genomeF, "\t%f", val);
+ else
+ continue;
+ }
+ }
+
+
+fprintf(genomeF,"\n");
+fprintf(mrnaF,"\n");
+boolean gClose = carefulCloseWarn(&genomeF);
+boolean mClose = carefulCloseWarn(&mrnaF);
+
+return (gClose && mClose);
+}
+
+// eventually this will be more dynamic, but for now...
+boolean writeConfigFile(char *filename, char *evidPrefix)
+{
+
+FILE *f = mustOpen(filename, "w");
+if (!f)
+ return FALSE;
+
+/*fprintf(f, "inference [pathway_match=_pid_66_,method=BP,updates=SEQFIX,tol=1e-9,maxiter=10000,logdomain=0]\n");
+fprintf(f, "inference [pathway_match=_pid_48_,method=BP,updates=SEQFIX,tol=1e-9,maxiter=10000,logdomain=0]\n");
+fprintf(f, "inference [pathway_match=_pid_3_,method=BP,updates=SEQFIX,tol=1e-9,maxiter=10000,logdomain=0]\n");
+fprintf(f, "inference [pathway_match=_pid_9_,method=BP,updates=SEQFIX,tol=1e-9,maxiter=10000,logdomain=0]\n");
+fprintf(f, "inference [method=JTREE,updates=HUGIN,verbose=1]\n");
+fprintf(f, "evidence [suffix=_genome.tab,node=genome,disc=-0.451727;0.467580,epsilon=0.01,epsilon0=0.2]\n");
+fprintf(f, "evidence [suffix=_mRNA.tab,node=mRNA,disc=-0.1817733;0.1824913,epsilon=0.01,epsilon0=0.2]\n");
+fprintf(f, "em_step [_mRNA.tab=-obs>,_genome.tab=-obs>]\n");
+fprintf(f, "em [max_iters=0,log_z_tol=0.001]\n");
+*/
+fprintf(f, "inference [pathway_match=_pid_66_,method=BP,updates=SEQFIX,tol=1e-9,maxiter=10000,logdomain=0]\n");
+fprintf(f, "inference [pathway_match=_pid_48_,method=BP,updates=SEQFIX,tol=1e-9,maxiter=10000,logdomain=0]\n");
+fprintf(f, "inference [pathway_match=_pid_3_,method=BP,updates=SEQFIX,tol=1e-9,maxiter=10000,logdomain=0]\n");
+fprintf(f, "inference [pathway_match=_pid_9_,method=BP,updates=SEQFIX,tol=1e-9,maxiter=10000,logdomain=0]\n");
+fprintf(f, "inference [method=JTREE,updates=HUGIN,verbose=1]\n");
+fprintf(f, "evidence [suffix=_genome.tab,node=genome,disc=-0.451727;0.467580,factorParams=0.680434527777778;0.188006489814815;0.131558978703704;0.241894870370370;0.55878737962963;0.199317706481481;0.127232687037037;0.246933030555556;0.625834314814815]\n");
+fprintf(f, "evidence [suffix=_mRNA.tab,node=mRNA,disc=-0.1817733;0.1824913,factorParams=0.6211706;0.250951192592593;0.127878243518519;0.259141574074074;0.488297342592593;0.252561083333333;0.137161049074074;0.213898711111111;0.648940222222222]\n");
+fprintf(f, "em [max_iters=0,log_z_tol=0.001]\n");
+
+return carefulCloseWarn(&f);
+}
+
struct analysisVals *factorGraph(struct biAnalysis *ba, void *data,
int sample_id, int feature_id)
{
if (!data)
@@ -219,25 +342,40 @@
struct pathwayData *pd = data;
/* make temporary file containing pathway info */
char tmpPathway[512];
-safef(tmpPathway, sizeof(tmpPathway), "%s/tmp_%s_path.tab", tmpDir, ba->tableName);
+safef(tmpPathway, sizeof(tmpPathway), "%s/tmp_%s_pid_%d_path.tab", tmpDir, ba->tableName, pd->id);
/* make temporary file containing evidence */
char tmpEvidence[512];
-safef(tmpEvidence, sizeof(tmpEvidence), "%s/tmp_%s_evid.tab", tmpDir, ba->tableName);
+//safef(tmpEvidence, sizeof(tmpEvidence), "%s/tmp_%s_evid.tab", tmpDir, ba->tableName);
+safef(tmpEvidence, sizeof(tmpEvidence), "%s/tmp_%s_evid", tmpDir, ba->tableName);
if (!writeEvidenceFile(tmpEvidence, pd))
errAbort("Problem writing evidence file %s\n", tmpEvidence);
+/* make a new tmp file containing the config */
+char tmpConfig[512];
+safef(tmpConfig, sizeof(tmpConfig), "%s/tmp_%s_config.txt", tmpDir, ba->tableName);
+
+if (!writeConfigFile(tmpConfig, tmpEvidence))
+ errAbort("Problem writing config file %s\n", tmpConfig);
+
+
/* build command */
char tmpOutput[128];
safef(tmpOutput, sizeof(tmpOutput), "%s/tmp.out", tmpDir);
char command[512];
safef(command, sizeof(command),
+ "%s/hgFactorGraph -p %s -c %s -b %s > %s",
+ fgDir, tmpPathway, tmpConfig, tmpEvidence, tmpOutput);
+
+/*safef(command, sizeof(command),
"%s/patient_2stage_exec.sh %s %s \"-d -1.3,1.3\" > %s",
fgDir, tmpPathway, tmpEvidence, tmpOutput);
+*/
+
/* Run command (cross fingers!) */
int ret = system(command);
if (ret)
@@ -257,9 +395,9 @@
{
if (!ba->analyze)
return NULL;
-fprintf(stdout, "starting geneset analysis.\n");
+fprintf(stdout, "starting pathway analysis.\n");
struct hash *featureHash = createIdHash(biConn, AF_TABLE, "feature_name");
struct slPair *pa, *sp;
@@ -297,13 +435,17 @@
struct pathwayVals *newPvList = convertToPathwayVals(avList, feature_id);
pvList = slCat(pvList, newPvList);
count++;
- fprintf(stderr, "%d of %d pathways\n", count, numPathways);
+ fprintf(stderr, "%d of %d pathways complete!\n", count, numPathways);
fflush(stderr);
- if (!cleanupFactorGraph(ba))
+ char tmpPathway[512];
+ safef(tmpPathway, sizeof(tmpPathway), "%s/tmp_%s_pid_%d_path.tab", tmpDir, ba->tableName, pd->id);
+ if( remove( tmpPathway ) != 0 )
fprintf(stderr, "problem with cleanup.\n");
+ /*if (!cleanupFactorGraph(ba))
+ fprintf(stderr, "problem with cleanup.\n");*/
analysisValsFreeList(&avList);
}
fprintf(stdout, "\n");
@@ -519,9 +661,9 @@
struct slPair *spPathways = getPathways(biConn);
uglyTime("got pathways");
struct pathwayVals *pvList = pathwayLevelAnalysis(biConn, ba, spData, spPathways);
-uglyTime("analyzed all genesets");
+uglyTime("analyzed all pathways");
/* store analysis vals in database table */
fprintf(stdout, "storing pathway results...\n");
storePathwayValsInDb(biConn, ba->tableName, pvList);