src/hg/instinct/bioInt2/bioPathwayLevel.c 1.3

1.3 2009/08/16 02:19:17 sbenz
Updated pipeline to be compatible with new binary
Index: src/hg/instinct/bioInt2/bioPathwayLevel.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/instinct/bioInt2/bioPathwayLevel.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -b -B -U 4 -r1.2 -r1.3
--- src/hg/instinct/bioInt2/bioPathwayLevel.c	20 May 2009 22:42:13 -0000	1.2
+++ src/hg/instinct/bioInt2/bioPathwayLevel.c	16 Aug 2009 02:19:17 -0000	1.3
@@ -53,8 +53,11 @@
 struct hash *hash = hashNew(0);
 for (en = enList; en; en = en->next)
     hashAddInt(hash, en->name, en->id);
 
+// skip the first
+lineFileSkip(lf,1);
+
 char *row[2];
 while (lineFileRowTab(lf, row))
     {
     char *name = row[0];
@@ -134,22 +137,26 @@
 struct pathwayData *pd = data;
 
 /* make temporary file containing pathway info */
 char tmpPathway[512];
-safef(tmpPathway, sizeof(tmpPathway), "%s/tmp_%s_path.tab", tmpDir, ba->tableName);
+//safef(tmpPathway, sizeof(tmpPathway), "%s/tmp_%s_path.tab", tmpDir, ba->tableName);
+safef(tmpPathway, sizeof(tmpPathway), "%s/tmp_%s_pid_%d_path.tab", tmpDir, ba->tableName, pd->id);
+
 
 if (!writePathwayFile(tmpPathway, pd))
     errAbort("Problem writing pathway file %s\n", tmpPathway);
 
-char command[512];
+/*char command[512];
 safef(command, sizeof(command), 
       "%s/patient_2stage_prep.sh %s", fgDir, tmpPathway);
-
+*/
 /* Run command (cross fingers!) */
-int ret = system(command);
-return (ret == 0);
+/*int ret = system(command);
+return (ret == 0);*/
+return TRUE;
 }
 
+// this no longer gets called
 boolean cleanupFactorGraph(struct biAnalysis *ba)
 {
 /* make temporary file containing pathway info */
 char tmpPathway[512];
@@ -164,9 +171,9 @@
 return (ret == 0);
 }
 
 
-boolean writeEvidenceFile(char *filename, struct pathwayData *pd)
+boolean writeEvidenceFile_OLD(char *filename, struct pathwayData *pd)
 {
 if (!pd)
     return FALSE;
 if (!pd->entities || !pd->links)
@@ -209,8 +216,124 @@
 
 return carefulCloseWarn(&f);
 }
 
+boolean writeEvidenceFile(char *filenamePrefix, struct pathwayData *pd)
+{
+if (!pd)
+    return FALSE;
+if (!pd->entities || !pd->links)
+    return FALSE;
+
+char genomeEvidenceFile[512];
+safef(genomeEvidenceFile, sizeof(genomeEvidenceFile), "%s_genome.tab", filenamePrefix);
+FILE *genomeF = mustOpen(genomeEvidenceFile, "w");
+if (!genomeF)
+    return FALSE;
+
+char mrnaEvidenceFile[512];
+safef(mrnaEvidenceFile, sizeof(mrnaEvidenceFile), "%s_mRNA.tab", filenamePrefix);
+FILE *mrnaF = mustOpen(mrnaEvidenceFile, "w");
+if (!mrnaF)
+    return FALSE;
+
+// write the upper-left node
+fprintf(genomeF,"id");
+fprintf(mrnaF,"id");
+struct entities *en, *enList = pd->entities;
+char idStr[128];
+for (en = enList; en; en = en->next)
+    {
+    int id = hashIntValDefault(pd->featureIds, en->name, -1);
+    if (id == -1)
+	continue;
+    safef(idStr, sizeof(idStr), "%d", id);
+
+    struct typeHash *th;
+    for (th = pd->data; th; th = th->next)
+		{
+		struct hashEl *el = hashLookup(th->hash, idStr);
+		if (!el)
+			continue;
+	
+		char *type;
+		if (sameString(th->type, "Expression"))
+			fprintf(mrnaF, "\t%s", en->name);
+		else if (sameString(th->type, "CNV"))
+			fprintf(genomeF, "\t%s", en->name);
+		else
+			continue;	
+		}
+    }
+
+ 	fprintf(genomeF,"\nsample");
+	fprintf(mrnaF,"\nsample");
+ for (en = enList; en; en = en->next)
+    {
+    int id = hashIntValDefault(pd->featureIds, en->name, -1);
+    if (id == -1)
+	continue;
+    safef(idStr, sizeof(idStr), "%d", id);
+
+    struct typeHash *th;
+	for (th = pd->data; th; th = th->next)
+		{
+		struct hashEl *el = hashLookup(th->hash, idStr);
+		if (!el)
+			continue;
+	
+		struct analysisVals *av = el->val;
+		double val = av->conf;
+	
+		char *type;
+		if (sameString(th->type, "Expression"))
+			fprintf(mrnaF, "\t%f", val);
+		else if (sameString(th->type, "CNV"))
+			fprintf(genomeF, "\t%f", val);
+		else
+			continue;
+		}
+   }
+
+
+fprintf(genomeF,"\n");
+fprintf(mrnaF,"\n");
+boolean gClose = carefulCloseWarn(&genomeF);
+boolean mClose = carefulCloseWarn(&mrnaF);
+
+return (gClose && mClose);
+}
+
+// eventually this will be more dynamic, but for now...
+boolean writeConfigFile(char *filename, char *evidPrefix)
+{
+
+FILE *f = mustOpen(filename, "w");
+if (!f)
+    return FALSE;
+
+/*fprintf(f, "inference [pathway_match=_pid_66_,method=BP,updates=SEQFIX,tol=1e-9,maxiter=10000,logdomain=0]\n");
+fprintf(f, "inference [pathway_match=_pid_48_,method=BP,updates=SEQFIX,tol=1e-9,maxiter=10000,logdomain=0]\n");
+fprintf(f, "inference [pathway_match=_pid_3_,method=BP,updates=SEQFIX,tol=1e-9,maxiter=10000,logdomain=0]\n");
+fprintf(f, "inference [pathway_match=_pid_9_,method=BP,updates=SEQFIX,tol=1e-9,maxiter=10000,logdomain=0]\n");
+fprintf(f, "inference [method=JTREE,updates=HUGIN,verbose=1]\n");
+fprintf(f, "evidence [suffix=_genome.tab,node=genome,disc=-0.451727;0.467580,epsilon=0.01,epsilon0=0.2]\n");
+fprintf(f, "evidence [suffix=_mRNA.tab,node=mRNA,disc=-0.1817733;0.1824913,epsilon=0.01,epsilon0=0.2]\n");
+fprintf(f, "em_step [_mRNA.tab=-obs>,_genome.tab=-obs>]\n");
+fprintf(f, "em [max_iters=0,log_z_tol=0.001]\n");
+*/
+fprintf(f, "inference [pathway_match=_pid_66_,method=BP,updates=SEQFIX,tol=1e-9,maxiter=10000,logdomain=0]\n");
+fprintf(f, "inference [pathway_match=_pid_48_,method=BP,updates=SEQFIX,tol=1e-9,maxiter=10000,logdomain=0]\n");
+fprintf(f, "inference [pathway_match=_pid_3_,method=BP,updates=SEQFIX,tol=1e-9,maxiter=10000,logdomain=0]\n");
+fprintf(f, "inference [pathway_match=_pid_9_,method=BP,updates=SEQFIX,tol=1e-9,maxiter=10000,logdomain=0]\n");
+fprintf(f, "inference [method=JTREE,updates=HUGIN,verbose=1]\n");
+fprintf(f, "evidence [suffix=_genome.tab,node=genome,disc=-0.451727;0.467580,factorParams=0.680434527777778;0.188006489814815;0.131558978703704;0.241894870370370;0.55878737962963;0.199317706481481;0.127232687037037;0.246933030555556;0.625834314814815]\n");
+fprintf(f, "evidence [suffix=_mRNA.tab,node=mRNA,disc=-0.1817733;0.1824913,factorParams=0.6211706;0.250951192592593;0.127878243518519;0.259141574074074;0.488297342592593;0.252561083333333;0.137161049074074;0.213898711111111;0.648940222222222]\n");
+fprintf(f, "em [max_iters=0,log_z_tol=0.001]\n");
+
+return carefulCloseWarn(&f);
+}
+
 struct analysisVals *factorGraph(struct biAnalysis *ba, void *data,
 				 int sample_id, int feature_id)
 { 
 if (!data)
@@ -219,25 +342,40 @@
 struct pathwayData *pd = data;
 
 /* make temporary file containing pathway info */
 char tmpPathway[512];
-safef(tmpPathway, sizeof(tmpPathway), "%s/tmp_%s_path.tab", tmpDir, ba->tableName);
+safef(tmpPathway, sizeof(tmpPathway), "%s/tmp_%s_pid_%d_path.tab", tmpDir, ba->tableName, pd->id);
 
 /* make temporary file containing evidence */
 char tmpEvidence[512];
-safef(tmpEvidence, sizeof(tmpEvidence), "%s/tmp_%s_evid.tab", tmpDir, ba->tableName);
+//safef(tmpEvidence, sizeof(tmpEvidence), "%s/tmp_%s_evid.tab", tmpDir, ba->tableName);
+safef(tmpEvidence, sizeof(tmpEvidence), "%s/tmp_%s_evid", tmpDir, ba->tableName);
 
 if (!writeEvidenceFile(tmpEvidence, pd))
     errAbort("Problem writing evidence file %s\n", tmpEvidence);
 
+/* make a new tmp file containing the config */
+char tmpConfig[512];
+safef(tmpConfig, sizeof(tmpConfig), "%s/tmp_%s_config.txt", tmpDir, ba->tableName);
+
+if (!writeConfigFile(tmpConfig, tmpEvidence))
+    errAbort("Problem writing config file %s\n", tmpConfig);
+
+
 /* build command */
 char tmpOutput[128];
 safef(tmpOutput, sizeof(tmpOutput), "%s/tmp.out", tmpDir);
 
 char command[512];
 safef(command, sizeof(command), 
+      "%s/hgFactorGraph -p %s -c %s -b %s > %s", 
+      fgDir, tmpPathway, tmpConfig, tmpEvidence, tmpOutput);
+
+/*safef(command, sizeof(command), 
       "%s/patient_2stage_exec.sh %s %s \"-d -1.3,1.3\" > %s", 
       fgDir, tmpPathway, tmpEvidence, tmpOutput);
+*/
+
 
 /* Run command (cross fingers!) */
 int ret = system(command);
 if (ret)
@@ -257,9 +395,9 @@
 {
 if (!ba->analyze)
     return NULL; 
 
-fprintf(stdout, "starting geneset analysis.\n");
+fprintf(stdout, "starting pathway analysis.\n");
 
 struct hash *featureHash = createIdHash(biConn, AF_TABLE, "feature_name");
 
 struct slPair *pa, *sp;
@@ -297,13 +435,17 @@
     
     struct pathwayVals *newPvList = convertToPathwayVals(avList, feature_id);
     pvList = slCat(pvList, newPvList);
     count++;
-    fprintf(stderr, "%d of %d pathways\n", count, numPathways);
+    fprintf(stderr, "%d of %d pathways complete!\n", count, numPathways);
     fflush(stderr);
 
-    if (!cleanupFactorGraph(ba))
+	char tmpPathway[512];
+	safef(tmpPathway, sizeof(tmpPathway), "%s/tmp_%s_pid_%d_path.tab", tmpDir, ba->tableName, pd->id);
+	if( remove( tmpPathway ) != 0 )
 	fprintf(stderr, "problem with cleanup.\n");
+    /*if (!cleanupFactorGraph(ba))
+	fprintf(stderr, "problem with cleanup.\n");*/
     analysisValsFreeList(&avList);
     }
 
 fprintf(stdout, "\n");
@@ -519,9 +661,9 @@
 struct slPair *spPathways = getPathways(biConn);
 uglyTime("got pathways");
 
 struct pathwayVals *pvList = pathwayLevelAnalysis(biConn, ba, spData, spPathways);
-uglyTime("analyzed all genesets");
+uglyTime("analyzed all pathways");
 
 /* store analysis vals in database table */
 fprintf(stdout, "storing pathway results...\n");
 storePathwayValsInDb(biConn, ba->tableName, pvList);