src/hg/instinct/bioInt2/pathwayNullDist.c 1.2
1.2 2009/05/20 20:34:36 jsanborn
initial commit
Index: src/hg/instinct/bioInt2/pathwayNullDist.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/instinct/bioInt2/pathwayNullDist.c,v
retrieving revision 1.1
retrieving revision 1.2
diff -b -B -U 4 -r1.1 -r1.2
--- src/hg/instinct/bioInt2/pathwayNullDist.c 14 May 2009 22:34:07 -0000 1.1
+++ src/hg/instinct/bioInt2/pathwayNullDist.c 20 May 2009 20:34:36 -0000 1.2
@@ -296,10 +296,10 @@
safef(tmpPathway, sizeof(tmpPathway), "%s/tmp_%s_path.tab", tmpDir, tableName);
/* make temporary file containing evidence */
char tmpEvidence[512];
-safef(tmpEvidence, sizeof(tmpEvidence), "%s/%s_pid_%d_sample_%d_evid.tab",
- tmpDir, tableName, pd->id, sample_id);
+safef(tmpEvidence, sizeof(tmpEvidence), "%s/tmp_%s_evid.tab", tmpDir, tableName);
+
if (!writeEvidenceFile(tmpEvidence, pd))
errAbort("Problem writing evidence file %s\n", tmpEvidence);
@@ -322,11 +322,11 @@
struct analysisVals *avList = readAnalysisValsFromFile(tmpOutput, pd->entities, sample_id);
return avList;
}
-void factorGraphNull(char *tableName, void *data, int sample_id, int feature_id)
+void factorGraphNull(char *tableName, void *data, int sample_id, int feature_id,
+ char *nullType)
{
-
struct pathwayData *pd = data;
/* make temporary file containing pathway info */
char tmpPathway[512];
@@ -334,19 +334,22 @@
"%s/tmp_%s_path.tab", tmpDir, tableName);
/* make temporary file containing evidence */
char tmpEvidence[512];
-safef(tmpEvidence, sizeof(tmpEvidence),
- "%s/null_%s_pid_%d_iter_%d_evid.tab",
- tmpDir, tableName, pd->id, sample_id);
+safef(tmpEvidence, sizeof(tmpEvidence), "%s/tmp_%s_evid.tab", tmpDir, tableName);
if (!writeNullEvidenceFile(tmpEvidence, pd))
errAbort("Problem writing evidence file %s\n", tmpEvidence);
/* build command */
char tmpOutput[128];
-safef(tmpOutput, sizeof(tmpOutput),
- "%s/null_%s_pid_%d_iter_%d_output.tab",
+if (sameString(nullType, "allData"))
+ safef(tmpOutput, sizeof(tmpOutput),
+ "%s/null_allData_%s_pid_%d_iter_%d_output.tab",
+ tmpDir, tableName, pd->id, sample_id);
+else
+ safef(tmpOutput, sizeof(tmpOutput),
+ "%s/null_within_%s_pid_%d_iter_%d_output.tab",
tmpDir, tableName, pd->id, sample_id);
char command[512];
safef(command, sizeof(command),
@@ -418,9 +421,9 @@
return sqlQuickList(biConn, query);
}
-struct dataTuple *getDataTuples(struct sqlConnection *biConn, struct hash *featureHash,
+struct dataTuple *getAllDataTuples(struct sqlConnection *biConn, struct hash *featureHash,
struct slPair *spData)
{
struct slName *sl, *featureNames = getAnalysisFeatureNames(biConn, AF_TABLE, "gene");
@@ -472,17 +475,77 @@
return dtList;
}
+struct dataTuple *getPathwayDataTuples(struct sqlConnection *biConn, struct hash *featureHash,
+ struct slPair *spData, struct slPair *spPathway)
+{
+struct pathwayData *pd = spPathway->val;
+struct entities *en, *enList = pd->entities;
+struct slName *featureIds = NULL;
+char idStr[128];
+for (en = enList; en; en = en->next)
+ {
+ int id = hashIntValDefault(featureHash, en->name, -1);
+ if (id == -1)
+ continue;
+
+ safef(idStr, sizeof(idStr), "%d", id);
+ slNameAddHead(&featureIds, idStr);
+ }
+
+struct slPair *sp;
+struct dataTuple *dt, *dtList = NULL;
+for (sp = spData; sp; sp = sp->next)
+ {
+ struct typeHash *th, *thList = sp->val;
+ if (slCount(thList) < 2)
+ continue; // currently only consider samples with more than one type of evidence.
+
+ struct slName *sl;
+ for (sl = featureIds; sl; sl = sl->next)
+ {
+ AllocVar(dt);
+ dt->cnv = BAD_VAL;
+ dt->exp = BAD_VAL;
+ for (th = thList; th; th = th->next)
+ {
+ struct hashEl *el = hashLookup(th->hash, sl->name);
+ if (!el)
+ continue;
+
+ struct analysisVals *av = el->val;
+ if (sameString(th->type, "Expression"))
+ dt->exp = av->conf;
+ else if (sameString(th->type, "CNV"))
+ dt->cnv = av->conf;
+ }
+ if (dt->cnv == BAD_VAL && dt->exp == BAD_VAL)
+ dataTupleFree(&dt);
+ else
+ slAddHead(&dtList, dt);
+ }
+ }
+
+return dtList;
+}
+
/* Pipeline Stuff */
struct pathwayVals *pathwayNullAnalysis(struct sqlConnection *biConn, char *tableName,
struct slPair *spData, struct slPair *spPathways,
- int numIters)
+ int numIters, char *nullType)
{
fprintf(stdout, "starting null analysis analysis.\n");
struct hash *featureHash = createIdHash(biConn, AF_TABLE, "feature_name");
-struct dataTuple *dtList = getDataTuples(biConn, featureHash, spData);
+
+struct dataTuple *dtList = NULL;
+if (sameString(nullType, "allData"))
+ dtList = getAllDataTuples(biConn, featureHash, spData);
+else if (sameString(nullType, "withinPathway"))
+ dtList = getPathwayDataTuples(biConn, featureHash, spData, spPathways);
+else
+ errAbort("%s not supported", nullType);
int count = 0, numPathways = slCount(spPathways);
struct slPair *pa;
@@ -501,9 +565,9 @@
int i;
for (i = 0; i < numIters; i++)
{
- factorGraphNull(tableName, pd, i, -1);
+ factorGraphNull(tableName, pd, i, -1, nullType);
fprintf(stderr, ".");
fflush(stderr);
}
fprintf(stderr, "\n");
@@ -745,15 +809,21 @@
uglyTime(NULL);
struct slPair *spPathways = getPathwaysByName(biConn, pathway);
uglyTime("got pathways");
+if (slCount(spPathways) == 0)
+ errAbort("No pathways by name of %s", pathway);
+
struct slPair *spData = analysisValsSamplesHashesList(biConn, tableNames);
uglyTime("got sample hashes");
-//pathwayLevelAnalysis(biConn, cohort_name, spData, spPathways);
-//uglyTime("analyzed all genesets");
+pathwayLevelAnalysis(biConn, cohort_name, spData, spPathways);
+uglyTime("analyzed all genesets");
+
+pathwayNullAnalysis(biConn, cohort_name, spData, spPathways, numIters, "withinPathway");
+uglyTime("produced null distribution");
-pathwayNullAnalysis(biConn, cohort_name, spData, spPathways, numIters);
+pathwayNullAnalysis(biConn, cohort_name, spData, spPathways, numIters, "allData");
uglyTime("produced null distribution");
slPairHashesFreeList(&spData);
slPairPathwayFreeList(&spPathways);