src/hg/instinct/bioInt2/grabData.c 1.7
1.7 2009/06/05 20:55:40 sbenz
Fixed issue where clinical data was querying wrong field, also starting to add pathway stuff
Index: src/hg/instinct/bioInt2/grabData.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/instinct/bioInt2/grabData.c,v
retrieving revision 1.6
retrieving revision 1.7
diff -b -B -U 1000000 -r1.6 -r1.7
--- src/hg/instinct/bioInt2/grabData.c 20 May 2009 20:34:36 -0000 1.6
+++ src/hg/instinct/bioInt2/grabData.c 5 Jun 2009 20:55:40 -0000 1.7
@@ -1,399 +1,418 @@
/* mapProbesToGenes - Will maps probes in BED format to overlapping gene(s). */
#include "common.h"
#include "linefile.h"
#include "hash.h"
#include "options.h"
#include "jksql.h"
#include "hPrint.h"
#include "hdb.h"
#include "dystring.h"
#include "bioIntDb.h"
#include "bioIntDriver.h"
void usage()
/* Explain usage and exit. */
{
errAbort(
"grabData \n"
"usage:\n"
" grabData [options] table\n"
" -clinical get clinical data corresponding to table\n"
" -transpose samples as rows, features as columns\n"
);
}
char db[128] = "bioInt";
char localDb[128] = "localDb";
boolean getClinical = FALSE;
boolean transpose = FALSE;
static struct optionSpec options[] = {
{"clinical", OPTION_BOOLEAN},
{"transpose", OPTION_BOOLEAN},
{NULL, 0}
};
struct valConf {
struct valConf *next;
double val;
double conf;
};
+boolean isPathwayAnalysisTable(struct sqlConnection *conn, char *tableName)
+{
+char query[256];
+safef(query, sizeof(query),
+ "select * from %s aTable,%s mTable where aTable.result_table = \"%s\" AND aTable.module_id = mTable.id AND mTable.name = \"%s\"",
+ AN_TABLE, AM_TABLE, tableName, "factorGraph");
+
+return sqlExists(conn, query);
+}
+
void spHashListTranspose(struct slPair *spList, struct slName *slList,
struct slPair **retSpList, struct slName **retSlList)
{
// Transpose data in spList. So, sp->name = sample name, sp->hash keyed by feature name
struct slName *sl, *outSlList = NULL;
struct slPair *sp, *outSp, *outSpList = NULL;
struct hash *featureHash = hashNew(0);
struct hash *sampleHash = hashNew(0);
for (sl = slList; sl; sl = sl->next)
{
struct hash *dataHash;
struct hashEl *el = hashLookup(sampleHash, sl->name);
if (!el)
{
AllocVar(outSp);
outSp->name = cloneString(sl->name);
dataHash = hashNew(0);
outSp->val = dataHash;
slAddHead(&outSpList, outSp);
hashAdd(sampleHash, sl->name, outSp);
}
else
outSp = el->val;
for (sp = spList; sp; sp = sp->next)
{
el = hashLookup(featureHash, sp->name);
if (!el)
{
slNameAddHead(&outSlList, sp->name);
hashAddInt(featureHash, sp->name, 1);
}
struct hash *hash = sp->val;
el = hashLookup(hash, sl->name);
if (el)
{
struct slDouble *sd = el->val;
hash = outSp->val;
hashAdd(hash, sp->name, sd);
}
}
}
*retSpList = outSpList;
*retSlList = outSlList;
}
void spHashListPrint(struct slPair *inSpList, struct slName *inSlList, char *tableName)
{
struct slPair *spList = NULL;
struct slName *slList = NULL;
char filename[256];
if (transpose)
{
safef(filename, sizeof(filename), "%s_data_transpose.tab", tableName);
fprintf(stdout, "transposing hash\n");
spHashListTranspose(inSpList, inSlList, &spList, &slList);
fprintf(stdout, "finished transposing\n");
}
else
{
safef(filename, sizeof(filename), "%s_data.tab", tableName);
spList = inSpList;
slList = inSlList;
}
FILE *f = mustOpen(filename, "w");
struct slName *sl;
struct slPair *sp;
// Print header
fprintf(f, "feature_name\t");
for (sl = slList; sl; sl = sl->next)
{
fprintf(f, "%s", sl->name);
if (sl->next)
fprintf(f, "\t");
}
fprintf(f, "\n");
struct hashEl *el;
for (sp = spList; sp; sp = sp->next)
{
fprintf(f, "%s\t", sp->name);
struct hash *hash = sp->val;
for (sl = slList; sl; sl = sl->next)
{
el = hashLookup(hash, sl->name);
if (el)
{
struct valConf *vc = el->val;
fprintf(f, "%f,%f", vc->val, vc->conf);
}
if (sl->next)
fprintf(f, "\t");
}
fprintf(f, "\n");
}
}
struct hash *createHash(struct sqlConnection *biConn,
char *table, char *key_field, char *val_field)
{
struct hash *hash = hashNew(0);
char query[128];
safef(query, sizeof(query), "select %s, %s from %s", key_field, val_field, table);
struct sqlResult *sr = sqlGetResult(biConn, query);
char **row = NULL;
while ((row = sqlNextRow(sr)) != NULL)
{
char *id = row[0];
char *name = cloneString(row[1]);
hashAdd(hash, id, name);
}
return hash;
}
struct datasets *findDataset(struct sqlConnection *conn, char *data_table)
{
char query[256];
safef(query, sizeof(query),
"select * from %s where data_table = \"%s\" ",
DA_TABLE, data_table);
return datasetsLoadByQuery(conn, query);
}
struct samples *getSamples(struct sqlConnection *conn, struct datasets *da)
{
char query[256];
safef(query, sizeof(query),
- "select * from %s where dataset_id = %d order by exp_id",
+ "select * from %s where dataset_id = %d order by id",
SA_TABLE, da->id);
return samplesLoadByQuery(conn, query);
}
void grabClinicalData(struct sqlConnection *conn, char *tableName)
{
struct datasets *da = findDataset(conn, tableName);
if (!da)
errAbort("No dataset by name of %s in db\n", tableName);
struct samples *sa, *saList = getSamples(conn, da);
struct dyString *dy = newDyString(100);
dyStringPrintf(dy,
"select %s.name, %s.name, %s.val from %s join %s on %s.sample_id = %s.id "
"join %s on %s.feature_id = %s.id "
"where %s.id in (",
SA_TABLE, FE_TABLE, CD_TABLE,
CD_TABLE, SA_TABLE, CD_TABLE, SA_TABLE,
FE_TABLE, CD_TABLE, FE_TABLE, SA_TABLE);
for (sa = saList; sa; sa = sa->next)
{
dyStringPrintf(dy, "%d", sa->id);
if (sa->next)
dyStringPrintf(dy, ",");
}
dyStringPrintf(dy, ")");
char *query = dyStringCannibalize(&dy);
struct slPair *sp, *spList = NULL;
struct slName *slList = NULL;
struct hash *samplesHash = hashNew(0);
struct hash *featureData = hashNew(0);
struct sqlResult *sr = sqlGetResult(conn, query);
char **row = NULL;
while ((row = sqlNextRow(sr)) != NULL)
{
char *sample = row[0];
char *feature = row[1];
double val = atof(row[2]);
struct hash *sampleData;
struct hashEl *el = hashLookup(featureData, feature);
if (!el)
{
sampleData = hashNew(0);
hashAdd(featureData, feature, sampleData);
AllocVar(sp);
sp->name = cloneString(feature);
sp->val = sampleData;
slAddHead(&spList, sp);
}
else
sampleData = el->val;
if (!hashLookup(samplesHash, sample))
{
slNameAddHead(&slList, sample);
hashAddInt(samplesHash, sample, 1);
}
struct valConf *vc;
AllocVar(vc);
vc->val = val;
vc->conf = -1.0;
hashAdd(sampleData, sample, vc);
}
sqlFreeResult(&sr);
hashFree(&featureData);
slReverse(&spList);
slReverse(&slList);
char clinTableName[512];
safef(clinTableName, sizeof(clinTableName),
"%s_clinical",
tableName);
spHashListPrint(spList, slList, clinTableName);
}
void medianSpHashList(struct slPair *spList, struct slName *slList)
{
struct hashEl *el;
struct slPair *sp;
struct slName *sl;
for (sp = spList; sp; sp = sp->next)
{
struct hash *hash = sp->val;
for (sl = slList; sl; sl = sl->next)
{
el = hashLookup(hash, sl->name);
struct slDouble *sd, *sdList = el->val;
double val = slDoubleMedian(sdList);
sd = slDoubleNew(val);
hashRemove(hash, sl->name);
slFreeList(&sdList);
hashAdd(hash, sl->name, sd);
}
}
}
void grabAnalysisData(struct sqlConnection *conn, char *tableName)
{
struct hash *sampleIdHash = createHash(conn, SA_TABLE, "id", "name");
-struct hash *featureIdHash = createHash(conn, AF_TABLE, "id", "feature_name");
+boolean isPathwayTable = isPathwayAnalysisTable(conn,tableName);
+struct hash *featureIdHash;
+struct hash *pathwayIdHash;
+if(isPathwayTable)
+{
+ featureIdHash = createHash(conn, EN_TABLE, "entity_id", "entity_name");
+ pathwayIdHash = createHash(conn, EP_TABLE, "pathway_id", "pathway_name");
+}
+else
+ featureIdHash = createHash(conn, AF_TABLE, "id", "feature_name");
char query[256];
safef(query, sizeof(query), "select * from %s", tableName);
struct sqlResult *sr = sqlGetResult(conn, query);
char **row = NULL;
struct hash *samplesHash = hashNew(0);
struct slName *slList = NULL;
struct hash *featureData = hashNew(0);
struct slPair *sp, *spList = NULL;
while ((row = sqlNextRow(sr)) != NULL)
{
char *sample_id = row[0];
char *feature_id = row[1];
double val = atof(row[2]);
double conf = atof(row[3]);
struct hash *sampleData;
struct hashEl *el = hashLookup(featureData, feature_id);
if (!el)
{
sampleData = hashNew(0);
hashAdd(featureData, feature_id, sampleData);
el = hashLookup(featureIdHash, feature_id);
if (el)
{
char *name = el->val;
AllocVar(sp);
sp->name = cloneString(name);
sp->val = sampleData;
slAddHead(&spList, sp);
}
}
else
sampleData = el->val;
el = hashLookup(sampleIdHash, sample_id);
char *name = el->val;
if (!hashLookup(samplesHash, name))
{
slNameAddHead(&slList, name);
hashAddInt(samplesHash, name, 1);
}
struct valConf *vc;
AllocVar(vc);
vc->val = val;
vc->conf = conf;
hashAdd(sampleData, name, vc);
}
sqlFreeResult(&sr);
hashFree(&featureData);
hashFree(&samplesHash);
slReverse(&spList);
slReverse(&slList);
spHashListPrint(spList, slList, tableName);
}
boolean isAnalysisTable(struct sqlConnection *conn, char *tableName)
{
char query[256];
safef(query, sizeof(query),
"select * from %s where result_table = \"%s\" ",
AN_TABLE, tableName);
return sqlExists(conn, query);
}
void grabData(char *tableName)
{
struct sqlConnection *conn = hAllocConnProfile(localDb, db);
if (!sqlTableExists(conn, tableName))
errAbort("%s doesn't exist in %s db.\n", tableName, db);
if (getClinical)
grabClinicalData(conn, tableName);
else
grabAnalysisData(conn, tableName);
}
int main(int argc, char *argv[])
/* Process command line. */
{
optionInit(&argc, argv, options);
if (argc != 2)
usage();
if (optionExists("clinical"))
getClinical = TRUE;
if (optionExists("transpose"))
transpose = TRUE;
grabData(argv[1]);
return 0;
}