src/hg/instinct/bioInt2/grabData.c 1.7

1.7 2009/06/05 20:55:40 sbenz
Fixed issue where clinical data was querying wrong field, also starting to add pathway stuff
Index: src/hg/instinct/bioInt2/grabData.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/instinct/bioInt2/grabData.c,v
retrieving revision 1.6
retrieving revision 1.7
diff -b -B -U 1000000 -r1.6 -r1.7
--- src/hg/instinct/bioInt2/grabData.c	20 May 2009 20:34:36 -0000	1.6
+++ src/hg/instinct/bioInt2/grabData.c	5 Jun 2009 20:55:40 -0000	1.7
@@ -1,399 +1,418 @@
 /* mapProbesToGenes - Will maps probes in BED format to overlapping gene(s). */
 #include "common.h"
 #include "linefile.h"
 #include "hash.h"
 #include "options.h"
 #include "jksql.h"
 #include "hPrint.h"
 #include "hdb.h"  
 #include "dystring.h"
 #include "bioIntDb.h"
 #include "bioIntDriver.h"
 
 void usage()
 /* Explain usage and exit. */
 {
 errAbort(
 	 "grabData \n"
 	 "usage:\n"
 	 "   grabData [options] table\n"
 	 "      -clinical    get clinical data corresponding to table\n"
 	 "      -transpose   samples as rows, features as columns\n"
 	 );
 }
 
 char db[128] = "bioInt";
 char localDb[128] = "localDb";
 
 boolean getClinical = FALSE;
 boolean transpose = FALSE;
 
 static struct optionSpec options[] = {
     {"clinical", OPTION_BOOLEAN},
     {"transpose", OPTION_BOOLEAN},
     {NULL, 0}
 }; 
 
 struct valConf {
     struct valConf *next;
     double val;
     double conf;
 };
 
+boolean isPathwayAnalysisTable(struct sqlConnection *conn, char *tableName)
+{
+char query[256];
+safef(query, sizeof(query), 
+      "select * from %s aTable,%s mTable where aTable.result_table = \"%s\" AND aTable.module_id = mTable.id AND mTable.name = \"%s\"",
+      AN_TABLE, AM_TABLE, tableName, "factorGraph");
+
+return sqlExists(conn, query);
+}
+
 
 void spHashListTranspose(struct slPair *spList, struct slName *slList, 
 			 struct slPair **retSpList, struct slName **retSlList)
 {
 // Transpose data in spList. So, sp->name = sample name, sp->hash keyed by feature name 
 
 struct slName *sl, *outSlList = NULL;
 struct slPair *sp, *outSp, *outSpList = NULL;
 
 struct hash *featureHash = hashNew(0);
 struct hash *sampleHash = hashNew(0);
 for (sl = slList; sl; sl = sl->next)
     {
     struct hash *dataHash;
     struct hashEl *el = hashLookup(sampleHash, sl->name);
     if (!el)
 	{
 	AllocVar(outSp);
 	outSp->name = cloneString(sl->name);
 	dataHash = hashNew(0);
 	outSp->val = dataHash;
 	slAddHead(&outSpList, outSp);
 	hashAdd(sampleHash, sl->name, outSp);
 	}
     else
 	outSp = el->val;
 
     for (sp = spList; sp; sp = sp->next)
 	{
 	el = hashLookup(featureHash, sp->name);
 	if (!el)
 	    {
 	    slNameAddHead(&outSlList, sp->name);
 	    hashAddInt(featureHash, sp->name, 1);
 	    }
 
 	struct hash *hash = sp->val;
 	el = hashLookup(hash, sl->name);
 	if (el)
 	    {
 	    struct slDouble *sd = el->val;
 	    hash = outSp->val;
 	    hashAdd(hash, sp->name, sd);
 	    }
 	}
     }
 
 *retSpList = outSpList;
 *retSlList = outSlList;
 }
 
 void spHashListPrint(struct slPair *inSpList, struct slName *inSlList, char *tableName)
 {
 struct slPair *spList = NULL;
 struct slName *slList = NULL;
 
 char filename[256];
 if (transpose)
     {
     safef(filename, sizeof(filename), "%s_data_transpose.tab", tableName);
     fprintf(stdout, "transposing hash\n");
     spHashListTranspose(inSpList, inSlList, &spList, &slList);
     fprintf(stdout, "finished transposing\n");
     }
 else
     {
     safef(filename, sizeof(filename), "%s_data.tab", tableName);
     spList = inSpList;
     slList = inSlList;
     }
 FILE *f = mustOpen(filename, "w");
 
 struct slName *sl;
 struct slPair *sp;
 
 // Print header
 fprintf(f, "feature_name\t");
 for (sl = slList; sl; sl = sl->next)
     {
     fprintf(f, "%s", sl->name);
     if (sl->next)
 	fprintf(f, "\t");
     }
 fprintf(f, "\n");
 
 struct hashEl *el;
 for (sp = spList; sp; sp = sp->next)
     {
     fprintf(f, "%s\t", sp->name);
     struct hash *hash = sp->val;
     for (sl = slList; sl; sl = sl->next)
 	{
 	el = hashLookup(hash, sl->name);
 	if (el)
 	    {
 	    struct valConf *vc = el->val;
 	    fprintf(f, "%f,%f", vc->val, vc->conf);
 	    }
 	if (sl->next)
 	    fprintf(f, "\t");
 	}
     fprintf(f, "\n");
     }
 }
 
 
 struct hash *createHash(struct sqlConnection *biConn,
 			char *table, char *key_field, char *val_field)
 {
 struct hash *hash = hashNew(0);
 char query[128];
 safef(query, sizeof(query), "select %s, %s from %s", key_field, val_field, table);
 
 struct sqlResult *sr = sqlGetResult(biConn, query);
 char **row = NULL;
 while ((row = sqlNextRow(sr)) != NULL)
     {
     char *id = row[0];
     char *name = cloneString(row[1]);
     hashAdd(hash, id, name);
     }
 
 return hash;
 }   
 
 struct datasets *findDataset(struct sqlConnection *conn, char *data_table)
 {
 char query[256];
 safef(query, sizeof(query), 
       "select * from %s where data_table = \"%s\" ", 
       DA_TABLE, data_table);
 
 return datasetsLoadByQuery(conn, query);
 }
 
 struct samples *getSamples(struct sqlConnection *conn, struct datasets *da)
 {
 char query[256];
 safef(query, sizeof(query), 
-      "select * from %s where dataset_id = %d order by exp_id", 
+      "select * from %s where dataset_id = %d order by id", 
       SA_TABLE, da->id);
 
 return samplesLoadByQuery(conn, query);
 }
 
 
 
 void grabClinicalData(struct sqlConnection *conn, char *tableName)
 {
 struct datasets *da = findDataset(conn, tableName);
 if (!da)
     errAbort("No dataset by name of %s in db\n", tableName);
 
 struct samples *sa, *saList = getSamples(conn, da);
 
 struct dyString *dy = newDyString(100);
 dyStringPrintf(dy, 
 	       "select %s.name, %s.name, %s.val from %s join %s on %s.sample_id = %s.id "
 	       "join %s on %s.feature_id = %s.id "
 	       "where %s.id in (",
 	       SA_TABLE, FE_TABLE, CD_TABLE,
 	       CD_TABLE, SA_TABLE, CD_TABLE, SA_TABLE, 
 	       FE_TABLE, CD_TABLE, FE_TABLE, SA_TABLE);
 
 for (sa = saList; sa; sa = sa->next)
     {
     dyStringPrintf(dy, "%d", sa->id);
     if (sa->next)
 	dyStringPrintf(dy, ",");
     }
 dyStringPrintf(dy, ")");
 char *query = dyStringCannibalize(&dy);
 
 struct slPair *sp, *spList = NULL;
 struct slName *slList = NULL;
 struct hash *samplesHash = hashNew(0);
 struct hash *featureData = hashNew(0);
 
 struct sqlResult *sr = sqlGetResult(conn, query);
 char **row = NULL;
 
 while ((row = sqlNextRow(sr)) != NULL)
     {
     char *sample = row[0];
     char *feature = row[1];
     double val = atof(row[2]);
     
     struct hash *sampleData;
     struct hashEl *el = hashLookup(featureData, feature);
     if (!el)
 	{
 	sampleData = hashNew(0);
 	hashAdd(featureData, feature, sampleData);
 	AllocVar(sp);
 	sp->name = cloneString(feature);
 	sp->val = sampleData;
 	slAddHead(&spList, sp);
 	}
     else
 	sampleData = el->val;
 
     if (!hashLookup(samplesHash, sample))
 	{
 	slNameAddHead(&slList, sample);
 	hashAddInt(samplesHash, sample, 1);
 	}
     struct valConf *vc;
     AllocVar(vc);
     vc->val = val;
     vc->conf = -1.0;
     hashAdd(sampleData, sample, vc);
     } 
 
 sqlFreeResult(&sr);
 hashFree(&featureData);
 
 slReverse(&spList);
 slReverse(&slList);
 
 char clinTableName[512];
 safef(clinTableName, sizeof(clinTableName), 
       "%s_clinical",
       tableName);
 
 spHashListPrint(spList, slList, clinTableName);
 }
 
 void medianSpHashList(struct slPair *spList, struct slName *slList)
 {
 struct hashEl *el;
 struct slPair *sp;
 struct slName *sl;
 for (sp = spList; sp; sp = sp->next)
     {
     struct hash *hash = sp->val;
     for (sl = slList; sl; sl = sl->next)
 	{
 	el = hashLookup(hash, sl->name);
 	struct slDouble *sd, *sdList = el->val;
 	
 	double val = slDoubleMedian(sdList);
 	sd = slDoubleNew(val);
 
 	hashRemove(hash, sl->name);
 	slFreeList(&sdList);
 
 	hashAdd(hash, sl->name, sd);
 	}
     }
 }
 
 void grabAnalysisData(struct sqlConnection *conn, char *tableName)
 {
 struct hash *sampleIdHash = createHash(conn, SA_TABLE, "id", "name");
-struct hash *featureIdHash = createHash(conn, AF_TABLE, "id", "feature_name");
+boolean isPathwayTable = isPathwayAnalysisTable(conn,tableName);
+struct hash *featureIdHash;
+struct hash *pathwayIdHash;
+if(isPathwayTable)
+{
+	featureIdHash = createHash(conn, EN_TABLE, "entity_id", "entity_name");
+	pathwayIdHash = createHash(conn, EP_TABLE, "pathway_id", "pathway_name");
+}
+else
+	featureIdHash = createHash(conn, AF_TABLE, "id", "feature_name");
 
 char query[256];
 safef(query, sizeof(query), "select * from %s", tableName);
 
 struct sqlResult *sr = sqlGetResult(conn, query);
 char **row = NULL;
 
 struct hash *samplesHash = hashNew(0);
 struct slName *slList = NULL;
 struct hash *featureData = hashNew(0);
 struct slPair *sp, *spList = NULL;
 while ((row = sqlNextRow(sr)) != NULL)
     {
     char *sample_id = row[0];
     char *feature_id = row[1];
     double val = atof(row[2]);
     double conf = atof(row[3]);
 
     struct hash *sampleData;
     struct hashEl *el = hashLookup(featureData, feature_id);
     if (!el)
 	{
 	sampleData = hashNew(0);
 	hashAdd(featureData, feature_id, sampleData);
 	el = hashLookup(featureIdHash, feature_id);
 	if (el)
 	    {
 	    char *name = el->val;
 	    AllocVar(sp);
 	    sp->name = cloneString(name);
 	    sp->val = sampleData;
 	    slAddHead(&spList, sp);
 	    }
 	}
     else
 	sampleData = el->val;
 
     el = hashLookup(sampleIdHash, sample_id);
     char *name = el->val;
     if (!hashLookup(samplesHash, name))
 	{
 	slNameAddHead(&slList, name);
 	hashAddInt(samplesHash, name, 1);
 	}
     struct valConf *vc;
     AllocVar(vc);
     vc->val = val;
     vc->conf = conf;
     hashAdd(sampleData, name, vc);
     } 
 
 sqlFreeResult(&sr);
 hashFree(&featureData);
 hashFree(&samplesHash);
 
 slReverse(&spList);
 slReverse(&slList);
 
 spHashListPrint(spList, slList, tableName);
 }
 
 boolean isAnalysisTable(struct sqlConnection *conn, char *tableName)
 {
 char query[256];
 safef(query, sizeof(query), 
       "select * from %s where result_table = \"%s\" ",
       AN_TABLE, tableName);
 
 return sqlExists(conn, query);
 }
 
 void grabData(char *tableName)
 {
 struct sqlConnection *conn = hAllocConnProfile(localDb, db);
 
 if (!sqlTableExists(conn, tableName))
     errAbort("%s doesn't exist in %s db.\n", tableName, db);
 
 if (getClinical)
     grabClinicalData(conn, tableName);
 else
     grabAnalysisData(conn, tableName);
 }
 
 
 int main(int argc, char *argv[])
 /* Process command line. */
 {
 optionInit(&argc, argv, options);
 if (argc != 2)
     usage();
 
 if (optionExists("clinical"))
     getClinical = TRUE;
 
 if (optionExists("transpose"))
     transpose = TRUE;
 
 grabData(argv[1]);
 
 return 0;
 }