src/hg/instinct/bioInt2/bioLevelI.c 1.4

1.4 2009/03/24 05:21:54 jsanborn
updated
Index: src/hg/instinct/bioInt2/bioLevelI.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/instinct/bioInt2/bioLevelI.c,v
retrieving revision 1.3
retrieving revision 1.4
diff -b -B -U 1000000 -r1.3 -r1.4
--- src/hg/instinct/bioInt2/bioLevelI.c	23 Mar 2009 18:19:29 -0000	1.3
+++ src/hg/instinct/bioInt2/bioLevelI.c	24 Mar 2009 05:21:54 -0000	1.4
@@ -1,265 +1,264 @@
 /* mapProbesToGenes - Will maps probes in BED format to overlapping gene(s). */
 #include "common.h"
 #include "linefile.h"
 #include "hash.h"
 #include "options.h"
 #include "jksql.h"
 #include "hPrint.h"
 #include "hdb.h"  
 #include "dystring.h"
 #include "bioIntDb.h"
 #include "bioIntDriver.h"
 #include "cprob.h"
 #include "hgStatsLib.h"
 #include "bioController.h"
 
 
 void printSlName(struct slName *slList)
 {
 if (!slList)
     fprintf(stdout, "nothing in slName list\n");
 
 struct slName *sl;
 
 fprintf(stdout, "num samples = %d\n", slCount(slList));
 for (sl = slList; sl; sl = sl->next)
     {
     fprintf(stdout, "%s", sl->name);
     if (sl->next)
 	fprintf(stdout, ",");
     }
 fprintf(stdout, "\n");
 }
 
 void analysisResultFree(struct analysisResult **pEl)
 {
 struct analysisResult *el;
 if ((el = *pEl) == NULL) return;
 
 freeMem(el->sample);
 freeMem(el->feature);
 freez(pEl);
 }
 
 void analysisResultFreeList(struct analysisResult **pList)
 {
 struct analysisResult *el, *next;
 
 for (el = *pList; el != NULL; el = next)
     {
     next = el->next;
     analysisResultFree(&el);
     }
 *pList = NULL;
 } 
 
 int findIdInTable(struct sqlConnection *biConn, char *tableName,
 		  char *idField, char *sField, char *name)
 {
 if (sqlTableSize(biConn, tableName) == 0)  /* brand new table, return 0 */
     return 0;
 
 char query[256];
 safef(query, sizeof(query),
       "select DISTINCT %s from %s where %s = \"%s\";",
       idField, tableName, sField, name);
 if (sqlExists(biConn, query))  /* sample name found, use same id */
     return sqlQuickNum(biConn, query);
 
 /* Else, find maximum sample id and add one to it */
 safef(query, sizeof(query),
       "select max(%s) from %s;",
       idField, tableName);
 int maxId = sqlQuickNum(biConn, query);
 return maxId + 1;
 }   
 
 
 void createAnalysisFeaturesTable(struct sqlConnection *biConn, char *tableName)
 {
 struct dyString *dy = newDyString(1024);
 dyStringPrintf(dy, "CREATE TABLE %s (\n", tableName);
 dyStringPrintf(dy, "id int unsigned not null,\n");
 dyStringPrintf(dy, "feature_name varchar(255) not null,\n");
 dyStringPrintf(dy, "KEY(id),\n");
 dyStringPrintf(dy, "KEY(feature_name),\n");
 dyStringPrintf(dy, "KEY(id, feature_name),\n");
 dyStringPrintf(dy, "KEY(feature_name, id)\n");
 dyStringPrintf(dy, ")\n");
 sqlUpdate(biConn,dy->string);
 dyStringFree(&dy);
 } 
 
 void createAnalysisValsTable(struct sqlConnection *biConn, char *tableName)
 {
 struct dyString *dy = newDyString(1024);
 dyStringPrintf(dy, "CREATE TABLE %s (\n", tableName);
 dyStringPrintf(dy, "sample_id int unsigned not null,\n");
 dyStringPrintf(dy, "feature_id int unsigned not null,\n");
 dyStringPrintf(dy, "val float not null,\n");
 dyStringPrintf(dy, "conf float not null,\n");
 dyStringPrintf(dy, "KEY(feature_id, sample_id),\n");
 dyStringPrintf(dy, "KEY(sample_id, feature_id)\n");
 dyStringPrintf(dy, ")\n");
 sqlUpdate(biConn,dy->string);
 dyStringFree(&dy);
 } 
 
 boolean analysisFeatureExists(struct sqlConnection *biConn, struct analysisFeatures *af)
 {
 char query[256];
 safef(query, sizeof(query),
-      "select * from analysisFeatures where id = %d "
+      "select * from %s where id = %d "
       "and feature_name = \"%s\" ",
-      af->id, af->feature_name);
+      AF_TABLE, af->id, af->feature_name);
 
 return sqlExists(biConn, query);
 } 
 
 struct hash *getAnalysisFeaturesHash(struct sqlConnection *biConn)
 {
 fprintf(stdout, "getting analysis features hash.\n");
 
 struct hash *hash = hashNew(0);
 char query[128];
-safef(query, sizeof(query), "select * from analysisFeatures");
+safef(query, sizeof(query), "select * from %s", AF_TABLE);
 
 struct analysisFeatures *af, *afList = analysisFeaturesLoadByQuery(biConn, query);
 
 for (af = afList; af; af = af->next)
     hashAddInt(hash, af->feature_name, af->id);
 
 analysisFeaturesFreeList(&afList);
 return hash;
 }  
 
 struct hash *storeAnalysisFeaturesInDb(struct sqlConnection *biConn, struct analysisResult *arList)
 {
 fprintf(stdout, "storing analysis features\n");
 
-char *tableName = "analysisFeatures";
-if (!sqlTableExists(biConn, tableName))
-    createAnalysisFeaturesTable(biConn, tableName);
+if (!sqlTableExists(biConn, AF_TABLE))
+    createAnalysisFeaturesTable(biConn, AF_TABLE);
 
 /* Get existing analysis features */
 struct hash *hash = getAnalysisFeaturesHash(biConn);
 
 struct analysisFeatures *af;
 struct analysisResult *ar;
 for (ar = arList; ar; ar = ar->next)
     {
     if (hashLookup(hash, ar->feature))
 	continue;  // already visited, ignore
 
-    int feature_id = findIdInTable(biConn, tableName, "id", "feature_name", ar->feature); 
+    int feature_id = findIdInTable(biConn, AF_TABLE, "id", "feature_name", ar->feature); 
 
     AllocVar(af);
     af->id = feature_id;
     af->feature_name = cloneString(ar->feature);
     if (!analysisFeatureExists(biConn, af))
-	analysisFeaturesSaveToDb(biConn, af, tableName, 10);
+	analysisFeaturesSaveToDb(biConn, af, AF_TABLE, 10);
 
     hashAddInt(hash, af->feature_name, af->id);
     analysisFeaturesFree(&af);
     }
 
 return hash;
 }
 
 struct hash *createHash(struct sqlConnection *biConn, 
 			char *table, char *key_field, char *val_field)
 {
 struct hash *hash = hashNew(0);
 char query[128];
 safef(query, sizeof(query), "select %s, %s from %s", key_field, val_field, table);
 
 struct sqlResult *sr = sqlGetResult(biConn, query);
 char **row = NULL;
 while ((row = sqlNextRow(sr)) != NULL)
     {
     char *id = row[0];
     char *name = cloneString(row[1]);
     hashAdd(hash, id, name);
     }
 
 return hash;
 }
 
 
 void storeAnalysisResultsInDb(struct sqlConnection *biConn, struct biAnalysis *ba, 
 			      struct analysisResult *arList)
 {
 fprintf(stdout, "storing analysis in table %s in %s db\n", 
 	ba->tableName, ba->db);
 
 if (!sqlTableExists(biConn, ba->tableName))
     createAnalysisValsTable(biConn, ba->tableName);
 
 struct hash *featureIds = storeAnalysisFeaturesInDb(biConn, arList); 
-struct hash *sampleIds = createHash(biConn, "samples", "name", "id");
+struct hash *sampleIds = createHash(biConn, SA_TABLE, "name", "id");
 
 struct analysisVals *av = AllocA(struct analysisVals);
 struct analysisResult *ar;
 for (ar = arList; ar; ar = ar->next)
     {
     char *sample_id = hashMustFindVal(sampleIds, ar->sample);
     int feature_id = hashIntValDefault(featureIds, ar->feature, -1);
 
     if (!sample_id || feature_id == -1)
 	continue;
 
     av->sample_id  = atoi(sample_id);
     av->feature_id = feature_id;
     av->val        = ar->val;
     av->conf       = ar->conf;
 
     analysisValsSaveToDb(biConn, av, ba->tableName, 50);
     }
 
 analysisValsFree(&av);
 }
 
 boolean analysisExists(struct sqlConnection *biConn, struct biAnalysis *ba)
 {
 if (!sqlTableExists(biConn, ba->tableName))
     return FALSE;
 
 if (sqlTableSize(biConn, ba->tableName) == 0)
     return FALSE;
 
 return TRUE;
 }
 
 boolean analysisListExists(char *db, struct biAnalysis *baList)
 {
 struct sqlConnection *biConn = hAllocConnProfile("localDb", db);
 
 boolean exists = TRUE;
 struct biAnalysis *ba;
 for (ba = baList; ba; ba = ba->next)
     if (!analysisExists(biConn, ba))
 	exists = FALSE;
 
 hFreeConn(&biConn);
 return exists;
 }
 
 void runAnalysisPipeline(struct biAnalysis *baList)
 {
 /* If all analyses already exist, don't do anything */
 if (analysisListExists(baList->db, baList))
     return;
 
 struct biAnalysis *ba;
 for (ba = baList; ba; ba = ba->next)
     {
     char *db = ba->db;
     struct sqlConnection *biConn = hAllocConnProfile("localDb", db); 
     boolean exists = analysisExists(biConn, ba);
     hFreeConn(&biConn);
     if (exists)
 	continue;
 
     ba->pipeline(ba);
     }
 }