src/hg/instinct/bioInt2/populateDb.c 1.4
1.4 2009/03/24 05:21:54 jsanborn
updated
Index: src/hg/instinct/bioInt2/populateDb.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/instinct/bioInt2/populateDb.c,v
retrieving revision 1.3
retrieving revision 1.4
diff -b -B -U 4 -r1.3 -r1.4
--- src/hg/instinct/bioInt2/populateDb.c 24 Mar 2009 03:07:55 -0000 1.3
+++ src/hg/instinct/bioInt2/populateDb.c 24 Mar 2009 05:21:54 -0000 1.4
@@ -11,12 +11,11 @@
#include "microarray.h"
#include "ra.h"
#include "featuresLib.h"
#include "hgHeatmapLib.h"
+#include "bioIntDriver.h"
#include "bioIntDb.h"
-#define DEBUG 1
-
char *hgDb = "hg18";
char *genome = "Human";
void usage()
@@ -159,25 +158,25 @@
char *data_format = "probeVals";
char query[256];
safef(query, sizeof(query),
- "select * from dataTypes where format = \"%s\" and name = \"%s\"",
- data_format, platform);
+ "select * from %s where format = \"%s\" and name = \"%s\"",
+ DT_TABLE, data_format, platform);
return dataTypesLoadByQuery(biConn, query);
}
struct dataTypes *createDataType(struct sqlConnection *biConn, char *type, char *platform)
{
-int nextId = sqlTableSize(biConn, "dataTypes");
+int nextId = sqlTableSize(biConn, DT_TABLE);
struct dataTypes *dt;
AllocVar(dt);
dt->id = nextId;
dt->format = cloneString("probeVals");
dt->name = cloneString(platform);
/* Save to db */
-dataTypesSaveToDb(biConn, dt, "dataTypes", 100);
+dataTypesSaveToDb(biConn, dt, DT_TABLE, 100);
return dt;
}
void createDataTypesTable(struct sqlConnection *biConn, char *tableName)
@@ -195,12 +194,12 @@
struct dataTypes *setupDataType(struct sqlConnection *biConn,
char *type, char *platform)
{
-if (!sqlTableExists(biConn, "dataTypes"))
+if (!sqlTableExists(biConn, DT_TABLE))
{
fprintf(stderr, "Tables dataTypes doesn't exist, creating...\n");
- createDataTypesTable(biConn, "dataTypes");
+ createDataTypesTable(biConn, DT_TABLE);
}
struct dataTypes *dt = findDataType(biConn, type, platform);
if (!dt)
@@ -213,23 +212,23 @@
struct tissues *findTissue(struct sqlConnection *biConn, char *tissue)
{
char query[256];
safef(query, sizeof(query),
- "select * from tissues where name = \"%s\";",
- tissue);
+ "select * from %s where name = \"%s\";",
+ TI_TABLE, tissue);
return tissuesLoadByQuery(biConn, query);
}
struct tissues *createTissue(struct sqlConnection *biConn, char *tissue)
{
-int nextId = sqlTableSize(biConn, "tissues");
+int nextId = sqlTableSize(biConn, TI_TABLE);
struct tissues *ti;
AllocVar(ti);
ti->id = nextId;
ti->name = cloneString(tissue);
/* Save to db */
-tissuesSaveToDb(biConn, ti, "tissues", 100);
+tissuesSaveToDb(biConn, ti, TI_TABLE, 100);
return ti;
}
void createTissuesTable(struct sqlConnection *biConn, char *tableName)
@@ -245,12 +244,12 @@
}
struct tissues *setupTissue(struct sqlConnection *biConn, char *tissue)
{
-if (!sqlTableExists(biConn, "tissues"))
+if (!sqlTableExists(biConn, TI_TABLE))
{
fprintf(stderr, "Tables tissues doesn't exist, creating...\n");
- createTissuesTable(biConn, "tissues");
+ createTissuesTable(biConn, TI_TABLE);
}
struct tissues *ti = findTissue(biConn, tissue);
if (!ti)
@@ -263,10 +262,10 @@
struct datasets *findDataset(struct sqlConnection *biConn, char *name)
{
char query[256];
safef(query, sizeof(query),
- "select * from datasets where data_table = \"%s\";",
- name);
+ "select * from %s where data_table = \"%s\";",
+ DA_TABLE, name);
return datasetsLoadByQuery(biConn, query);
}
@@ -306,9 +305,9 @@
struct dataTypes *dt = setupDataType(biConn, dataType, platform);
struct tissues *ti = setupTissue(biConn, tissue);
-int nextId = sqlTableSize(biConn, "datasets");
+int nextId = sqlTableSize(biConn, DA_TABLE);
struct datasets *da;
AllocVar(da);
da->id = nextId;
@@ -323,9 +322,9 @@
dataTypesFree(&dt);
tissuesFree(&ti);
/* Write datasets */
-datasetsSaveToDbEscaped(biConn, da, "datasets", 100);
+datasetsSaveToDbEscaped(biConn, da, DA_TABLE, 100);
return da;
}
@@ -349,12 +348,12 @@
struct datasets *setupDataset(struct sqlConnection *biConn,
char *tableName, char *tissue, int numSamples)
{
-if (!sqlTableExists(biConn, "datasets"))
+if (!sqlTableExists(biConn, DA_TABLE))
{
fprintf(stderr, "Tables datasets doesn't exist, creating...");
- createDatasetsTable(biConn, "datasets");
+ createDatasetsTable(biConn, DA_TABLE);
}
struct datasets *da = findDataset(biConn, tableName);
if (!da)
@@ -376,38 +375,38 @@
}
int findId(struct sqlConnection *biConn, char *idField, char *sField, char *name)
{
-if (sqlTableSize(biConn, "samples") == 0) /* brand new table, return 0 */
+if (sqlTableSize(biConn, SA_TABLE) == 0) /* brand new table, return 0 */
return 0;
char query[256];
safef(query, sizeof(query),
- "select DISTINCT %s from samples where %s = \"%s\";",
- idField, sField, name);
+ "select DISTINCT %s from %s where %s = \"%s\";",
+ SA_TABLE, idField, sField, name);
if (sqlExists(biConn, query)) /* sample name found, use same id */
return sqlQuickNum(biConn, query);
/* Else, find maximum sample id and add one to it */
safef(query, sizeof(query),
- "select max(%s) from samples;",
- idField);
+ "select max(%s) from %s;",
+ SA_TABLE, idField);
int maxId = sqlQuickNum(biConn, query);
return maxId + 1;
}
boolean sampleExists(struct sqlConnection *biConn, struct samples *sa)
{
char query[256];
safef(query, sizeof(query),
- "select * from samples where id = %d "
+ "select * from %s where id = %d "
"and name = \"%s\" "
"and patient_id = %d "
"and patient_name = \"%s\" "
"and dataset_id = %d "
"and exp_id = %d "
"and tissue_id = %d ",
- sa->id, sa->name, sa->patient_id, sa->patient_name, sa->dataset_id,
+ SA_TABLE, sa->id, sa->name, sa->patient_id, sa->patient_name, sa->dataset_id,
sa->exp_id, sa->tissue_id);
return sqlExists(biConn, query);
}
@@ -461,9 +460,9 @@
sa->exp_id = expId;
sa->tissue_id = tissueId;
if (!sampleExists(biConn, sa))
- samplesSaveToDb(biConn, sa, "samples", 100);
+ samplesSaveToDb(biConn, sa, SA_TABLE, 100);
samplesFree(&sa);
}
@@ -493,22 +492,22 @@
struct samples *getSamples(struct sqlConnection *biConn, struct datasets *da)
{
char query[256];
safef(query, sizeof(query),
- "select * from samples where dataset_id = %d order by exp_id;",
- da->id);
+ "select * from %s where dataset_id = %d order by exp_id;",
+ SA_TABLE, da->id);
return samplesLoadByQuery(biConn, query);
}
struct samples *setupSamples(struct sqlConnection *biConn, struct datasets *da,
struct maGrouping *allA)
{
-if (!sqlTableExists(biConn, "samples"))
+if (!sqlTableExists(biConn, SA_TABLE))
{
fprintf(stderr, "Table samples doesn't exist, creating...\n");
- createSamplesTable(biConn, "samples");
+ createSamplesTable(biConn, SA_TABLE);
}
createSamples(biConn, da, allA);
struct samples *saList = getSamples(biConn, da);
@@ -520,49 +519,49 @@
int getFeatureId(struct sqlConnection *biConn, char *name)
{
-if (sqlTableSize(biConn, "features") == 0) /* brand new table, return 0 */
+if (sqlTableSize(biConn, FE_TABLE) == 0) /* brand new table, return 0 */
return 0;
char query[256];
safef(query, sizeof(query),
- "select id from features where name = \"%s\";",
- name);
+ "select id from %s where name = \"%s\";",
+ FE_TABLE, name);
if (sqlExists(biConn, query)) /* sample name found, use same id */
return sqlQuickNum(biConn, query);
else
- return sqlTableSize(biConn, "features");
+ return sqlTableSize(biConn, FE_TABLE);
}
struct features *getFeature(struct sqlConnection *biConn, char *name)
{
char query[256];
safef(query, sizeof(query),
- "select * from features where name = \"%s\";",
- name);
+ "select * from %s where name = \"%s\";",
+ FE_TABLE, name);
return featuresLoadByQuery(biConn, query);
}
boolean featureExists(struct sqlConnection *biConn, struct features *fs)
{
char query[256];
safef(query, sizeof(query),
- "select * from features where name = \"%s\";",
- fs->name);
+ "select * from %s where name = \"%s\";",
+ FE_TABLE, fs->name);
return sqlExists(biConn, query);
}
boolean clinicalDataExists(struct sqlConnection *biConn, struct clinicalData *cd)
{
char query[256];
safef(query, sizeof(query),
- "select * from clinicalData where sample_id = %d "
+ "select * from %s where sample_id = %d "
"and feature_id = %d; ",
- cd->sample_id, cd->feature_id);
+ CD_TABLE, cd->sample_id, cd->feature_id);
if (!sqlExists(biConn, query)) /* entry doesn't exist, report */
return FALSE;
@@ -624,18 +623,18 @@
{
if (!saList)
return;
-if (!sqlTableExists(biConn, "features"))
+if (!sqlTableExists(biConn, FE_TABLE))
{
fprintf(stderr, "Table features doesn't exist, creating...\n");
- createFeaturesTable(biConn, "features");
+ createFeaturesTable(biConn, FE_TABLE);
}
-if (!sqlTableExists(biConn, "clinicalData"))
+if (!sqlTableExists(biConn, CD_TABLE))
{
fprintf(stderr, "Table clinicalData doesn't exist, creating...\n");
- createClinicalDataTable(biConn, "clinicalData");
+ createClinicalDataTable(biConn, CD_TABLE);
}
struct hash *settings = getSettings(da->data_table);
@@ -688,9 +687,9 @@
fs->shortLabel = cloneString(shortLabel);
fs->longLabel= cloneString(longLabel);
if (!featureExists(biConn, fs))
- featuresSaveToDbEscaped(biConn, fs, "features", 100);
+ featuresSaveToDbEscaped(biConn, fs, FE_TABLE, 100);
featuresFree(&fs);
fs = getFeature(biConn, name);
if (!fs)
@@ -719,9 +718,9 @@
if (col->cellCoded(col, pdConn))
cd->code = cloneString(col->cellCodedVal(col, id, pdConn));
if (!clinicalDataExists(biConn, cd))
- clinicalDataSaveToDb(biConn, cd, "clinicalData", 100);
+ clinicalDataSaveToDb(biConn, cd, CD_TABLE, 100);
clinicalDataFree(&cd);
slNameFree(&id);
}
@@ -866,15 +865,15 @@
}
void createGeneLookup(struct sqlConnection *biConn)
{
-if (!sqlTableExists(biConn, "geneLookup"))
+if (!sqlTableExists(biConn, GL_TABLE))
{
fprintf(stderr, "geneLookup table doesn't exist in bioInt database, recreating it.\n");
- createGeneLookupTable(biConn, "geneLookup");
+ createGeneLookupTable(biConn, GL_TABLE);
}
-if (sqlTableSize(biConn, "geneLookup") > 0)
+if (sqlTableSize(biConn, GL_TABLE) > 0)
{
fprintf(stderr, "geneLookup table already has data in it, doing nothing.\n");
return;
}
@@ -895,9 +894,9 @@
gl->id = id;
gl->kgId = cloneString(sl->name);
id++;
- geneLookupSaveToDb(biConn, gl, "geneLookup", 100);
+ geneLookupSaveToDb(biConn, gl, GL_TABLE, 100);
geneLookupFree(&gl);
}
slNameFreeList(&slList);
@@ -938,11 +937,11 @@
return NULL;
struct slName *sl;
struct dyString *dy = newDyString(100);
-dyStringPrintf(dy, "select id from geneLookup "
- "join kgXref on geneLookup.kgId = kgXref.kgId "
- "where kgXref.geneSymbol in (");
+dyStringPrintf(dy, "select id from %s "
+ "join kgXref on %s.kgId = kgXref.kgId "
+ "where kgXref.geneSymbol in (", GL_TABLE, GL_TABLE);
for (sl = slList; sl; sl = sl->next)
{
dyStringPrintf(dy, "\"%s\"", sl->name);
if (sl->next)
@@ -1080,43 +1079,43 @@
struct sqlConnection *pdConn = hAllocConnProfile("localDb", "pathway");
if (!pdConn)
errAbort("Could not connect to pathways database.\n");
-if (sqlTableExists(biConn, "pathways") && dropTable)
+if (sqlTableExists(biConn, PA_TABLE) && dropTable)
{
fprintf(stderr, "pathways table already exists, dropping and recreating.\n");
- sqlDropTable(biConn, "pathways");
+ sqlDropTable(biConn, PA_TABLE);
}
-if (!sqlTableExists(biConn, "pathways"))
+if (!sqlTableExists(biConn, PA_TABLE))
{
fprintf(stderr, "Creating pathways table.\n");
- createPathwaysTable(biConn, "pathways");
+ createPathwaysTable(biConn, PA_TABLE);
inputPathways = TRUE;
}
-if (sqlTableExists(biConn, "pathwayGenes") && dropTable)
+if (sqlTableExists(biConn, PG_TABLE) && dropTable)
{
fprintf(stderr, "pathwayGenes table already exists, dropping and recreating.\n");
- sqlDropTable(biConn, "pathwayGenes");
+ sqlDropTable(biConn, PG_TABLE);
}
-if (!sqlTableExists(biConn, "pathwayGenes"))
+if (!sqlTableExists(biConn, PG_TABLE))
{
fprintf(stderr, "Creating pathwayGenes table.\n");
- createPathwayGenesTable(biConn, "pathwayGenes");
+ createPathwayGenesTable(biConn, PG_TABLE);
inputPathwayGenes = TRUE;
}
-if (sqlTableExists(biConn, "pathwayInfo") && dropTable)
+if (sqlTableExists(biConn, PI_TABLE) && dropTable)
{
fprintf(stderr, "pathwayInfo table already exists, dropping and recreating.\n");
- sqlDropTable(biConn, "pathwayInfo");
+ sqlDropTable(biConn, PI_TABLE);
}
-if (!sqlTableExists(biConn, "pathwayInfo"))
+if (!sqlTableExists(biConn, PI_TABLE))
{
fprintf(stderr, "Creeting pathwayInfo table.\n");
- createPathwayInfoTable(biConn, "pathwayInfo");
+ createPathwayInfoTable(biConn, PI_TABLE);
inputPathwayInfo = TRUE;
}
if (!inputPathways && !inputPathwayInfo && !inputPathwayGenes)
@@ -1161,9 +1160,9 @@
AllocVar(ps);
ps->id = id;
ps->name = cloneString(name);
ps->source = cloneString("N/A");
- pathwaysSaveToDb(biConn, ps, "pathways", 100);
+ pathwaysSaveToDb(biConn, ps, PA_TABLE, 100);
pathwaysFree(&ps);
}
if (inputPathwayGenes)
@@ -1173,9 +1172,9 @@
pg->id = id;
for (si = siList; si; si = si->next)
{
pg->gene_id = si->val;
- pathwayGenesSaveToDb(biConn, pg, "pathwayGenes", 100);
+ pathwayGenesSaveToDb(biConn, pg, PG_TABLE, 100);
}
pathwayGenesFree(&pg);
}
@@ -1187,9 +1186,9 @@
struct pathwayInfo *pi;
AllocVar(pi);
pi->id = id;
pi->description = desc;
- pathwayInfoSaveToDbEscaped(biConn, pi, "pathwayInfo", 200);
+ pathwayInfoSaveToDbEscaped(biConn, pi, PI_TABLE, 200);
pathwayInfoFree(&pi);
}
}
id++;