src/hg/instinct/bioInt2/bioLevelI.c 1.3
1.3 2009/03/23 18:19:29 jsanborn
updated
Index: src/hg/instinct/bioInt2/bioLevelI.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/instinct/bioInt2/bioLevelI.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -b -B -U 4 -r1.2 -r1.3
--- src/hg/instinct/bioInt2/bioLevelI.c 22 Mar 2009 01:07:28 -0000 1.2
+++ src/hg/instinct/bioInt2/bioLevelI.c 23 Mar 2009 18:19:29 -0000 1.3
@@ -30,31 +30,8 @@
}
fprintf(stdout, "\n");
}
-void slPairDoubleFree(struct slPair **pEl)
-{
-struct slPair *el;
-if ((el = *pEl) == NULL) return;
-
-freeMem(el->name);
-struct slDouble *sdList = el->val;
-slFreeList(&sdList);
-freez(pEl);
-}
-
-void slPairDoubleFreeList(struct slPair **pList)
-{
-struct slPair *el, *next;
-
-for (el = *pList; el != NULL; el = next)
- {
- next = el->next;
- slPairDoubleFree(&el);
- }
-*pList = NULL;
-}
-
void analysisResultFree(struct analysisResult **pEl)
{
struct analysisResult *el;
if ((el = *pEl) == NULL) return;
@@ -75,95 +52,8 @@
}
*pList = NULL;
}
-struct slName *getAvailableGenes(char *db, struct biResults *br)
-{
-struct sqlConnection *biConn = hAllocConnProfile("localDb", db);
-
-char query[256];
-safef(query, sizeof(query), "select DISTINCT geneSymbol from kgXref join knownGene on kgXref.kgId = knownGene.name");
-
-struct slName *sl, *slList = sqlQuickList(biConn, query);
-hFreeConn(&biConn);
-
-struct slName *geneList = NULL;
-for (sl = slList; sl; sl = sl->next)
- {
- struct slName *probes = br->probesForGene(br, sl->name);
- int numProbes = slCount(probes);
- slNameFreeList(&probes);
- if (numProbes == 0)
- continue;
-
- slNameAddHead(&geneList, sl->name);
- }
-
-return geneList;
-}
-
-
-struct slPair *geneLevelData(struct biResults *br, struct biData *bdList, char *sample, char *gene)
-{
-struct biData *bd;
-struct slPair *sp, *spList = NULL;
-for (bd = bdList; bd; bd = bd->next)
- {
- char *dataset = bd->name;
- struct slName *sl, *probes = br->probesForGeneInDataset(br, gene, dataset);
-
- AllocVar(sp);
- sp->name = cloneString(bd->type);
- sp->val = NULL;
- struct slDouble *sd, *sdList = NULL;
- for (sl = probes; sl; sl = sl->next)
- {
- struct hashEl *el = hashLookup(bd->hash, sl->name);
- if (!el)
- continue;
- struct slDouble *sdVal = el->val;
- sd = slDoubleNew(sdVal->val);
- slAddHead(&sdList, sd);
- }
- sp->val = sdList;
- slAddHead(&spList, sp);
-
- slNameFreeList(&probes);
- }
-
-return spList;
-}
-
-struct analysisResult *geneLevelAnalysis(struct biAnalysis *ba, struct biResults *br,
- struct slName *genes)
-{
-fprintf(stdout, "starting gene level analysis\n");
-
-struct slName *gene, *sample, *samples = br->allSamplesInCommon(br);
-
-struct analysisResult *ar, *arList = NULL;
-for (sample = samples; sample; sample = sample->next)
- {
- struct biData *bdList = br->dataForSample(br, sample->name);
- for (gene = genes; gene; gene = gene->next)
- {
- struct slPair *spList = geneLevelData(br, bdList, sample->name, gene->name);
- if (!spList)
- continue;
- ar = ba->analyze(ba, spList, sample->name, gene->name);
- slPairDoubleFreeList(&spList);
- if (!ar)
- continue;
- slAddHead(&arList, ar);
- }
- fprintf(stdout, ".");
- biDataFree(&bdList);
- }
-fprintf(stdout, "\n");
-
-return arList;
-}
-
int findIdInTable(struct sqlConnection *biConn, char *tableName,
char *idField, char *sField, char *name)
{
if (sqlTableSize(biConn, tableName) == 0) /* brand new table, return 0 */
@@ -275,22 +165,24 @@
return hash;
}
-struct hash *getSampleIdHash(struct sqlConnection *biConn)
+struct hash *createHash(struct sqlConnection *biConn,
+ char *table, char *key_field, char *val_field)
{
-fprintf(stdout, "getting sample id hash.\n");
-
struct hash *hash = hashNew(0);
char query[128];
-safef(query, sizeof(query), "select * from samples");
-
-struct samples *sa, *saList = samplesLoadByQuery(biConn, query);
+safef(query, sizeof(query), "select %s, %s from %s", key_field, val_field, table);
-for (sa = saList; sa; sa = sa->next)
- hashAddInt(hash, sa->name, sa->id);
+struct sqlResult *sr = sqlGetResult(biConn, query);
+char **row = NULL;
+while ((row = sqlNextRow(sr)) != NULL)
+ {
+ char *id = row[0];
+ char *name = cloneString(row[1]);
+ hashAdd(hash, id, name);
+ }
-samplesFreeList(&saList);
return hash;
}
@@ -303,21 +195,21 @@
if (!sqlTableExists(biConn, ba->tableName))
createAnalysisValsTable(biConn, ba->tableName);
struct hash *featureIds = storeAnalysisFeaturesInDb(biConn, arList);
-struct hash *sampleIds = getSampleIdHash(biConn);
+struct hash *sampleIds = createHash(biConn, "samples", "name", "id");
struct analysisVals *av = AllocA(struct analysisVals);
struct analysisResult *ar;
for (ar = arList; ar; ar = ar->next)
{
- int sample_id = hashIntValDefault(sampleIds, ar->sample, -1);
+ char *sample_id = hashMustFindVal(sampleIds, ar->sample);
int feature_id = hashIntValDefault(featureIds, ar->feature, -1);
- if (sample_id == -1 || feature_id == -1)
+ if (!sample_id || feature_id == -1)
continue;
- av->sample_id = sample_id;
+ av->sample_id = atoi(sample_id);
av->feature_id = feature_id;
av->val = ar->val;
av->conf = ar->conf;
@@ -351,80 +243,23 @@
hFreeConn(&biConn);
return exists;
}
-
-void geneLevelPipeline(struct biAnalysis *baList, struct biResults *br, struct slName *genes)
+void runAnalysisPipeline(struct biAnalysis *baList)
{
-if (!baList)
+/* If all analyses already exist, don't do anything */
+if (analysisListExists(baList->db, baList))
return;
-/* This assumes all results go to same db (which is safe for now) */
-struct sqlConnection *biConn = hAllocConnProfile("localDb", baList->db);
-
struct biAnalysis *ba;
for (ba = baList; ba; ba = ba->next)
{
- if (analysisExists(biConn, ba))
+ char *db = ba->db;
+ struct sqlConnection *biConn = hAllocConnProfile("localDb", db);
+ boolean exists = analysisExists(biConn, ba);
+ hFreeConn(&biConn);
+ if (exists)
continue;
- struct analysisResult *arList = geneLevelAnalysis(ba, br, genes);
-
- fprintf(stdout, "storing in db\n");
- uglyTime(NULL);
- storeAnalysisResultsInDb(biConn, ba, arList);
- uglyTime("done");
-
- analysisResultFreeList(&arList);
+ ba->pipeline(ba);
}
-
-hFreeConn(&biConn);
-}
-
-struct biResults *retrieveData(char *db, struct slName *datasets, boolean toLogP)
-{
-struct slName *dataset;
-
-struct biQuery *bq, *bqList = NULL;
-for (dataset = datasets; dataset; dataset = dataset->next)
- {
- bq = biQueryNew(db, dataset->name); // Add Dataset
- bq->getAllProbes = TRUE; // Set flag to retrieve all probes
- biQueryAppend(&bqList, bq); // Append query to query list
- }
-
-/* Get results from all queries in list */
-struct biResults *br = biQueryResults(bqList);
-
-/* Convert to log p */
-if (toLogP)
- br->toLogP(br);
-
-return br;
-}
-
-void runAnalysisPipeline(struct biAnalysis *baList)
-{
-char *db = baList->db;
-struct slName *slDatasets = baList->inputTables;
-
-if (!analysisListExists(db, baList))
- {
- /* Get raw gene/sample data for all overlapping samples in dataset list */
- uglyTime(NULL);
- struct biResults *br = retrieveData(db, slDatasets, TRUE);
- uglyTime("retrieveData");
-
- if (!br)
- errAbort("No gene data!\n");
-
- /* Run gene level analyses (meta-gene, pathlet, etc.) */
- struct slName *genes = getAvailableGenes(db, br);
- uglyTime("Num available genes = %d", slCount(genes));
-
- geneLevelPipeline(baList, br, genes);
- }
-
-/* Run pathway/geneset level analyses */
-
-
}