src/hg/instinct/bioInt2/bioIntDriver.c 1.4
1.4 2009/05/20 20:34:36 jsanborn
initial commit
Index: src/hg/instinct/bioInt2/bioIntDriver.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/instinct/bioInt2/bioIntDriver.c,v
retrieving revision 1.3
retrieving revision 1.4
diff -b -B -U 1000000 -r1.3 -r1.4
--- src/hg/instinct/bioInt2/bioIntDriver.c 27 Apr 2009 06:15:48 -0000 1.3
+++ src/hg/instinct/bioInt2/bioIntDriver.c 20 May 2009 20:34:36 -0000 1.4
@@ -1,1208 +1,1344 @@
/* bioIntDriver.c
* All rights reserved -- J. Zachary Sanborn
*/
#include "common.h"
#include "linefile.h"
#include "hash.h"
#include "options.h"
#include "jksql.h"
#include "hdb.h"
#include "dystring.h"
#include "cprob.h"
#include "hgStatsLib.h"
#include "bioIntDriver.h"
static char *heatMapDbProfile = "localDb";
/* Begin helper functions */
struct hash *createIdHash(struct sqlConnection *biConn, char *tableName, char *fieldName)
{
struct hash *hash = hashNew(0);
char query[128];
safef(query, sizeof(query), "select id, %s from %s", fieldName, tableName);
struct sqlResult *sr = sqlGetResult(biConn, query);
char **row = NULL;
while ((row = sqlNextRow(sr)) != NULL)
{
unsigned int id = sqlUnsigned(row[0]);
char *name = cloneString(row[1]);
hashAddInt(hash, name, id);
}
sqlFreeResult(&sr);
return hash;
}
int addDelimStringToList(struct slName **list, char *str, char sep)
{
if (!str)
return 0;
struct slName *sl, *slList = slNameListFromString(str, sep);
for (sl = slList; sl; sl = sl->next)
slNameAddHead(list, sl->name);
slReverse(list);
slNameFreeList(&slList);
return 1;
}
void slNamePrint(struct slName *list)
{
struct slName *sl;
for (sl = list; sl; sl = sl->next)
fprintf(stdout, "%s ", sl->name);
fprintf(stdout, "\n");
}
struct analysisVals *cloneAnalysisVals(struct analysisVals *av)
{
struct analysisVals *newAv;
AllocVar(newAv);
newAv->sample_id = av->sample_id;
newAv->feature_id = av->feature_id;
newAv->val = av->val;
newAv->conf = av->conf;
return newAv;
}
struct hash *uniqueHashFromSlNameList(void *list)
/* Create a unique hash out of a list of slNames or any kind of list where the */
/* first field is the next pointer and the second is the name.
* -- Adapted from hashFromSlNameList in hash.h */
{
if (!list)
return NULL;
struct slName *namedList = list, *item;
struct hash *hash = hashNew(10);
for (item = namedList; item != NULL; item = item->next)
{
if (!hashLookup(hash, item->name))
hashAdd(hash, item->name, item);
}
return hash;
}
struct slName *slNameUniqueList(struct slName *list)
{
if (!list)
return NULL;
struct hash *hash = uniqueHashFromSlNameList(list);
/* Iterate through names in hash, saving to new list */
struct slName *slList = NULL;
struct hashCookie cookie = hashFirst(hash);
char *name;
while ((name = hashNextName(&cookie)) != NULL)
slNameAddHead(&slList, name);
slNameFreeList(&list);
freeHash(&hash);
return slList;
}
int analysisValsCmp(const void *va, const void *vb)
/* Compare function to sort array of ints. */
{
const struct analysisVals *a = *((struct analysisVals **)va);
const struct analysisVals *b = *((struct analysisVals **)vb);
int diff = a->feature_id - b->feature_id;
if (diff < 0)
return -1;
else if (diff > 0)
return 1;
// feature_id's are the same
diff = a->sample_id - b->sample_id;
if (diff < 0)
return -1;
else if (diff > 0)
return 1;
return 0;
}
+void slPairHashesFree(struct slPair **pEl)
+{
+struct slPair *el;
+
+if ((el = *pEl) == NULL) return;
+
+freeMem(el->name);
+
+struct typeHash *th, *thList = el->val;
+for (th = thList; th; th = th->next)
+ {
+ freeMem(th->type);
+ hashFreeWithVals(&th->hash, analysisValsFree);
+ }
+freez(pEl);
+}
+
+void slPairHashesFreeList(struct slPair **pList)
+{
+struct slPair *el, *next;
+
+for (el = *pList; el != NULL; el = next)
+ {
+ next = el->next;
+ slPairHashesFree(&el);
+ }
+*pList = NULL;
+}
+
+void slPairStringFree(struct slPair **pEl)
+{
+struct slPair *el;
+
+if ((el = *pEl) == NULL) return;
+
+freeMem(el->name);
+char *name = el->val;
+freeMem(name);
+freez(pEl);
+}
+
+void slPairStringFreeList(struct slPair **pList)
+{
+struct slPair *el, *next;
+
+for (el = *pList; el != NULL; el = next)
+ {
+ next = el->next;
+ slPairStringFree(&el);
+ }
+*pList = NULL;
+}
+
+void analysisValsSamplesHashes(struct sqlConnection *biConn, struct hash *hash,
+ struct slPair **spList, char *dataset, char *type)
+{
+/* Currently only looks at first dataset in slName list passed in */
+char query[128];
+safef(query, sizeof(query), "select * from %s", dataset);
+struct slPair *sp;
+
+struct typeHash *th, *thList;
+
+struct sqlResult *sr = sqlGetResult(biConn, query);
+char **row = NULL;
+while ((row = sqlNextRow(sr)) != NULL)
+ {
+ char *sample_id = row[0];
+ char *feature_id = row[1];
+ struct analysisVals *av = analysisValsLoad(row);
+
+ struct hashEl *el = hashLookup(hash, sample_id);
+ if (!el)
+ {
+ AllocVar(sp);
+ sp->name = cloneString(sample_id);
+ AllocVar(th);
+ th->type = cloneString(type);
+ th->hash = hashNew(0);
+ sp->val = th;
+ hashAdd(hash, sample_id, sp);
+ slAddTail(spList, sp);
+ }
+ else
+ sp = el->val;
+
+ thList = sp->val;
+ for (th = thList; th; th = th->next)
+ if (sameString(type, th->type))
+ break;
+ if (!th)
+ { // type doesn't exist yet, add
+ AllocVar(th);
+ th->type = cloneString(type);
+ th->hash = hashNew(0);
+ slAddHead(&thList, th);
+ sp->val = thList;
+ }
+
+ /* found the hash, add data for feature */
+ hashAdd(th->hash, feature_id, av);
+ }
+sqlFreeResult(&sr);
+}
+
+char *getDatasetType(struct sqlConnection *biConn, char *dataset)
+{
+char query[256];
+safef(query, sizeof(query),
+ "select %s.name from %s join %s on %s.type_id=%s.id "
+ "where data_table=\"%s\"",
+ DT_TABLE, DA_TABLE, DT_TABLE, DA_TABLE, DT_TABLE, dataset);
+
+return sqlQuickString(biConn, query);
+}
+
+struct slPair *analysisValsSamplesHashesList(struct sqlConnection *biConn,
+ struct slName *datasets)
+{
+struct slPair *spList = NULL;
+struct slName *sl;
+struct hash *hash = hashNew(0);
+for (sl = datasets; sl; sl = sl->next)
+ {
+ char *type = getDatasetType(biConn, sl->name);
+ if (!type)
+ type = "N/A";
+ fprintf(stderr, "adding type = %s for %s\n", type, sl->name);
+ analysisValsSamplesHashes(biConn, hash, &spList, sl->name, type);
+ }
+
+hashFree(&hash);
+return spList;
+}
+
+
void storeAnalysisValsInDb(struct sqlConnection *biConn, char *tableName,
struct analysisVals *avList)
{
if (!sqlTableExists(biConn, tableName))
createAnalysisValsTable(biConn, tableName);
-slSort(avList, analysisValsCmp);
+slSort(&avList, analysisValsCmp);
struct analysisVals *av;
for (av = avList; av; av = av->next)
analysisValsSaveToDb(biConn, av, tableName, 50);
}
/* End helper functions */
/* Begin database functions */
void addSample(struct biOmics *bi, struct samples *sa)
{
struct hashEl *el = hashLookup(bi->samplesHash, sa->name);
if (el)
{ /* already in hash, free memory and move on */
samplesFree(&sa);
return;
}
slAddHead(&bi->samples, sa);
hashAdd(bi->samplesHash, sa->name, sa);
}
void loadAllSamples(struct sqlConnection *conn, struct biOmics *bi)
{
struct datasets *da = bi->dataset;
char query[128];
safef(query, sizeof(query),
"select * from %s where dataset_id = %d order by id;",
SA_TABLE, da->id);
struct sqlResult *sr = sqlGetResult(conn, query);
char **row = NULL;
while ((row = sqlNextRow(sr)) != NULL)
{
struct samples *sa = samplesLoad(row);
addSample(bi, sa);
}
sqlFreeResult(&sr);
}
void loadSamplesInList(struct sqlConnection *conn, struct biOmics *bi,
struct slName *sampleList)
{
if (!sampleList)
return;
struct slName *sl;
struct datasets *da = bi->dataset;
struct dyString *dy = newDyString(100);
dyStringPrintf(dy, "select * from %s where name in (", SA_TABLE);
for (sl = sampleList; sl; sl = sl->next)
{
dyStringPrintf(dy, "'%s'", sl->name);
if (sl->next)
dyStringPrintf(dy, ",");
}
dyStringPrintf(dy, ") and dataset_id = %d order by id;", da->id);
char *query = dyStringCannibalize(&dy);
struct sqlResult *sr = sqlGetResult(conn, query);
char **row = NULL;
while ((row = sqlNextRow(sr)) != NULL)
{
struct samples *sa = samplesLoad(row);
addSample(bi, sa);
}
sqlFreeResult(&sr);
}
void loadSamplesMatchingFeatureVals(struct sqlConnection *conn, struct biOmics *bi,
struct slName *featureValList)
{
if (!featureValList)
return;
//TODO: Inner JOIN!
//select * from clinicalData p1 join clinicalData p2 on p1.sample_id = p2.sample_id and p1.code = 'Positive' and p1.feature_id = 0 and p2.code = 'Positive' and p2.feature_id = 1;
struct datasets *da = bi->dataset;
struct dyString *dy = newDyString(100);
dyStringPrintf(dy,
"select * from %s "
"join %s on %s.id = %s.sample_id "
"join %s on %s.feature_id = %s.id "
"where %s.dataset_id = %d ",
SA_TABLE, CD_TABLE, SA_TABLE, CD_TABLE, FE_TABLE,
CD_TABLE, FE_TABLE, SA_TABLE, da->id);
struct slName *sl, *fv, *slList = NULL;
for (fv = featureValList; fv; fv = fv->next)
{
addDelimStringToList(&slList, fv->name, ' ');
if (slCount(slList) != 3)
{
fprintf(stderr, "Improperly formatted feature-value pair: %s\n", fv->name);
slNameFreeList(&slList);
slList = NULL;
continue;
}
sl = slList;
char *name = sl->name;
sl = sl->next;
char *operation = sl->name;
sl = sl->next;
char *val = sl->name;
dyStringPrintf(dy, " and %s.name = '%s'", FE_TABLE, name);
dyStringPrintf(dy, " and %s.val %s %s", CD_TABLE, operation, val);
slNameFreeList(&slList);
slList = NULL;
}
dyStringPrintf(dy, " order by %s.id;", SA_TABLE);
char *query = dyStringCannibalize(&dy);
struct sqlResult *sr = sqlGetResult(conn, query);
char **row = NULL;
while ((row = sqlNextRow(sr)) != NULL)
{
struct samples *sa = samplesLoad(row);
addSample(bi, sa);
}
sqlFreeResult(&sr);
}
void loadSamplesMatchingFeatureCodes(struct sqlConnection *conn, struct biOmics *bi,
struct slName *featureCodeList)
{
if (!featureCodeList)
return;
//TODO: Inner JOIN!
//select * from clinicalData p1 join clinicalData p2 on p1.sample_id = p2.sample_id and p1.code = 'Positive' and p1.feature_id = 0 and p2.code = 'Positive' and p2.feature_id = 1;
struct datasets *da = bi->dataset;
struct dyString *dy = newDyString(100);
dyStringPrintf(dy,
"select * from %s "
"join %s on %s.id = %s.sample_id "
"join %s on %s.feature_id = %s.id "
"where %s.dataset_id = %d and (",
SA_TABLE, CD_TABLE, SA_TABLE, CD_TABLE, FE_TABLE,
CD_TABLE, FE_TABLE, SA_TABLE, da->id);
struct slName *sl, *fc, *slList = NULL;
for (fc = featureCodeList; fc; fc = fc->next)
{
addDelimStringToList(&slList, fc->name, ' ');
if (slCount(slList) != 3)
{
fprintf(stderr, "Improperly formatted feature-code pair: %s\n", fc->name);
slNameFreeList(&slList);
slList = NULL;
continue;
}
sl = slList;
char *name = sl->name;
sl = sl->next;
char *operation = sl->name;
sl = sl->next;
char *code = sl->name;
dyStringPrintf(dy, " (%s.name = '%s'", FE_TABLE, name);
if (sameString(code, "NULL"))
dyStringPrintf(dy, " and %s.code is NULL)", CD_TABLE);
else
dyStringPrintf(dy, " and %s.code %s '%s')", CD_TABLE, operation, code);
slNameFreeList(&slList);
slList = NULL;
if (fc->next)
dyStringPrintf(dy, " or");
}
dyStringPrintf(dy, ") order by %s.id;", SA_TABLE);
char *query = dyStringCannibalize(&dy);
struct sqlResult *sr = sqlGetResult(conn, query);
char **row = NULL;
while ((row = sqlNextRow(sr)) != NULL)
{
struct samples *sa = samplesLoad(row);
addSample(bi, sa);
}
sqlFreeResult(&sr);
}
void loadSamples(struct sqlConnection *conn, struct biOmics *bi, struct biQuery *bq)
{
if (!bq->sampleList && !bq->featureValList && !bq->featureCodeList)
{
fprintf(stderr, "Loading ALL samples from dataset.\n");
loadAllSamples(conn, bi);
return;
}
/* Load samples from list */
loadSamplesInList(conn, bi, bq->sampleList);
/* Load samples matching feature-value pairs, e.g. B <= 5,C > 6,... */
loadSamplesMatchingFeatureVals(conn, bi, bq->featureValList);
/* Load samples matching feature-code pairs, e.g. ER = Positive */
loadSamplesMatchingFeatureCodes(conn, bi, bq->featureCodeList);
}
void addAnalysisFeature(struct biOmics *bi, struct analysisFeatures *af)
{
struct hashEl *el = hashLookup(bi->featuresHash, af->feature_name);
if (el)
{
analysisFeaturesFree(&af);
return; // already there;
}
slAddHead(&bi->features, af);
hashAdd(bi->featuresHash, af->feature_name, af);
/* Allocate space for slPair data */
char feature_id[128];
safef(feature_id, sizeof(feature_id), "%d", af->id);
struct slPair *sp;
AllocVar(sp);
sp->name = cloneString(af->feature_name);
sp->val = NULL;
slAddHead(&bi->data, sp);
hashAdd(bi->dataHash, feature_id, sp);
}
void loadGenesets(struct sqlConnection *conn, struct biOmics *bi, struct slName *genesetList)
{
return;
}
void loadFeatures(struct sqlConnection *conn, struct biOmics *bi,
struct slName *featureList, boolean allData)
{
if (!featureList && !allData)
return;
struct slName *sl;
struct dyString *dy = newDyString(100);
dyStringPrintf(dy, "select * from %s ", AF_TABLE);
if (featureList)
{
dyStringPrintf(dy, "where feature_name in (");
for (sl = featureList; sl; sl = sl->next)
{
dyStringPrintf(dy, "'%s'", sl->name);
if (sl->next)
dyStringPrintf(dy, ",");
}
dyStringPrintf(dy, ");");
}
dyStringPrintf(dy, " where type = \"gene\"");
char *query = dyStringCannibalize(&dy);
struct sqlResult *sr = sqlGetResult(conn, query);
char **row = NULL;
while ((row = sqlNextRow(sr)) != NULL)
{
struct analysisFeatures *af = analysisFeaturesLoad(row);
addAnalysisFeature(bi, af);
}
sqlFreeResult(&sr);
}
void loadAnalysisValsData(struct sqlConnection *conn, struct biOmics *bi, boolean allData)
{ /* boolean allData is unused in this function */
struct datasets *da = bi->dataset;
struct dyString *dy = newDyString(100);
dyStringPrintf(dy, "select * from %s", da->data_table);
if (bi->features)
{
struct analysisFeatures *af;
dyStringPrintf(dy, " where feature_id in (");
for (af = bi->features; af; af = af->next)
{
dyStringPrintf(dy, "%d", af->id);
if (af->next)
dyStringPrintf(dy, ",");
}
dyStringPrintf(dy, ")");
}
if (bi->samples)
{
if (bi->features)
dyStringPrintf(dy, " and");
else
dyStringPrintf(dy, " where");
struct samples *sa;
dyStringPrintf(dy, " sample_id in (");
for (sa = bi->samples; sa; sa = sa->next)
{
dyStringPrintf(dy, "%d", sa->id);
if (sa->next)
dyStringPrintf(dy, ",");
}
dyStringPrintf(dy, ")");
}
dyStringPrintf(dy, ";");
char *query = dyStringCannibalize(&dy);
struct sqlResult *sr = sqlGetResult(conn, query);
char **row = NULL;
int count = 0;
struct slPair *sp;
struct analysisVals *av;
while ((row = sqlNextRow(sr)) != NULL)
{
char *feature_id = row[1];
struct hashEl *el = hashLookup(bi->dataHash, feature_id);
if (!el)
continue;
sp = el->val;
av = analysisValsLoad(row);
slAddHead(&sp->val, av);
count++;
}
if (DEBUG)
fprintf(stdout, "found %d features\n", count);
sqlFreeResult(&sr);
}
struct biData *analysisValsForFeature(struct biOmics *bi, char *feature)
{
struct hashEl *el = hashLookup(bi->featuresHash, feature);
if (!el)
return NULL;
struct analysisFeatures *af = el->val;
int id = af->id;
char pStr[128];
safef(pStr, sizeof(pStr), "%d", id);
el = hashLookup(bi->dataHash, pStr);
if (!el)
return NULL;
struct slPair *sp = el->val;
struct biData *bd = biDataNew(feature);
struct samples *sa;
struct analysisVals *av;
for (av = sp->val; av; av = av->next)
{
for (sa = bi->samples; sa; sa = sa->next)
{
if (sa->id != av->sample_id)
continue;
struct analysisVals *newAv = cloneAnalysisVals(av);
slAddHead(&bd->data, newAv);
hashAdd(bd->hash, sa->name, av);
break;
}
}
slReverse(&bd->data);
return bd;
}
struct biData *analysisValsForSample(struct biOmics *bi, char *sample)
{
struct hashEl *el = hashLookup(bi->samplesHash, sample);
if (!el)
return NULL;
struct samples *sa = el->val;
int id = sa->id;
struct biData *bd = biDataNew(sample);
struct slPair *sp;
for (sp = bi->data; sp; sp = sp->next)
{
struct analysisVals *av;
for (av = sp->val; av; av = av->next)
{
if (id != av->sample_id)
continue;
struct analysisVals *newAv = cloneAnalysisVals(av);
slAddHead(&bd->data, newAv);
hashAdd(bd->hash, sp->name, newAv);
}
}
slReverse(&bd->data);
return bd;
}
struct analysisVals *analysisValsForFeatureSample(struct biOmics *bi,
char *feature, char *sample)
{
struct hashEl *el = hashLookup(bi->featuresHash, feature);
if (!el)
return NULL;
struct analysisFeatures *af = el->val;
int id = af->id;
char pStr[128];
safef(pStr, sizeof(pStr), "%d", id);
el = hashLookup(bi->dataHash, pStr);
if (!el)
return NULL;
struct slPair *sp = el->val;
el = hashLookup(bi->samplesHash, sample);
if (!el)
return NULL;
struct samples *sa = el->val;
id = sa->id;
struct analysisVals *av = sp->val;
for (av = sp->val; av; av = av->next)
{
if (id != av->sample_id)
continue;
return av;
}
return NULL;
}
void setDataType(struct sqlConnection *conn, struct biOmics *bi)
{
struct datasets *da = bi->dataset;
char query[128];
safef(query, sizeof(query),
"select * from %s where id = %d",
DT_TABLE, da->type_id);
struct dataTypes *dt = dataTypesLoadByQuery(conn, query);
if (!dt)
errAbort("Datatype with id = %d not found in database", da->type_id);
bi->type = cloneString(dt->name);
if (sameString(dt->format, "analysisVals"))
{
bi->loadData = loadAnalysisValsData;
bi->freeData = slPairAnalysisValsFreeList;
bi->dataForFeature = analysisValsForFeature;
bi->dataForSample = analysisValsForSample;
bi->dataForFeatureSample = analysisValsForFeatureSample;
}
else
errAbort("Unrecognized datatype");
dataTypesFreeList(&dt);
}
void loadDataset(struct sqlConnection *conn, struct biOmics *bi)
{
if (!bi->name)
return;
/* Make sure only one dataset is loaded (hence 'limit 1') */
char query[256];
safef(query, sizeof(query),
"select * from %s where data_table = '%s' limit 1;",
DA_TABLE, bi->name);
bi->dataset = datasetsLoadByQuery(conn, query);
if (!bi->dataset)
errAbort("No datasets named %s found in database.", bi->name);
/* Set data type */
setDataType(conn, bi);
}
/* End database functions */
struct biOmics *biOmicsMatchDataset(struct biOmics *biList, char *name)
{
if (!name)
return NULL;
struct biOmics *bi;
for (bi = biList; bi; bi = bi->next)
if (sameString(bi->name, name))
return bi;
return NULL;
}
int biOmicsPopulateAll(struct biOmics *bi, struct biQuery *bq)
{
struct sqlConnection *biConn = hAllocConnProfile(heatMapDbProfile, bi->db);
loadDataset(biConn, bi);
loadSamples(biConn, bi, bq);
loadFeatures(biConn, bi, NULL, TRUE);
bi->loadData(biConn, bi, TRUE);
hFreeConn(&biConn);
return 0;
}
int biOmicsPopulate(struct biOmics *bi, struct biQuery *bq)
{
struct sqlConnection *biConn = hAllocConnProfile(heatMapDbProfile, bi->db);
if (DEBUG)
uglyTime(NULL);
loadDataset(biConn, bi);
if (DEBUG)
uglyTime("Load Dataset");
loadSamples(biConn, bi, bq);
if (DEBUG)
uglyTime("Load Samples");
loadGenesets(biConn, bi, bq->genesetList);
if (DEBUG)
uglyTime("Load Pathways");
loadFeatures(biConn, bi, bq->featureList, FALSE);
if (DEBUG)
uglyTime("Load Features");
bi->loadData(biConn, bi, FALSE);
if (DEBUG)
uglyTime("Load Data");
hFreeConn(&biConn);
return 0;
}
int biOmicsPopulateRandom(struct biOmics *bi, struct biQuery *bq, int numProbes)
{
return 0;
}
struct slName *biOmicsGetSamples(struct biOmics *bi)
{
struct slName *slList = NULL;
struct samples *sa;
for (sa = bi->samples; sa; sa = sa->next)
slNameAddHead(&slList, sa->name);
slReverse(&slList);
return slList;
}
struct slName *biOmicsGetFeatures(struct biOmics *bi)
{
struct slName *slList = NULL;
struct analysisFeatures *af;
for (af = bi->features; af; af = af->next)
slNameAddHead(&slList, af->feature_name);
slReverse(&slList);
return slList;
}
struct biOmics *newBiOmics(char *db, char *dataset)
{
struct biOmics *bi;
AllocVar(bi);
bi->db = cloneString(db);
bi->name = cloneString(dataset);
bi->sampleIndices = hashNew(0);
bi->dataset = NULL;
bi->samples = NULL;
bi->samplesHash = hashNew(0);
bi->features = NULL;
bi->featuresHash = hashNew(0);
bi->data = NULL;
bi->dataHash = hashNew(0);
/* Methods */
bi->populate = biOmicsPopulate;
bi->populateAll = biOmicsPopulateAll;
bi->populateRandom = biOmicsPopulateRandom;
bi->allFeatures = biOmicsGetFeatures;
bi->allSamples = biOmicsGetSamples;
/* These are set according to dataType */
bi->loadData = NULL;
bi->freeData = NULL;
bi->dataForFeature = NULL;
bi->dataForSample = NULL;
bi->dataForFeatureSample = NULL;
return bi;
}
void slPairAnalysisValsFree(struct slPair **pEl)
{
struct slPair *el;
if ((el = *pEl) == NULL) return;
freeMem(el->name);
struct analysisVals *av = el->val;
analysisValsFree(&av);
freez(pEl);
}
void slPairAnalysisValsFreeList(struct slPair **pList)
{
struct slPair *el, *next;
for (el = *pList; el != NULL; el = next)
{
next = el->next;
slPairAnalysisValsFree(&el);
}
*pList = NULL;
}
void biOmicsFree(struct biOmics **pEl)
{
struct biOmics *el;
if ((el = *pEl) == NULL) return;
freeMem(el->db);
freeMem(el->name);
freeHash(&el->sampleIndices);
datasetsFreeList(&el->dataset);
samplesFreeList(&el->samples);
freeHash(&el->samplesHash);
analysisFeaturesFreeList(&el->features);
freeHash(&el->featuresHash);
el->freeData(&el->data);
freeHash(&el->dataHash);
*pEl = NULL;
}
void biOmicsFreeList(struct biOmics **pList)
{
struct biOmics *el, *next;
for (el = *pList; el != NULL; el = next)
{
next = el->next;
biOmicsFree(&el);
}
*pList = NULL;
}
/*** biResults code ***/
struct slName *biResultsGetDatasets(struct biResults *br)
{
struct slName *slList = NULL;
struct biOmics *bi;
for (bi = br->datasets; bi; bi = bi->next)
slNameAddHead(&slList, bi->name);
return slList;
}
struct slName *biResultsGetFeaturesInDataset(struct biResults *br, char *dataset)
{
struct biOmics *bi = biOmicsMatchDataset(br->datasets, dataset);
if (!bi)
return NULL;
return bi->allFeatures(bi);
}
struct slName *biResultsGetFeatures(struct biResults *br)
{
struct slName *slList = NULL;
struct biOmics *bi;
for (bi = br->datasets; bi; bi = bi->next)
{
struct slName *features = bi->allFeatures(bi);
slList = slCat(slList, features);
}
return slNameUniqueList(slList);
}
struct slName *biResultsGetSamplesInDataset(struct biResults *br, char *dataset)
{
struct biOmics *bi = biOmicsMatchDataset(br->datasets, dataset);
if (!bi)
return NULL;
return bi->allSamples(bi);
}
struct slName *biResultsGetSamples(struct biResults *br)
{
struct slName *slList = NULL;
struct biOmics *bi;
for (bi = br->datasets; bi; bi = bi->next)
{
struct slName *samples = bi->allSamples(bi);
slList = slCat(slList, samples);
}
return slNameUniqueList(slList);
}
struct slName *getMatching(struct slName *list1, struct slName *list2)
{
if (!list1 || !list2)
return NULL;
fprintf(stderr, "matching: count(list1) = %d, count(list2) = %d\n",
slCount(list1), slCount(list2));
struct slName *sl1, *sl2, *matched = NULL;
for (sl1 = list1; sl1; sl1 = sl1->next)
{
for (sl2 = list2; sl2; sl2 = sl2->next)
{
if (sameString(sl1->name, sl2->name))
{
if (!slNameInList(matched, sl1->name))
slNameAddHead(&matched, sl1->name);
}
}
}
return matched;
}
struct slName *biResultsGetSamplesInCommon(struct biResults *br)
{
struct biOmics *bi = br->datasets;
/* Start off the list */
struct slName *prevMatched, *matched = bi->allSamples(bi);
bi = bi->next;
for ( ; bi; bi = bi->next)
{
prevMatched = matched;
struct slName *samples = bi->allSamples(bi);
matched = getMatching(prevMatched, samples);
slNameFreeList(&samples);
slNameFreeList(&prevMatched);
}
return matched;
}
struct biData *biResultsDataForFeatureInDataset(struct biResults *br,
char *feature, char *dataset)
{
struct biOmics *bi = biOmicsMatchDataset(br->datasets, dataset);
if (!bi)
return NULL;
return bi->dataForFeature(bi, feature);
}
struct biData *biResultsDataForFeature(struct biResults *br, char *feature)
{
struct biData *bd,*bdList = NULL;
struct biOmics *bi;
for (bi = br->datasets; bi; bi = bi->next)
{
bd = bi->dataForFeature(bi, feature);
biDataAppendName(bd, bi->name);
slAddHead(&bdList, bd);
}
return bdList;
}
struct biData *biResultsDataForSampleInDataset(struct biResults *br, char *sample,
char *dataset)
{
struct biOmics *bi = biOmicsMatchDataset(br->datasets, dataset);
if (!bi)
return NULL;
return bi->dataForSample(bi, sample);
}
struct biData *biResultsDataForSample(struct biResults *br, char *sample)
{
struct biData *bd, *bdList = NULL;
struct biOmics *bi;
for (bi = br->datasets; bi; bi = bi->next)
{
bd = bi->dataForSample(bi, sample);
bd->type = cloneString(bi->type);
biDataAppendName(bd, bi->name);
slAddHead(&bdList, bd);
}
return bdList;
}
struct analysisVals *biResultsDataForFeatureSampleInDataset(struct biResults *br,
char *feature, char *sample,
char *dataset)
{
struct biOmics *bi = biOmicsMatchDataset(br->datasets, dataset);
if (!bi)
return NULL;
return bi->dataForFeatureSample(bi, feature, sample);
}
struct biData *biResultsDataForFeatureSample(struct biResults *br, char *feature, char *sample)
{
char name[128];
safef(name, sizeof(name), "%s-%s", feature, sample);
struct biData *bd = biDataNew(name);
struct biOmics *bi;
for (bi = br->datasets; bi; bi = bi->next)
{
struct analysisVals *av = bi->dataForFeatureSample(bi, feature, sample);
struct analysisVals *newAv = cloneAnalysisVals(av);
slAddHead(&bd->data, newAv);
hashAdd(bd->hash, bi->name, newAv);
}
slReverse(&bd->data);
return bd;
}
struct biResults *biResultsNew(void)
{
struct biResults *br;
AllocVar(br);
br->datasets = NULL;
/* Methods */
br->allDatasets = biResultsGetDatasets;
br->allFeatures = biResultsGetFeatures;
br->allFeaturesInDataset = biResultsGetFeaturesInDataset;
br->allSamples = biResultsGetSamples;
br->allSamplesInCommon = biResultsGetSamplesInCommon;
br->allSamplesInDataset = biResultsGetSamplesInDataset;
br->dataForFeature = biResultsDataForFeature;
br->dataForFeatureInDataset = biResultsDataForFeatureInDataset;
br->dataForSample = biResultsDataForSample;
br->dataForSampleInDataset = biResultsDataForSampleInDataset;
br->dataForFeatureSample = biResultsDataForFeatureSample;
br->dataForFeatureSampleInDataset = biResultsDataForFeatureSampleInDataset;
return br;
}
void biResultsFree(struct biResults **pEl)
{
struct biResults *el;
if ((el = *pEl) == NULL) return;
biOmicsFreeList(&el->datasets);
}
void biResultsAddBiQuery(struct biResults *br, struct biQuery *bq)
{
struct biOmics *bi = newBiOmics(bq->db, bq->dataset);
if (bq->getAllFeatures)
bi->populateAll(bi, bq);
else
bi->populate(bi, bq);
slAddHead(&br->datasets, bi);
}
struct biResults *biQueryResults(struct biQuery *bqList)
{
struct biResults *br = biResultsNew();
struct biQuery *bq;
for (bq = bqList; bq; bq = bq->next)
biResultsAddBiQuery(br, bq);
return br;
}
void biResultsAddBiQueryRandom(struct biResults *br, struct biQuery *bq, int numFeatures)
{
struct biOmics *bi = newBiOmics(bq->db, bq->dataset);
bi->populateRandom(bi, bq, numFeatures);
slAddHead(&br->datasets, bi);
}
struct biResults *biQueryResultsRandomize(struct biQuery *bqList, int numFeatures)
{
struct biResults *br = biResultsNew();
struct biQuery *bq;
for (bq = bqList; bq; bq = bq->next)
biResultsAddBiQueryRandom(br, bq, numFeatures);
return br;
}
/*** End biResults code ***/
/*** biQuery code ****/
void biQueryAppend(struct biQuery **bqList, struct biQuery *bq)
{
if (!bqList)
return;
slAddHead(bqList, bq);
}
int biQueryAddGenesets(struct biQuery *bq, char *genesets, char sep)
{
return addDelimStringToList(&bq->genesetList, genesets, sep);
}
int biQueryAddFeatures(struct biQuery *bq, char *features, char sep)
{
return addDelimStringToList(&bq->featureList, features, sep);
}
int biQueryAddSamples(struct biQuery *bq, char *samples, char sep)
{
return addDelimStringToList(&bq->sampleList, samples, sep);
}
int biQueryAddFeatureVals(struct biQuery *bq, char *featureVals, char sep)
{
return addDelimStringToList(&bq->featureValList, featureVals, sep);
}
int biQueryAddFeatureCodes(struct biQuery *bq, char *featureCodes, char sep)
{
return addDelimStringToList(&bq->featureCodeList, featureCodes, sep);
}
struct biQuery *biQueryNew(char *db, char *dataset)
{
struct biQuery *bq;
AllocVar(bq);
bq->db = cloneString(db);
bq->dataset = cloneString(dataset);
bq->getAllFeatures = FALSE;
bq->genesetList = NULL;
bq->featureList = NULL;
bq->sampleList = NULL;
bq->featureValList = NULL;
bq->featureCodeList = NULL;
bq->addGenesets = biQueryAddGenesets;
bq->addSamples = biQueryAddSamples;
bq->addFeatures = biQueryAddFeatures;
bq->addFeatureVals = biQueryAddFeatureVals;
bq->addFeatureCodes = biQueryAddFeatureCodes;
return bq;
}
void biQueryFree(struct biQuery **pEl)
{
struct biQuery *el;
if ((el = *pEl) == NULL) return;
freeMem(el->db);
freeMem(el->dataset);
slNameFreeList(&el->genesetList);
slNameFreeList(&el->featureList);
slNameFreeList(&el->sampleList);
slNameFreeList(&el->featureValList);
slNameFreeList(&el->featureCodeList);
}
void biQueryFreeList(struct biQuery **pList)
{
struct biQuery *el, *next;
for (el = *pList; el != NULL; el = next)
{
next = el->next;
biQueryFree(&el);
}
*pList = NULL;
}
void biDataAppendName(struct biData *bd, char *name)
{
if (!bd->name)
{
bd->name = cloneString(name);
return;
}
char newName[128];
safef(newName, sizeof(newName), "%s,%s", bd->name, name);
freeMem(bd->name);
bd->name = cloneString(newName);
}
struct biData *biDataFind(struct biData *bdList, char *name)
{
struct biData *bd;
for (bd = bdList; bd; bd = bd->next)
{
if (!sameString(bd->name, name))
continue;
return bd;
}
return NULL;
}
struct biData *biDataNew(char *name)
{
struct biData *bd;
AllocVar(bd);
bd->name = NULL;
if (name)
bd->name = cloneString(name);
bd->type = NULL;
bd->data = NULL;
bd->hash = hashNew(0);
return bd;
}
void biDataFree(struct biData **pEl)
{
struct biData *el;
if ((el = *pEl) == NULL) return;
freeMem(el->name);
freeMem(el->type);
analysisValsFreeList(&el->data);
freeHash(&el->hash);
}
void biDataFreeList(struct biData **pList)
{
struct biData *el, *next;
for (el = *pList; el != NULL; el = next)
{
next = el->next;
biDataFree(&el);
}
*pList = NULL;
}
/**** End biQuery Code *****/