src/hg/instinct/bioInt2/bioIntDriver.c 1.3
1.3 2009/04/27 06:15:48 jsanborn
updated lots of stuff, will break older implementation of database
Index: src/hg/instinct/bioInt2/bioIntDriver.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/instinct/bioInt2/bioIntDriver.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -b -B -U 4 -r1.2 -r1.3
--- src/hg/instinct/bioInt2/bioIntDriver.c 24 Mar 2009 05:21:54 -0000 1.2
+++ src/hg/instinct/bioInt2/bioIntDriver.c 27 Apr 2009 06:15:48 -0000 1.3
@@ -15,8 +15,27 @@
static char *heatMapDbProfile = "localDb";
/* Begin helper functions */
+struct hash *createIdHash(struct sqlConnection *biConn, char *tableName, char *fieldName)
+{
+struct hash *hash = hashNew(0);
+char query[128];
+safef(query, sizeof(query), "select id, %s from %s", fieldName, tableName);
+
+struct sqlResult *sr = sqlGetResult(biConn, query);
+char **row = NULL;
+while ((row = sqlNextRow(sr)) != NULL)
+ {
+ unsigned int id = sqlUnsigned(row[0]);
+ char *name = cloneString(row[1]);
+ hashAddInt(hash, name, id);
+ }
+sqlFreeResult(&sr);
+
+return hash;
+}
+
int addDelimStringToList(struct slName **list, char *str, char sep)
{
if (!str)
return 0;
@@ -37,8 +56,20 @@
fprintf(stdout, "%s ", sl->name);
fprintf(stdout, "\n");
}
+struct analysisVals *cloneAnalysisVals(struct analysisVals *av)
+{
+struct analysisVals *newAv;
+AllocVar(newAv);
+newAv->sample_id = av->sample_id;
+newAv->feature_id = av->feature_id;
+newAv->val = av->val;
+newAv->conf = av->conf;
+
+return newAv;
+}
+
struct hash *uniqueHashFromSlNameList(void *list)
/* Create a unique hash out of a list of slNames or any kind of list where the */
/* first field is the next pointer and the second is the name.
* -- Adapted from hashFromSlNameList in hash.h */
@@ -71,11 +102,48 @@
slNameFreeList(&list);
freeHash(&hash);
return slList;
}
+
+int analysisValsCmp(const void *va, const void *vb)
+/* Compare function to sort array of ints. */
+{
+const struct analysisVals *a = *((struct analysisVals **)va);
+const struct analysisVals *b = *((struct analysisVals **)vb);
+int diff = a->feature_id - b->feature_id;
+if (diff < 0)
+ return -1;
+else if (diff > 0)
+ return 1;
+
+// feature_id's are the same
+diff = a->sample_id - b->sample_id;
+
+if (diff < 0)
+ return -1;
+else if (diff > 0)
+ return 1;
+
+return 0;
+}
+
+void storeAnalysisValsInDb(struct sqlConnection *biConn, char *tableName,
+ struct analysisVals *avList)
+{
+if (!sqlTableExists(biConn, tableName))
+ createAnalysisValsTable(biConn, tableName);
+
+slSort(avList, analysisValsCmp);
+
+struct analysisVals *av;
+for (av = avList; av; av = av->next)
+ analysisValsSaveToDb(biConn, av, tableName, 50);
+}
+
/* End helper functions */
+
/* Begin database functions */
void addSample(struct biOmics *bi, struct samples *sa)
{
struct hashEl *el = hashLookup(bi->samplesHash, sa->name);
@@ -274,237 +342,96 @@
/* Load samples matching feature-code pairs, e.g. ER = Positive */
loadSamplesMatchingFeatureCodes(conn, bi, bq->featureCodeList);
}
-void addProbeInfo(struct biOmics *bi, struct probeInfo *pi)
+void addAnalysisFeature(struct biOmics *bi, struct analysisFeatures *af)
{
-struct hashEl *el = hashLookup(bi->probeInfoHash, pi->name);
+struct hashEl *el = hashLookup(bi->featuresHash, af->feature_name);
if (el)
{
- probeInfoFree(&pi);
+ analysisFeaturesFree(&af);
return; // already there;
}
-slAddHead(&bi->probeInfo, pi);
-hashAdd(bi->probeInfoHash, pi->name, pi);
+slAddHead(&bi->features, af);
+hashAdd(bi->featuresHash, af->feature_name, af);
/* Allocate space for slPair data */
-char probe_id[128];
-safef(probe_id, sizeof(probe_id), "%d", pi->id);
+char feature_id[128];
+safef(feature_id, sizeof(feature_id), "%d", af->id);
struct slPair *sp;
AllocVar(sp);
-sp->name = cloneString(pi->name);
+sp->name = cloneString(af->feature_name);
sp->val = NULL;
-slAddHead(&bi->probes, sp);
-hashAdd(bi->probesHash, probe_id, sp);
+slAddHead(&bi->data, sp);
+hashAdd(bi->dataHash, feature_id, sp);
}
-void loadPathways(struct sqlConnection *conn, struct biOmics *bi, struct slName *pathwayList)
+void loadGenesets(struct sqlConnection *conn, struct biOmics *bi, struct slName *genesetList)
{
-if (!pathwayList)
- return;
-
-struct slName *sl;
-struct datasets *da = bi->dataset;
-char *pInfo = da->probe_table;
-char *p2g = da->probe_to_gene_table;
-
-if (!pInfo || !p2g)
- return;
-
-struct dyString *dy = newDyString(100);
-dyStringPrintf(dy,
- "select %s.* from %s join %s on %s.probe_id = %s.id "
- "join %s on %s.gene_id = %s.gene_id "
- "join %s on %s.id = %s.id "
- "where %s.name in (",
- pInfo, pInfo, p2g, p2g, pInfo,
- PG_TABLE, PG_TABLE, p2g,
- PA_TABLE, PA_TABLE, PG_TABLE,
- PA_TABLE);
-
-for (sl = pathwayList; sl; sl = sl->next)
- {
- dyStringPrintf(dy, "'%s'", sl->name);
- if (sl->next)
- dyStringPrintf(dy, ",");
- }
-dyStringPrintf(dy, ");");
-char *query = dyStringCannibalize(&dy);
-
-struct sqlResult *sr = sqlGetResult(conn, query);
-char **row = NULL;
-while ((row = sqlNextRow(sr)) != NULL)
- {
- struct probeInfo *pi = probeInfoLoad(row);
- addProbeInfo(bi, pi);
- }
-sqlFreeResult(&sr);
+return;
}
-void loadGenes(struct sqlConnection *conn, struct biOmics *bi, struct slName *geneList)
+void loadFeatures(struct sqlConnection *conn, struct biOmics *bi,
+ struct slName *featureList, boolean allData)
{
-if (!geneList)
+if (!featureList && !allData)
return;
struct slName *sl;
-struct datasets *da = bi->dataset;
-char *pInfo = da->probe_table;
-char *p2g = da->probe_to_gene_table;
-
-if (!pInfo || !p2g)
- return;
struct dyString *dy = newDyString(100);
-dyStringPrintf(dy,
- "select * from %s join %s on %s.probe_id = %s.id "
- "join %s on %s.id = %s.gene_id where kgId in (",
- pInfo, p2g, p2g, pInfo,
- GL_TABLE, GL_TABLE, p2g);
+dyStringPrintf(dy, "select * from %s ", AF_TABLE);
-for (sl = geneList; sl; sl = sl->next)
+if (featureList)
{
- dyStringPrintf(dy, "'%s'", sl->name);
- if (sl->next)
- dyStringPrintf(dy, ",");
- }
-dyStringPrintf(dy, ");");
-char *query = dyStringCannibalize(&dy);
-
-struct sqlResult *sr = sqlGetResult(conn, query);
-char **row = NULL;
-while ((row = sqlNextRow(sr)) != NULL)
- {
- struct probeInfo *pi = probeInfoLoad(row);
- addProbeInfo(bi, pi);
- }
-sqlFreeResult(&sr);
-}
-
-void loadProbeInfo(struct sqlConnection *conn, struct biOmics *bi,
- struct slName *probeList, boolean allData)
-{
-if (!probeList && !allData)
- return;
-
-struct datasets *da = bi->dataset;
-
-struct slName *sl;
-struct dyString *dy = newDyString(100);
-dyStringPrintf(dy, "select * from %s ", da->probe_table);
-
-if (probeList)
- {
- dyStringPrintf(dy, "where name in (");
- for (sl = probeList; sl; sl = sl->next)
+ dyStringPrintf(dy, "where feature_name in (");
+ for (sl = featureList; sl; sl = sl->next)
{
dyStringPrintf(dy, "'%s'", sl->name);
if (sl->next)
dyStringPrintf(dy, ",");
}
dyStringPrintf(dy, ");");
}
+dyStringPrintf(dy, " where type = \"gene\"");
char *query = dyStringCannibalize(&dy);
struct sqlResult *sr = sqlGetResult(conn, query);
char **row = NULL;
while ((row = sqlNextRow(sr)) != NULL)
{
- struct probeInfo *pi = probeInfoLoad(row);
- addProbeInfo(bi, pi);
+ struct analysisFeatures *af = analysisFeaturesLoad(row);
+ addAnalysisFeature(bi, af);
}
sqlFreeResult(&sr);
}
-void loadProbeInfoRandom(struct sqlConnection *conn, struct biOmics *bi, int numProbes)
-{
-if (numProbes <= 0)
- return;
-
-struct datasets *da = bi->dataset;
-
-char q[256];
-safef(q, sizeof(q), "select max(id) from %s;", da->probe_table);
-int maxProbes = sqlQuickNum(conn, q);
-
-if (numProbes >= maxProbes) // what's the point?!?
- errAbort("Number of random probes greater than probes in table!.");
-
-int i = 0, total = 0;
-char rStr[128];
-struct hash *hash = hashNew(0);
-while (i < numProbes && total < maxProbes * 2)
- {
- total++;
- int r = random() % maxProbes;
- safef(rStr, sizeof(rStr), "%d", r);
- if (hashLookup(hash, rStr))
- continue;
- hashAddInt(hash, rStr, 1);
- i++;
- }
-struct slInt *si, *siList = NULL;
-struct hashCookie cookie = hashFirst(hash);
-char *name;
-while ((name = hashNextName(&cookie)) != NULL)
- {
- si = slIntNew(atoi(name));
- slAddHead(&siList, si);
- }
-slSort(&siList, slIntCmp);
-hashFree(&hash);
-
-if (slCount(siList) != numProbes)
- errAbort("Random list not correct length %d != %d.", slCount(siList), numProbes);
-
-struct dyString *dy = newDyString(100);
-dyStringPrintf(dy, "select * from %s where id in (", da->probe_table);
-for (si = siList; si; si = si->next)
- {
- dyStringPrintf(dy, "%d", si->val);
- if (si->next)
- dyStringPrintf(dy, ",");
- }
-dyStringPrintf(dy, ");");
-
-char *query = dyStringCannibalize(&dy);
-slFreeList(&siList);
-
-struct sqlResult *sr = sqlGetResult(conn, query);
-char **row = NULL;
-while ((row = sqlNextRow(sr)) != NULL)
- {
- struct probeInfo *pi = probeInfoLoad(row);
- addProbeInfo(bi, pi);
- }
-sqlFreeResult(&sr);
-}
-
-void loadProbeSampleValData(struct sqlConnection *conn, struct biOmics *bi, boolean allData)
+void loadAnalysisValsData(struct sqlConnection *conn, struct biOmics *bi, boolean allData)
{ /* boolean allData is unused in this function */
struct datasets *da = bi->dataset;
struct dyString *dy = newDyString(100);
dyStringPrintf(dy, "select * from %s", da->data_table);
-if (bi->probeInfo)
+if (bi->features)
{
- struct probeInfo *pi;
- dyStringPrintf(dy, " where probe_id in (");
- for (pi = bi->probeInfo; pi; pi = pi->next)
+ struct analysisFeatures *af;
+ dyStringPrintf(dy, " where feature_id in (");
+ for (af = bi->features; af; af = af->next)
{
- dyStringPrintf(dy, "%d", pi->id);
- if (pi->next)
+ dyStringPrintf(dy, "%d", af->id);
+ if (af->next)
dyStringPrintf(dy, ",");
}
dyStringPrintf(dy, ")");
}
if (bi->samples)
{
- if (bi->probeInfo)
+ if (bi->features)
dyStringPrintf(dy, " and");
else
dyStringPrintf(dy, " where");
@@ -525,390 +452,121 @@
char **row = NULL;
int count = 0;
struct slPair *sp;
-struct probeSampleVal *pv;
+struct analysisVals *av;
while ((row = sqlNextRow(sr)) != NULL)
{
- char *probe_id = row[0];
- struct hashEl *el = hashLookup(bi->probesHash, probe_id);
+ char *feature_id = row[1];
+ struct hashEl *el = hashLookup(bi->dataHash, feature_id);
if (!el)
continue;
sp = el->val;
- pv = probeSampleValLoad(row);
- slAddHead(&sp->val, pv);
+ av = analysisValsLoad(row);
+ slAddHead(&sp->val, av);
count++;
}
if (DEBUG)
- fprintf(stdout, "found %d probes\n", count);
+ fprintf(stdout, "found %d features\n", count);
sqlFreeResult(&sr);
}
-void loadProbeValsData(struct sqlConnection *conn, struct biOmics *bi, boolean allData)
+struct biData *analysisValsForFeature(struct biOmics *bi, char *feature)
{
-if (!bi->probeInfo && !allData)
- return;
-struct datasets *da = bi->dataset;
-
-struct probeInfo *pi;
-struct dyString *dy = newDyString(100);
-dyStringPrintf(dy, "select * from %s ", da->data_table);
-
-if (bi->probeInfo && !allData)
- {
- dyStringPrintf(dy, "where probe_id in (");
- for (pi = bi->probeInfo; pi; pi = pi->next)
- {
- dyStringPrintf(dy, "%d", pi->id);
- if (pi->next)
- dyStringPrintf(dy, ",");
- }
- dyStringPrintf(dy, ")");
- }
-char *query = dyStringCannibalize(&dy);
-
-struct sqlResult *sr = sqlGetResult(conn, query);
-char **row = NULL;
-
-int count = 0;
-struct slPair *sp;
-struct probeVals *pv;
-while ((row = sqlNextRow(sr)) != NULL)
- {
- char *probe_id = row[0];
- struct hashEl *el = hashLookup(bi->probesHash, probe_id);
- if (!el)
- continue;
- sp = el->val;
- pv = probeValsLoad(row);
- count += pv->sample_count;
- slAddHead(&sp->val, pv);
- }
-if (DEBUG)
- fprintf(stdout, "found %d probes\n", count);
-
-sqlFreeResult(&sr);
-}
-
-struct biData *probeValsForProbe(struct biOmics *bi, char *probe)
-{
-struct hashEl *el = hashLookup(bi->probeInfoHash, probe);
+struct hashEl *el = hashLookup(bi->featuresHash, feature);
if (!el)
return NULL;
-struct probeInfo *pi = el->val;
-int probeId = pi->id;
+struct analysisFeatures *af = el->val;
+int id = af->id;
char pStr[128];
-safef(pStr, sizeof(pStr), "%d", probeId);
-el = hashLookup(bi->probesHash, pStr);
+safef(pStr, sizeof(pStr), "%d", id);
+el = hashLookup(bi->dataHash, pStr);
if (!el)
return NULL;
struct slPair *sp = el->val;
-struct biData *bd = biDataNew(probe);
+struct biData *bd = biDataNew(feature);
struct samples *sa;
-struct probeVals *pv;
-struct slDouble *sd;
-for (pv = sp->val; pv; pv = pv->next)
+struct analysisVals *av;
+for (av = sp->val; av; av = av->next)
{
for (sa = bi->samples; sa; sa = sa->next)
{
- sd = slDoubleNew(pv->sample_data[sa->exp_id]);
- slAddHead(&bd->data, sd);
- hashAdd(bd->hash, sa->name, sd);
- }
- }
-slReverse(&bd->data);
-return bd;
-}
-
-struct biData *probeValsForSample(struct biOmics *bi, char *sample)
-{
-struct hashEl *el = hashLookup(bi->samplesHash, sample);
-if (!el)
- return NULL;
-struct samples *sa = el->val;
-int expId = sa->exp_id;
-
-struct biData *bd = biDataNew(NULL);
-struct slDouble *sd;
-struct slPair *sp;
-for (sp = bi->probes; sp; sp = sp->next)
- {
- struct probeVals *pv;
- for (pv = sp->val; pv; pv = pv->next)
- {
- if (expId >= pv->sample_count)
- errAbort("expId is greater than bd->vals[] array length.");
- sd = slDoubleNew(pv->sample_data[expId]);
- slAddHead(&bd->data, sd);
- hashAdd(bd->hash, sp->name, sd);
- }
- }
-slReverse(&bd->data);
-return bd;
-}
-
-double probeValsForProbeSample(struct biOmics *bi,
- char *probe, char *sample)
-{
-struct hashEl *el = hashLookup(bi->probeInfoHash, probe);
-if (!el)
- return DBL_NULL;
-struct probeInfo *pi = el->val;
-int probeId = pi->id;
-
-char pStr[128];
-safef(pStr, sizeof(pStr), "%d", probeId);
-el = hashLookup(bi->probesHash, pStr);
-if (!el)
- return DBL_NULL;
-struct slPair *sp = el->val;
-
-el = hashLookup(bi->samplesHash, sample);
-if (!el)
- return DBL_NULL;
-struct samples *sa = el->val;
-int expId = sa->exp_id;
-
-struct probeVals *pv = sp->val;
-return pv->sample_data[expId];
-}
-
-boolean calcMedianMAD(struct biOmics *bi, double *median, double *mad)
-{
-int count = 0;
-
-struct samples *sa;
-struct slPair *sp;
-struct probeVals *pv;
-struct slDouble *sd, *sdList = NULL;
-for (sp = bi->probes; sp; sp = sp->next)
- {
- pv = sp->val;
- for (sa = bi->samples; sa; sa = sa->next)
- {
- sd = slDoubleNew(pv->sample_data[sa->exp_id]);
- slAddHead(&sdList, sd);
- count++;
- }
- }
-if (!sdList)
- return FALSE;
-
-if (count <= 1)
- {
- slFreeList(&sdList);
- return FALSE;
- }
-double medianTmp = slDoubleMedian(sdList);
-
-/* Manually calculate approx std dev according to 99.7% cut-off (3 sigma) */
-slSort(sdList, slDoubleCmp);
-int low = round(count * (0.0015));
-int high = round(count * (1.0 - 0.0015));
-
-sd = slElementFromIx(sdList, low);
-double lowVal = sd->val;
-
-sd = slElementFromIx(sdList, high);
-double highVal = sd->val;
-
-*median = medianTmp;
-*mad = max(fabs(lowVal - medianTmp), fabs(highVal - medianTmp))/3.0;
-
-slFreeList(&sdList);
-return TRUE;
-}
-
-///* Calculate median absolute deviation for a more
-// * robust measure than standard deviation */
-//for (sd = sdList; sd; sd = sd->next)
-// sd->val = fabs(sd->val - medianTmp);
-
-//*median = medianTmp;
-//*mad = slDoubleMedian(sdList);
-//slFreeList(&sdList);
-//return TRUE;
-//}
-
-
-void probeValsConvertToLogP(struct biOmics *bi)
-{ /* tranforms all data to -log(p-value) based on normal z-score transform */
-double median, mad;
-if (!calcMedianMAD(bi, &median, &mad))
- return;
-
-/* To estimate std deviation from mad, use std = 1.43 * mad
- * Brideau et al. J Biomol Screen, 2003 */
-double std = mad * 1.43;
-
-fprintf(stderr, "median = %f, mad = %f, std = %f\n", median, mad, std);
-if (std == 0.0)
- return;
-
-double maxLogP = 88.0; /* max, in case p-value comes back zero */
-struct samples *sa;
-struct slPair *sp;
-struct probeVals *pv;
-double z, p;
-double val;
-for (sp = bi->probes; sp; sp = sp->next)
- {
- pv = sp->val;
- for (sa = bi->samples; sa; sa = sa->next)
- {
- z = (pv->sample_data[sa->exp_id] - median)/std;
- p = ndtr(-1.0*fabs(z));
- if (p > 0)
- val = min(-log(p)/log(10.0), maxLogP);
- else
- val = maxLogP;
-
- if (z < 0.0)
- val = -1.0*val; // signed log(p-value)
- pv->sample_data[sa->exp_id] = val;
- }
- }
-}
-
-void probeValsConvertToZscores(struct biOmics *bi)
-{ /* transforms all data to z-scores, median-centered */
-double median, mad;
-if (!calcMedianMAD(bi, &median, &mad))
- return;
-
-/* To estimate std deviation from mad, use std = 1.43 * mad
- * Brideau et al. J Biomol Screen, 2003 */
-double std = mad * 1.43;
-fprintf(stderr, "median = %f, mad = %f, std = %f\n", median, mad, std);
-if (std == 0.0)
- return;
-
-struct samples *sa;
-struct slPair *sp;
-struct probeVals *pv;
-double z;
-for (sp = bi->probes; sp; sp = sp->next)
- {
- pv = sp->val;
- for (sa = bi->samples; sa; sa = sa->next)
- {
- z = (pv->sample_data[sa->exp_id] - median)/std;
- pv->sample_data[sa->exp_id] = z;
- }
- }
-}
-
-void probeSampleValConvertToLogP(struct biOmics *bi)
-{
-return;
-}
-
-void probeSampleValConvertToZscores(struct biOmics *bi)
-{
-return;
-}
-
-struct biData *probeSampleValForProbe(struct biOmics *bi, char *probe)
-{
-struct hashEl *el = hashLookup(bi->probeInfoHash, probe);
-if (!el)
- return NULL;
-struct probeInfo *pi = el->val;
-int probeId = pi->id;
-
-char pStr[128];
-safef(pStr, sizeof(pStr), "%d", probeId);
-el = hashLookup(bi->probesHash, pStr);
-if (!el)
- return NULL;
-struct slPair *sp = el->val;
-
-struct biData *bd = biDataNew(probe);
-
-struct samples *sa;
-struct probeSampleVal *pv;
-struct slDouble *sd;
-for (pv = sp->val; pv; pv = pv->next)
- {
- for (sa = bi->samples; sa; sa = sa->next)
- {
- if (!sa->exp_id == pv->sample_id)
+ if (sa->id != av->sample_id)
continue;
- sd = slDoubleNew(pv->val);
- slAddHead(&bd->data, sd);
- hashAdd(bd->hash, sa->name, sd);
+ struct analysisVals *newAv = cloneAnalysisVals(av);
+ slAddHead(&bd->data, newAv);
+ hashAdd(bd->hash, sa->name, av);
break;
}
}
slReverse(&bd->data);
return bd;
}
-struct biData *probeSampleValForSample(struct biOmics *bi, char *sample)
+struct biData *analysisValsForSample(struct biOmics *bi, char *sample)
{
struct hashEl *el = hashLookup(bi->samplesHash, sample);
if (!el)
return NULL;
struct samples *sa = el->val;
-int expId = sa->exp_id;
+int id = sa->id;
struct biData *bd = biDataNew(sample);
-struct slDouble *sd;
struct slPair *sp;
-for (sp = bi->probes; sp; sp = sp->next)
+for (sp = bi->data; sp; sp = sp->next)
{
- struct probeSampleVal *pv;
- for (pv = sp->val; pv; pv = pv->next)
+ struct analysisVals *av;
+ for (av = sp->val; av; av = av->next)
{
- if (expId != pv->sample_id)
+ if (id != av->sample_id)
continue;
- sd = slDoubleNew(pv->val);
- slAddHead(&bd->data, sd);
- hashAdd(bd->hash, sp->name, sd);
+ struct analysisVals *newAv = cloneAnalysisVals(av);
+ slAddHead(&bd->data, newAv);
+ hashAdd(bd->hash, sp->name, newAv);
}
}
slReverse(&bd->data);
return bd;
}
-double probeSampleValForProbeSample(struct biOmics *bi,
- char *probe, char *sample)
+struct analysisVals *analysisValsForFeatureSample(struct biOmics *bi,
+ char *feature, char *sample)
{
-struct hashEl *el = hashLookup(bi->probeInfoHash, probe);
+struct hashEl *el = hashLookup(bi->featuresHash, feature);
if (!el)
- return DBL_NULL;
-struct probeInfo *pi = el->val;
-int probeId = pi->id;
+ return NULL;
+struct analysisFeatures *af = el->val;
+int id = af->id;
char pStr[128];
-safef(pStr, sizeof(pStr), "%d", probeId);
-el = hashLookup(bi->probesHash, pStr);
+safef(pStr, sizeof(pStr), "%d", id);
+el = hashLookup(bi->dataHash, pStr);
if (!el)
- return DBL_NULL;
-
+ return NULL;
struct slPair *sp = el->val;
+
el = hashLookup(bi->samplesHash, sample);
if (!el)
- return DBL_NULL;
+ return NULL;
struct samples *sa = el->val;
-int expId = sa->exp_id;
+id = sa->id;
-struct probeSampleVal *pv = sp->val;
-for (pv = sp->val; pv; pv = pv->next)
+struct analysisVals *av = sp->val;
+for (av = sp->val; av; av = av->next)
{
- if (expId != pv->sample_id)
+ if (id != av->sample_id)
continue;
- return pv->val;
+ return av;
}
-return DBL_NULL;
+return NULL;
}
void setDataType(struct sqlConnection *conn, struct biOmics *bi)
{
@@ -923,27 +581,15 @@
if (!dt)
errAbort("Datatype with id = %d not found in database", da->type_id);
bi->type = cloneString(dt->name);
-if (sameString(dt->format, "probeVals"))
+if (sameString(dt->format, "analysisVals"))
{
- bi->loadData = loadProbeValsData;
- bi->freeData = slPairValsFreeList;
- bi->dataForProbe = probeValsForProbe;
- bi->dataForSample = probeValsForSample;
- bi->dataForProbeSample = probeValsForProbeSample;
- bi->toZscores = probeValsConvertToZscores;
- bi->toLogP = probeValsConvertToLogP;
- }
-else if (sameString(dt->format, "probeSampleVal"))
- {
- bi->loadData = loadProbeSampleValData;
- bi->freeData = slPairSampleValFreeList;
- bi->dataForProbe = probeSampleValForProbe;
- bi->dataForSample = probeSampleValForSample;
- bi->dataForProbeSample = probeSampleValForProbeSample;
- bi->toZscores = probeSampleValConvertToZscores;
- bi->toLogP = probeSampleValConvertToLogP;
+ bi->loadData = loadAnalysisValsData;
+ bi->freeData = slPairAnalysisValsFreeList;
+ bi->dataForFeature = analysisValsForFeature;
+ bi->dataForSample = analysisValsForSample;
+ bi->dataForFeatureSample = analysisValsForFeatureSample;
}
else
errAbort("Unrecognized datatype");
@@ -990,9 +636,9 @@
loadDataset(biConn, bi);
loadSamples(biConn, bi, bq);
-loadProbeInfo(biConn, bi, NULL, TRUE);
+loadFeatures(biConn, bi, NULL, TRUE);
bi->loadData(biConn, bi, TRUE);
hFreeConn(&biConn);
@@ -1013,51 +659,26 @@
loadSamples(biConn, bi, bq);
if (DEBUG)
uglyTime("Load Samples");
-loadPathways(biConn, bi, bq->pathwayList);
+loadGenesets(biConn, bi, bq->genesetList);
if (DEBUG)
uglyTime("Load Pathways");
-loadGenes(biConn, bi, bq->geneList);
+loadFeatures(biConn, bi, bq->featureList, FALSE);
if (DEBUG)
- uglyTime("Load Genes");
-
-loadProbeInfo(biConn, bi, bq->probeList, FALSE);
-if (DEBUG)
- uglyTime("Load Probe Info");
+ uglyTime("Load Features");
bi->loadData(biConn, bi, FALSE);
if (DEBUG)
- uglyTime("Load Probe");
+ uglyTime("Load Data");
hFreeConn(&biConn);
return 0;
}
int biOmicsPopulateRandom(struct biOmics *bi, struct biQuery *bq, int numProbes)
{
-struct sqlConnection *biConn = hAllocConnProfile(heatMapDbProfile, bi->db);
-
-if (DEBUG)
- uglyTime(NULL);
-loadDataset(biConn, bi);
-if (DEBUG)
- uglyTime("Load Dataset");
-
-loadSamples(biConn, bi, bq);
-if (DEBUG)
- uglyTime("Load Samples");
-
-loadProbeInfoRandom(biConn, bi, numProbes);
-if (DEBUG)
- uglyTime("Load Random Probe Info");
-
-bi->loadData(biConn, bi, FALSE);
-if (DEBUG)
- uglyTime("Load Probe");
-
-hFreeConn(&biConn);
return 0;
}
struct slName *biOmicsGetSamples(struct biOmics *bi)
@@ -1071,149 +692,73 @@
slReverse(&slList);
return slList;
}
-struct slName *biOmicsGetProbes(struct biOmics *bi)
+struct slName *biOmicsGetFeatures(struct biOmics *bi)
{
struct slName *slList = NULL;
-struct probeInfo *pi;
-for (pi = bi->probeInfo; pi; pi = pi->next)
- slNameAddHead(&slList, pi->name);
+struct analysisFeatures *af;
+for (af = bi->features; af; af = af->next)
+ slNameAddHead(&slList, af->feature_name);
slReverse(&slList);
return slList;
}
-void populateAliases(struct biOmics *bi)
-{
-struct sqlConnection *biConn = hAllocConnProfile(heatMapDbProfile, bi->db);
-struct datasets *da = bi->dataset;
-
-fprintf(stderr, "loading gene aliases...\n");
-if (DEBUG)
- uglyTime(NULL);
-char query[1024];
-safef(query, sizeof(query),
- "select DISTINCT kgXref.geneSymbol, %s.name from %s "
- "join %s on %s.id = %s.probe_id "
- "join %s on %s.id = %s.gene_id "
- "join kgXref on kgXref.kgId = %s.kgId;",
- da->probe_table, da->probe_table,
- da->probe_to_gene_table, da->probe_table, da->probe_to_gene_table,
- GL_TABLE, GL_TABLE, da->probe_to_gene_table,
- GL_TABLE);
-
-struct sqlResult *sr = sqlGetResult(biConn, query);
-char **row = NULL;
-while ((row = sqlNextRow(sr)) != NULL)
- {
- char *gene = row[0];
- char *probe = cloneString(row[1]);
- hashAdd(bi->geneAliases, gene, probe);
- }
-sqlFreeResult(&sr);
-
-if (DEBUG)
- uglyTime("loaded gene aliases");
-hFreeConn(&biConn);
-}
-
-struct slName *biOmicsGetProbesForGene(struct biOmics *bi, char *gene)
-{
-if (hashNumEntries(bi->geneAliases) == 0)
- populateAliases(bi);
-
-struct hashEl *el;
-struct slName *slList = NULL;
-for(el = hashLookup(bi->geneAliases, gene); el != NULL; el = hashLookupNext(el))
- slNameAddHead(&slList, (char *) el->val);
-
-return slList;
-}
-
-
-
struct biOmics *newBiOmics(char *db, char *dataset)
{
struct biOmics *bi;
AllocVar(bi);
bi->db = cloneString(db);
bi->name = cloneString(dataset);
bi->sampleIndices = hashNew(0);
-bi->geneAliases = hashNew(0);
bi->dataset = NULL;
bi->samples = NULL;
bi->samplesHash = hashNew(0);
-bi->probeInfo = NULL;
-bi->probeInfoHash = hashNew(0);
-bi->probes = NULL;
-bi->probesHash = hashNew(0);
+bi->features = NULL;
+bi->featuresHash = hashNew(0);
+bi->data = NULL;
+bi->dataHash = hashNew(0);
/* Methods */
bi->populate = biOmicsPopulate;
bi->populateAll = biOmicsPopulateAll;
bi->populateRandom = biOmicsPopulateRandom;
-bi->allProbes = biOmicsGetProbes;
+bi->allFeatures = biOmicsGetFeatures;
bi->allSamples = biOmicsGetSamples;
-bi->probesForGene = biOmicsGetProbesForGene;
/* These are set according to dataType */
bi->loadData = NULL;
bi->freeData = NULL;
-bi->dataForProbe = NULL;
+bi->dataForFeature = NULL;
bi->dataForSample = NULL;
-bi->dataForProbeSample = NULL;
-bi->toZscores = NULL;
+bi->dataForFeatureSample = NULL;
return bi;
}
-void slPairSampleValFree(struct slPair **pEl)
-{
-struct slPair *el;
-
-if ((el = *pEl) == NULL) return;
-
-freeMem(el->name);
-struct probeSampleVal *pv = el->val;
-probeSampleValFree(&pv);
-freez(pEl);
-}
-
-void slPairSampleValFreeList(struct slPair **pList)
-{
-struct slPair *el, *next;
-
-for (el = *pList; el != NULL; el = next)
- {
- next = el->next;
- slPairSampleValFree(&el);
- }
-*pList = NULL;
-}
-
-void slPairValsFree(struct slPair **pEl)
+void slPairAnalysisValsFree(struct slPair **pEl)
{
struct slPair *el;
if ((el = *pEl) == NULL) return;
freeMem(el->name);
-struct probeVals *pv = el->val;
-probeValsFreeList(&pv);
+struct analysisVals *av = el->val;
+analysisValsFree(&av);
freez(pEl);
}
-void slPairValsFreeList(struct slPair **pList)
+void slPairAnalysisValsFreeList(struct slPair **pList)
{
struct slPair *el, *next;
for (el = *pList; el != NULL; el = next)
{
next = el->next;
- slPairValsFree(&el);
+ slPairAnalysisValsFree(&el);
}
*pList = NULL;
}
@@ -1226,19 +771,18 @@
freeMem(el->db);
freeMem(el->name);
freeHash(&el->sampleIndices);
-freeHash(&el->geneAliases);
datasetsFreeList(&el->dataset);
samplesFreeList(&el->samples);
freeHash(&el->samplesHash);
-probeInfoFreeList(&el->probeInfo);
-freeHash(&el->probeInfoHash);
+analysisFeaturesFreeList(&el->features);
+freeHash(&el->featuresHash);
-el->freeData(&el->probes);
-freeHash(&el->probesHash);
+el->freeData(&el->data);
+freeHash(&el->dataHash);
*pEl = NULL;
}
@@ -1264,25 +809,25 @@
return slList;
}
-struct slName *biResultsGetProbesInDataset(struct biResults *br, char *dataset)
+struct slName *biResultsGetFeaturesInDataset(struct biResults *br, char *dataset)
{
struct biOmics *bi = biOmicsMatchDataset(br->datasets, dataset);
if (!bi)
return NULL;
-return bi->allProbes(bi);
+return bi->allFeatures(bi);
}
-struct slName *biResultsGetProbes(struct biResults *br)
+struct slName *biResultsGetFeatures(struct biResults *br)
{
struct slName *slList = NULL;
struct biOmics *bi;
for (bi = br->datasets; bi; bi = bi->next)
{
- struct slName *probes = bi->allProbes(bi);
- slList = slCat(slList, probes);
+ struct slName *features = bi->allFeatures(bi);
+ slList = slCat(slList, features);
}
return slNameUniqueList(slList);
}
@@ -1353,46 +898,25 @@
return matched;
}
-
-struct slName *biResultsProbesForGeneInDataset(struct biResults *br, char *gene, char *dataset)
+struct biData *biResultsDataForFeatureInDataset(struct biResults *br,
+ char *feature, char *dataset)
{
struct biOmics *bi = biOmicsMatchDataset(br->datasets, dataset);
if (!bi)
return NULL;
-return bi->probesForGene(bi, gene);
+return bi->dataForFeature(bi, feature);
}
-struct slName *biResultsProbesForGene(struct biResults *br, char *gene)
-{
-struct slName *slList = NULL;
-struct biOmics *bi;
-for (bi = br->datasets; bi; bi = bi->next)
- {
- struct slName *probes = bi->probesForGene(bi, gene);
- slList = slCat(slList, probes);
- }
-return slNameUniqueList(slList);
-}
-
-struct biData *biResultsDataForProbeInDataset(struct biResults *br, char *probe, char *dataset)
-{
-struct biOmics *bi = biOmicsMatchDataset(br->datasets, dataset);
-if (!bi)
- return NULL;
-
-return bi->dataForProbe(bi, probe);
-}
-
-struct biData *biResultsDataForProbe(struct biResults *br, char *probe)
+struct biData *biResultsDataForFeature(struct biResults *br, char *feature)
{
struct biData *bd,*bdList = NULL;
struct biOmics *bi;
for (bi = br->datasets; bi; bi = bi->next)
{
- bd = bi->dataForProbe(bi, probe);
+ bd = bi->dataForFeature(bi, feature);
biDataAppendName(bd, bi->name);
slAddHead(&bdList, bd);
}
return bdList;
@@ -1421,53 +945,37 @@
}
return bdList;
}
-double biResultsDataForProbeSampleInDataset(struct biResults *br,
- char *probe, char *sample, char *dataset)
+struct analysisVals *biResultsDataForFeatureSampleInDataset(struct biResults *br,
+ char *feature, char *sample,
+ char *dataset)
{
struct biOmics *bi = biOmicsMatchDataset(br->datasets, dataset);
if (!bi)
- return DBL_NULL;
+ return NULL;
-return bi->dataForProbeSample(bi, probe, sample);
+return bi->dataForFeatureSample(bi, feature, sample);
}
-struct biData *biResultsDataForProbeSample(struct biResults *br, char *probe, char *sample)
+struct biData *biResultsDataForFeatureSample(struct biResults *br, char *feature, char *sample)
{
char name[128];
-safef(name, sizeof(name), "%s-%s", probe, sample);
-struct slDouble *sd;
+safef(name, sizeof(name), "%s-%s", feature, sample);
struct biData *bd = biDataNew(name);
struct biOmics *bi;
for (bi = br->datasets; bi; bi = bi->next)
{
- double val = bi->dataForProbeSample(bi, probe, sample);
- sd = slDoubleNew(val);
- slAddHead(&bd->data, sd);
- hashAdd(bd->hash, bi->name, sd);
+ struct analysisVals *av = bi->dataForFeatureSample(bi, feature, sample);
+ struct analysisVals *newAv = cloneAnalysisVals(av);
+ slAddHead(&bd->data, newAv);
+ hashAdd(bd->hash, bi->name, newAv);
}
slReverse(&bd->data);
return bd;
}
-void biResultsConvertToZscores(struct biResults *br)
-{
-struct biOmics *bi;
-for (bi = br->datasets; bi; bi = bi->next)
- bi->toZscores(bi);
-}
-
-void biResultsConvertToLogP(struct biResults *br)
-{
-struct biOmics *bi;
-for (bi = br->datasets; bi; bi = bi->next)
- bi->toLogP(bi);
-}
-
-
-
struct biResults *biResultsNew(void)
{
struct biResults *br;
AllocVar(br);
@@ -1476,29 +984,23 @@
/* Methods */
br->allDatasets = biResultsGetDatasets;
-br->allProbes = biResultsGetProbes;
-br->allProbesInDataset = biResultsGetProbesInDataset;
+br->allFeatures = biResultsGetFeatures;
+br->allFeaturesInDataset = biResultsGetFeaturesInDataset;
br->allSamples = biResultsGetSamples;
br->allSamplesInCommon = biResultsGetSamplesInCommon;
br->allSamplesInDataset = biResultsGetSamplesInDataset;
-br->probesForGene = biResultsProbesForGene;
-br->probesForGeneInDataset = biResultsProbesForGeneInDataset;
-
-br->dataForProbe = biResultsDataForProbe;
-br->dataForProbeInDataset = biResultsDataForProbeInDataset;
+br->dataForFeature = biResultsDataForFeature;
+br->dataForFeatureInDataset = biResultsDataForFeatureInDataset;
br->dataForSample = biResultsDataForSample;
br->dataForSampleInDataset = biResultsDataForSampleInDataset;
-br->dataForProbeSample = biResultsDataForProbeSample;
-br->dataForProbeSampleInDataset = biResultsDataForProbeSampleInDataset;
-
-br->toZscores = biResultsConvertToZscores;
-br->toLogP = biResultsConvertToLogP;
+br->dataForFeatureSample = biResultsDataForFeatureSample;
+br->dataForFeatureSampleInDataset = biResultsDataForFeatureSampleInDataset;
return br;
}
@@ -1514,9 +1016,9 @@
void biResultsAddBiQuery(struct biResults *br, struct biQuery *bq)
{
struct biOmics *bi = newBiOmics(bq->db, bq->dataset);
-if (bq->getAllProbes)
+if (bq->getAllFeatures)
bi->populateAll(bi, bq);
else
bi->populate(bi, bq);
@@ -1532,22 +1034,22 @@
return br;
}
-void biResultsAddBiQueryRandom(struct biResults *br, struct biQuery *bq, int numProbes)
+void biResultsAddBiQueryRandom(struct biResults *br, struct biQuery *bq, int numFeatures)
{
struct biOmics *bi = newBiOmics(bq->db, bq->dataset);
-bi->populateRandom(bi, bq, numProbes);
+bi->populateRandom(bi, bq, numFeatures);
slAddHead(&br->datasets, bi);
}
-struct biResults *biQueryResultsRandomize(struct biQuery *bqList, int numProbes)
+struct biResults *biQueryResultsRandomize(struct biQuery *bqList, int numFeatures)
{
struct biResults *br = biResultsNew();
struct biQuery *bq;
for (bq = bqList; bq; bq = bq->next)
- biResultsAddBiQueryRandom(br, bq, numProbes);
+ biResultsAddBiQueryRandom(br, bq, numFeatures);
return br;
}
@@ -1563,21 +1065,16 @@
return;
slAddHead(bqList, bq);
}
-int biQueryAddPathways(struct biQuery *bq, char *pathways, char sep)
-{
-return addDelimStringToList(&bq->pathwayList, pathways, sep);
-}
-
-int biQueryAddProbes(struct biQuery *bq, char *probes, char sep)
+int biQueryAddGenesets(struct biQuery *bq, char *genesets, char sep)
{
-return addDelimStringToList(&bq->probeList, probes, sep);
+return addDelimStringToList(&bq->genesetList, genesets, sep);
}
-int biQueryAddGenes(struct biQuery *bq, char *genes, char sep)
+int biQueryAddFeatures(struct biQuery *bq, char *features, char sep)
{
-return addDelimStringToList(&bq->geneList, genes, sep);
+return addDelimStringToList(&bq->featureList, features, sep);
}
int biQueryAddSamples(struct biQuery *bq, char *samples, char sep)
{
@@ -1599,21 +1096,19 @@
struct biQuery *bq;
AllocVar(bq);
bq->db = cloneString(db);
bq->dataset = cloneString(dataset);
-bq->getAllProbes = FALSE;
+bq->getAllFeatures = FALSE;
-bq->pathwayList = NULL;
-bq->probeList = NULL;
-bq->geneList = NULL;
+bq->genesetList = NULL;
+bq->featureList = NULL;
bq->sampleList = NULL;
bq->featureValList = NULL;
bq->featureCodeList = NULL;
-bq->addPathways = biQueryAddPathways;
-bq->addProbes = biQueryAddProbes;
+bq->addGenesets = biQueryAddGenesets;
bq->addSamples = biQueryAddSamples;
-bq->addGenes = biQueryAddGenes;
+bq->addFeatures = biQueryAddFeatures;
bq->addFeatureVals = biQueryAddFeatureVals;
bq->addFeatureCodes = biQueryAddFeatureCodes;
return bq;
@@ -1626,11 +1121,10 @@
freeMem(el->db);
freeMem(el->dataset);
-slNameFreeList(&el->pathwayList);
-slNameFreeList(&el->probeList);
-slNameFreeList(&el->geneList);
+slNameFreeList(&el->genesetList);
+slNameFreeList(&el->featureList);
slNameFreeList(&el->sampleList);
slNameFreeList(&el->featureValList);
slNameFreeList(&el->featureCodeList);
}
@@ -1694,9 +1188,9 @@
if ((el = *pEl) == NULL) return;
freeMem(el->name);
freeMem(el->type);
-slFreeList(&el->data);
+analysisValsFreeList(&el->data);
freeHash(&el->hash);
}
void biDataFreeList(struct biData **pList)