src/hg/instinct/bioInt2/bioIntDriver.c 1.1
1.1 2009/03/20 06:06:31 jsanborn
initial commit
Index: src/hg/instinct/bioInt2/bioIntDriver.c
===================================================================
RCS file: src/hg/instinct/bioInt2/bioIntDriver.c
diff -N src/hg/instinct/bioInt2/bioIntDriver.c
--- /dev/null 1 Jan 1970 00:00:00 -0000
+++ src/hg/instinct/bioInt2/bioIntDriver.c 20 Mar 2009 06:06:31 -0000 1.1
@@ -0,0 +1,1706 @@
+/* bioIntDriver.c
+ * All rights reserved -- J. Zachary Sanborn
+ */
+#include "common.h"
+#include "linefile.h"
+#include "hash.h"
+#include "options.h"
+#include "jksql.h"
+#include "hdb.h"
+#include "dystring.h"
+#include "cprob.h"
+#include "hgStatsLib.h"
+#include "bioIntDriver.h"
+
+static char *heatMapDbProfile = "localDb";
+
+/* Begin helper functions */
+
+int addDelimStringToList(struct slName **list, char *str, char sep)
+{
+if (!str)
+ return 0;
+struct slName *sl, *slList = slNameListFromString(str, sep);
+
+for (sl = slList; sl; sl = sl->next)
+ slNameAddHead(list, sl->name);
+slReverse(list);
+
+slNameFreeList(&slList);
+return 1;
+}
+
+void slNamePrint(struct slName *list)
+{
+struct slName *sl;
+for (sl = list; sl; sl = sl->next)
+ fprintf(stdout, "%s ", sl->name);
+fprintf(stdout, "\n");
+}
+
+struct hash *uniqueHashFromSlNameList(void *list)
+/* Create a unique hash out of a list of slNames or any kind of list where the */
+/* first field is the next pointer and the second is the name.
+ * -- Adapted from hashFromSlNameList in hash.h */
+{
+if (!list)
+ return NULL;
+struct slName *namedList = list, *item;
+struct hash *hash = hashNew(10);
+for (item = namedList; item != NULL; item = item->next)
+ {
+ if (!hashLookup(hash, item->name))
+ hashAdd(hash, item->name, item);
+ }
+return hash;
+}
+
+struct slName *slNameUniqueList(struct slName *list)
+{
+if (!list)
+ return NULL;
+
+struct hash *hash = uniqueHashFromSlNameList(list);
+
+/* Iterate through names in hash, saving to new list */
+struct slName *slList = NULL;
+struct hashCookie cookie = hashFirst(hash);
+char *name;
+while ((name = hashNextName(&cookie)) != NULL)
+ slNameAddHead(&slList, name);
+slNameFreeList(&list);
+freeHash(&hash);
+return slList;
+}
+/* End helper functions */
+
+
+/* Begin database functions */
+void addSample(struct biOmics *bi, struct samples *sa)
+{
+struct hashEl *el = hashLookup(bi->samplesHash, sa->name);
+if (el)
+ { /* already in hash, free memory and move on */
+ samplesFree(&sa);
+ return;
+ }
+slAddHead(&bi->samples, sa);
+hashAdd(bi->samplesHash, sa->name, sa);
+}
+
+void loadAllSamples(struct sqlConnection *conn, struct biOmics *bi)
+{
+struct datasets *da = bi->dataset;
+char query[128];
+safef(query, sizeof(query),
+ "select * from samples where dataset_id = %d order by id;",
+ da->id);
+
+struct sqlResult *sr = sqlGetResult(conn, query);
+char **row = NULL;
+while ((row = sqlNextRow(sr)) != NULL)
+ {
+ struct samples *sa = samplesLoad(row);
+ addSample(bi, sa);
+ }
+sqlFreeResult(&sr);
+}
+
+void loadSamplesInList(struct sqlConnection *conn, struct biOmics *bi,
+ struct slName *sampleList)
+{
+if (!sampleList)
+ return;
+
+struct slName *sl;
+
+struct datasets *da = bi->dataset;
+struct dyString *dy = newDyString(100);
+dyStringPrintf(dy, "select * from samples where name in (");
+for (sl = sampleList; sl; sl = sl->next)
+ {
+ dyStringPrintf(dy, "'%s'", sl->name);
+ if (sl->next)
+ dyStringPrintf(dy, ",");
+ }
+dyStringPrintf(dy, ") and dataset_id = %d order by id;", da->id);
+char *query = dyStringCannibalize(&dy);
+
+struct sqlResult *sr = sqlGetResult(conn, query);
+char **row = NULL;
+while ((row = sqlNextRow(sr)) != NULL)
+ {
+ struct samples *sa = samplesLoad(row);
+ addSample(bi, sa);
+ }
+sqlFreeResult(&sr);
+}
+
+void loadSamplesMatchingFeatureVals(struct sqlConnection *conn, struct biOmics *bi,
+ struct slName *featureValList)
+{
+if (!featureValList)
+ return;
+
+//TODO: Inner JOIN!
+//select * from clinicalData p1 join clinicalData p2 on p1.sample_id = p2.sample_id and p1.code = 'Positive' and p1.feature_id = 0 and p2.code = 'Positive' and p2.feature_id = 1;
+
+struct datasets *da = bi->dataset;
+struct dyString *dy = newDyString(100);
+dyStringPrintf(dy,
+ "select * from samples "
+ "join clinicalData on samples.id = clinicalData.sample_id "
+ "join features on clinicalData.feature_id = features.id "
+ "where samples.dataset_id = %d ",
+ da->id);
+
+struct slName *sl, *fv, *slList = NULL;
+for (fv = featureValList; fv; fv = fv->next)
+ {
+ addDelimStringToList(&slList, fv->name, ' ');
+ if (slCount(slList) != 3)
+ {
+ fprintf(stderr, "Improperly formatted feature-value pair: %s\n", fv->name);
+ slNameFreeList(&slList);
+ slList = NULL;
+ continue;
+ }
+ sl = slList;
+ char *name = sl->name;
+ sl = sl->next;
+ char *operation = sl->name;
+ sl = sl->next;
+ char *val = sl->name;
+
+ dyStringPrintf(dy, " and features.name = '%s'", name);
+ dyStringPrintf(dy, " and clinicalData.val %s %s", operation, val);
+
+ slNameFreeList(&slList);
+ slList = NULL;
+ }
+
+dyStringPrintf(dy, " order by samples.id;");
+char *query = dyStringCannibalize(&dy);
+
+struct sqlResult *sr = sqlGetResult(conn, query);
+char **row = NULL;
+while ((row = sqlNextRow(sr)) != NULL)
+ {
+ struct samples *sa = samplesLoad(row);
+ addSample(bi, sa);
+ }
+sqlFreeResult(&sr);
+}
+
+void loadSamplesMatchingFeatureCodes(struct sqlConnection *conn, struct biOmics *bi,
+ struct slName *featureCodeList)
+{
+if (!featureCodeList)
+ return;
+
+//TODO: Inner JOIN!
+//select * from clinicalData p1 join clinicalData p2 on p1.sample_id = p2.sample_id and p1.code = 'Positive' and p1.feature_id = 0 and p2.code = 'Positive' and p2.feature_id = 1;
+
+
+struct datasets *da = bi->dataset;
+struct dyString *dy = newDyString(100);
+dyStringPrintf(dy,
+ "select * from samples "
+ "join clinicalData on samples.id = clinicalData.sample_id "
+ "join features on clinicalData.feature_id = features.id "
+ "where samples.dataset_id = %d and (",
+ da->id);
+
+struct slName *sl, *fc, *slList = NULL;
+for (fc = featureCodeList; fc; fc = fc->next)
+ {
+ addDelimStringToList(&slList, fc->name, ' ');
+ if (slCount(slList) != 3)
+ {
+ fprintf(stderr, "Improperly formatted feature-code pair: %s\n", fc->name);
+ slNameFreeList(&slList);
+ slList = NULL;
+ continue;
+ }
+ sl = slList;
+ char *name = sl->name;
+ sl = sl->next;
+ char *operation = sl->name;
+ sl = sl->next;
+ char *code = sl->name;
+
+ dyStringPrintf(dy, " (features.name = '%s'", name);
+ if (sameString(code, "NULL"))
+ dyStringPrintf(dy, " and clinicalData.code is NULL)");
+ else
+ dyStringPrintf(dy, " and clinicalData.code %s '%s')", operation, code);
+ slNameFreeList(&slList);
+ slList = NULL;
+
+ if (fc->next)
+ dyStringPrintf(dy, " or");
+ }
+
+dyStringPrintf(dy, ") order by samples.id;");
+char *query = dyStringCannibalize(&dy);
+
+struct sqlResult *sr = sqlGetResult(conn, query);
+char **row = NULL;
+while ((row = sqlNextRow(sr)) != NULL)
+ {
+ struct samples *sa = samplesLoad(row);
+ addSample(bi, sa);
+ }
+sqlFreeResult(&sr);
+}
+
+void loadSamples(struct sqlConnection *conn, struct biOmics *bi, struct biQuery *bq)
+{
+if (!bq->sampleList && !bq->featureValList && !bq->featureCodeList)
+ {
+ fprintf(stderr, "Loading ALL samples from dataset.\n");
+ loadAllSamples(conn, bi);
+ return;
+ }
+
+/* Load samples from list */
+loadSamplesInList(conn, bi, bq->sampleList);
+
+/* Load samples matching feature-value pairs, e.g. B <= 5,C > 6,... */
+loadSamplesMatchingFeatureVals(conn, bi, bq->featureValList);
+
+/* Load samples matching feature-code pairs, e.g. ER = Positive */
+loadSamplesMatchingFeatureCodes(conn, bi, bq->featureCodeList);
+}
+
+void addProbeInfo(struct biOmics *bi, struct probeInfo *pi)
+{
+struct hashEl *el = hashLookup(bi->probeInfoHash, pi->name);
+if (el)
+ {
+ probeInfoFree(&pi);
+ return; // already there;
+ }
+
+slAddHead(&bi->probeInfo, pi);
+hashAdd(bi->probeInfoHash, pi->name, pi);
+
+/* Allocate space for slPair data */
+char probe_id[128];
+safef(probe_id, sizeof(probe_id), "%d", pi->id);
+struct slPair *sp;
+AllocVar(sp);
+sp->name = cloneString(pi->name);
+sp->val = NULL;
+slAddHead(&bi->probes, sp);
+hashAdd(bi->probesHash, probe_id, sp);
+}
+
+void loadPathways(struct sqlConnection *conn, struct biOmics *bi, struct slName *pathwayList)
+{
+if (!pathwayList)
+ return;
+
+struct slName *sl;
+struct datasets *da = bi->dataset;
+char *pInfo = da->probe_table;
+char *p2g = da->probe_to_gene_table;
+
+if (!pInfo || !p2g)
+ return;
+
+struct dyString *dy = newDyString(100);
+dyStringPrintf(dy,
+ "select %s.* from %s join %s on %s.probe_id = %s.id "
+ "join pathwayGenes on pathwayGenes.gene_id = %s.gene_id "
+ "join pathways on pathways.id = pathwayGenes.id "
+ "where pathways.name in (",
+ pInfo, pInfo, p2g, p2g, pInfo, p2g);
+
+for (sl = pathwayList; sl; sl = sl->next)
+ {
+ dyStringPrintf(dy, "'%s'", sl->name);
+ if (sl->next)
+ dyStringPrintf(dy, ",");
+ }
+dyStringPrintf(dy, ");");
+char *query = dyStringCannibalize(&dy);
+
+struct sqlResult *sr = sqlGetResult(conn, query);
+char **row = NULL;
+while ((row = sqlNextRow(sr)) != NULL)
+ {
+ struct probeInfo *pi = probeInfoLoad(row);
+ addProbeInfo(bi, pi);
+ }
+sqlFreeResult(&sr);
+}
+
+void loadGenes(struct sqlConnection *conn, struct biOmics *bi, struct slName *geneList)
+{
+if (!geneList)
+ return;
+
+struct slName *sl;
+struct datasets *da = bi->dataset;
+char *pInfo = da->probe_table;
+char *p2g = da->probe_to_gene_table;
+
+if (!pInfo || !p2g)
+ return;
+
+struct dyString *dy = newDyString(100);
+dyStringPrintf(dy,
+ "select * from %s join %s on %s.probe_id = %s.id "
+ "join geneLookup on geneLookup.id = %s.gene_id where kgId in (",
+ pInfo, p2g, p2g, pInfo, p2g);
+
+for (sl = geneList; sl; sl = sl->next)
+ {
+ dyStringPrintf(dy, "'%s'", sl->name);
+ if (sl->next)
+ dyStringPrintf(dy, ",");
+ }
+dyStringPrintf(dy, ");");
+char *query = dyStringCannibalize(&dy);
+
+struct sqlResult *sr = sqlGetResult(conn, query);
+char **row = NULL;
+while ((row = sqlNextRow(sr)) != NULL)
+ {
+ struct probeInfo *pi = probeInfoLoad(row);
+ addProbeInfo(bi, pi);
+ }
+sqlFreeResult(&sr);
+}
+
+void loadProbeInfo(struct sqlConnection *conn, struct biOmics *bi,
+ struct slName *probeList, boolean allData)
+{
+if (!probeList && !allData)
+ return;
+
+struct datasets *da = bi->dataset;
+
+struct slName *sl;
+struct dyString *dy = newDyString(100);
+dyStringPrintf(dy, "select * from %s ", da->probe_table);
+
+if (probeList)
+ {
+ dyStringPrintf(dy, "where name in (");
+ for (sl = probeList; sl; sl = sl->next)
+ {
+ dyStringPrintf(dy, "'%s'", sl->name);
+ if (sl->next)
+ dyStringPrintf(dy, ",");
+ }
+ dyStringPrintf(dy, ");");
+ }
+
+char *query = dyStringCannibalize(&dy);
+
+struct sqlResult *sr = sqlGetResult(conn, query);
+char **row = NULL;
+while ((row = sqlNextRow(sr)) != NULL)
+ {
+ struct probeInfo *pi = probeInfoLoad(row);
+ addProbeInfo(bi, pi);
+ }
+sqlFreeResult(&sr);
+}
+
+
+void loadProbeInfoRandom(struct sqlConnection *conn, struct biOmics *bi, int numProbes)
+{
+if (numProbes <= 0)
+ return;
+
+struct datasets *da = bi->dataset;
+
+char q[256];
+safef(q, sizeof(q), "select max(id) from %s;", da->probe_table);
+int maxProbes = sqlQuickNum(conn, q);
+
+if (numProbes >= maxProbes) // what's the point?!?
+ errAbort("Number of random probes greater than probes in table!.");
+
+int i = 0, total = 0;
+char rStr[128];
+struct hash *hash = hashNew(0);
+while (i < numProbes && total < maxProbes * 2)
+ {
+ total++;
+ int r = random() % maxProbes;
+ safef(rStr, sizeof(rStr), "%d", r);
+ if (hashLookup(hash, rStr))
+ continue;
+ hashAddInt(hash, rStr, 1);
+ i++;
+ }
+struct slInt *si, *siList = NULL;
+struct hashCookie cookie = hashFirst(hash);
+char *name;
+while ((name = hashNextName(&cookie)) != NULL)
+ {
+ si = slIntNew(atoi(name));
+ slAddHead(&siList, si);
+ }
+slSort(&siList, slIntCmp);
+hashFree(&hash);
+
+if (slCount(siList) != numProbes)
+ errAbort("Random list not correct length %d != %d.", slCount(siList), numProbes);
+
+struct dyString *dy = newDyString(100);
+dyStringPrintf(dy, "select * from %s where id in (", da->probe_table);
+for (si = siList; si; si = si->next)
+ {
+ dyStringPrintf(dy, "%d", si->val);
+ if (si->next)
+ dyStringPrintf(dy, ",");
+ }
+dyStringPrintf(dy, ");");
+
+char *query = dyStringCannibalize(&dy);
+slFreeList(&siList);
+
+struct sqlResult *sr = sqlGetResult(conn, query);
+char **row = NULL;
+while ((row = sqlNextRow(sr)) != NULL)
+ {
+ struct probeInfo *pi = probeInfoLoad(row);
+ addProbeInfo(bi, pi);
+ }
+sqlFreeResult(&sr);
+}
+
+void loadProbeSampleValData(struct sqlConnection *conn, struct biOmics *bi, boolean allData)
+{ /* boolean allData is unused in this function */
+struct datasets *da = bi->dataset;
+
+struct dyString *dy = newDyString(100);
+dyStringPrintf(dy, "select * from %s", da->data_table);
+
+if (bi->probeInfo)
+ {
+ struct probeInfo *pi;
+ dyStringPrintf(dy, " where probe_id in (");
+ for (pi = bi->probeInfo; pi; pi = pi->next)
+ {
+ dyStringPrintf(dy, "%d", pi->id);
+ if (pi->next)
+ dyStringPrintf(dy, ",");
+ }
+ dyStringPrintf(dy, ")");
+ }
+
+if (bi->samples)
+ {
+ if (bi->probeInfo)
+ dyStringPrintf(dy, " and");
+ else
+ dyStringPrintf(dy, " where");
+
+ struct samples *sa;
+ dyStringPrintf(dy, " sample_id in (");
+ for (sa = bi->samples; sa; sa = sa->next)
+ {
+ dyStringPrintf(dy, "%d", sa->id);
+ if (sa->next)
+ dyStringPrintf(dy, ",");
+ }
+ dyStringPrintf(dy, ")");
+ }
+dyStringPrintf(dy, ";");
+
+char *query = dyStringCannibalize(&dy);
+struct sqlResult *sr = sqlGetResult(conn, query);
+char **row = NULL;
+
+int count = 0;
+struct slPair *sp;
+struct probeSampleVal *pv;
+while ((row = sqlNextRow(sr)) != NULL)
+ {
+ char *probe_id = row[0];
+ struct hashEl *el = hashLookup(bi->probesHash, probe_id);
+ if (!el)
+ continue;
+ sp = el->val;
+ pv = probeSampleValLoad(row);
+ slAddHead(&sp->val, pv);
+ count++;
+ }
+if (DEBUG)
+ fprintf(stdout, "found %d probes\n", count);
+
+sqlFreeResult(&sr);
+}
+
+void loadProbeValsData(struct sqlConnection *conn, struct biOmics *bi, boolean allData)
+{
+if (!bi->probeInfo && !allData)
+ return;
+struct datasets *da = bi->dataset;
+
+struct probeInfo *pi;
+struct dyString *dy = newDyString(100);
+dyStringPrintf(dy, "select * from %s ", da->data_table);
+
+if (bi->probeInfo && !allData)
+ {
+ dyStringPrintf(dy, "where probe_id in (");
+ for (pi = bi->probeInfo; pi; pi = pi->next)
+ {
+ dyStringPrintf(dy, "%d", pi->id);
+ if (pi->next)
+ dyStringPrintf(dy, ",");
+ }
+ dyStringPrintf(dy, ")");
+ }
+char *query = dyStringCannibalize(&dy);
+
+struct sqlResult *sr = sqlGetResult(conn, query);
+char **row = NULL;
+
+int count = 0;
+struct slPair *sp;
+struct probeVals *pv;
+while ((row = sqlNextRow(sr)) != NULL)
+ {
+ char *probe_id = row[0];
+ struct hashEl *el = hashLookup(bi->probesHash, probe_id);
+ if (!el)
+ continue;
+ sp = el->val;
+ pv = probeValsLoad(row);
+ count += pv->sample_count;
+ slAddHead(&sp->val, pv);
+ }
+if (DEBUG)
+ fprintf(stdout, "found %d probes\n", count);
+
+sqlFreeResult(&sr);
+}
+
+struct biData *probeValsForProbe(struct biOmics *bi, char *probe)
+{
+struct hashEl *el = hashLookup(bi->probeInfoHash, probe);
+if (!el)
+ return NULL;
+struct probeInfo *pi = el->val;
+int probeId = pi->id;
+
+char pStr[128];
+safef(pStr, sizeof(pStr), "%d", probeId);
+el = hashLookup(bi->probesHash, pStr);
+if (!el)
+ return NULL;
+struct slPair *sp = el->val;
+
+struct biData *bd = biDataNew(probe);
+
+struct samples *sa;
+struct probeVals *pv;
+struct slDouble *sd;
+for (pv = sp->val; pv; pv = pv->next)
+ {
+ for (sa = bi->samples; sa; sa = sa->next)
+ {
+ sd = slDoubleNew(pv->sample_data[sa->exp_id]);
+ slAddHead(&bd->data, sd);
+ hashAdd(bd->hash, sa->name, sd);
+ }
+ }
+slReverse(&bd->data);
+return bd;
+}
+
+struct biData *probeValsForSample(struct biOmics *bi, char *sample)
+{
+struct hashEl *el = hashLookup(bi->samplesHash, sample);
+if (!el)
+ return NULL;
+struct samples *sa = el->val;
+int expId = sa->exp_id;
+
+struct biData *bd = biDataNew(NULL);
+struct slDouble *sd;
+struct slPair *sp;
+for (sp = bi->probes; sp; sp = sp->next)
+ {
+ struct probeVals *pv;
+ for (pv = sp->val; pv; pv = pv->next)
+ {
+ if (expId >= pv->sample_count)
+ errAbort("expId is greater than bd->vals[] array length.");
+ sd = slDoubleNew(pv->sample_data[expId]);
+ slAddHead(&bd->data, sd);
+ hashAdd(bd->hash, sp->name, sd);
+ }
+ }
+slReverse(&bd->data);
+return bd;
+}
+
+double probeValsForProbeSample(struct biOmics *bi,
+ char *probe, char *sample)
+{
+struct hashEl *el = hashLookup(bi->probeInfoHash, probe);
+if (!el)
+ return DBL_NULL;
+struct probeInfo *pi = el->val;
+int probeId = pi->id;
+
+char pStr[128];
+safef(pStr, sizeof(pStr), "%d", probeId);
+el = hashLookup(bi->probesHash, pStr);
+if (!el)
+ return DBL_NULL;
+struct slPair *sp = el->val;
+
+el = hashLookup(bi->samplesHash, sample);
+if (!el)
+ return DBL_NULL;
+struct samples *sa = el->val;
+int expId = sa->exp_id;
+
+struct probeVals *pv = sp->val;
+return pv->sample_data[expId];
+}
+
+boolean calcMedianMAD(struct biOmics *bi, double *median, double *mad)
+{
+int count = 0;
+
+struct samples *sa;
+struct slPair *sp;
+struct probeVals *pv;
+struct slDouble *sd, *sdList = NULL;
+for (sp = bi->probes; sp; sp = sp->next)
+ {
+ pv = sp->val;
+ for (sa = bi->samples; sa; sa = sa->next)
+ {
+ sd = slDoubleNew(pv->sample_data[sa->exp_id]);
+ slAddHead(&sdList, sd);
+ count++;
+ }
+ }
+if (!sdList)
+ return FALSE;
+
+if (count <= 1)
+ {
+ slFreeList(&sdList);
+ return FALSE;
+ }
+double medianTmp = slDoubleMedian(sdList);
+
+/* Manually calculate approx std dev according to 99.7% cut-off (3 sigma) */
+slSort(sdList, slDoubleCmp);
+int low = round(count * (0.0015));
+int high = round(count * (1.0 - 0.0015));
+
+sd = slElementFromIx(sdList, low);
+double lowVal = sd->val;
+
+sd = slElementFromIx(sdList, high);
+double highVal = sd->val;
+
+*median = medianTmp;
+*mad = max(fabs(lowVal - medianTmp), fabs(highVal - medianTmp))/3.0;
+
+slFreeList(&sdList);
+return TRUE;
+}
+
+///* Calculate median absolute deviation for a more
+// * robust measure than standard deviation */
+//for (sd = sdList; sd; sd = sd->next)
+// sd->val = fabs(sd->val - medianTmp);
+
+//*median = medianTmp;
+//*mad = slDoubleMedian(sdList);
+//slFreeList(&sdList);
+//return TRUE;
+//}
+
+
+void probeValsConvertToLogP(struct biOmics *bi)
+{ /* tranforms all data to -log(p-value) based on normal z-score transform */
+double median, mad;
+if (!calcMedianMAD(bi, &median, &mad))
+ return;
+
+/* To estimate std deviation from mad, use std = 1.43 * mad
+ * Brideau et al. J Biomol Screen, 2003 */
+double std = mad * 1.43;
+
+fprintf(stderr, "median = %f, mad = %f, std = %f\n", median, mad, std);
+if (std == 0.0)
+ return;
+
+double maxLogP = 88.0; /* max, in case p-value comes back zero */
+struct samples *sa;
+struct slPair *sp;
+struct probeVals *pv;
+double z, p;
+double val;
+for (sp = bi->probes; sp; sp = sp->next)
+ {
+ pv = sp->val;
+ for (sa = bi->samples; sa; sa = sa->next)
+ {
+ z = (pv->sample_data[sa->exp_id] - median)/std;
+ p = ndtr(-1.0*fabs(z));
+ if (p > 0)
+ val = min(-log(p)/log(10.0), maxLogP);
+ else
+ val = maxLogP;
+
+ if (z < 0.0)
+ val = -1.0*val; // signed log(p-value)
+ pv->sample_data[sa->exp_id] = val;
+ }
+ }
+}
+
+void probeValsConvertToZscores(struct biOmics *bi)
+{ /* transforms all data to z-scores, median-centered */
+double median, mad;
+if (!calcMedianMAD(bi, &median, &mad))
+ return;
+
+/* To estimate std deviation from mad, use std = 1.43 * mad
+ * Brideau et al. J Biomol Screen, 2003 */
+double std = mad * 1.43;
+fprintf(stderr, "median = %f, mad = %f, std = %f\n", median, mad, std);
+if (std == 0.0)
+ return;
+
+struct samples *sa;
+struct slPair *sp;
+struct probeVals *pv;
+double z;
+for (sp = bi->probes; sp; sp = sp->next)
+ {
+ pv = sp->val;
+ for (sa = bi->samples; sa; sa = sa->next)
+ {
+ z = (pv->sample_data[sa->exp_id] - median)/std;
+ pv->sample_data[sa->exp_id] = z;
+ }
+ }
+}
+
+void probeSampleValConvertToLogP(struct biOmics *bi)
+{
+return;
+}
+
+void probeSampleValConvertToZscores(struct biOmics *bi)
+{
+return;
+}
+
+struct biData *probeSampleValForProbe(struct biOmics *bi, char *probe)
+{
+struct hashEl *el = hashLookup(bi->probeInfoHash, probe);
+if (!el)
+ return NULL;
+struct probeInfo *pi = el->val;
+int probeId = pi->id;
+
+char pStr[128];
+safef(pStr, sizeof(pStr), "%d", probeId);
+el = hashLookup(bi->probesHash, pStr);
+if (!el)
+ return NULL;
+struct slPair *sp = el->val;
+
+struct biData *bd = biDataNew(probe);
+
+struct samples *sa;
+struct probeSampleVal *pv;
+struct slDouble *sd;
+for (pv = sp->val; pv; pv = pv->next)
+ {
+ for (sa = bi->samples; sa; sa = sa->next)
+ {
+ if (!sa->exp_id == pv->sample_id)
+ continue;
+
+ sd = slDoubleNew(pv->val);
+ slAddHead(&bd->data, sd);
+ hashAdd(bd->hash, sa->name, sd);
+ break;
+ }
+ }
+slReverse(&bd->data);
+return bd;
+}
+
+struct biData *probeSampleValForSample(struct biOmics *bi, char *sample)
+{
+struct hashEl *el = hashLookup(bi->samplesHash, sample);
+if (!el)
+ return NULL;
+struct samples *sa = el->val;
+int expId = sa->exp_id;
+
+struct biData *bd = biDataNew(sample);
+
+struct slDouble *sd;
+struct slPair *sp;
+for (sp = bi->probes; sp; sp = sp->next)
+ {
+ struct probeSampleVal *pv;
+ for (pv = sp->val; pv; pv = pv->next)
+ {
+ if (expId != pv->sample_id)
+ continue;
+ sd = slDoubleNew(pv->val);
+ slAddHead(&bd->data, sd);
+ hashAdd(bd->hash, sp->name, sd);
+ }
+ }
+
+slReverse(&bd->data);
+return bd;
+}
+
+double probeSampleValForProbeSample(struct biOmics *bi,
+ char *probe, char *sample)
+{
+struct hashEl *el = hashLookup(bi->probeInfoHash, probe);
+if (!el)
+ return DBL_NULL;
+struct probeInfo *pi = el->val;
+int probeId = pi->id;
+
+char pStr[128];
+safef(pStr, sizeof(pStr), "%d", probeId);
+el = hashLookup(bi->probesHash, pStr);
+if (!el)
+ return DBL_NULL;
+
+struct slPair *sp = el->val;
+el = hashLookup(bi->samplesHash, sample);
+if (!el)
+ return DBL_NULL;
+struct samples *sa = el->val;
+int expId = sa->exp_id;
+
+struct probeSampleVal *pv = sp->val;
+for (pv = sp->val; pv; pv = pv->next)
+ {
+ if (expId != pv->sample_id)
+ continue;
+ return pv->val;
+ }
+
+return DBL_NULL;
+}
+
+void setDataType(struct sqlConnection *conn, struct biOmics *bi)
+{
+struct datasets *da = bi->dataset;
+
+char query[128];
+safef(query, sizeof(query),
+ "select * from dataTypes where id = %d",
+ da->type_id);
+
+struct dataTypes *dt = dataTypesLoadByQuery(conn, query);
+if (!dt)
+ errAbort("Datatype with id = %d not found in database", da->type_id);
+
+bi->type = cloneString(dt->name);
+if (sameString(dt->format, "probeVals"))
+ {
+ bi->loadData = loadProbeValsData;
+ bi->freeData = slPairValsFreeList;
+ bi->dataForProbe = probeValsForProbe;
+ bi->dataForSample = probeValsForSample;
+ bi->dataForProbeSample = probeValsForProbeSample;
+ bi->toZscores = probeValsConvertToZscores;
+ bi->toLogP = probeValsConvertToLogP;
+ }
+else if (sameString(dt->format, "probeSampleVal"))
+ {
+ bi->loadData = loadProbeSampleValData;
+ bi->freeData = slPairSampleValFreeList;
+ bi->dataForProbe = probeSampleValForProbe;
+ bi->dataForSample = probeSampleValForSample;
+ bi->dataForProbeSample = probeSampleValForProbeSample;
+ bi->toZscores = probeSampleValConvertToZscores;
+ bi->toLogP = probeSampleValConvertToLogP;
+ }
+else
+ errAbort("Unrecognized datatype");
+
+dataTypesFreeList(&dt);
+}
+
+void loadDataset(struct sqlConnection *conn, struct biOmics *bi)
+{
+if (!bi->name)
+ return;
+
+/* Make sure only one dataset is loaded (hence 'limit 1') */
+char query[256];
+safef(query, sizeof(query),
+ "select * from datasets where data_table = '%s' limit 1;",
+ bi->name);
+bi->dataset = datasetsLoadByQuery(conn, query);
+if (!bi->dataset)
+ errAbort("No datasets named %s found in database.", bi->name);
+
+/* Set data type */
+setDataType(conn, bi);
+}
+
+/* End database functions */
+
+struct biOmics *biOmicsMatchDataset(struct biOmics *biList, char *name)
+{
+if (!name)
+ return NULL;
+
+struct biOmics *bi;
+for (bi = biList; bi; bi = bi->next)
+ if (sameString(bi->name, name))
+ return bi;
+
+return NULL;
+}
+
+int biOmicsPopulateAll(struct biOmics *bi, struct biQuery *bq)
+{
+struct sqlConnection *biConn = hAllocConnProfile(heatMapDbProfile, bi->db);
+
+loadDataset(biConn, bi);
+
+loadSamples(biConn, bi, bq);
+
+loadProbeInfo(biConn, bi, NULL, TRUE);
+
+bi->loadData(biConn, bi, TRUE);
+
+hFreeConn(&biConn);
+
+return 0;
+}
+
+int biOmicsPopulate(struct biOmics *bi, struct biQuery *bq)
+{
+struct sqlConnection *biConn = hAllocConnProfile(heatMapDbProfile, bi->db);
+
+if (DEBUG)
+ uglyTime(NULL);
+loadDataset(biConn, bi);
+if (DEBUG)
+ uglyTime("Load Dataset");
+
+loadSamples(biConn, bi, bq);
+if (DEBUG)
+ uglyTime("Load Samples");
+
+loadPathways(biConn, bi, bq->pathwayList);
+if (DEBUG)
+ uglyTime("Load Pathways");
+
+loadGenes(biConn, bi, bq->geneList);
+if (DEBUG)
+ uglyTime("Load Genes");
+
+loadProbeInfo(biConn, bi, bq->probeList, FALSE);
+if (DEBUG)
+ uglyTime("Load Probe Info");
+
+bi->loadData(biConn, bi, FALSE);
+if (DEBUG)
+ uglyTime("Load Probe");
+
+hFreeConn(&biConn);
+return 0;
+}
+
+int biOmicsPopulateRandom(struct biOmics *bi, struct biQuery *bq, int numProbes)
+{
+struct sqlConnection *biConn = hAllocConnProfile(heatMapDbProfile, bi->db);
+
+if (DEBUG)
+ uglyTime(NULL);
+loadDataset(biConn, bi);
+if (DEBUG)
+ uglyTime("Load Dataset");
+
+loadSamples(biConn, bi, bq);
+if (DEBUG)
+ uglyTime("Load Samples");
+
+loadProbeInfoRandom(biConn, bi, numProbes);
+if (DEBUG)
+ uglyTime("Load Random Probe Info");
+
+bi->loadData(biConn, bi, FALSE);
+if (DEBUG)
+ uglyTime("Load Probe");
+
+hFreeConn(&biConn);
+return 0;
+}
+
+struct slName *biOmicsGetSamples(struct biOmics *bi)
+{
+struct slName *slList = NULL;
+
+struct samples *sa;
+for (sa = bi->samples; sa; sa = sa->next)
+ slNameAddHead(&slList, sa->name);
+
+slReverse(&slList);
+return slList;
+}
+
+struct slName *biOmicsGetProbes(struct biOmics *bi)
+{
+struct slName *slList = NULL;
+struct probeInfo *pi;
+for (pi = bi->probeInfo; pi; pi = pi->next)
+ slNameAddHead(&slList, pi->name);
+slReverse(&slList);
+return slList;
+}
+
+void populateAliases(struct biOmics *bi)
+{
+struct sqlConnection *biConn = hAllocConnProfile(heatMapDbProfile, bi->db);
+struct datasets *da = bi->dataset;
+
+fprintf(stderr, "loading gene aliases...\n");
+if (DEBUG)
+ uglyTime(NULL);
+char query[1024];
+safef(query, sizeof(query),
+ "select DISTINCT kgXref.geneSymbol, %s.name from %s "
+ "join %s on %s.id = %s.probe_id "
+ "join geneLookup on geneLookup.id = %s.gene_id "
+ "join kgXref on kgXref.kgId = geneLookup.kgId;",
+ da->probe_table, da->probe_table, da->probe_to_gene_table,
+ da->probe_table, da->probe_to_gene_table, da->probe_to_gene_table);
+
+struct sqlResult *sr = sqlGetResult(biConn, query);
+char **row = NULL;
+while ((row = sqlNextRow(sr)) != NULL)
+ {
+ char *gene = row[0];
+ char *probe = cloneString(row[1]);
+ hashAdd(bi->geneAliases, gene, probe);
+ }
+sqlFreeResult(&sr);
+
+if (DEBUG)
+ uglyTime("loaded gene aliases");
+hFreeConn(&biConn);
+}
+
+struct slName *biOmicsGetProbesForGene(struct biOmics *bi, char *gene)
+{
+if (hashNumEntries(bi->geneAliases) == 0)
+ populateAliases(bi);
+
+struct hashEl *el;
+struct slName *slList = NULL;
+for(el = hashLookup(bi->geneAliases, gene); el != NULL; el = hashLookupNext(el))
+ slNameAddHead(&slList, (char *) el->val);
+
+return slList;
+}
+
+
+
+struct biOmics *newBiOmics(char *db, char *dataset)
+{
+struct biOmics *bi;
+AllocVar(bi);
+bi->db = cloneString(db);
+bi->name = cloneString(dataset);
+
+bi->sampleIndices = hashNew(0);
+bi->geneAliases = hashNew(0);
+
+bi->dataset = NULL;
+bi->samples = NULL;
+bi->samplesHash = hashNew(0);
+bi->probeInfo = NULL;
+bi->probeInfoHash = hashNew(0);
+bi->probes = NULL;
+bi->probesHash = hashNew(0);
+
+/* Methods */
+bi->populate = biOmicsPopulate;
+bi->populateAll = biOmicsPopulateAll;
+bi->populateRandom = biOmicsPopulateRandom;
+bi->allProbes = biOmicsGetProbes;
+bi->allSamples = biOmicsGetSamples;
+bi->probesForGene = biOmicsGetProbesForGene;
+
+/* These are set according to dataType */
+bi->loadData = NULL;
+bi->freeData = NULL;
+bi->dataForProbe = NULL;
+bi->dataForSample = NULL;
+bi->dataForProbeSample = NULL;
+bi->toZscores = NULL;
+
+return bi;
+}
+
+
+void slPairSampleValFree(struct slPair **pEl)
+{
+struct slPair *el;
+
+if ((el = *pEl) == NULL) return;
+
+freeMem(el->name);
+struct probeSampleVal *pv = el->val;
+probeSampleValFree(&pv);
+freez(pEl);
+}
+
+void slPairSampleValFreeList(struct slPair **pList)
+{
+struct slPair *el, *next;
+
+for (el = *pList; el != NULL; el = next)
+ {
+ next = el->next;
+ slPairSampleValFree(&el);
+ }
+*pList = NULL;
+}
+
+void slPairValsFree(struct slPair **pEl)
+{
+struct slPair *el;
+
+if ((el = *pEl) == NULL) return;
+
+freeMem(el->name);
+struct probeVals *pv = el->val;
+probeValsFreeList(&pv);
+freez(pEl);
+}
+
+void slPairValsFreeList(struct slPair **pList)
+{
+struct slPair *el, *next;
+
+for (el = *pList; el != NULL; el = next)
+ {
+ next = el->next;
+ slPairValsFree(&el);
+ }
+*pList = NULL;
+}
+
+void biOmicsFree(struct biOmics **pEl)
+{
+struct biOmics *el;
+
+if ((el = *pEl) == NULL) return;
+
+freeMem(el->db);
+freeMem(el->name);
+
+freeHash(&el->sampleIndices);
+freeHash(&el->geneAliases);
+
+datasetsFreeList(&el->dataset);
+samplesFreeList(&el->samples);
+freeHash(&el->samplesHash);
+
+probeInfoFreeList(&el->probeInfo);
+freeHash(&el->probeInfoHash);
+
+el->freeData(&el->probes);
+freeHash(&el->probesHash);
+
+*pEl = NULL;
+}
+
+void biOmicsFreeList(struct biOmics **pList)
+{
+struct biOmics *el, *next;
+
+for (el = *pList; el != NULL; el = next)
+ {
+ next = el->next;
+ biOmicsFree(&el);
+ }
+*pList = NULL;
+}
+
+/*** biResults code ***/
+struct slName *biResultsGetDatasets(struct biResults *br)
+{
+struct slName *slList = NULL;
+struct biOmics *bi;
+for (bi = br->datasets; bi; bi = bi->next)
+ slNameAddHead(&slList, bi->name);
+
+return slList;
+}
+
+struct slName *biResultsGetProbesInDataset(struct biResults *br, char *dataset)
+{
+struct biOmics *bi = biOmicsMatchDataset(br->datasets, dataset);
+if (!bi)
+ return NULL;
+
+return bi->allProbes(bi);
+}
+
+struct slName *biResultsGetProbes(struct biResults *br)
+{
+struct slName *slList = NULL;
+struct biOmics *bi;
+for (bi = br->datasets; bi; bi = bi->next)
+ {
+ struct slName *probes = bi->allProbes(bi);
+ slList = slCat(slList, probes);
+ }
+return slNameUniqueList(slList);
+}
+
+struct slName *biResultsGetSamplesInDataset(struct biResults *br, char *dataset)
+{
+struct biOmics *bi = biOmicsMatchDataset(br->datasets, dataset);
+if (!bi)
+ return NULL;
+
+return bi->allSamples(bi);
+}
+
+struct slName *biResultsGetSamples(struct biResults *br)
+{
+struct slName *slList = NULL;
+struct biOmics *bi;
+for (bi = br->datasets; bi; bi = bi->next)
+ {
+ struct slName *samples = bi->allSamples(bi);
+ slList = slCat(slList, samples);
+ }
+
+return slNameUniqueList(slList);
+}
+
+
+struct slName *getMatching(struct slName *list1, struct slName *list2)
+{
+if (!list1 || !list2)
+ return NULL;
+
+fprintf(stderr, "matching: count(list1) = %d, count(list2) = %d\n",
+ slCount(list1), slCount(list2));
+
+struct slName *sl1, *sl2, *matched = NULL;
+for (sl1 = list1; sl1; sl1 = sl1->next)
+ {
+ for (sl2 = list2; sl2; sl2 = sl2->next)
+ {
+ if (sameString(sl1->name, sl2->name))
+ {
+ if (!slNameInList(matched, sl1->name))
+ slNameAddHead(&matched, sl1->name);
+ }
+ }
+ }
+
+return matched;
+}
+
+struct slName *biResultsGetSamplesInCommon(struct biResults *br)
+{
+struct biOmics *bi = br->datasets;
+/* Start off the list */
+struct slName *prevMatched, *matched = bi->allSamples(bi);
+
+bi = bi->next;
+for ( ; bi; bi = bi->next)
+ {
+ prevMatched = matched;
+
+ struct slName *samples = bi->allSamples(bi);
+ matched = getMatching(prevMatched, samples);
+
+ slNameFreeList(&samples);
+ slNameFreeList(&prevMatched);
+ }
+
+return matched;
+}
+
+
+struct slName *biResultsProbesForGeneInDataset(struct biResults *br, char *gene, char *dataset)
+{
+struct biOmics *bi = biOmicsMatchDataset(br->datasets, dataset);
+if (!bi)
+ return NULL;
+
+return bi->probesForGene(bi, gene);
+}
+
+struct slName *biResultsProbesForGene(struct biResults *br, char *gene)
+{
+struct slName *slList = NULL;
+struct biOmics *bi;
+for (bi = br->datasets; bi; bi = bi->next)
+ {
+ struct slName *probes = bi->probesForGene(bi, gene);
+ slList = slCat(slList, probes);
+ }
+return slNameUniqueList(slList);
+}
+
+struct biData *biResultsDataForProbeInDataset(struct biResults *br, char *probe, char *dataset)
+{
+struct biOmics *bi = biOmicsMatchDataset(br->datasets, dataset);
+if (!bi)
+ return NULL;
+
+return bi->dataForProbe(bi, probe);
+}
+
+struct biData *biResultsDataForProbe(struct biResults *br, char *probe)
+{
+struct biData *bd,*bdList = NULL;
+struct biOmics *bi;
+for (bi = br->datasets; bi; bi = bi->next)
+ {
+ bd = bi->dataForProbe(bi, probe);
+ biDataAppendName(bd, bi->name);
+ slAddHead(&bdList, bd);
+ }
+return bdList;
+}
+
+struct biData *biResultsDataForSampleInDataset(struct biResults *br, char *sample,
+ char *dataset)
+{
+struct biOmics *bi = biOmicsMatchDataset(br->datasets, dataset);
+if (!bi)
+ return NULL;
+
+return bi->dataForSample(bi, sample);
+}
+
+struct biData *biResultsDataForSample(struct biResults *br, char *sample)
+{
+struct biData *bd, *bdList = NULL;
+struct biOmics *bi;
+for (bi = br->datasets; bi; bi = bi->next)
+ {
+ bd = bi->dataForSample(bi, sample);
+ bd->type = cloneString(bi->type);
+ biDataAppendName(bd, bi->name);
+ slAddHead(&bdList, bd);
+ }
+return bdList;
+}
+
+double biResultsDataForProbeSampleInDataset(struct biResults *br,
+ char *probe, char *sample, char *dataset)
+{
+struct biOmics *bi = biOmicsMatchDataset(br->datasets, dataset);
+if (!bi)
+ return DBL_NULL;
+
+return bi->dataForProbeSample(bi, probe, sample);
+}
+
+
+struct biData *biResultsDataForProbeSample(struct biResults *br, char *probe, char *sample)
+{
+char name[128];
+safef(name, sizeof(name), "%s-%s", probe, sample);
+struct slDouble *sd;
+struct biData *bd = biDataNew(name);
+struct biOmics *bi;
+for (bi = br->datasets; bi; bi = bi->next)
+ {
+ double val = bi->dataForProbeSample(bi, probe, sample);
+ sd = slDoubleNew(val);
+ slAddHead(&bd->data, sd);
+ hashAdd(bd->hash, bi->name, sd);
+ }
+slReverse(&bd->data);
+return bd;
+}
+
+void biResultsConvertToZscores(struct biResults *br)
+{
+struct biOmics *bi;
+for (bi = br->datasets; bi; bi = bi->next)
+ bi->toZscores(bi);
+}
+
+void biResultsConvertToLogP(struct biResults *br)
+{
+struct biOmics *bi;
+for (bi = br->datasets; bi; bi = bi->next)
+ bi->toLogP(bi);
+}
+
+
+
+struct biResults *biResultsNew(void)
+{
+struct biResults *br;
+AllocVar(br);
+
+br->datasets = NULL;
+
+/* Methods */
+br->allDatasets = biResultsGetDatasets;
+
+br->allProbes = biResultsGetProbes;
+br->allProbesInDataset = biResultsGetProbesInDataset;
+
+br->allSamples = biResultsGetSamples;
+br->allSamplesInCommon = biResultsGetSamplesInCommon;
+br->allSamplesInDataset = biResultsGetSamplesInDataset;
+
+br->probesForGene = biResultsProbesForGene;
+br->probesForGeneInDataset = biResultsProbesForGeneInDataset;
+
+br->dataForProbe = biResultsDataForProbe;
+br->dataForProbeInDataset = biResultsDataForProbeInDataset;
+
+br->dataForSample = biResultsDataForSample;
+br->dataForSampleInDataset = biResultsDataForSampleInDataset;
+
+br->dataForProbeSample = biResultsDataForProbeSample;
+br->dataForProbeSampleInDataset = biResultsDataForProbeSampleInDataset;
+
+br->toZscores = biResultsConvertToZscores;
+br->toLogP = biResultsConvertToLogP;
+
+return br;
+}
+
+void biResultsFree(struct biResults **pEl)
+{
+struct biResults *el;
+
+if ((el = *pEl) == NULL) return;
+biOmicsFreeList(&el->datasets);
+}
+
+
+void biResultsAddBiQuery(struct biResults *br, struct biQuery *bq)
+{
+struct biOmics *bi = newBiOmics(bq->db, bq->dataset);
+
+if (bq->getAllProbes)
+ bi->populateAll(bi, bq);
+else
+ bi->populate(bi, bq);
+
+slAddHead(&br->datasets, bi);
+}
+
+struct biResults *biQueryResults(struct biQuery *bqList)
+{
+struct biResults *br = biResultsNew();
+struct biQuery *bq;
+for (bq = bqList; bq; bq = bq->next)
+ biResultsAddBiQuery(br, bq);
+
+return br;
+}
+
+void biResultsAddBiQueryRandom(struct biResults *br, struct biQuery *bq, int numProbes)
+{
+struct biOmics *bi = newBiOmics(bq->db, bq->dataset);
+bi->populateRandom(bi, bq, numProbes);
+
+slAddHead(&br->datasets, bi);
+}
+
+struct biResults *biQueryResultsRandomize(struct biQuery *bqList, int numProbes)
+{
+struct biResults *br = biResultsNew();
+struct biQuery *bq;
+for (bq = bqList; bq; bq = bq->next)
+ biResultsAddBiQueryRandom(br, bq, numProbes);
+
+return br;
+}
+
+/*** End biResults code ***/
+
+
+/*** biQuery code ****/
+
+
+void biQueryAppend(struct biQuery **bqList, struct biQuery *bq)
+{
+if (!bqList)
+ return;
+slAddHead(bqList, bq);
+}
+
+int biQueryAddPathways(struct biQuery *bq, char *pathways, char sep)
+{
+return addDelimStringToList(&bq->pathwayList, pathways, sep);
+}
+
+int biQueryAddProbes(struct biQuery *bq, char *probes, char sep)
+{
+return addDelimStringToList(&bq->probeList, probes, sep);
+}
+
+int biQueryAddGenes(struct biQuery *bq, char *genes, char sep)
+{
+return addDelimStringToList(&bq->geneList, genes, sep);
+}
+
+int biQueryAddSamples(struct biQuery *bq, char *samples, char sep)
+{
+return addDelimStringToList(&bq->sampleList, samples, sep);
+}
+
+int biQueryAddFeatureVals(struct biQuery *bq, char *featureVals, char sep)
+{
+return addDelimStringToList(&bq->featureValList, featureVals, sep);
+}
+
+int biQueryAddFeatureCodes(struct biQuery *bq, char *featureCodes, char sep)
+{
+return addDelimStringToList(&bq->featureCodeList, featureCodes, sep);
+}
+
+struct biQuery *biQueryNew(char *db, char *dataset)
+{
+struct biQuery *bq;
+AllocVar(bq);
+bq->db = cloneString(db);
+bq->dataset = cloneString(dataset);
+bq->getAllProbes = FALSE;
+
+bq->pathwayList = NULL;
+bq->probeList = NULL;
+bq->geneList = NULL;
+bq->sampleList = NULL;
+bq->featureValList = NULL;
+bq->featureCodeList = NULL;
+
+bq->addPathways = biQueryAddPathways;
+bq->addProbes = biQueryAddProbes;
+bq->addSamples = biQueryAddSamples;
+bq->addGenes = biQueryAddGenes;
+bq->addFeatureVals = biQueryAddFeatureVals;
+bq->addFeatureCodes = biQueryAddFeatureCodes;
+
+return bq;
+}
+
+void biQueryFree(struct biQuery **pEl)
+{
+struct biQuery *el;
+if ((el = *pEl) == NULL) return;
+
+freeMem(el->db);
+freeMem(el->dataset);
+
+slNameFreeList(&el->pathwayList);
+slNameFreeList(&el->probeList);
+slNameFreeList(&el->geneList);
+slNameFreeList(&el->sampleList);
+slNameFreeList(&el->featureValList);
+slNameFreeList(&el->featureCodeList);
+}
+
+void biQueryFreeList(struct biQuery **pList)
+{
+struct biQuery *el, *next;
+
+for (el = *pList; el != NULL; el = next)
+ {
+ next = el->next;
+ biQueryFree(&el);
+ }
+*pList = NULL;
+}
+
+void biDataAppendName(struct biData *bd, char *name)
+{
+if (!bd->name)
+ {
+ bd->name = cloneString(name);
+ return;
+ }
+
+char newName[128];
+safef(newName, sizeof(newName), "%s,%s", bd->name, name);
+freeMem(bd->name);
+bd->name = cloneString(newName);
+}
+
+struct biData *biDataFind(struct biData *bdList, char *name)
+{
+struct biData *bd;
+for (bd = bdList; bd; bd = bd->next)
+ {
+ if (!sameString(bd->name, name))
+ continue;
+ return bd;
+ }
+return NULL;
+}
+
+struct biData *biDataNew(char *name)
+{
+struct biData *bd;
+AllocVar(bd);
+bd->name = NULL;
+if (name)
+ bd->name = cloneString(name);
+bd->type = NULL;
+bd->data = NULL;
+bd->hash = hashNew(0);
+
+return bd;
+}
+
+void biDataFree(struct biData **pEl)
+{
+struct biData *el;
+
+if ((el = *pEl) == NULL) return;
+
+freeMem(el->name);
+freeMem(el->type);
+slFreeList(&el->data);
+freeHash(&el->hash);
+}
+
+void biDataFreeList(struct biData **pList)
+{
+struct biData *el, *next;
+
+for (el = *pList; el != NULL; el = next)
+ {
+ next = el->next;
+ biDataFree(&el);
+ }
+*pList = NULL;
+}
+
+/**** End biQuery Code *****/