src/hg/instinct/bioInt2/bioIntDriver.c 1.1

1.1 2009/03/20 06:06:31 jsanborn
initial commit
Index: src/hg/instinct/bioInt2/bioIntDriver.c
===================================================================
RCS file: src/hg/instinct/bioInt2/bioIntDriver.c
diff -N src/hg/instinct/bioInt2/bioIntDriver.c
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ src/hg/instinct/bioInt2/bioIntDriver.c	20 Mar 2009 06:06:31 -0000	1.1
@@ -0,0 +1,1706 @@
+/* bioIntDriver.c 
+ * All rights reserved -- J. Zachary Sanborn 
+ */
+#include "common.h"
+#include "linefile.h"
+#include "hash.h"
+#include "options.h"
+#include "jksql.h"
+#include "hdb.h"
+#include "dystring.h"
+#include "cprob.h"
+#include "hgStatsLib.h"
+#include "bioIntDriver.h"
+
+static char *heatMapDbProfile = "localDb";
+
+/* Begin helper functions */
+
+int addDelimStringToList(struct slName **list, char *str, char sep)
+{
+if (!str)
+    return 0;
+struct slName *sl, *slList = slNameListFromString(str, sep);
+
+for (sl = slList; sl; sl = sl->next)
+    slNameAddHead(list, sl->name);
+slReverse(list);
+
+slNameFreeList(&slList);
+return 1;
+}
+
+void slNamePrint(struct slName *list)
+{
+struct slName *sl;
+for (sl = list; sl; sl = sl->next)
+    fprintf(stdout, "%s ", sl->name);
+fprintf(stdout, "\n");
+}
+
+struct hash *uniqueHashFromSlNameList(void *list)
+/* Create a unique hash out of a list of slNames or any kind of list where the */
+/* first field is the next pointer and the second is the name. 
+ * -- Adapted from hashFromSlNameList in hash.h */
+{
+if (!list)
+    return NULL;
+struct slName *namedList = list, *item;
+struct hash *hash = hashNew(10);
+for (item = namedList; item != NULL; item = item->next)
+    {
+    if (!hashLookup(hash, item->name))
+	hashAdd(hash, item->name, item);
+    }
+return hash;
+} 
+
+struct slName *slNameUniqueList(struct slName *list)
+{
+if (!list)
+    return NULL;
+
+struct hash *hash = uniqueHashFromSlNameList(list);
+
+/* Iterate through names in hash, saving to new list */
+struct slName *slList = NULL;
+struct hashCookie cookie = hashFirst(hash);
+char *name;
+while ((name = hashNextName(&cookie)) != NULL)
+    slNameAddHead(&slList, name);
+slNameFreeList(&list);
+freeHash(&hash);
+return slList;
+}
+/* End helper functions */
+
+
+/* Begin database functions */
+void addSample(struct biOmics *bi, struct samples *sa)
+{
+struct hashEl *el = hashLookup(bi->samplesHash, sa->name);
+if (el)
+    { /* already in hash, free memory and move on */
+    samplesFree(&sa);
+    return;
+    }
+slAddHead(&bi->samples, sa);
+hashAdd(bi->samplesHash, sa->name, sa);
+}
+
+void loadAllSamples(struct sqlConnection *conn, struct biOmics *bi)
+{
+struct datasets *da = bi->dataset;
+char query[128];
+safef(query, sizeof(query), 
+      "select * from samples where dataset_id = %d order by id;", 
+      da->id);
+
+struct sqlResult *sr = sqlGetResult(conn, query);
+char **row = NULL;
+while ((row = sqlNextRow(sr)) != NULL)
+    {
+    struct samples *sa = samplesLoad(row);
+    addSample(bi, sa);
+    }
+sqlFreeResult(&sr);
+}
+
+void loadSamplesInList(struct sqlConnection *conn, struct biOmics *bi, 
+		       struct slName *sampleList)
+{
+if (!sampleList)
+    return;
+
+struct slName *sl;
+
+struct datasets *da = bi->dataset;
+struct dyString *dy = newDyString(100);
+dyStringPrintf(dy, "select * from samples where name in (");
+for (sl = sampleList; sl; sl = sl->next)
+    {
+    dyStringPrintf(dy, "'%s'", sl->name);
+    if (sl->next)
+	dyStringPrintf(dy, ",");
+    }
+dyStringPrintf(dy, ") and dataset_id = %d order by id;", da->id);
+char *query = dyStringCannibalize(&dy);
+
+struct sqlResult *sr = sqlGetResult(conn, query);
+char **row = NULL;
+while ((row = sqlNextRow(sr)) != NULL)
+    {
+    struct samples *sa = samplesLoad(row);
+    addSample(bi, sa);
+    }
+sqlFreeResult(&sr);
+}
+
+void loadSamplesMatchingFeatureVals(struct sqlConnection *conn, struct biOmics *bi, 
+				    struct slName *featureValList)
+{
+if (!featureValList)
+    return;
+
+//TODO: Inner JOIN!
+//select * from clinicalData p1 join clinicalData p2 on p1.sample_id = p2.sample_id and p1.code = 'Positive' and p1.feature_id = 0 and p2.code = 'Positive' and p2.feature_id = 1;
+
+struct datasets *da = bi->dataset;
+struct dyString *dy = newDyString(100);
+dyStringPrintf(dy, 
+	       "select * from samples "
+	       "join clinicalData on samples.id = clinicalData.sample_id "
+	       "join features on clinicalData.feature_id = features.id "
+	       "where samples.dataset_id = %d ", 
+	       da->id);
+
+struct slName *sl, *fv, *slList = NULL;
+for (fv = featureValList; fv; fv = fv->next)
+    {
+    addDelimStringToList(&slList, fv->name, ' ');
+    if (slCount(slList) != 3)
+	{
+	fprintf(stderr, "Improperly formatted feature-value pair: %s\n", fv->name);
+	slNameFreeList(&slList);
+	slList = NULL;
+	continue;
+	}
+    sl = slList;
+    char *name = sl->name;
+    sl = sl->next;
+    char *operation = sl->name;
+    sl = sl->next;
+    char *val = sl->name;
+
+    dyStringPrintf(dy, " and features.name = '%s'", name);
+    dyStringPrintf(dy, " and clinicalData.val %s %s", operation, val);
+
+    slNameFreeList(&slList);
+    slList = NULL;
+    }
+
+dyStringPrintf(dy, " order by samples.id;");
+char *query = dyStringCannibalize(&dy);
+
+struct sqlResult *sr = sqlGetResult(conn, query);
+char **row = NULL;
+while ((row = sqlNextRow(sr)) != NULL)
+    {
+    struct samples *sa = samplesLoad(row);
+    addSample(bi, sa);
+    }
+sqlFreeResult(&sr);
+}
+
+void loadSamplesMatchingFeatureCodes(struct sqlConnection *conn, struct biOmics *bi, 
+				     struct slName *featureCodeList)
+{
+if (!featureCodeList)
+    return;
+
+//TODO: Inner JOIN!
+//select * from clinicalData p1 join clinicalData p2 on p1.sample_id = p2.sample_id and p1.code = 'Positive' and p1.feature_id = 0 and p2.code = 'Positive' and p2.feature_id = 1;
+
+
+struct datasets *da = bi->dataset;
+struct dyString *dy = newDyString(100);
+dyStringPrintf(dy, 
+	       "select * from samples "
+	       "join clinicalData on samples.id = clinicalData.sample_id "
+	       "join features on clinicalData.feature_id = features.id "
+	       "where samples.dataset_id = %d and (", 
+	       da->id);
+
+struct slName *sl, *fc, *slList = NULL;
+for (fc = featureCodeList; fc; fc = fc->next)
+    {
+    addDelimStringToList(&slList, fc->name, ' ');
+    if (slCount(slList) != 3)
+	{
+	fprintf(stderr, "Improperly formatted feature-code pair: %s\n", fc->name);
+	slNameFreeList(&slList);
+	slList = NULL;
+	continue;
+	}
+    sl = slList;
+    char *name = sl->name;
+    sl = sl->next;
+    char *operation = sl->name;
+    sl = sl->next;
+    char *code = sl->name;
+
+    dyStringPrintf(dy, " (features.name = '%s'", name);
+    if (sameString(code, "NULL"))
+	dyStringPrintf(dy, " and clinicalData.code is NULL)");
+    else
+	dyStringPrintf(dy, " and clinicalData.code %s '%s')", operation, code);
+    slNameFreeList(&slList);
+    slList = NULL;
+
+    if (fc->next)
+	dyStringPrintf(dy, " or");
+    }
+
+dyStringPrintf(dy, ") order by samples.id;");
+char *query = dyStringCannibalize(&dy);
+
+struct sqlResult *sr = sqlGetResult(conn, query);
+char **row = NULL;
+while ((row = sqlNextRow(sr)) != NULL)
+    {
+    struct samples *sa = samplesLoad(row);
+    addSample(bi, sa);
+    }
+sqlFreeResult(&sr);
+}
+
+void loadSamples(struct sqlConnection *conn, struct biOmics *bi, struct biQuery *bq)
+{
+if (!bq->sampleList && !bq->featureValList && !bq->featureCodeList)
+    {
+    fprintf(stderr, "Loading ALL samples from dataset.\n");
+    loadAllSamples(conn, bi);
+    return;
+    }
+
+/* Load samples from list */
+loadSamplesInList(conn, bi, bq->sampleList);
+
+/* Load samples matching feature-value pairs, e.g. B <= 5,C > 6,... */
+loadSamplesMatchingFeatureVals(conn, bi, bq->featureValList);
+
+/* Load samples matching feature-code pairs, e.g. ER = Positive */
+loadSamplesMatchingFeatureCodes(conn, bi, bq->featureCodeList);
+}
+
+void addProbeInfo(struct biOmics *bi, struct probeInfo *pi)
+{
+struct hashEl *el = hashLookup(bi->probeInfoHash, pi->name);
+if (el)
+    {
+    probeInfoFree(&pi);
+    return;  // already there;
+    }
+
+slAddHead(&bi->probeInfo, pi);
+hashAdd(bi->probeInfoHash, pi->name, pi);
+
+/* Allocate space for slPair data */
+char probe_id[128];
+safef(probe_id, sizeof(probe_id), "%d", pi->id);
+struct slPair *sp;
+AllocVar(sp);
+sp->name = cloneString(pi->name);
+sp->val = NULL;
+slAddHead(&bi->probes, sp);
+hashAdd(bi->probesHash, probe_id, sp);
+}
+
+void loadPathways(struct sqlConnection *conn, struct biOmics *bi, struct slName *pathwayList)
+{
+if (!pathwayList)
+    return;
+
+struct slName *sl;
+struct datasets *da = bi->dataset;
+char *pInfo = da->probe_table;
+char *p2g = da->probe_to_gene_table;
+
+if (!pInfo || !p2g)
+    return;
+
+struct dyString *dy = newDyString(100);
+dyStringPrintf(dy, 
+	       "select %s.* from %s join %s on %s.probe_id = %s.id "
+	       "join pathwayGenes on pathwayGenes.gene_id = %s.gene_id "
+	       "join pathways on pathways.id = pathwayGenes.id "
+	       "where pathways.name in (",
+	       pInfo, pInfo, p2g, p2g, pInfo, p2g);
+
+for (sl = pathwayList; sl; sl = sl->next)
+    {
+    dyStringPrintf(dy, "'%s'", sl->name);
+    if (sl->next)
+	dyStringPrintf(dy, ",");
+    }
+dyStringPrintf(dy, ");");
+char *query = dyStringCannibalize(&dy);
+
+struct sqlResult *sr = sqlGetResult(conn, query);
+char **row = NULL;
+while ((row = sqlNextRow(sr)) != NULL)
+    {
+    struct probeInfo *pi = probeInfoLoad(row);
+    addProbeInfo(bi, pi);
+    }
+sqlFreeResult(&sr);
+}
+
+void loadGenes(struct sqlConnection *conn, struct biOmics *bi, struct slName *geneList)
+{
+if (!geneList)
+    return;
+
+struct slName *sl;
+struct datasets *da = bi->dataset;
+char *pInfo = da->probe_table;
+char *p2g = da->probe_to_gene_table;
+
+if (!pInfo || !p2g)
+    return;
+
+struct dyString *dy = newDyString(100);
+dyStringPrintf(dy, 
+	       "select * from %s join %s on %s.probe_id = %s.id "
+	       "join geneLookup on geneLookup.id = %s.gene_id where kgId in (", 
+	       pInfo, p2g, p2g, pInfo, p2g);
+
+for (sl = geneList; sl; sl = sl->next)
+    {
+    dyStringPrintf(dy, "'%s'", sl->name);
+    if (sl->next)
+	dyStringPrintf(dy, ",");
+    }
+dyStringPrintf(dy, ");");
+char *query = dyStringCannibalize(&dy);
+
+struct sqlResult *sr = sqlGetResult(conn, query);
+char **row = NULL;
+while ((row = sqlNextRow(sr)) != NULL)
+    {
+    struct probeInfo *pi = probeInfoLoad(row);
+    addProbeInfo(bi, pi);
+    }
+sqlFreeResult(&sr);
+}                  
+
+void loadProbeInfo(struct sqlConnection *conn, struct biOmics *bi, 
+		   struct slName *probeList, boolean allData)
+{
+if (!probeList && !allData)
+    return;
+
+struct datasets *da = bi->dataset;
+
+struct slName *sl;
+struct dyString *dy = newDyString(100);
+dyStringPrintf(dy, "select * from %s ", da->probe_table);
+
+if (probeList)
+    {
+    dyStringPrintf(dy, "where name in (");
+    for (sl = probeList; sl; sl = sl->next)
+	{
+	dyStringPrintf(dy, "'%s'", sl->name);
+	if (sl->next)
+	    dyStringPrintf(dy, ",");
+	}
+    dyStringPrintf(dy, ");");
+    }
+
+char *query = dyStringCannibalize(&dy);
+
+struct sqlResult *sr = sqlGetResult(conn, query);
+char **row = NULL;
+while ((row = sqlNextRow(sr)) != NULL)
+    {
+    struct probeInfo *pi = probeInfoLoad(row);
+    addProbeInfo(bi, pi);
+    }
+sqlFreeResult(&sr);
+}
+
+
+void loadProbeInfoRandom(struct sqlConnection *conn, struct biOmics *bi, int numProbes)
+{
+if (numProbes <= 0)
+    return;
+
+struct datasets *da = bi->dataset;
+
+char q[256];
+safef(q, sizeof(q), "select max(id) from %s;", da->probe_table);
+int maxProbes = sqlQuickNum(conn, q);
+
+if (numProbes >= maxProbes) // what's the point?!?
+    errAbort("Number of random probes greater than probes in table!.");
+
+int i = 0, total = 0;
+char rStr[128];
+struct hash *hash = hashNew(0);
+while (i < numProbes && total < maxProbes * 2)
+    {
+    total++;
+    int r = random() % maxProbes;
+    safef(rStr, sizeof(rStr), "%d", r);
+    if (hashLookup(hash, rStr))
+	continue;
+    hashAddInt(hash, rStr, 1);
+    i++;
+    }
+struct slInt *si, *siList = NULL;
+struct hashCookie cookie = hashFirst(hash);
+char *name;
+while ((name = hashNextName(&cookie)) != NULL)
+    {
+    si = slIntNew(atoi(name));
+    slAddHead(&siList, si);
+    }
+slSort(&siList, slIntCmp);
+hashFree(&hash);
+
+if (slCount(siList) != numProbes)
+    errAbort("Random list not correct length %d != %d.", slCount(siList), numProbes);
+
+struct dyString *dy = newDyString(100);
+dyStringPrintf(dy, "select * from %s where id in (", da->probe_table);
+for (si = siList; si; si = si->next)
+    {
+    dyStringPrintf(dy, "%d", si->val);
+    if (si->next)
+	dyStringPrintf(dy, ",");
+    }
+dyStringPrintf(dy, ");");
+
+char *query = dyStringCannibalize(&dy);
+slFreeList(&siList);
+
+struct sqlResult *sr = sqlGetResult(conn, query);
+char **row = NULL;
+while ((row = sqlNextRow(sr)) != NULL)
+    {
+    struct probeInfo *pi = probeInfoLoad(row);
+    addProbeInfo(bi, pi);
+    }
+sqlFreeResult(&sr);
+}
+
+void loadProbeSampleValData(struct sqlConnection *conn, struct biOmics *bi, boolean allData)
+{ /* boolean allData is unused in this function */
+struct datasets *da = bi->dataset;
+
+struct dyString *dy = newDyString(100);
+dyStringPrintf(dy, "select * from %s", da->data_table);
+
+if (bi->probeInfo)
+    {
+    struct probeInfo *pi;
+    dyStringPrintf(dy, " where probe_id in (");
+    for (pi = bi->probeInfo; pi; pi = pi->next)
+	{
+	dyStringPrintf(dy, "%d", pi->id);
+	if (pi->next)
+	    dyStringPrintf(dy, ",");
+	}
+    dyStringPrintf(dy, ")");
+    }
+
+if (bi->samples)
+    {
+    if (bi->probeInfo)
+	dyStringPrintf(dy, " and");
+    else 
+	dyStringPrintf(dy, " where");
+
+    struct samples *sa;
+    dyStringPrintf(dy, " sample_id in (");
+    for (sa = bi->samples; sa; sa = sa->next)
+	{
+	dyStringPrintf(dy, "%d", sa->id);
+	if (sa->next)
+	    dyStringPrintf(dy, ",");
+	}
+    dyStringPrintf(dy, ")");
+    }
+dyStringPrintf(dy, ";");
+
+char *query = dyStringCannibalize(&dy);
+struct sqlResult *sr = sqlGetResult(conn, query);
+char **row = NULL;
+
+int count = 0;
+struct slPair *sp;
+struct probeSampleVal *pv;
+while ((row = sqlNextRow(sr)) != NULL)
+    {
+    char *probe_id = row[0];
+    struct hashEl *el = hashLookup(bi->probesHash, probe_id);
+    if (!el)
+	continue;
+    sp = el->val;
+    pv = probeSampleValLoad(row);
+    slAddHead(&sp->val, pv);
+    count++;
+    }
+if (DEBUG)
+    fprintf(stdout, "found %d probes\n", count);
+
+sqlFreeResult(&sr); 
+}
+
+void loadProbeValsData(struct sqlConnection *conn, struct biOmics *bi, boolean allData)
+{
+if (!bi->probeInfo && !allData)
+    return;
+struct datasets *da = bi->dataset;
+
+struct probeInfo *pi;
+struct dyString *dy = newDyString(100);
+dyStringPrintf(dy, "select * from %s ", da->data_table);
+
+if (bi->probeInfo && !allData)
+    {
+    dyStringPrintf(dy, "where probe_id in (");
+    for (pi = bi->probeInfo; pi; pi = pi->next)
+	{
+	dyStringPrintf(dy, "%d", pi->id);
+	if (pi->next)
+	    dyStringPrintf(dy, ",");
+	}
+    dyStringPrintf(dy, ")");
+    }
+char *query = dyStringCannibalize(&dy);
+
+struct sqlResult *sr = sqlGetResult(conn, query);
+char **row = NULL;
+
+int count = 0;
+struct slPair *sp;
+struct probeVals *pv;
+while ((row = sqlNextRow(sr)) != NULL)
+    {
+    char *probe_id = row[0];
+    struct hashEl *el = hashLookup(bi->probesHash, probe_id);
+    if (!el)
+	continue;
+    sp = el->val;
+    pv = probeValsLoad(row);
+    count += pv->sample_count;
+    slAddHead(&sp->val, pv);
+    }
+if (DEBUG)
+    fprintf(stdout, "found %d probes\n", count);
+
+sqlFreeResult(&sr); 
+}
+
+struct biData *probeValsForProbe(struct biOmics *bi, char *probe)
+{
+struct hashEl *el = hashLookup(bi->probeInfoHash, probe);
+if (!el)
+    return NULL;
+struct probeInfo *pi = el->val;
+int probeId = pi->id;
+
+char pStr[128];
+safef(pStr, sizeof(pStr), "%d", probeId);
+el = hashLookup(bi->probesHash, pStr);
+if (!el)
+    return NULL;
+struct slPair *sp = el->val;
+
+struct biData *bd = biDataNew(probe);
+
+struct samples *sa;
+struct probeVals *pv;
+struct slDouble *sd;
+for (pv = sp->val; pv; pv = pv->next)
+    {
+    for (sa = bi->samples; sa; sa = sa->next)
+	{
+	sd = slDoubleNew(pv->sample_data[sa->exp_id]);
+	slAddHead(&bd->data, sd);
+	hashAdd(bd->hash, sa->name, sd);
+	}
+    }
+slReverse(&bd->data);
+return bd;
+}
+
+struct biData *probeValsForSample(struct biOmics *bi, char *sample)
+{
+struct hashEl *el = hashLookup(bi->samplesHash, sample);
+if (!el)
+    return NULL;
+struct samples *sa = el->val;
+int expId = sa->exp_id;
+
+struct biData *bd = biDataNew(NULL);
+struct slDouble *sd;
+struct slPair *sp;
+for (sp = bi->probes; sp; sp = sp->next)
+    {
+    struct probeVals *pv;
+    for (pv = sp->val; pv; pv = pv->next)
+	{
+	if (expId >= pv->sample_count)
+	    errAbort("expId is greater than bd->vals[] array length.");
+	sd = slDoubleNew(pv->sample_data[expId]);
+	slAddHead(&bd->data, sd);
+	hashAdd(bd->hash, sp->name, sd);
+	}
+    }
+slReverse(&bd->data);
+return bd;
+}
+
+double probeValsForProbeSample(struct biOmics *bi, 
+			       char *probe, char *sample)
+{
+struct hashEl *el = hashLookup(bi->probeInfoHash, probe);
+if (!el)
+    return DBL_NULL;
+struct probeInfo *pi = el->val;
+int probeId = pi->id;
+
+char pStr[128];
+safef(pStr, sizeof(pStr), "%d", probeId);
+el = hashLookup(bi->probesHash, pStr);
+if (!el)
+    return DBL_NULL;
+struct slPair *sp = el->val;
+
+el = hashLookup(bi->samplesHash, sample);
+if (!el)
+    return DBL_NULL;
+struct samples *sa = el->val;
+int expId = sa->exp_id;
+
+struct probeVals *pv = sp->val;
+return pv->sample_data[expId];
+}
+
+boolean calcMedianMAD(struct biOmics *bi, double *median, double *mad)
+{
+int count = 0;
+
+struct samples *sa;
+struct slPair *sp;
+struct probeVals *pv;
+struct slDouble *sd, *sdList = NULL;
+for (sp = bi->probes; sp; sp = sp->next)
+    {    
+    pv = sp->val; 
+    for (sa = bi->samples; sa; sa = sa->next)
+	{
+	sd = slDoubleNew(pv->sample_data[sa->exp_id]);
+	slAddHead(&sdList, sd);
+	count++;
+	}
+    }
+if (!sdList)
+    return FALSE;
+
+if (count <= 1)
+    {
+    slFreeList(&sdList);
+    return FALSE;
+    }
+double medianTmp = slDoubleMedian(sdList);
+
+/* Manually calculate approx std dev according to 99.7% cut-off (3 sigma) */
+slSort(sdList, slDoubleCmp);
+int low = round(count * (0.0015));
+int high = round(count * (1.0 - 0.0015));
+
+sd = slElementFromIx(sdList, low);
+double lowVal = sd->val;
+
+sd = slElementFromIx(sdList, high);
+double highVal = sd->val;
+
+*median = medianTmp;
+*mad = max(fabs(lowVal - medianTmp), fabs(highVal - medianTmp))/3.0;
+
+slFreeList(&sdList);
+return TRUE;
+}
+
+///* Calculate median absolute deviation for a more 
+// * robust measure than standard deviation */
+//for (sd = sdList; sd; sd = sd->next) 
+//   sd->val = fabs(sd->val - medianTmp);
+
+//*median = medianTmp;
+//*mad = slDoubleMedian(sdList);
+//slFreeList(&sdList);
+//return TRUE;
+//}
+
+
+void probeValsConvertToLogP(struct biOmics *bi)
+{ /* tranforms all data to -log(p-value) based on normal z-score transform */
+double median, mad;
+if (!calcMedianMAD(bi, &median, &mad))
+    return;
+
+/* To estimate std deviation from mad, use std = 1.43 * mad 
+ *    Brideau et al. J Biomol Screen, 2003 */
+double std = mad * 1.43;
+
+fprintf(stderr, "median = %f, mad = %f, std = %f\n", median, mad, std);
+if (std == 0.0)
+    return;
+
+double maxLogP = 88.0;  /* max, in case p-value comes back zero */
+struct samples *sa;
+struct slPair *sp;
+struct probeVals *pv;
+double z, p;
+double val;
+for (sp = bi->probes; sp; sp = sp->next)
+    {    
+    pv = sp->val;
+    for (sa = bi->samples; sa; sa = sa->next)
+	{
+	z = (pv->sample_data[sa->exp_id] - median)/std;
+	p = ndtr(-1.0*fabs(z));
+	if (p > 0)
+	    val = min(-log(p)/log(10.0), maxLogP);
+	else
+	    val = maxLogP;
+
+	if (z < 0.0)
+	    val = -1.0*val;  // signed log(p-value)
+	pv->sample_data[sa->exp_id] = val;
+	}
+    }
+}
+
+void probeValsConvertToZscores(struct biOmics *bi)
+{ /* transforms all data to z-scores, median-centered */
+double median, mad;
+if (!calcMedianMAD(bi, &median, &mad))
+    return;
+
+/* To estimate std deviation from mad, use std = 1.43 * mad 
+ *    Brideau et al. J Biomol Screen, 2003 */
+double std = mad * 1.43;
+fprintf(stderr, "median = %f, mad = %f, std = %f\n", median, mad, std);
+if (std == 0.0)
+    return;
+
+struct samples *sa;
+struct slPair *sp;
+struct probeVals *pv;
+double z;
+for (sp = bi->probes; sp; sp = sp->next)
+    {    
+    pv = sp->val;
+    for (sa = bi->samples; sa; sa = sa->next)
+	{
+	z = (pv->sample_data[sa->exp_id] - median)/std;
+	pv->sample_data[sa->exp_id] = z;
+	}
+    }
+}
+
+void probeSampleValConvertToLogP(struct biOmics *bi)
+{
+return;
+}
+
+void probeSampleValConvertToZscores(struct biOmics *bi)
+{
+return;
+}
+
+struct biData *probeSampleValForProbe(struct biOmics *bi, char *probe)
+{
+struct hashEl *el = hashLookup(bi->probeInfoHash, probe);
+if (!el)
+    return NULL;
+struct probeInfo *pi = el->val;
+int probeId = pi->id;
+
+char pStr[128];
+safef(pStr, sizeof(pStr), "%d", probeId);
+el = hashLookup(bi->probesHash, pStr);
+if (!el)
+    return NULL;
+struct slPair *sp = el->val;
+
+struct biData *bd = biDataNew(probe);
+
+struct samples *sa;
+struct probeSampleVal *pv;
+struct slDouble *sd;
+for (pv = sp->val; pv; pv = pv->next)
+    {
+    for (sa = bi->samples; sa; sa = sa->next)
+	{
+	if (!sa->exp_id == pv->sample_id)
+	    continue;
+	
+	sd = slDoubleNew(pv->val);
+	slAddHead(&bd->data, sd);
+	hashAdd(bd->hash, sa->name, sd);
+	break;
+	}
+    }
+slReverse(&bd->data);
+return bd;
+}
+
+struct biData *probeSampleValForSample(struct biOmics *bi, char *sample)
+{
+struct hashEl *el = hashLookup(bi->samplesHash, sample);
+if (!el)
+    return NULL;
+struct samples *sa = el->val;
+int expId = sa->exp_id;
+
+struct biData *bd = biDataNew(sample);
+
+struct slDouble *sd;
+struct slPair *sp;
+for (sp = bi->probes; sp; sp = sp->next)
+    {
+    struct probeSampleVal *pv;
+    for (pv = sp->val; pv; pv = pv->next)
+	{
+	if (expId != pv->sample_id)
+	    continue;
+	sd = slDoubleNew(pv->val);
+	slAddHead(&bd->data, sd);
+	hashAdd(bd->hash, sp->name, sd);
+	}
+    }
+
+slReverse(&bd->data);
+return bd;
+}
+
+double probeSampleValForProbeSample(struct biOmics *bi, 
+				    char *probe, char *sample)
+{
+struct hashEl *el = hashLookup(bi->probeInfoHash, probe);
+if (!el)
+    return DBL_NULL;
+struct probeInfo *pi = el->val;
+int probeId = pi->id;
+
+char pStr[128];
+safef(pStr, sizeof(pStr), "%d", probeId);
+el = hashLookup(bi->probesHash, pStr);
+if (!el)
+    return DBL_NULL;
+
+struct slPair *sp = el->val;
+el = hashLookup(bi->samplesHash, sample);
+if (!el)
+    return DBL_NULL;
+struct samples *sa = el->val;
+int expId = sa->exp_id;
+
+struct probeSampleVal *pv = sp->val;
+for (pv = sp->val; pv; pv = pv->next)
+    {
+    if (expId != pv->sample_id)
+	continue;
+    return pv->val;
+    }
+
+return DBL_NULL;
+}
+
+void setDataType(struct sqlConnection *conn, struct biOmics *bi)
+{
+struct datasets *da = bi->dataset;
+
+char query[128];
+safef(query, sizeof(query),
+      "select * from dataTypes where id = %d",
+      da->type_id);
+
+struct dataTypes *dt = dataTypesLoadByQuery(conn, query);
+if (!dt)
+    errAbort("Datatype with id = %d not found in database", da->type_id);
+
+bi->type = cloneString(dt->name);
+if (sameString(dt->format, "probeVals"))
+    {
+    bi->loadData           = loadProbeValsData;
+    bi->freeData           = slPairValsFreeList;
+    bi->dataForProbe       = probeValsForProbe;
+    bi->dataForSample      = probeValsForSample;
+    bi->dataForProbeSample = probeValsForProbeSample;
+    bi->toZscores          = probeValsConvertToZscores;
+    bi->toLogP             = probeValsConvertToLogP;
+    }
+else if (sameString(dt->format, "probeSampleVal"))
+    {
+    bi->loadData           = loadProbeSampleValData;
+    bi->freeData           = slPairSampleValFreeList;
+    bi->dataForProbe       = probeSampleValForProbe;
+    bi->dataForSample      = probeSampleValForSample;
+    bi->dataForProbeSample = probeSampleValForProbeSample;
+    bi->toZscores          = probeSampleValConvertToZscores;
+    bi->toLogP             = probeSampleValConvertToLogP;
+    }
+else
+    errAbort("Unrecognized datatype");
+
+dataTypesFreeList(&dt);
+}
+
+void loadDataset(struct sqlConnection *conn, struct biOmics *bi)
+{
+if (!bi->name)
+    return;
+
+/* Make sure only one dataset is loaded (hence 'limit 1') */
+char query[256];
+safef(query, sizeof(query), 
+      "select * from datasets where data_table = '%s' limit 1;", 
+      bi->name);   
+bi->dataset = datasetsLoadByQuery(conn, query);
+if (!bi->dataset)
+    errAbort("No datasets named %s found in database.", bi->name);
+
+/* Set data type */
+setDataType(conn, bi);
+}
+
+/* End database functions */
+
+struct biOmics *biOmicsMatchDataset(struct biOmics *biList, char *name)
+{
+if (!name)
+    return NULL;
+
+struct biOmics *bi;
+for (bi = biList; bi; bi = bi->next)
+    if (sameString(bi->name, name))
+	return bi;
+
+return NULL;
+}
+
+int biOmicsPopulateAll(struct biOmics *bi, struct biQuery *bq)
+{
+struct sqlConnection *biConn = hAllocConnProfile(heatMapDbProfile, bi->db);
+
+loadDataset(biConn, bi);
+
+loadSamples(biConn, bi, bq);
+
+loadProbeInfo(biConn, bi, NULL, TRUE);
+
+bi->loadData(biConn, bi, TRUE);
+
+hFreeConn(&biConn);
+
+return 0;
+}
+
+int biOmicsPopulate(struct biOmics *bi, struct biQuery *bq)
+{
+struct sqlConnection *biConn = hAllocConnProfile(heatMapDbProfile, bi->db);
+
+if (DEBUG)
+    uglyTime(NULL);
+loadDataset(biConn, bi);
+if (DEBUG)
+    uglyTime("Load Dataset");
+
+loadSamples(biConn, bi, bq);
+if (DEBUG)
+    uglyTime("Load Samples");
+
+loadPathways(biConn, bi, bq->pathwayList);
+if (DEBUG)
+    uglyTime("Load Pathways");
+
+loadGenes(biConn, bi, bq->geneList);
+if (DEBUG)
+    uglyTime("Load Genes");
+
+loadProbeInfo(biConn, bi, bq->probeList, FALSE);
+if (DEBUG)
+    uglyTime("Load Probe Info");
+
+bi->loadData(biConn, bi, FALSE);
+if (DEBUG)
+    uglyTime("Load Probe");
+
+hFreeConn(&biConn);
+return 0;
+}
+
+int biOmicsPopulateRandom(struct biOmics *bi, struct biQuery *bq, int numProbes)
+{
+struct sqlConnection *biConn = hAllocConnProfile(heatMapDbProfile, bi->db);
+
+if (DEBUG)
+    uglyTime(NULL);
+loadDataset(biConn, bi);
+if (DEBUG)
+    uglyTime("Load Dataset");
+
+loadSamples(biConn, bi, bq);
+if (DEBUG)
+    uglyTime("Load Samples");
+
+loadProbeInfoRandom(biConn, bi, numProbes);
+if (DEBUG)
+    uglyTime("Load Random Probe Info");
+
+bi->loadData(biConn, bi, FALSE);
+if (DEBUG)
+    uglyTime("Load Probe");
+
+hFreeConn(&biConn);
+return 0;
+}
+
+struct slName *biOmicsGetSamples(struct biOmics *bi)
+{
+struct slName *slList = NULL;
+
+struct samples *sa;
+for (sa = bi->samples; sa; sa = sa->next)
+    slNameAddHead(&slList, sa->name);
+
+slReverse(&slList);
+return slList;
+}
+
+struct slName *biOmicsGetProbes(struct biOmics *bi)
+{
+struct slName *slList = NULL;
+struct probeInfo *pi;
+for (pi = bi->probeInfo; pi; pi = pi->next)
+    slNameAddHead(&slList, pi->name);
+slReverse(&slList);
+return slList;
+}
+
+void populateAliases(struct biOmics *bi)
+{
+struct sqlConnection *biConn = hAllocConnProfile(heatMapDbProfile, bi->db);
+struct datasets *da = bi->dataset;
+
+fprintf(stderr, "loading gene aliases...\n");
+if (DEBUG)
+    uglyTime(NULL);
+char query[1024];
+safef(query, sizeof(query), 
+      "select DISTINCT kgXref.geneSymbol, %s.name from %s "
+      "join %s on %s.id = %s.probe_id "
+      "join geneLookup on geneLookup.id = %s.gene_id "
+      "join kgXref on kgXref.kgId = geneLookup.kgId;", 
+      da->probe_table, da->probe_table, da->probe_to_gene_table, 
+      da->probe_table, da->probe_to_gene_table, da->probe_to_gene_table);
+
+struct sqlResult *sr = sqlGetResult(biConn, query);
+char **row = NULL;
+while ((row = sqlNextRow(sr)) != NULL)
+    {
+    char *gene = row[0];
+    char *probe = cloneString(row[1]);
+    hashAdd(bi->geneAliases, gene, probe);
+    }
+sqlFreeResult(&sr);
+
+if (DEBUG)
+    uglyTime("loaded gene aliases");
+hFreeConn(&biConn);
+}
+
+struct slName *biOmicsGetProbesForGene(struct biOmics *bi, char *gene)
+{
+if (hashNumEntries(bi->geneAliases) == 0)
+    populateAliases(bi);
+
+struct hashEl *el;
+struct slName *slList = NULL;
+for(el = hashLookup(bi->geneAliases, gene); el != NULL; el = hashLookupNext(el))
+    slNameAddHead(&slList, (char *) el->val);
+    
+return slList;
+}
+
+
+
+struct biOmics *newBiOmics(char *db, char *dataset)
+{
+struct biOmics *bi;
+AllocVar(bi);
+bi->db          = cloneString(db);
+bi->name        = cloneString(dataset);
+
+bi->sampleIndices = hashNew(0);
+bi->geneAliases   = hashNew(0);
+
+bi->dataset       = NULL;
+bi->samples       = NULL;
+bi->samplesHash   = hashNew(0);
+bi->probeInfo     = NULL;
+bi->probeInfoHash = hashNew(0);
+bi->probes        = NULL;
+bi->probesHash    = hashNew(0);
+
+/* Methods */
+bi->populate               = biOmicsPopulate;
+bi->populateAll            = biOmicsPopulateAll;
+bi->populateRandom         = biOmicsPopulateRandom;
+bi->allProbes              = biOmicsGetProbes;
+bi->allSamples             = biOmicsGetSamples;
+bi->probesForGene          = biOmicsGetProbesForGene;
+
+/* These are set according to dataType */
+bi->loadData               = NULL;
+bi->freeData               = NULL;
+bi->dataForProbe           = NULL;
+bi->dataForSample          = NULL;
+bi->dataForProbeSample     = NULL;
+bi->toZscores              = NULL;
+
+return bi;
+}
+
+
+void slPairSampleValFree(struct slPair **pEl)
+{
+struct slPair *el;
+
+if ((el = *pEl) == NULL) return;
+
+freeMem(el->name);
+struct probeSampleVal *pv = el->val;
+probeSampleValFree(&pv);
+freez(pEl);
+}
+
+void slPairSampleValFreeList(struct slPair **pList)
+{
+struct slPair *el, *next;
+
+for (el = *pList; el != NULL; el = next)
+    {
+    next = el->next;
+    slPairSampleValFree(&el);
+    }
+*pList = NULL;
+}         
+
+void slPairValsFree(struct slPair **pEl)
+{
+struct slPair *el;
+
+if ((el = *pEl) == NULL) return;
+
+freeMem(el->name);
+struct probeVals *pv = el->val;
+probeValsFreeList(&pv);
+freez(pEl);
+}
+
+void slPairValsFreeList(struct slPair **pList)
+{
+struct slPair *el, *next;
+
+for (el = *pList; el != NULL; el = next)
+    {
+    next = el->next;
+    slPairValsFree(&el);
+    }
+*pList = NULL;
+}         
+
+void biOmicsFree(struct biOmics **pEl)
+{
+struct biOmics *el;
+
+if ((el = *pEl) == NULL) return;
+
+freeMem(el->db);
+freeMem(el->name);
+
+freeHash(&el->sampleIndices);
+freeHash(&el->geneAliases);
+
+datasetsFreeList(&el->dataset);
+samplesFreeList(&el->samples);
+freeHash(&el->samplesHash);
+
+probeInfoFreeList(&el->probeInfo);
+freeHash(&el->probeInfoHash);
+
+el->freeData(&el->probes);
+freeHash(&el->probesHash);
+
+*pEl = NULL;
+}
+
+void biOmicsFreeList(struct biOmics **pList)
+{
+struct biOmics *el, *next;
+
+for (el = *pList; el != NULL; el = next)
+    {
+    next = el->next;
+    biOmicsFree(&el);
+    }
+*pList = NULL;
+}         
+
+/*** biResults code ***/
+struct slName *biResultsGetDatasets(struct biResults *br)
+{
+struct slName *slList = NULL;
+struct biOmics *bi;
+for (bi = br->datasets; bi; bi = bi->next)
+    slNameAddHead(&slList, bi->name);
+
+return slList;
+}
+
+struct slName *biResultsGetProbesInDataset(struct biResults *br, char *dataset)
+{
+struct biOmics *bi = biOmicsMatchDataset(br->datasets, dataset);
+if (!bi)
+    return NULL;
+
+return bi->allProbes(bi);
+}
+
+struct slName *biResultsGetProbes(struct biResults *br)
+{
+struct slName *slList = NULL;
+struct biOmics *bi;
+for (bi = br->datasets; bi; bi = bi->next)
+    {
+    struct slName *probes = bi->allProbes(bi);
+    slList = slCat(slList, probes);
+    }
+return slNameUniqueList(slList);
+}
+
+struct slName *biResultsGetSamplesInDataset(struct biResults *br, char *dataset)
+{
+struct biOmics *bi = biOmicsMatchDataset(br->datasets, dataset);
+if (!bi)
+    return NULL;
+
+return bi->allSamples(bi);
+}
+
+struct slName *biResultsGetSamples(struct biResults *br)
+{
+struct slName *slList = NULL;
+struct biOmics *bi;
+for (bi = br->datasets; bi; bi = bi->next)
+    {
+    struct slName *samples = bi->allSamples(bi);
+    slList = slCat(slList, samples);
+    }
+
+return slNameUniqueList(slList);
+}
+
+
+struct slName *getMatching(struct slName *list1, struct slName *list2)
+{
+if (!list1 || !list2)
+    return NULL;
+
+fprintf(stderr, "matching: count(list1) = %d, count(list2) = %d\n",
+	slCount(list1), slCount(list2));
+
+struct slName *sl1, *sl2, *matched = NULL;
+for (sl1 = list1; sl1; sl1 = sl1->next)
+    {
+    for (sl2 = list2; sl2; sl2 = sl2->next)
+	{
+	if (sameString(sl1->name, sl2->name))
+	    {
+	    if (!slNameInList(matched, sl1->name))
+		slNameAddHead(&matched, sl1->name);
+	    }
+	}
+    }
+
+return matched;
+}
+
+struct slName *biResultsGetSamplesInCommon(struct biResults *br)
+{
+struct biOmics *bi = br->datasets;
+/* Start off the list */
+struct slName *prevMatched, *matched = bi->allSamples(bi);
+
+bi = bi->next;
+for ( ; bi; bi = bi->next)
+    {
+    prevMatched = matched;
+
+    struct slName *samples = bi->allSamples(bi);
+    matched = getMatching(prevMatched, samples);
+
+    slNameFreeList(&samples);
+    slNameFreeList(&prevMatched);
+    }
+
+return matched;
+}
+
+
+struct slName *biResultsProbesForGeneInDataset(struct biResults *br, char *gene, char *dataset)
+{
+struct biOmics *bi = biOmicsMatchDataset(br->datasets, dataset);
+if (!bi)
+    return NULL;
+
+return bi->probesForGene(bi, gene);
+}
+
+struct slName *biResultsProbesForGene(struct biResults *br, char *gene)
+{
+struct slName *slList = NULL;
+struct biOmics *bi;
+for (bi = br->datasets; bi; bi = bi->next)
+    {
+    struct slName *probes = bi->probesForGene(bi, gene);
+    slList = slCat(slList, probes);
+    }
+return slNameUniqueList(slList);
+}
+
+struct biData *biResultsDataForProbeInDataset(struct biResults *br, char *probe, char *dataset)
+{
+struct biOmics *bi = biOmicsMatchDataset(br->datasets, dataset);
+if (!bi)
+    return NULL;
+
+return bi->dataForProbe(bi, probe);
+}
+
+struct biData *biResultsDataForProbe(struct biResults *br, char *probe)
+{
+struct biData *bd,*bdList = NULL;
+struct biOmics *bi;
+for (bi = br->datasets; bi; bi = bi->next)
+    {
+    bd = bi->dataForProbe(bi, probe);
+    biDataAppendName(bd, bi->name);
+    slAddHead(&bdList, bd);
+    }
+return bdList;
+}
+
+struct biData *biResultsDataForSampleInDataset(struct biResults *br, char *sample, 
+						 char *dataset)
+{
+struct biOmics *bi = biOmicsMatchDataset(br->datasets, dataset);
+if (!bi)
+    return NULL;
+
+return bi->dataForSample(bi, sample);
+}
+
+struct biData *biResultsDataForSample(struct biResults *br, char *sample)
+{
+struct biData *bd, *bdList = NULL;
+struct biOmics *bi;
+for (bi = br->datasets; bi; bi = bi->next)
+    {
+    bd = bi->dataForSample(bi, sample);
+    bd->type = cloneString(bi->type);
+    biDataAppendName(bd, bi->name);
+    slAddHead(&bdList, bd);
+    }
+return bdList;
+}
+
+double biResultsDataForProbeSampleInDataset(struct biResults *br, 
+					    char *probe, char *sample, char *dataset)
+{
+struct biOmics *bi = biOmicsMatchDataset(br->datasets, dataset);
+if (!bi)
+    return DBL_NULL;
+
+return bi->dataForProbeSample(bi, probe, sample);
+}
+
+
+struct biData *biResultsDataForProbeSample(struct biResults *br, char *probe, char *sample)
+{
+char name[128];
+safef(name, sizeof(name), "%s-%s", probe, sample);
+struct slDouble *sd;
+struct biData *bd = biDataNew(name);
+struct biOmics *bi;
+for (bi = br->datasets; bi; bi = bi->next)
+    {
+    double val = bi->dataForProbeSample(bi, probe, sample);
+    sd = slDoubleNew(val);
+    slAddHead(&bd->data, sd);
+    hashAdd(bd->hash, bi->name, sd);
+    }
+slReverse(&bd->data);
+return bd;
+}
+
+void biResultsConvertToZscores(struct biResults *br)
+{
+struct biOmics *bi;
+for (bi = br->datasets; bi; bi = bi->next)
+    bi->toZscores(bi);
+}
+
+void biResultsConvertToLogP(struct biResults *br)
+{
+struct biOmics *bi;
+for (bi = br->datasets; bi; bi = bi->next)
+    bi->toLogP(bi);
+}
+
+
+
+struct biResults *biResultsNew(void)
+{
+struct biResults *br;
+AllocVar(br);
+
+br->datasets = NULL;
+
+/* Methods */
+br->allDatasets            = biResultsGetDatasets;
+
+br->allProbes              = biResultsGetProbes;
+br->allProbesInDataset     = biResultsGetProbesInDataset;
+
+br->allSamples             = biResultsGetSamples;
+br->allSamplesInCommon     = biResultsGetSamplesInCommon;
+br->allSamplesInDataset    = biResultsGetSamplesInDataset;
+
+br->probesForGene          = biResultsProbesForGene;
+br->probesForGeneInDataset = biResultsProbesForGeneInDataset;
+
+br->dataForProbe           = biResultsDataForProbe;
+br->dataForProbeInDataset  = biResultsDataForProbeInDataset;
+
+br->dataForSample          = biResultsDataForSample;
+br->dataForSampleInDataset = biResultsDataForSampleInDataset;
+
+br->dataForProbeSample          = biResultsDataForProbeSample;
+br->dataForProbeSampleInDataset = biResultsDataForProbeSampleInDataset;
+
+br->toZscores              = biResultsConvertToZscores;
+br->toLogP                 = biResultsConvertToLogP;
+
+return br;
+}
+
+void biResultsFree(struct biResults **pEl)
+{
+struct biResults *el;
+
+if ((el = *pEl) == NULL) return;
+biOmicsFreeList(&el->datasets);
+}
+
+
+void biResultsAddBiQuery(struct biResults *br, struct biQuery *bq)
+{
+struct biOmics *bi = newBiOmics(bq->db, bq->dataset);
+
+if (bq->getAllProbes)
+    bi->populateAll(bi, bq);
+else
+    bi->populate(bi, bq);
+
+slAddHead(&br->datasets, bi); 
+}
+
+struct biResults *biQueryResults(struct biQuery *bqList)
+{
+struct biResults *br = biResultsNew();
+struct biQuery *bq;
+for (bq = bqList; bq; bq = bq->next)
+    biResultsAddBiQuery(br, bq);
+
+return br;
+}
+
+void biResultsAddBiQueryRandom(struct biResults *br, struct biQuery *bq, int numProbes)
+{
+struct biOmics *bi = newBiOmics(bq->db, bq->dataset);
+bi->populateRandom(bi, bq, numProbes);
+
+slAddHead(&br->datasets, bi); 
+}
+
+struct biResults *biQueryResultsRandomize(struct biQuery *bqList, int numProbes)
+{
+struct biResults *br = biResultsNew();
+struct biQuery *bq;
+for (bq = bqList; bq; bq = bq->next)
+    biResultsAddBiQueryRandom(br, bq, numProbes);
+
+return br;
+}
+
+/*** End biResults code ***/
+
+
+/*** biQuery code ****/
+
+
+void biQueryAppend(struct biQuery **bqList, struct biQuery *bq)
+{
+if (!bqList)
+    return;
+slAddHead(bqList, bq);
+}
+
+int biQueryAddPathways(struct biQuery *bq, char *pathways, char sep)
+{
+return addDelimStringToList(&bq->pathwayList, pathways, sep); 
+}
+
+int biQueryAddProbes(struct biQuery *bq, char *probes, char sep)
+{
+return addDelimStringToList(&bq->probeList, probes, sep); 
+}
+
+int biQueryAddGenes(struct biQuery *bq, char *genes, char sep)
+{
+return addDelimStringToList(&bq->geneList, genes, sep); 
+}
+
+int biQueryAddSamples(struct biQuery *bq, char *samples, char sep)
+{
+return addDelimStringToList(&bq->sampleList, samples, sep); 
+}
+
+int biQueryAddFeatureVals(struct biQuery *bq, char *featureVals, char sep)
+{
+return addDelimStringToList(&bq->featureValList, featureVals, sep);
+}
+
+int biQueryAddFeatureCodes(struct biQuery *bq, char *featureCodes, char sep)
+{
+return addDelimStringToList(&bq->featureCodeList, featureCodes, sep);
+}
+
+struct biQuery *biQueryNew(char *db, char *dataset)
+{
+struct biQuery *bq;
+AllocVar(bq);
+bq->db = cloneString(db);
+bq->dataset = cloneString(dataset);
+bq->getAllProbes = FALSE;
+
+bq->pathwayList = NULL;
+bq->probeList   = NULL;
+bq->geneList    = NULL;
+bq->sampleList  = NULL;
+bq->featureValList  = NULL;
+bq->featureCodeList = NULL;
+
+bq->addPathways = biQueryAddPathways;
+bq->addProbes   = biQueryAddProbes;
+bq->addSamples  = biQueryAddSamples;
+bq->addGenes    = biQueryAddGenes;
+bq->addFeatureVals  = biQueryAddFeatureVals;
+bq->addFeatureCodes = biQueryAddFeatureCodes;
+
+return bq;
+}
+
+void biQueryFree(struct biQuery **pEl)
+{
+struct biQuery *el;
+if ((el = *pEl) == NULL) return;
+
+freeMem(el->db);
+freeMem(el->dataset);
+
+slNameFreeList(&el->pathwayList);
+slNameFreeList(&el->probeList);
+slNameFreeList(&el->geneList);
+slNameFreeList(&el->sampleList);
+slNameFreeList(&el->featureValList);
+slNameFreeList(&el->featureCodeList);
+}
+
+void biQueryFreeList(struct biQuery **pList)
+{
+struct biQuery *el, *next;
+
+for (el = *pList; el != NULL; el = next)
+    {
+    next = el->next;
+    biQueryFree(&el);
+    }
+*pList = NULL;
+}
+
+void biDataAppendName(struct biData *bd, char *name)
+{
+if (!bd->name)
+    {
+    bd->name = cloneString(name);
+    return;
+    }
+
+char newName[128];
+safef(newName, sizeof(newName), "%s,%s", bd->name, name);
+freeMem(bd->name);
+bd->name = cloneString(newName);
+}
+
+struct biData *biDataFind(struct biData *bdList, char *name)
+{
+struct biData *bd;
+for (bd = bdList; bd; bd = bd->next)
+    {
+    if (!sameString(bd->name, name))
+	continue;
+    return bd;
+    }
+return NULL;
+}
+
+struct biData *biDataNew(char *name)
+{
+struct biData *bd;
+AllocVar(bd);
+bd->name = NULL;
+if (name)
+    bd->name = cloneString(name);
+bd->type = NULL;
+bd->data = NULL;
+bd->hash = hashNew(0);
+
+return bd;
+}
+
+void biDataFree(struct biData **pEl)
+{
+struct biData *el;
+
+if ((el = *pEl) == NULL) return;
+
+freeMem(el->name);
+freeMem(el->type);
+slFreeList(&el->data);
+freeHash(&el->hash);
+}
+
+void biDataFreeList(struct biData **pList)
+{
+struct biData *el, *next;
+
+for (el = *pList; el != NULL; el = next)
+    {
+    next = el->next;
+    biDataFree(&el);
+    }
+*pList = NULL;
+}
+
+/**** End biQuery Code *****/