src/hg/instinct/bioInt2/bioIntDriver.c 1.3

1.3 2009/04/27 06:15:48 jsanborn
updated lots of stuff, will break older implementation of database
Index: src/hg/instinct/bioInt2/bioIntDriver.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/instinct/bioInt2/bioIntDriver.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -b -B -U 4 -r1.2 -r1.3
--- src/hg/instinct/bioInt2/bioIntDriver.c	24 Mar 2009 05:21:54 -0000	1.2
+++ src/hg/instinct/bioInt2/bioIntDriver.c	27 Apr 2009 06:15:48 -0000	1.3
@@ -15,8 +15,27 @@
 static char *heatMapDbProfile = "localDb";
 
 /* Begin helper functions */
 
+struct hash *createIdHash(struct sqlConnection *biConn, char *tableName, char *fieldName)
+{
+struct hash *hash = hashNew(0);
+char query[128];
+safef(query, sizeof(query), "select id, %s from %s", fieldName, tableName);
+
+struct sqlResult *sr = sqlGetResult(biConn, query);
+char **row = NULL;
+while ((row = sqlNextRow(sr)) != NULL)
+    {
+    unsigned int id = sqlUnsigned(row[0]);
+    char *name = cloneString(row[1]);
+    hashAddInt(hash, name, id);
+    }
+sqlFreeResult(&sr);
+
+return hash;
+}  
+
 int addDelimStringToList(struct slName **list, char *str, char sep)
 {
 if (!str)
     return 0;
@@ -37,8 +56,20 @@
     fprintf(stdout, "%s ", sl->name);
 fprintf(stdout, "\n");
 }
 
+struct analysisVals *cloneAnalysisVals(struct analysisVals *av)
+{
+struct analysisVals *newAv;
+AllocVar(newAv);
+newAv->sample_id = av->sample_id;
+newAv->feature_id = av->feature_id;
+newAv->val = av->val;
+newAv->conf = av->conf;
+
+return newAv;
+}
+
 struct hash *uniqueHashFromSlNameList(void *list)
 /* Create a unique hash out of a list of slNames or any kind of list where the */
 /* first field is the next pointer and the second is the name. 
  * -- Adapted from hashFromSlNameList in hash.h */
@@ -71,11 +102,48 @@
 slNameFreeList(&list);
 freeHash(&hash);
 return slList;
 }
+
+int analysisValsCmp(const void *va, const void *vb)
+/* Compare function to sort array of ints. */
+{
+const struct analysisVals *a = *((struct analysisVals **)va);
+const struct analysisVals *b = *((struct analysisVals **)vb);
+int diff = a->feature_id - b->feature_id;
+if (diff < 0)
+    return -1;
+else if (diff > 0)
+    return 1;
+
+// feature_id's are the same
+diff = a->sample_id - b->sample_id;
+
+if (diff < 0)
+    return -1;
+else if (diff > 0)
+    return 1;
+
+return 0;
+}  
+
+void storeAnalysisValsInDb(struct sqlConnection *biConn, char *tableName,
+			   struct analysisVals *avList)
+{
+if (!sqlTableExists(biConn, tableName))
+    createAnalysisValsTable(biConn, tableName);
+
+slSort(avList, analysisValsCmp);
+
+struct analysisVals *av;
+for (av = avList; av; av = av->next)
+    analysisValsSaveToDb(biConn, av, tableName, 50);
+}   
+
 /* End helper functions */
 
 
+
 /* Begin database functions */
 void addSample(struct biOmics *bi, struct samples *sa)
 {
 struct hashEl *el = hashLookup(bi->samplesHash, sa->name);
@@ -274,237 +342,96 @@
 /* Load samples matching feature-code pairs, e.g. ER = Positive */
 loadSamplesMatchingFeatureCodes(conn, bi, bq->featureCodeList);
 }
 
-void addProbeInfo(struct biOmics *bi, struct probeInfo *pi)
+void addAnalysisFeature(struct biOmics *bi, struct analysisFeatures *af)
 {
-struct hashEl *el = hashLookup(bi->probeInfoHash, pi->name);
+struct hashEl *el = hashLookup(bi->featuresHash, af->feature_name);
 if (el)
     {
-    probeInfoFree(&pi);
+    analysisFeaturesFree(&af);
     return;  // already there;
     }
 
-slAddHead(&bi->probeInfo, pi);
-hashAdd(bi->probeInfoHash, pi->name, pi);
+slAddHead(&bi->features, af);
+hashAdd(bi->featuresHash, af->feature_name, af);
 
 /* Allocate space for slPair data */
-char probe_id[128];
-safef(probe_id, sizeof(probe_id), "%d", pi->id);
+char feature_id[128];
+safef(feature_id, sizeof(feature_id), "%d", af->id);
 struct slPair *sp;
 AllocVar(sp);
-sp->name = cloneString(pi->name);
+sp->name = cloneString(af->feature_name);
 sp->val = NULL;
-slAddHead(&bi->probes, sp);
-hashAdd(bi->probesHash, probe_id, sp);
+slAddHead(&bi->data, sp);
+hashAdd(bi->dataHash, feature_id, sp);
 }
 
-void loadPathways(struct sqlConnection *conn, struct biOmics *bi, struct slName *pathwayList)
+void loadGenesets(struct sqlConnection *conn, struct biOmics *bi, struct slName *genesetList)
 {
-if (!pathwayList)
-    return;
-
-struct slName *sl;
-struct datasets *da = bi->dataset;
-char *pInfo = da->probe_table;
-char *p2g = da->probe_to_gene_table;
-
-if (!pInfo || !p2g)
-    return;
-
-struct dyString *dy = newDyString(100);
-dyStringPrintf(dy, 
-	       "select %s.* from %s join %s on %s.probe_id = %s.id "
-	       "join %s on %s.gene_id = %s.gene_id "
-	       "join %s on %s.id = %s.id "
-	       "where %s.name in (",
-	       pInfo, pInfo, p2g, p2g, pInfo, 
-	       PG_TABLE, PG_TABLE, p2g, 
-	       PA_TABLE, PA_TABLE, PG_TABLE, 
-	       PA_TABLE);
-
-for (sl = pathwayList; sl; sl = sl->next)
-    {
-    dyStringPrintf(dy, "'%s'", sl->name);
-    if (sl->next)
-	dyStringPrintf(dy, ",");
-    }
-dyStringPrintf(dy, ");");
-char *query = dyStringCannibalize(&dy);
-
-struct sqlResult *sr = sqlGetResult(conn, query);
-char **row = NULL;
-while ((row = sqlNextRow(sr)) != NULL)
-    {
-    struct probeInfo *pi = probeInfoLoad(row);
-    addProbeInfo(bi, pi);
-    }
-sqlFreeResult(&sr);
+return;
 }
 
-void loadGenes(struct sqlConnection *conn, struct biOmics *bi, struct slName *geneList)
+void loadFeatures(struct sqlConnection *conn, struct biOmics *bi, 
+		  struct slName *featureList, boolean allData)
 {
-if (!geneList)
+if (!featureList && !allData)
     return;
 
 struct slName *sl;
-struct datasets *da = bi->dataset;
-char *pInfo = da->probe_table;
-char *p2g = da->probe_to_gene_table;
-
-if (!pInfo || !p2g)
-    return;
 
 struct dyString *dy = newDyString(100);
-dyStringPrintf(dy, 
-	       "select * from %s join %s on %s.probe_id = %s.id "
-	       "join %s on %s.id = %s.gene_id where kgId in (", 
-	       pInfo, p2g, p2g, pInfo, 
-	       GL_TABLE, GL_TABLE, p2g);
+dyStringPrintf(dy, "select * from %s ", AF_TABLE); 
 
-for (sl = geneList; sl; sl = sl->next)
+if (featureList)
     {
-    dyStringPrintf(dy, "'%s'", sl->name);
-    if (sl->next)
-	dyStringPrintf(dy, ",");
-    }
-dyStringPrintf(dy, ");");
-char *query = dyStringCannibalize(&dy);
-
-struct sqlResult *sr = sqlGetResult(conn, query);
-char **row = NULL;
-while ((row = sqlNextRow(sr)) != NULL)
-    {
-    struct probeInfo *pi = probeInfoLoad(row);
-    addProbeInfo(bi, pi);
-    }
-sqlFreeResult(&sr);
-}                  
-
-void loadProbeInfo(struct sqlConnection *conn, struct biOmics *bi, 
-		   struct slName *probeList, boolean allData)
-{
-if (!probeList && !allData)
-    return;
-
-struct datasets *da = bi->dataset;
-
-struct slName *sl;
-struct dyString *dy = newDyString(100);
-dyStringPrintf(dy, "select * from %s ", da->probe_table);
-
-if (probeList)
-    {
-    dyStringPrintf(dy, "where name in (");
-    for (sl = probeList; sl; sl = sl->next)
+    dyStringPrintf(dy, "where feature_name in (");
+    for (sl = featureList; sl; sl = sl->next)
 	{
 	dyStringPrintf(dy, "'%s'", sl->name);
 	if (sl->next)
 	    dyStringPrintf(dy, ",");
 	}
     dyStringPrintf(dy, ");");
     }
+dyStringPrintf(dy, " where type = \"gene\"");
 
 char *query = dyStringCannibalize(&dy);
 
 struct sqlResult *sr = sqlGetResult(conn, query);
 char **row = NULL;
 while ((row = sqlNextRow(sr)) != NULL)
     {
-    struct probeInfo *pi = probeInfoLoad(row);
-    addProbeInfo(bi, pi);
+    struct analysisFeatures *af = analysisFeaturesLoad(row);
+    addAnalysisFeature(bi, af);
     }
 sqlFreeResult(&sr);
 }
 
 
-void loadProbeInfoRandom(struct sqlConnection *conn, struct biOmics *bi, int numProbes)
-{
-if (numProbes <= 0)
-    return;
-
-struct datasets *da = bi->dataset;
-
-char q[256];
-safef(q, sizeof(q), "select max(id) from %s;", da->probe_table);
-int maxProbes = sqlQuickNum(conn, q);
-
-if (numProbes >= maxProbes) // what's the point?!?
-    errAbort("Number of random probes greater than probes in table!.");
-
-int i = 0, total = 0;
-char rStr[128];
-struct hash *hash = hashNew(0);
-while (i < numProbes && total < maxProbes * 2)
-    {
-    total++;
-    int r = random() % maxProbes;
-    safef(rStr, sizeof(rStr), "%d", r);
-    if (hashLookup(hash, rStr))
-	continue;
-    hashAddInt(hash, rStr, 1);
-    i++;
-    }
-struct slInt *si, *siList = NULL;
-struct hashCookie cookie = hashFirst(hash);
-char *name;
-while ((name = hashNextName(&cookie)) != NULL)
-    {
-    si = slIntNew(atoi(name));
-    slAddHead(&siList, si);
-    }
-slSort(&siList, slIntCmp);
-hashFree(&hash);
-
-if (slCount(siList) != numProbes)
-    errAbort("Random list not correct length %d != %d.", slCount(siList), numProbes);
-
-struct dyString *dy = newDyString(100);
-dyStringPrintf(dy, "select * from %s where id in (", da->probe_table);
-for (si = siList; si; si = si->next)
-    {
-    dyStringPrintf(dy, "%d", si->val);
-    if (si->next)
-	dyStringPrintf(dy, ",");
-    }
-dyStringPrintf(dy, ");");
-
-char *query = dyStringCannibalize(&dy);
-slFreeList(&siList);
-
-struct sqlResult *sr = sqlGetResult(conn, query);
-char **row = NULL;
-while ((row = sqlNextRow(sr)) != NULL)
-    {
-    struct probeInfo *pi = probeInfoLoad(row);
-    addProbeInfo(bi, pi);
-    }
-sqlFreeResult(&sr);
-}
-
-void loadProbeSampleValData(struct sqlConnection *conn, struct biOmics *bi, boolean allData)
+void loadAnalysisValsData(struct sqlConnection *conn, struct biOmics *bi, boolean allData)
 { /* boolean allData is unused in this function */
 struct datasets *da = bi->dataset;
 
 struct dyString *dy = newDyString(100);
 dyStringPrintf(dy, "select * from %s", da->data_table);
 
-if (bi->probeInfo)
+if (bi->features)
     {
-    struct probeInfo *pi;
-    dyStringPrintf(dy, " where probe_id in (");
-    for (pi = bi->probeInfo; pi; pi = pi->next)
+    struct analysisFeatures *af;
+    dyStringPrintf(dy, " where feature_id in (");
+    for (af = bi->features; af; af = af->next)
 	{
-	dyStringPrintf(dy, "%d", pi->id);
-	if (pi->next)
+	dyStringPrintf(dy, "%d", af->id);
+	if (af->next)
 	    dyStringPrintf(dy, ",");
 	}
     dyStringPrintf(dy, ")");
     }
 
 if (bi->samples)
     {
-    if (bi->probeInfo)
+    if (bi->features)
 	dyStringPrintf(dy, " and");
     else 
 	dyStringPrintf(dy, " where");
 
@@ -525,390 +452,121 @@
 char **row = NULL;
 
 int count = 0;
 struct slPair *sp;
-struct probeSampleVal *pv;
+struct analysisVals *av;
 while ((row = sqlNextRow(sr)) != NULL)
     {
-    char *probe_id = row[0];
-    struct hashEl *el = hashLookup(bi->probesHash, probe_id);
+    char *feature_id = row[1];
+    struct hashEl *el = hashLookup(bi->dataHash, feature_id);
     if (!el)
 	continue;
     sp = el->val;
-    pv = probeSampleValLoad(row);
-    slAddHead(&sp->val, pv);
+    av = analysisValsLoad(row);
+    slAddHead(&sp->val, av);
     count++;
     }
 if (DEBUG)
-    fprintf(stdout, "found %d probes\n", count);
+    fprintf(stdout, "found %d features\n", count);
 
 sqlFreeResult(&sr); 
 }
 
-void loadProbeValsData(struct sqlConnection *conn, struct biOmics *bi, boolean allData)
+struct biData *analysisValsForFeature(struct biOmics *bi, char *feature)
 {
-if (!bi->probeInfo && !allData)
-    return;
-struct datasets *da = bi->dataset;
-
-struct probeInfo *pi;
-struct dyString *dy = newDyString(100);
-dyStringPrintf(dy, "select * from %s ", da->data_table);
-
-if (bi->probeInfo && !allData)
-    {
-    dyStringPrintf(dy, "where probe_id in (");
-    for (pi = bi->probeInfo; pi; pi = pi->next)
-	{
-	dyStringPrintf(dy, "%d", pi->id);
-	if (pi->next)
-	    dyStringPrintf(dy, ",");
-	}
-    dyStringPrintf(dy, ")");
-    }
-char *query = dyStringCannibalize(&dy);
-
-struct sqlResult *sr = sqlGetResult(conn, query);
-char **row = NULL;
-
-int count = 0;
-struct slPair *sp;
-struct probeVals *pv;
-while ((row = sqlNextRow(sr)) != NULL)
-    {
-    char *probe_id = row[0];
-    struct hashEl *el = hashLookup(bi->probesHash, probe_id);
-    if (!el)
-	continue;
-    sp = el->val;
-    pv = probeValsLoad(row);
-    count += pv->sample_count;
-    slAddHead(&sp->val, pv);
-    }
-if (DEBUG)
-    fprintf(stdout, "found %d probes\n", count);
-
-sqlFreeResult(&sr); 
-}
-
-struct biData *probeValsForProbe(struct biOmics *bi, char *probe)
-{
-struct hashEl *el = hashLookup(bi->probeInfoHash, probe);
+struct hashEl *el = hashLookup(bi->featuresHash, feature);
 if (!el)
     return NULL;
-struct probeInfo *pi = el->val;
-int probeId = pi->id;
+struct analysisFeatures *af = el->val;
+int id = af->id;
 
 char pStr[128];
-safef(pStr, sizeof(pStr), "%d", probeId);
-el = hashLookup(bi->probesHash, pStr);
+safef(pStr, sizeof(pStr), "%d", id);
+el = hashLookup(bi->dataHash, pStr);
 if (!el)
     return NULL;
 struct slPair *sp = el->val;
 
-struct biData *bd = biDataNew(probe);
+struct biData *bd = biDataNew(feature);
 
 struct samples *sa;
-struct probeVals *pv;
-struct slDouble *sd;
-for (pv = sp->val; pv; pv = pv->next)
+struct analysisVals *av;
+for (av = sp->val; av; av = av->next)
     {
     for (sa = bi->samples; sa; sa = sa->next)
 	{
-	sd = slDoubleNew(pv->sample_data[sa->exp_id]);
-	slAddHead(&bd->data, sd);
-	hashAdd(bd->hash, sa->name, sd);
-	}
-    }
-slReverse(&bd->data);
-return bd;
-}
-
-struct biData *probeValsForSample(struct biOmics *bi, char *sample)
-{
-struct hashEl *el = hashLookup(bi->samplesHash, sample);
-if (!el)
-    return NULL;
-struct samples *sa = el->val;
-int expId = sa->exp_id;
-
-struct biData *bd = biDataNew(NULL);
-struct slDouble *sd;
-struct slPair *sp;
-for (sp = bi->probes; sp; sp = sp->next)
-    {
-    struct probeVals *pv;
-    for (pv = sp->val; pv; pv = pv->next)
-	{
-	if (expId >= pv->sample_count)
-	    errAbort("expId is greater than bd->vals[] array length.");
-	sd = slDoubleNew(pv->sample_data[expId]);
-	slAddHead(&bd->data, sd);
-	hashAdd(bd->hash, sp->name, sd);
-	}
-    }
-slReverse(&bd->data);
-return bd;
-}
-
-double probeValsForProbeSample(struct biOmics *bi, 
-			       char *probe, char *sample)
-{
-struct hashEl *el = hashLookup(bi->probeInfoHash, probe);
-if (!el)
-    return DBL_NULL;
-struct probeInfo *pi = el->val;
-int probeId = pi->id;
-
-char pStr[128];
-safef(pStr, sizeof(pStr), "%d", probeId);
-el = hashLookup(bi->probesHash, pStr);
-if (!el)
-    return DBL_NULL;
-struct slPair *sp = el->val;
-
-el = hashLookup(bi->samplesHash, sample);
-if (!el)
-    return DBL_NULL;
-struct samples *sa = el->val;
-int expId = sa->exp_id;
-
-struct probeVals *pv = sp->val;
-return pv->sample_data[expId];
-}
-
-boolean calcMedianMAD(struct biOmics *bi, double *median, double *mad)
-{
-int count = 0;
-
-struct samples *sa;
-struct slPair *sp;
-struct probeVals *pv;
-struct slDouble *sd, *sdList = NULL;
-for (sp = bi->probes; sp; sp = sp->next)
-    {    
-    pv = sp->val; 
-    for (sa = bi->samples; sa; sa = sa->next)
-	{
-	sd = slDoubleNew(pv->sample_data[sa->exp_id]);
-	slAddHead(&sdList, sd);
-	count++;
-	}
-    }
-if (!sdList)
-    return FALSE;
-
-if (count <= 1)
-    {
-    slFreeList(&sdList);
-    return FALSE;
-    }
-double medianTmp = slDoubleMedian(sdList);
-
-/* Manually calculate approx std dev according to 99.7% cut-off (3 sigma) */
-slSort(sdList, slDoubleCmp);
-int low = round(count * (0.0015));
-int high = round(count * (1.0 - 0.0015));
-
-sd = slElementFromIx(sdList, low);
-double lowVal = sd->val;
-
-sd = slElementFromIx(sdList, high);
-double highVal = sd->val;
-
-*median = medianTmp;
-*mad = max(fabs(lowVal - medianTmp), fabs(highVal - medianTmp))/3.0;
-
-slFreeList(&sdList);
-return TRUE;
-}
-
-///* Calculate median absolute deviation for a more 
-// * robust measure than standard deviation */
-//for (sd = sdList; sd; sd = sd->next) 
-//   sd->val = fabs(sd->val - medianTmp);
-
-//*median = medianTmp;
-//*mad = slDoubleMedian(sdList);
-//slFreeList(&sdList);
-//return TRUE;
-//}
-
-
-void probeValsConvertToLogP(struct biOmics *bi)
-{ /* tranforms all data to -log(p-value) based on normal z-score transform */
-double median, mad;
-if (!calcMedianMAD(bi, &median, &mad))
-    return;
-
-/* To estimate std deviation from mad, use std = 1.43 * mad 
- *    Brideau et al. J Biomol Screen, 2003 */
-double std = mad * 1.43;
-
-fprintf(stderr, "median = %f, mad = %f, std = %f\n", median, mad, std);
-if (std == 0.0)
-    return;
-
-double maxLogP = 88.0;  /* max, in case p-value comes back zero */
-struct samples *sa;
-struct slPair *sp;
-struct probeVals *pv;
-double z, p;
-double val;
-for (sp = bi->probes; sp; sp = sp->next)
-    {    
-    pv = sp->val;
-    for (sa = bi->samples; sa; sa = sa->next)
-	{
-	z = (pv->sample_data[sa->exp_id] - median)/std;
-	p = ndtr(-1.0*fabs(z));
-	if (p > 0)
-	    val = min(-log(p)/log(10.0), maxLogP);
-	else
-	    val = maxLogP;
-
-	if (z < 0.0)
-	    val = -1.0*val;  // signed log(p-value)
-	pv->sample_data[sa->exp_id] = val;
-	}
-    }
-}
-
-void probeValsConvertToZscores(struct biOmics *bi)
-{ /* transforms all data to z-scores, median-centered */
-double median, mad;
-if (!calcMedianMAD(bi, &median, &mad))
-    return;
-
-/* To estimate std deviation from mad, use std = 1.43 * mad 
- *    Brideau et al. J Biomol Screen, 2003 */
-double std = mad * 1.43;
-fprintf(stderr, "median = %f, mad = %f, std = %f\n", median, mad, std);
-if (std == 0.0)
-    return;
-
-struct samples *sa;
-struct slPair *sp;
-struct probeVals *pv;
-double z;
-for (sp = bi->probes; sp; sp = sp->next)
-    {    
-    pv = sp->val;
-    for (sa = bi->samples; sa; sa = sa->next)
-	{
-	z = (pv->sample_data[sa->exp_id] - median)/std;
-	pv->sample_data[sa->exp_id] = z;
-	}
-    }
-}
-
-void probeSampleValConvertToLogP(struct biOmics *bi)
-{
-return;
-}
-
-void probeSampleValConvertToZscores(struct biOmics *bi)
-{
-return;
-}
-
-struct biData *probeSampleValForProbe(struct biOmics *bi, char *probe)
-{
-struct hashEl *el = hashLookup(bi->probeInfoHash, probe);
-if (!el)
-    return NULL;
-struct probeInfo *pi = el->val;
-int probeId = pi->id;
-
-char pStr[128];
-safef(pStr, sizeof(pStr), "%d", probeId);
-el = hashLookup(bi->probesHash, pStr);
-if (!el)
-    return NULL;
-struct slPair *sp = el->val;
-
-struct biData *bd = biDataNew(probe);
-
-struct samples *sa;
-struct probeSampleVal *pv;
-struct slDouble *sd;
-for (pv = sp->val; pv; pv = pv->next)
-    {
-    for (sa = bi->samples; sa; sa = sa->next)
-	{
-	if (!sa->exp_id == pv->sample_id)
+	if (sa->id != av->sample_id)
 	    continue;
 	
-	sd = slDoubleNew(pv->val);
-	slAddHead(&bd->data, sd);
-	hashAdd(bd->hash, sa->name, sd);
+	struct analysisVals *newAv = cloneAnalysisVals(av);
+	slAddHead(&bd->data, newAv);
+	hashAdd(bd->hash, sa->name, av);
 	break;
 	}
     }
 slReverse(&bd->data);
 return bd;
 }
 
-struct biData *probeSampleValForSample(struct biOmics *bi, char *sample)
+struct biData *analysisValsForSample(struct biOmics *bi, char *sample)
 {
 struct hashEl *el = hashLookup(bi->samplesHash, sample);
 if (!el)
     return NULL;
 struct samples *sa = el->val;
-int expId = sa->exp_id;
+int id = sa->id;
 
 struct biData *bd = biDataNew(sample);
 
-struct slDouble *sd;
 struct slPair *sp;
-for (sp = bi->probes; sp; sp = sp->next)
+for (sp = bi->data; sp; sp = sp->next)
     {
-    struct probeSampleVal *pv;
-    for (pv = sp->val; pv; pv = pv->next)
+    struct analysisVals *av;
+    for (av = sp->val; av; av = av->next)
 	{
-	if (expId != pv->sample_id)
+	if (id != av->sample_id)
 	    continue;
-	sd = slDoubleNew(pv->val);
-	slAddHead(&bd->data, sd);
-	hashAdd(bd->hash, sp->name, sd);
+	struct analysisVals *newAv = cloneAnalysisVals(av);
+	slAddHead(&bd->data, newAv);
+	hashAdd(bd->hash, sp->name, newAv);
 	}
     }
 
 slReverse(&bd->data);
 return bd;
 }
 
-double probeSampleValForProbeSample(struct biOmics *bi, 
-				    char *probe, char *sample)
+struct analysisVals *analysisValsForFeatureSample(struct biOmics *bi, 
+						  char *feature, char *sample)
 {
-struct hashEl *el = hashLookup(bi->probeInfoHash, probe);
+struct hashEl *el = hashLookup(bi->featuresHash, feature);
 if (!el)
-    return DBL_NULL;
-struct probeInfo *pi = el->val;
-int probeId = pi->id;
+    return NULL;
+struct analysisFeatures *af = el->val;
+int id = af->id;
 
 char pStr[128];
-safef(pStr, sizeof(pStr), "%d", probeId);
-el = hashLookup(bi->probesHash, pStr);
+safef(pStr, sizeof(pStr), "%d", id);
+el = hashLookup(bi->dataHash, pStr);
 if (!el)
-    return DBL_NULL;
-
+    return NULL;
 struct slPair *sp = el->val;
+
 el = hashLookup(bi->samplesHash, sample);
 if (!el)
-    return DBL_NULL;
+    return NULL;
 struct samples *sa = el->val;
-int expId = sa->exp_id;
+id = sa->id;
 
-struct probeSampleVal *pv = sp->val;
-for (pv = sp->val; pv; pv = pv->next)
+struct analysisVals *av = sp->val;
+for (av = sp->val; av; av = av->next)
     {
-    if (expId != pv->sample_id)
+    if (id != av->sample_id)
 	continue;
-    return pv->val;
+    return av;
     }
 
-return DBL_NULL;
+return NULL;
 }
 
 void setDataType(struct sqlConnection *conn, struct biOmics *bi)
 {
@@ -923,27 +581,15 @@
 if (!dt)
     errAbort("Datatype with id = %d not found in database", da->type_id);
 
 bi->type = cloneString(dt->name);
-if (sameString(dt->format, "probeVals"))
+if (sameString(dt->format, "analysisVals"))
     {
-    bi->loadData           = loadProbeValsData;
-    bi->freeData           = slPairValsFreeList;
-    bi->dataForProbe       = probeValsForProbe;
-    bi->dataForSample      = probeValsForSample;
-    bi->dataForProbeSample = probeValsForProbeSample;
-    bi->toZscores          = probeValsConvertToZscores;
-    bi->toLogP             = probeValsConvertToLogP;
-    }
-else if (sameString(dt->format, "probeSampleVal"))
-    {
-    bi->loadData           = loadProbeSampleValData;
-    bi->freeData           = slPairSampleValFreeList;
-    bi->dataForProbe       = probeSampleValForProbe;
-    bi->dataForSample      = probeSampleValForSample;
-    bi->dataForProbeSample = probeSampleValForProbeSample;
-    bi->toZscores          = probeSampleValConvertToZscores;
-    bi->toLogP             = probeSampleValConvertToLogP;
+    bi->loadData             = loadAnalysisValsData;
+    bi->freeData             = slPairAnalysisValsFreeList;
+    bi->dataForFeature       = analysisValsForFeature;
+    bi->dataForSample        = analysisValsForSample;
+    bi->dataForFeatureSample = analysisValsForFeatureSample;
     }
 else
     errAbort("Unrecognized datatype");
 
@@ -990,9 +636,9 @@
 loadDataset(biConn, bi);
 
 loadSamples(biConn, bi, bq);
 
-loadProbeInfo(biConn, bi, NULL, TRUE);
+loadFeatures(biConn, bi, NULL, TRUE);
 
 bi->loadData(biConn, bi, TRUE);
 
 hFreeConn(&biConn);
@@ -1013,51 +659,26 @@
 loadSamples(biConn, bi, bq);
 if (DEBUG)
     uglyTime("Load Samples");
 
-loadPathways(biConn, bi, bq->pathwayList);
+loadGenesets(biConn, bi, bq->genesetList);
 if (DEBUG)
     uglyTime("Load Pathways");
 
-loadGenes(biConn, bi, bq->geneList);
+loadFeatures(biConn, bi, bq->featureList, FALSE);
 if (DEBUG)
-    uglyTime("Load Genes");
-
-loadProbeInfo(biConn, bi, bq->probeList, FALSE);
-if (DEBUG)
-    uglyTime("Load Probe Info");
+    uglyTime("Load Features");
 
 bi->loadData(biConn, bi, FALSE);
 if (DEBUG)
-    uglyTime("Load Probe");
+    uglyTime("Load Data");
 
 hFreeConn(&biConn);
 return 0;
 }
 
 int biOmicsPopulateRandom(struct biOmics *bi, struct biQuery *bq, int numProbes)
 {
-struct sqlConnection *biConn = hAllocConnProfile(heatMapDbProfile, bi->db);
-
-if (DEBUG)
-    uglyTime(NULL);
-loadDataset(biConn, bi);
-if (DEBUG)
-    uglyTime("Load Dataset");
-
-loadSamples(biConn, bi, bq);
-if (DEBUG)
-    uglyTime("Load Samples");
-
-loadProbeInfoRandom(biConn, bi, numProbes);
-if (DEBUG)
-    uglyTime("Load Random Probe Info");
-
-bi->loadData(biConn, bi, FALSE);
-if (DEBUG)
-    uglyTime("Load Probe");
-
-hFreeConn(&biConn);
 return 0;
 }
 
 struct slName *biOmicsGetSamples(struct biOmics *bi)
@@ -1071,149 +692,73 @@
 slReverse(&slList);
 return slList;
 }
 
-struct slName *biOmicsGetProbes(struct biOmics *bi)
+struct slName *biOmicsGetFeatures(struct biOmics *bi)
 {
 struct slName *slList = NULL;
-struct probeInfo *pi;
-for (pi = bi->probeInfo; pi; pi = pi->next)
-    slNameAddHead(&slList, pi->name);
+struct analysisFeatures *af;
+for (af = bi->features; af; af = af->next)
+    slNameAddHead(&slList, af->feature_name);
 slReverse(&slList);
 return slList;
 }
 
-void populateAliases(struct biOmics *bi)
-{
-struct sqlConnection *biConn = hAllocConnProfile(heatMapDbProfile, bi->db);
-struct datasets *da = bi->dataset;
-
-fprintf(stderr, "loading gene aliases...\n");
-if (DEBUG)
-    uglyTime(NULL);
-char query[1024];
-safef(query, sizeof(query), 
-      "select DISTINCT kgXref.geneSymbol, %s.name from %s "
-      "join %s on %s.id = %s.probe_id "
-      "join %s on %s.id = %s.gene_id "
-      "join kgXref on kgXref.kgId = %s.kgId;", 
-      da->probe_table, da->probe_table, 
-      da->probe_to_gene_table, da->probe_table, da->probe_to_gene_table, 
-      GL_TABLE, GL_TABLE, da->probe_to_gene_table, 
-      GL_TABLE);
-
-struct sqlResult *sr = sqlGetResult(biConn, query);
-char **row = NULL;
-while ((row = sqlNextRow(sr)) != NULL)
-    {
-    char *gene = row[0];
-    char *probe = cloneString(row[1]);
-    hashAdd(bi->geneAliases, gene, probe);
-    }
-sqlFreeResult(&sr);
-
-if (DEBUG)
-    uglyTime("loaded gene aliases");
-hFreeConn(&biConn);
-}
-
-struct slName *biOmicsGetProbesForGene(struct biOmics *bi, char *gene)
-{
-if (hashNumEntries(bi->geneAliases) == 0)
-    populateAliases(bi);
-
-struct hashEl *el;
-struct slName *slList = NULL;
-for(el = hashLookup(bi->geneAliases, gene); el != NULL; el = hashLookupNext(el))
-    slNameAddHead(&slList, (char *) el->val);
-    
-return slList;
-}
-
-
-
 struct biOmics *newBiOmics(char *db, char *dataset)
 {
 struct biOmics *bi;
 AllocVar(bi);
 bi->db          = cloneString(db);
 bi->name        = cloneString(dataset);
 
 bi->sampleIndices = hashNew(0);
-bi->geneAliases   = hashNew(0);
 
 bi->dataset       = NULL;
 bi->samples       = NULL;
 bi->samplesHash   = hashNew(0);
-bi->probeInfo     = NULL;
-bi->probeInfoHash = hashNew(0);
-bi->probes        = NULL;
-bi->probesHash    = hashNew(0);
+bi->features      = NULL;
+bi->featuresHash  = hashNew(0);
+bi->data          = NULL;
+bi->dataHash      = hashNew(0);
 
 /* Methods */
 bi->populate               = biOmicsPopulate;
 bi->populateAll            = biOmicsPopulateAll;
 bi->populateRandom         = biOmicsPopulateRandom;
-bi->allProbes              = biOmicsGetProbes;
+bi->allFeatures            = biOmicsGetFeatures;
 bi->allSamples             = biOmicsGetSamples;
-bi->probesForGene          = biOmicsGetProbesForGene;
 
 /* These are set according to dataType */
 bi->loadData               = NULL;
 bi->freeData               = NULL;
-bi->dataForProbe           = NULL;
+bi->dataForFeature           = NULL;
 bi->dataForSample          = NULL;
-bi->dataForProbeSample     = NULL;
-bi->toZscores              = NULL;
+bi->dataForFeatureSample     = NULL;
 
 return bi;
 }
 
 
-void slPairSampleValFree(struct slPair **pEl)
-{
-struct slPair *el;
-
-if ((el = *pEl) == NULL) return;
-
-freeMem(el->name);
-struct probeSampleVal *pv = el->val;
-probeSampleValFree(&pv);
-freez(pEl);
-}
-
-void slPairSampleValFreeList(struct slPair **pList)
-{
-struct slPair *el, *next;
-
-for (el = *pList; el != NULL; el = next)
-    {
-    next = el->next;
-    slPairSampleValFree(&el);
-    }
-*pList = NULL;
-}         
-
-void slPairValsFree(struct slPair **pEl)
+void slPairAnalysisValsFree(struct slPair **pEl)
 {
 struct slPair *el;
 
 if ((el = *pEl) == NULL) return;
 
 freeMem(el->name);
-struct probeVals *pv = el->val;
-probeValsFreeList(&pv);
+struct analysisVals *av = el->val;
+analysisValsFree(&av);
 freez(pEl);
 }
 
-void slPairValsFreeList(struct slPair **pList)
+void slPairAnalysisValsFreeList(struct slPair **pList)
 {
 struct slPair *el, *next;
 
 for (el = *pList; el != NULL; el = next)
     {
     next = el->next;
-    slPairValsFree(&el);
+    slPairAnalysisValsFree(&el);
     }
 *pList = NULL;
 }         
 
@@ -1226,19 +771,18 @@
 freeMem(el->db);
 freeMem(el->name);
 
 freeHash(&el->sampleIndices);
-freeHash(&el->geneAliases);
 
 datasetsFreeList(&el->dataset);
 samplesFreeList(&el->samples);
 freeHash(&el->samplesHash);
 
-probeInfoFreeList(&el->probeInfo);
-freeHash(&el->probeInfoHash);
+analysisFeaturesFreeList(&el->features);
+freeHash(&el->featuresHash);
 
-el->freeData(&el->probes);
-freeHash(&el->probesHash);
+el->freeData(&el->data);
+freeHash(&el->dataHash);
 
 *pEl = NULL;
 }
 
@@ -1264,25 +809,25 @@
 
 return slList;
 }
 
-struct slName *biResultsGetProbesInDataset(struct biResults *br, char *dataset)
+struct slName *biResultsGetFeaturesInDataset(struct biResults *br, char *dataset)
 {
 struct biOmics *bi = biOmicsMatchDataset(br->datasets, dataset);
 if (!bi)
     return NULL;
 
-return bi->allProbes(bi);
+return bi->allFeatures(bi);
 }
 
-struct slName *biResultsGetProbes(struct biResults *br)
+struct slName *biResultsGetFeatures(struct biResults *br)
 {
 struct slName *slList = NULL;
 struct biOmics *bi;
 for (bi = br->datasets; bi; bi = bi->next)
     {
-    struct slName *probes = bi->allProbes(bi);
-    slList = slCat(slList, probes);
+    struct slName *features = bi->allFeatures(bi);
+    slList = slCat(slList, features);
     }
 return slNameUniqueList(slList);
 }
 
@@ -1353,46 +898,25 @@
 
 return matched;
 }
 
-
-struct slName *biResultsProbesForGeneInDataset(struct biResults *br, char *gene, char *dataset)
+struct biData *biResultsDataForFeatureInDataset(struct biResults *br, 
+						char *feature, char *dataset)
 {
 struct biOmics *bi = biOmicsMatchDataset(br->datasets, dataset);
 if (!bi)
     return NULL;
 
-return bi->probesForGene(bi, gene);
+return bi->dataForFeature(bi, feature);
 }
 
-struct slName *biResultsProbesForGene(struct biResults *br, char *gene)
-{
-struct slName *slList = NULL;
-struct biOmics *bi;
-for (bi = br->datasets; bi; bi = bi->next)
-    {
-    struct slName *probes = bi->probesForGene(bi, gene);
-    slList = slCat(slList, probes);
-    }
-return slNameUniqueList(slList);
-}
-
-struct biData *biResultsDataForProbeInDataset(struct biResults *br, char *probe, char *dataset)
-{
-struct biOmics *bi = biOmicsMatchDataset(br->datasets, dataset);
-if (!bi)
-    return NULL;
-
-return bi->dataForProbe(bi, probe);
-}
-
-struct biData *biResultsDataForProbe(struct biResults *br, char *probe)
+struct biData *biResultsDataForFeature(struct biResults *br, char *feature)
 {
 struct biData *bd,*bdList = NULL;
 struct biOmics *bi;
 for (bi = br->datasets; bi; bi = bi->next)
     {
-    bd = bi->dataForProbe(bi, probe);
+    bd = bi->dataForFeature(bi, feature);
     biDataAppendName(bd, bi->name);
     slAddHead(&bdList, bd);
     }
 return bdList;
@@ -1421,53 +945,37 @@
     }
 return bdList;
 }
 
-double biResultsDataForProbeSampleInDataset(struct biResults *br, 
-					    char *probe, char *sample, char *dataset)
+struct analysisVals *biResultsDataForFeatureSampleInDataset(struct biResults *br, 
+							    char *feature, char *sample, 
+							    char *dataset)
 {
 struct biOmics *bi = biOmicsMatchDataset(br->datasets, dataset);
 if (!bi)
-    return DBL_NULL;
+    return NULL;
 
-return bi->dataForProbeSample(bi, probe, sample);
+return bi->dataForFeatureSample(bi, feature, sample);
 }
 
 
-struct biData *biResultsDataForProbeSample(struct biResults *br, char *probe, char *sample)
+struct biData *biResultsDataForFeatureSample(struct biResults *br, char *feature, char *sample)
 {
 char name[128];
-safef(name, sizeof(name), "%s-%s", probe, sample);
-struct slDouble *sd;
+safef(name, sizeof(name), "%s-%s", feature, sample);
 struct biData *bd = biDataNew(name);
 struct biOmics *bi;
 for (bi = br->datasets; bi; bi = bi->next)
     {
-    double val = bi->dataForProbeSample(bi, probe, sample);
-    sd = slDoubleNew(val);
-    slAddHead(&bd->data, sd);
-    hashAdd(bd->hash, bi->name, sd);
+    struct analysisVals *av = bi->dataForFeatureSample(bi, feature, sample);
+    struct analysisVals *newAv = cloneAnalysisVals(av);
+    slAddHead(&bd->data, newAv);
+    hashAdd(bd->hash, bi->name, newAv);
     }
 slReverse(&bd->data);
 return bd;
 }
 
-void biResultsConvertToZscores(struct biResults *br)
-{
-struct biOmics *bi;
-for (bi = br->datasets; bi; bi = bi->next)
-    bi->toZscores(bi);
-}
-
-void biResultsConvertToLogP(struct biResults *br)
-{
-struct biOmics *bi;
-for (bi = br->datasets; bi; bi = bi->next)
-    bi->toLogP(bi);
-}
-
-
-
 struct biResults *biResultsNew(void)
 {
 struct biResults *br;
 AllocVar(br);
@@ -1476,29 +984,23 @@
 
 /* Methods */
 br->allDatasets            = biResultsGetDatasets;
 
-br->allProbes              = biResultsGetProbes;
-br->allProbesInDataset     = biResultsGetProbesInDataset;
+br->allFeatures              = biResultsGetFeatures;
+br->allFeaturesInDataset     = biResultsGetFeaturesInDataset;
 
 br->allSamples             = biResultsGetSamples;
 br->allSamplesInCommon     = biResultsGetSamplesInCommon;
 br->allSamplesInDataset    = biResultsGetSamplesInDataset;
 
-br->probesForGene          = biResultsProbesForGene;
-br->probesForGeneInDataset = biResultsProbesForGeneInDataset;
-
-br->dataForProbe           = biResultsDataForProbe;
-br->dataForProbeInDataset  = biResultsDataForProbeInDataset;
+br->dataForFeature            = biResultsDataForFeature;
+br->dataForFeatureInDataset   = biResultsDataForFeatureInDataset;
 
 br->dataForSample          = biResultsDataForSample;
 br->dataForSampleInDataset = biResultsDataForSampleInDataset;
 
-br->dataForProbeSample          = biResultsDataForProbeSample;
-br->dataForProbeSampleInDataset = biResultsDataForProbeSampleInDataset;
-
-br->toZscores              = biResultsConvertToZscores;
-br->toLogP                 = biResultsConvertToLogP;
+br->dataForFeatureSample          = biResultsDataForFeatureSample;
+br->dataForFeatureSampleInDataset = biResultsDataForFeatureSampleInDataset;
 
 return br;
 }
 
@@ -1514,9 +1016,9 @@
 void biResultsAddBiQuery(struct biResults *br, struct biQuery *bq)
 {
 struct biOmics *bi = newBiOmics(bq->db, bq->dataset);
 
-if (bq->getAllProbes)
+if (bq->getAllFeatures)
     bi->populateAll(bi, bq);
 else
     bi->populate(bi, bq);
 
@@ -1532,22 +1034,22 @@
 
 return br;
 }
 
-void biResultsAddBiQueryRandom(struct biResults *br, struct biQuery *bq, int numProbes)
+void biResultsAddBiQueryRandom(struct biResults *br, struct biQuery *bq, int numFeatures)
 {
 struct biOmics *bi = newBiOmics(bq->db, bq->dataset);
-bi->populateRandom(bi, bq, numProbes);
+bi->populateRandom(bi, bq, numFeatures);
 
 slAddHead(&br->datasets, bi); 
 }
 
-struct biResults *biQueryResultsRandomize(struct biQuery *bqList, int numProbes)
+struct biResults *biQueryResultsRandomize(struct biQuery *bqList, int numFeatures)
 {
 struct biResults *br = biResultsNew();
 struct biQuery *bq;
 for (bq = bqList; bq; bq = bq->next)
-    biResultsAddBiQueryRandom(br, bq, numProbes);
+    biResultsAddBiQueryRandom(br, bq, numFeatures);
 
 return br;
 }
 
@@ -1563,21 +1065,16 @@
     return;
 slAddHead(bqList, bq);
 }
 
-int biQueryAddPathways(struct biQuery *bq, char *pathways, char sep)
-{
-return addDelimStringToList(&bq->pathwayList, pathways, sep); 
-}
-
-int biQueryAddProbes(struct biQuery *bq, char *probes, char sep)
+int biQueryAddGenesets(struct biQuery *bq, char *genesets, char sep)
 {
-return addDelimStringToList(&bq->probeList, probes, sep); 
+return addDelimStringToList(&bq->genesetList, genesets, sep); 
 }
 
-int biQueryAddGenes(struct biQuery *bq, char *genes, char sep)
+int biQueryAddFeatures(struct biQuery *bq, char *features, char sep)
 {
-return addDelimStringToList(&bq->geneList, genes, sep); 
+return addDelimStringToList(&bq->featureList, features, sep); 
 }
 
 int biQueryAddSamples(struct biQuery *bq, char *samples, char sep)
 {
@@ -1599,21 +1096,19 @@
 struct biQuery *bq;
 AllocVar(bq);
 bq->db = cloneString(db);
 bq->dataset = cloneString(dataset);
-bq->getAllProbes = FALSE;
+bq->getAllFeatures  = FALSE;
 
-bq->pathwayList = NULL;
-bq->probeList   = NULL;
-bq->geneList    = NULL;
+bq->genesetList     = NULL;
+bq->featureList     = NULL;
 bq->sampleList  = NULL;
 bq->featureValList  = NULL;
 bq->featureCodeList = NULL;
 
-bq->addPathways = biQueryAddPathways;
-bq->addProbes   = biQueryAddProbes;
+bq->addGenesets     = biQueryAddGenesets;
 bq->addSamples  = biQueryAddSamples;
-bq->addGenes    = biQueryAddGenes;
+bq->addFeatures     = biQueryAddFeatures;
 bq->addFeatureVals  = biQueryAddFeatureVals;
 bq->addFeatureCodes = biQueryAddFeatureCodes;
 
 return bq;
@@ -1626,11 +1121,10 @@
 
 freeMem(el->db);
 freeMem(el->dataset);
 
-slNameFreeList(&el->pathwayList);
-slNameFreeList(&el->probeList);
-slNameFreeList(&el->geneList);
+slNameFreeList(&el->genesetList);
+slNameFreeList(&el->featureList);
 slNameFreeList(&el->sampleList);
 slNameFreeList(&el->featureValList);
 slNameFreeList(&el->featureCodeList);
 }
@@ -1694,9 +1188,9 @@
 if ((el = *pEl) == NULL) return;
 
 freeMem(el->name);
 freeMem(el->type);
-slFreeList(&el->data);
+analysisValsFreeList(&el->data);
 freeHash(&el->hash);
 }
 
 void biDataFreeList(struct biData **pList)