e70152e44cc66cc599ff6b699eb8adc07f3e656a
kent
  Sat May 24 21:09:34 2014 -0700
Adding Copyright NNNN Regents of the University of California to all files I believe with reasonable certainty were developed under UCSC employ or as part of Genome Browser copyright assignment.
diff --git src/hg/lib/microarray.c src/hg/lib/microarray.c
index 3643c85..c667489 100644
--- src/hg/lib/microarray.c
+++ src/hg/lib/microarray.c
@@ -1,1180 +1,1183 @@
+/* Copyright (C) 2014 The Regents of the University of California 
+ * See README in this or parent directory for licensing information. */
+
 #include "common.h"
 #include "obscure.h"
 #include "linefile.h"
 #include "hash.h"
 #include "options.h"
 #include "jksql.h"
 #include "bed.h"
 #include "cart.h"
 #include "expData.h"
 #include "expRecord.h"
 #include "hdb.h"
 #include "microarray.h"
 
 /* Make a 2D array out of the list of expDatas' expScores. */
 struct maExpDataMatrix
 {
     int nrow;
     int ncol;
     float **data;
 };
 
 float **maExpDataArray(struct expData *exps, int *pNrow, int *pNcol)
 /* Make a 2D array out of a list of expDatas' expScores. */
 {
 struct expData *cur;
 float **ret;
 int i = 0;
 if (!exps)
     return NULL;
 *pNrow = slCount(exps);
 *pNcol = exps->expCount;
 AllocArray(ret, *pNrow);
 for (cur = exps; cur != NULL; cur = cur->next)
     {
     float *thecopy = CloneArray(cur->expScores, *pNcol);
     ret[i++] = thecopy;
     }
 return ret;
 }
 
 struct maExpDataMatrix *makeMatrix(struct expData *exps)
 /* Convert the expData list into an easier-to-manipulate matrix. */
 {
 struct maExpDataMatrix *ret;
 AllocVar(ret);
 ret->data = maExpDataArray(exps, &ret->nrow, &ret->ncol);
 return ret;
 }
 
 void freeExpDataMatrix(struct maExpDataMatrix **pMat)
 /* Free up an maExpDataMatrix. */
 {
 int i;
 struct maExpDataMatrix *mat;
 if (!pMat || !(*pMat))
     return;
 mat = *pMat;
 for (i = 0; i < mat->nrow; i++)
     freeMem(mat->data[i]);
 freeMem(mat->data);
 freez(pMat);
 }
 
 void transposeExpDataMatrix(struct maExpDataMatrix **pMat)
 /* Transpose the matrix i.e. mat[i][j] = mat[j][i]. */
 /* This may come in handy later if the job is to combine genes instead of */
 /* arrays... or both. */
 {
 int i, j;
 struct maExpDataMatrix *mat, *newMat;
 if (!pMat || !(*pMat))
     return;
 mat = *pMat;
 AllocVar(newMat);
 AllocArray(newMat->data, mat->ncol);
 for (i = 0; i < mat->ncol; i++)
     {
     AllocArray(newMat->data[i], mat->nrow);
     for (j = 0; j < mat->nrow; j++)
 	newMat->data[i][j] = mat->data[j][i];
     }
 newMat->nrow = mat->ncol;
 newMat->ncol = mat->nrow;
 for (i = 0; i < mat->nrow; i++)
     freeMem(mat->data[i]);
 *pMat = newMat;
 }
 
 /* 2D int array to keep track of indeces in a grouping situation. FOr example */
 /* the mapArray as follows x = */
 /* [[ 2 0 2 0 0 0 ]  */
 /*  [ 1 4 0 0 0 0 ]  */
 /*  [ 2 1 3 0 0 0 ]  */
 /* x is 3 x 6, so the original array had 5 columns.  The number of rows x has */
 /* is the new number of columns.  The first number in each row of x keeps */
 /* track of y number of columns being grouped, and the next y numbers in that */
 /* row are the column indeces of the columns being grouped.  For example, */
 /* using x, then if a = [[ 2 3 4 5 6 ]], then b grouped by x is */
 /* b = [[ 3 6 4 ]]. */
 struct mapArray 
 {
     int nrow; 
     int ncol;
     int **data;
 };
 
 double maDoubleMeanHandleNA(int count, double *array)
 /* Calculate the mean value of an array, skipping the NA vals. */
 {
 int i, num = 0;
 double result = MICROARRAY_MISSING_DATA, sum = 0;
 for (i = 0; i < count; i++)
     if (array[i] > MICROARRAY_MISSING_DATA)
 	{
 	sum += array[i];
 	num++;
 	}
 if (num > 0) 
     result = sum/num;
 return result;
 }
 
 double maDoubleMedianHandleNA(int count, double *array)
 /* Return median value in array, skipping the NAs. */
 /* This one will sort the inputted array as a side-effect. */
 {
 double median, *pastNA;
 int countSansNA = count;
 doubleSort(count, array);
 pastNA = array;
 while ((countSansNA > 0) && (*pastNA <= MICROARRAY_MISSING_DATA))
     {
     pastNA++;
     countSansNA--;
     }
 if (countSansNA < 1)
     return MICROARRAY_MISSING_DATA;
 if ((countSansNA&1) == 1)
     median = pastNA[countSansNA>>1];
 else
     {
     countSansNA >>= 1;
     median = (pastNA[countSansNA] + pastNA[countSansNA-1]) * 0.5;
     }
 return median;
 }
 
 static double *floatArrayToDoubleArray(int count, float *array)
 /* Pretty stupid function but saves a line or two later. */
 {
 double *ret;
 int i;
 AllocArray(ret, count);
 for (i = 0; i < count; i++)
     ret[i] = array[i];
 return ret;
 }
 
 static float maCalcNewScore(float *expScores, int *mapRow, enum maCombineMethod method, int minExps)
 /* Calculate a median, mean, etc for a group of scores and return it. */
 {
 double ret = MICROARRAY_MISSING_DATA;
 double *grouping;
 int k;
 int realCount = 0;
 if (!expScores || !mapRow)
     return MICROARRAY_MISSING_DATA;
 AllocArray(grouping, mapRow[0]);
 for (k = 0; k < mapRow[0]; k++)
     {
     grouping[k] = (double)expScores[mapRow[k+1]];
     if (grouping[k] > MICROARRAY_MISSING_DATA)
 	realCount++;
     }
 if ((realCount == 0) || ((minExps > 0) && (realCount < minExps)))
     ret = MICROARRAY_MISSING_DATA;
 else if (method == useMedian)
     ret = maDoubleMedianHandleNA(mapRow[0], grouping);
 else if (method == useMean)
     ret = maDoubleMeanHandleNA(mapRow[0], grouping);
 freez(&grouping);
 return (float)ret;
 }
 
 struct maExpDataMatrix *maCombineCols(struct maExpDataMatrix *origMat, struct mapArray *mapping, enum maCombineMethod method, int minExps)
 /* Make a new matrix combining the columns of the given one however then */
 /* return it. */
 {
 struct maExpDataMatrix *newMat;
 int i, j;
 if (!origMat)
     return NULL;
 AllocVar(newMat);
 newMat->nrow = origMat->nrow;
 newMat->ncol = mapping->nrow;
 AllocArray(newMat->data, newMat->nrow);
 for (i = 0; i < newMat->nrow; i++)
     {
     AllocArray(newMat->data[i], newMat->ncol);
     for (j = 0; j < newMat->ncol; j++)
 	newMat->data[i][j] = maCalcNewScore(origMat->data[i], mapping->data[j], method, minExps);
     }
 return newMat;
 }
 
 struct expData *maExpDataCombineCols(struct expData *exps, struct mapArray *mapping, enum maCombineMethod method, int minExps)
 /* median, mean, etc. the columns of the expData according to the grouping */
 /* from the mapping array. Return a new expData list. */
 {
 struct expData *cur, *newList = NULL;
 struct maExpDataMatrix *mat, *combinedMat;
 int i;
 if (!exps)
     return NULL;
 mat = makeMatrix(exps);
 combinedMat = maCombineCols(mat, mapping, method, minExps);
 for (cur = exps, i = 0; cur != NULL; cur = cur->next, i++)
     {
     struct expData *newExp;
     AllocVar(newExp);
     newExp->name = cloneString(cur->name);
     newExp->expCount = mapping->nrow;
     newExp->expScores = combinedMat->data[i];
     slAddHead(&newList, newExp);
     }
 slReverse(&newList);
 freeMem(mat->data);
 freez(&mat);
 freez(&combinedMat);
 return newList;
 }
 
 struct mapArray *maMappingFromGrouping(struct maGrouping *grouping)
 /* Convert a microarrayGroups.ra style grouping to the array. */
 {
 struct mapArray *ret;
 int i, offset = 0;
 AllocVar(ret);
 ret->nrow = grouping->numGroups;
 ret->ncol = grouping->size + 1;
 AllocArray(ret->data, ret->ncol);
 for (i = 0; i < ret->nrow; i++)
     {
     int j; /* Iterator for grouping->groupSizes */
     int k = 1; /* Iterator for ret->data[i][] */
     AllocArray(ret->data[i], ret->ncol);
     ret->data[i][0] = grouping->groupSizes[i];
     for (j = offset; j < offset+grouping->groupSizes[i]; j++)
 	ret->data[i][k++] = grouping->expIds[j];
     offset += grouping->groupSizes[i];
     }
 return ret;
 }
 
 struct mapArray *mappingFromExpRecords(struct expRecord *erList, int extrasIndex)
 /* Make a mapping matrix out of a list of expRecord structs using the */
 /* specific grouping provided by the expRecord->extras[extrasIndex].  */
 {
 struct expRecord *er;
 struct mapArray *ret;
 struct slName *erNames = NULL;
 int i;
 if (!erList)
     return NULL;
 if ((extrasIndex < 0) || (extrasIndex >= erList->numExtras))
     return NULL;
 for (er = erList; er != NULL; er = er->next)
     slNameStore(&erNames, er->extras[extrasIndex]);
 slReverse(&erNames);
 AllocVar(ret);
 ret->nrow = slCount(erNames);
 ret->ncol = slCount(erList) + 1;
 AllocArray(ret->data, ret->nrow);
 for (i = 0; i < ret->nrow; i++)
     AllocArray(ret->data[i], ret->ncol);
 for (er = erList, i = 0; er != NULL; er = er->next, i++)
     {
     int rowIx = slNameFindIx(erNames, er->extras[extrasIndex]);
     ret->data[rowIx][++ret->data[rowIx][0]] = i;    
     }
 slNameFreeList(&erNames);
 return ret;
 }
 
 struct maMedSpec *maMedSpecReadAll(char *fileName)
 /* Read in file and parse it into maMedSpecs. */
 {
 struct lineFile *lf = lineFileOpen(fileName, TRUE);
 struct maMedSpec *medList = NULL, *med;
 char *row[256], *line;
 int i, count;
 while (lineFileNextReal(lf, &line))
     {
     char *name = nextQuotedWord(&line);
     char *group = nextQuotedWord(&line);
     if (group == NULL)
 	lineFileShort(lf);
     count = chopLine(line, row);
     if (count < 1)
         lineFileShort(lf);
     if (count >= ArraySize(row))
         errAbort("Too many experiments in median line %d of %s",
 		lf->lineIx, lf->fileName);
     AllocVar(med);
     med->name = cloneString(name);
     med->group = cloneString(group);
     med->count = count;
     AllocArray(med->ids, count);
     for (i=0; i<count; ++i)
         {
 	char *asc = row[i];
 	if (!isdigit(asc[0]))
 	    errAbort("Expecting number got %s line %d of %s", asc,
 	    	lf->lineIx, lf->fileName);
 	med->ids[i] = atoi(asc);
 	}
     slAddHead(&medList, med);
     }
 lineFileClose(&lf);
 slReverse(&medList);
 return medList;
 }
 
 struct mapArray *mappingFromMedSpec(struct maMedSpec *specList)
 /* Make an array for mapping out of the maMedSpec.ra files */
 /* used by hgMedianMicroarray for the gene sorter. */
 {
 struct maMedSpec *spec;
 struct mapArray *ret;
 int numTotalExps = 0;
 int i;
 if (specList == NULL)
     return NULL;
 for (spec = specList; spec != NULL; spec = spec->next)
     numTotalExps += spec->count;
 AllocVar(ret);
 ret->nrow = slCount(specList);
 ret->ncol = numTotalExps + 1;
 AllocArray(ret->data, ret->nrow);
 for (spec = specList, i = 0; spec != NULL; spec = spec->next, i++)
     {
     int j;
     AllocArray(ret->data[i], ret->ncol);
     for (j = 0; j < spec->count; j++)
 	{
 	ret->data[i][0]++;
 	ret->data[i][j+1] = spec->ids[j];
 	}
     }
 return ret;
 }
 
 struct expData *maExpDataGroupByExtras(struct expData *exps, struct expRecord *erList, int extrasIndex, enum maCombineMethod method, int minExps)
 /* Combine experiments using mean or median and the grouping defined by the */
 /* expRecord/extrasIndex combination. */
 {
 struct mapArray *mapping = mappingFromExpRecords(erList, extrasIndex);
 struct expData *ret = maExpDataCombineCols(exps, mapping, method, minExps);
 freeMem(mapping->data);
 freez(&mapping);
 return ret;
 }
 
 struct expData *maExpDataMeanByExtras(struct expData *exps, struct expRecord *erList, int extrasIndex, int minExps)
 /* Given the data, the expRecords, and the index for the grouping, mean the */
 /* columns in the data. */
 {
 return maExpDataGroupByExtras(exps, erList, extrasIndex, useMean, minExps);
 }
 
 struct expData *maExpDataMedianByExtras(struct expData *exps, struct expRecord *erList, int extrasIndex, int minExps)
 /* Given the data, the expRecords, and the index for the grouping, median the */
 /* columns in the data. */
 {
 return maExpDataGroupByExtras(exps, erList, extrasIndex, useMedian, minExps);
 }
 
 struct expData *maExpDataMedianFromSpec(struct expData *exps, struct maMedSpec *specs, int minExps)
 /* Given the data, the and a maMedSpec list, median the columns in the data. */
 {
 struct mapArray *mapping = mappingFromMedSpec(specs);
 struct expData *ret = maExpDataCombineCols(exps, mapping, useMedian, minExps);
 freeMem(mapping->data);
 freez(&mapping);
 return ret;
 }
 
 int maExpDataNumBadScoresForProbe(struct expData *exps, char *probeName)
 /* Given a probe name, find that probe's data in the list and report */
 /* the # of NA (< -9999) values in the expScores. I.e. how many bad */
 /* experiments are there. Return -1 if name not found. */
 {
 struct expData *exp;
 int numBad = 0;
 boolean foundName = FALSE;
 if (probeName == NULL)
     return -1;
 for (exp = exps; exp != NULL; exp = exp->next)
     if (sameString(probeName, exp->name))
 	{
 	int i;
 	foundName = TRUE;
 	for (i = 0; i < exp->expCount; i++)
 	    if (exp->expScores[i] <= MICROARRAY_MISSING_DATA)
 		numBad++;
 	break;
 	}
 return (foundName) ? numBad : -1;
 }
 
 int maExpDataNumBadScoresForOneArray(struct expData *exps, struct expRecord *ers, char *arrayName)
 /* Find # of bad datapoints for a certain column (array) in the dataset. */
 /* return -1 if not found. */
 {
 int expId = -1, numBad = 0;
 struct expRecord *findMe;
 struct expData *exp;
 if (arrayName == NULL)
     return -1;
 for (findMe = ers; findMe != NULL; findMe = findMe->next)
     if (sameString(findMe->name, arrayName))
 	{
 	expId = findMe->id;
 	break;
 	}
 if (expId < 0)
     return -1;
 for (exp = exps; exp != NULL; exp = exp->next)
     {
     if (expId >= exp->expCount)
 	return -1;
     if (exp->expScores[expId] <= MICROARRAY_MISSING_DATA)
 	numBad++;
     }
 return numBad;
 }
 
 float maExpDataOverallMean(struct expData *exps)
 /* Return the overall mean for the expression data. */
 {
 double sum = 0;
 long goodVals = 0;
 int i;
 struct expData *exp;
 if (exps == NULL)
     return -1;
 for (exp = exps; exp != NULL; exp = exp->next)
     {
     for (i = 0; i < exp->expCount; i++)
 	{
 	if (exp->expScores[i] > MICROARRAY_MISSING_DATA)
 	    {
 	    goodVals++;
 	    sum += exp->expScores[i];
 	    }
 	}
     }
 return (float)(sum/(double)goodVals);
 }
 
 float maExpDataOverallMedian(struct expData *exps)
 /* Return the overall median for the expression data. */
 {
 double *expVals;
 int goodVals = 0;
 int i;
 float ret = 0;
 struct expData *exp;
 if (exps == NULL)
     return -1;
 AllocArray(expVals, slCount(exps) * exps->expCount);
 for (exp = exps; exp != NULL; exp = exp->next)
     {
     for (i = 0; i < exp->expCount; i++)
 	{
 	if (exp->expScores[i] > MICROARRAY_MISSING_DATA)
 	    expVals[goodVals++] = exp->expScores[i];
 	}
     }
 ret = (float)doubleMedian(goodVals, expVals);
 freez(&expVals);
 return ret;
 }
 
 void maExpDataClipMin(struct expData *exps, float minGood, float minVal)
 /* If an expData->expScores datapoint is < minGood, set it to minVal. */
 {
 struct expData *exp;
 for (exp = exps; exp != NULL; exp = exp->next)
     {
     int i;
     for (i = 0; i < exp->expCount; i++)
 	if ((exp->expScores[i] > MICROARRAY_MISSING_DATA) && (exp->expScores[i] < minGood))
 	    exp->expScores[i] = minVal;
     }
 }
 
 struct expData *maExpDataTranspose(struct expData *exps)
 /* Return a transposed copy of the expData. */
 {
 struct expData *transposed = NULL;
 int probeCount, expCount, i;
 if (!exps)
     errAbort("Tried to transpose empty expData");
 expCount = exps->expCount;
 probeCount = slCount(exps);
 for (i = 0; i < expCount; i++)
     {
     struct expData *newProbe, *cur;
     char name[256];
     int j;
     AllocVar(newProbe);
     safef(name, sizeof(name), "array-%d", i);
     newProbe->name = cloneString(name);
     newProbe->expCount = probeCount;
     AllocArray(newProbe->expScores, newProbe->expCount);
     for (j = 0, cur = exps; (cur != NULL) && (j < probeCount); cur = cur->next, j++)
 	{
 	if (i >= cur->expCount)
 	    errAbort("There needs to be a uniform expCount in given expData in order to transpose");
 	newProbe->expScores[j] = cur->expScores[i];
 	}
     slAddHead(&transposed, newProbe);
     }
 slReverse(&transposed);
 return transposed;
 }
 
 void maExpDataUntranspose(struct expData *exps, struct expData *transpose)
 /* Copy values over from the transposed expData. */
 {
 int expCount = transpose->expCount;
 int probeCount = exps->expCount;
 int i; 
 struct expData *exp;
 for (exp = exps, i = 0; (exp != NULL) && (i < expCount); exp = exp->next, i++)
     {
     struct expData *trans;
     int j;
     for (j = 0, trans = transpose; (j < probeCount) && (trans != NULL); j++, trans = trans->next)
 	exp->expScores[j] = trans->expScores[i];
     }
 }
 
 void maExpDataDoLogRatioMeanOrMedian(struct expData *exps, boolean mean)
 /* For the M x N sized expression matrix, change each value to be the ratio */
 /* of that value to the mean or median of values in that value's row (gene). */
 {
 struct expData *exp;
 for (exp = exps; exp != NULL; exp = exp->next)
     {
     double *tmpVals;
     int i;
     double q, invQ;
     AllocArray(tmpVals, exp->expCount);
     for (i = 0; i < exp->expCount; i++)
 	tmpVals[i] = exp->expScores[i];
     q = (mean) ? maDoubleMeanHandleNA(exp->expCount, tmpVals) : maDoubleMedianHandleNA(exp->expCount, tmpVals);
     freez(&tmpVals);
     if (q <= 0)
 	{
 	for (i = 0; i < exp->expCount; i++)
 	    exp->expScores[i] = MICROARRAY_MISSING_DATA;
 	}
     else
 	{
 	invQ = 1/q;
 	for (i = 0; i < exp->expCount; i++)
 	    if (exp->expScores[i] > MICROARRAY_MISSING_DATA)
 		exp->expScores[i] = logBase2(exp->expScores[i] * invQ);
 	}
     }
 }
 
 void maExpDataDoLogRatioTranspose(struct expData *exps, boolean mean)
 /* For the M x N sized expression matrix, change each value to be the ratio */
 /* of that value to the mean or median of values in that value's column (probe). */
 /* This involves no clumping of experiments to calculate the median/mean. */
 {
 struct expData *transpose = maExpDataTranspose(exps);
 maExpDataDoLogRatioMeanOrMedian(transpose, mean);
 maExpDataUntranspose(exps, transpose);
 expDataFreeList(&transpose);
 }
 
 void maExpDataDoLogRatioClumping(struct expData *exps, struct mapArray *mapping, enum maCombineMethod method)
 /* Sort of an intermediary function to be called by maExpDataDoLogRatioMedianOfMedians or */
 /* maExpDataDoLogRatioMeanOfMeans. */
 {
 struct expData *cur;
 struct maExpDataMatrix *originalMat = makeMatrix(exps);
 struct maExpDataMatrix *clumpedMat = maCombineCols(originalMat, mapping, method, 0);
 int i, j;
 if (!clumpedMat)
     return;
 for (i = 0, cur = exps; (cur != NULL) && (i < clumpedMat->nrow); cur = cur->next, i++)
     {
     double *tmpArray = floatArrayToDoubleArray(clumpedMat->ncol, clumpedMat->data[i]);
     double q = (method == useMean) ? maDoubleMeanHandleNA(clumpedMat->ncol, tmpArray) :
 	maDoubleMedianHandleNA(clumpedMat->ncol, tmpArray);
     double invQ;
     freeMem(tmpArray);
     if (q <= 0)
 	{
 	for (j = 0; j < cur->expCount; j++) 
 	    cur->expScores[j] = MICROARRAY_MISSING_DATA;
 	}
     else
 	{
 	invQ = 1/q;
 	for (j = 0; j < cur->expCount; j++)
 	    cur->expScores[j] = (cur->expScores[j] > 0) ? 
 		logBase2(invQ * cur->expScores[j]) : cur->expScores[j];
 	}
     }
 freeExpDataMatrix(&originalMat);
 freeExpDataMatrix(&clumpedMat);
 }
 
 void maExpDataDoLogRatioClumpExpRecord(struct expData *exps, struct expRecord *erList, int extrasIndex, enum maCombineMethod method)
 /* Log ratio the expData list.  The ratio of the denominator is the median of */
 /* medians or the mean of means of each group of experiments.  This grouping */
 /* is defined by the expRecord/extrasIndex combination. The log is base 2. */
 {
 struct mapArray *mapping = mappingFromExpRecords(erList, extrasIndex);
 maExpDataDoLogRatioClumping(exps, mapping, method);
 freeMem(mapping->data);
 freez(&mapping);
 }
 
 void maExpDataDoLogRatioGivenMedSpec(struct expData *exps, struct maMedSpec *specList, enum maCombineMethod method)
 /* Same as maExpDataDoLogRatioClumpExpRecord except use the maMedSpec as the */
 /* thing to base the groupings off of. */
 {
 struct mapArray *mapping;
 if (specList == NULL) 
     {
     if (method == useMedian)
 	maExpDataDoLogRatioMeanOrMedian(exps, FALSE);
     else if (method == useMean)
 	maExpDataDoLogRatioMeanOrMedian(exps, TRUE);
     return;
     }
 mapping = mappingFromMedSpec(specList);
 maExpDataDoLogRatioClumping(exps, mapping, method);
 freeMem(mapping->data);
 freez(&mapping);
 }
 
 /* static void uglyPrintSlInt(struct slInt *list) */
 /* { */
 /* struct slInt *item; */
 /* int i = 0; */
 /* for (item = list; item != NULL; item = item->next) */
 /*     uglyf("i = %d, item->val = %d\n", i++, item->val); */
 /* } */
 
 static struct slRef *groupingLoL(struct maGrouping *grouping)
 /* return the grouping groups array as an slRef list of slInts */
 {
 struct slRef *list = NULL;
 int i;
 int off = 0;
 for (i = 0; i < grouping->numGroups; i++)
     {
     struct slInt *ints = NULL;
     int j;
     for (j = off; j < off + grouping->groupSizes[i]; j++)
 	{
 	struct slInt *oneInt = slIntNew(grouping->expIds[j]);
 	slAddHead(&ints, oneInt);
 	}
     slReverse(&ints);
     refAdd(&list, ints);
     off += grouping->groupSizes[i];
     }
 slReverse(&list);
 return list;
 }
 
 static void maPruneLoL(struct slRef *lol, struct slInt *subsetList)
 /* remove things from list where the subset */
 {
 struct slRef *ref;
 for (ref = lol; ref != NULL; ref = ref->next)
     {
     struct slInt *intList = (struct slInt *)ref->val;
     struct slInt *cur;
     struct slInt *newList = NULL;
     while ((cur = slPopHead(&intList)) != NULL)
 	{
 	if (slIntFind(subsetList, cur->val))
 	    slAddHead(&newList, cur);
 	else
 	    freeMem(cur);
 	}
     if (newList)
 	slSort(&newList, slIntCmp);
     ref->val = newList;
     }
 }
 
 static struct slInt *makeSubsetSlInt(struct maGrouping *subset, int subsetOffset)
 /* just make a linked-list out of an array */
 {
 struct slInt *list = NULL;
 if (subset)
     {
     int i;
     int j = 0;
     for (i = 0; i < subsetOffset; i++)
 	j += subset->groupSizes[i];
     for (i = j; i < j + subset->groupSizes[subsetOffset]; i++)
 	{
 	struct slInt *newint = slIntNew(subset->expIds[i]);
 	slAddHead(&list, newint);
 	}
     slReverse(&list);
     }
 return list;
 }
 
 static void maPruneGroupingWithSubset(struct maGrouping *grouping, struct maGrouping *subset, int subsetOffset)
 /* remove all the expIds not contained in the subset from the given grouping, */
 /* and remove groups that have no expIds left */
 {
 if (grouping && subset && (subsetOffset < subset->numGroups) && (subsetOffset >= 0))
     {
     int i, j, k;
     int newSize = subset->groupSizes[subsetOffset];
     struct slRef *lol = groupingLoL(grouping);
     struct slRef *cur;
     struct slInt *ssList = makeSubsetSlInt(subset, subsetOffset);
     int *newExpIds;
     int newNumGroups = 0;
     char **newNames;
     int *newGroupSizes;
     maPruneLoL(lol, ssList);
     for (cur = lol; cur != NULL; cur = cur->next)
 	{
 	struct slInt *curList = cur->val;
 	if (slCount(curList) > 0)
 	    newNumGroups++;
 	}
     AllocArray(newGroupSizes, newNumGroups);
     AllocArray(newNames, newNumGroups);
     AllocArray(newExpIds, newSize);
     i = 0;
     j = 0;
     for (k = 0, cur = lol; (k < grouping->numGroups) && (cur != NULL); k++, cur = cur->next)
 	{
 	struct slInt *curList = cur->val;
 	int groupSize = slCount(curList);
 	if (groupSize > 0)
 	    {
 	    struct slInt *intcur;
 	    for (intcur = curList; intcur != NULL; intcur = intcur->next)
 		newExpIds[j++] = intcur->val;
 	    newNames[i] = cloneString(grouping->names[k]);
 	    newGroupSizes[i] = groupSize;
 	    i++;
 	    }
 	freeMem(grouping->names[k]);
 	}
     freeMem(grouping->names);
     freeMem(grouping->groupSizes);
     freeMem(grouping->expIds);
     grouping->names = newNames;
     grouping->groupSizes = newGroupSizes;
     grouping->numGroups = newNumGroups;
     grouping->expIds = newExpIds;
     grouping->size = newSize;
     }
 }
 
 struct expData *maExpDataClumpGivenGrouping(struct expData *exps, struct maGrouping *grouping)
 /* Clump expDatas from a grouping from a .ra file. */
 {
 struct mapArray *mapping;
 struct expData *ret = NULL;
 enum maCombineMethod combine = useMedian;
 char *combType;
 /* if (!grouping->type || sameWord(grouping->type, "all")) */
 /*     return NULL; */
 combType = cloneString(grouping->type);
 eraseWhiteSpace(combType);
 if (sameWord(combType, "combinemean"))
     combine = useMean;
 mapping = maMappingFromGrouping(grouping);
 ret = maExpDataCombineCols(exps, mapping, combine, 0);
 freeMem(mapping->data);
 freez(&mapping);
 freeMem(combType);
 return ret;
 }
 
 void maExpDataAddConstant(struct expData *exps, double c)
 /* Add a constant c to all of the microarray data. Ensure that NA values */
 /* aren't inadvertantly created. */
 {
 struct expData *exp;
 for (exp = exps; exp != NULL; exp = exp->next)
     {
     int i;
     for (i = 0; i < exp->expCount; i++)
 	{
 	if (c <= MICROARRAY_MISSING_DATA)
 	    errAbort("error: Constant used caused expData value to fall into the NA range");
 	if (exp->expScores[i] > MICROARRAY_MISSING_DATA)
 	    exp->expScores[i] = exp->expScores[i] + c;
 	}
     }
 }
 
 /* Functions for getting information from the microarrayGroups.ra files */
 /* that are in the hgCgiData/<org>/<database> .ra tree. */
 
 void maGroupingFree(struct maGrouping **pMag)
 /* free up a maGrouping */
 {
 int i, size;
 struct maGrouping *mag;
 if (!pMag || !*pMag)
     return;
 mag = *pMag;
 size = sameString(mag->type, "all") ? mag->size : mag->numGroups;
 freeMem(mag->name);
 freeMem(mag->description);
 freeMem(mag->expIds);
 freeMem(mag->groupSizes);
 freeMem(mag->type);
 for (i = 0; i < size; i++)
     freeMem(mag->names[i]);
 freeMem(mag->names);
 freez(pMag);
 }
 
 void maGroupingFreeList(struct maGrouping **pList)
 /* Free up a list of maGroupings. */
 {
 struct maGrouping *el, *next;
 for (el = *pList; el != NULL; el = next)
     {
     next = el->next;
     maGroupingFree(&el);
     }
 *pList = NULL;
 }
 
 void microarrayGroupsFree(struct microarrayGroups **pGroups)
 /* Free up the microarrayGroups struct. */
 {
 struct microarrayGroups *groups;
 if (!pGroups || ((groups = *pGroups) == NULL))
     return;
 maGroupingFree(&groups->allArrays);
 maGroupingFreeList(&groups->combineSettings);
 maGroupingFreeList(&groups->subsetSettings);
 freez(pGroups);
 }
 
 struct maGrouping *maHashToMaGrouping(struct hash *oneGroup)
 /* This converts a single "stanza" of the microarrayGroups.ra file to a */
 /* maGrouping struct. */
 {
 struct maGrouping *ret;
 char *s;
 struct slName *wordList = NULL, *oneWord;
 int i = 0;
 AllocVar(ret);
 /* required settings. */
 ret->name = cloneString((char *)hashMustFindVal(oneGroup, "name"));
 ret->type = cloneString((char *)hashMustFindVal(oneGroup, "type"));
 ret->description = cloneString((char *)hashMustFindVal(oneGroup, "description"));
 s = hashMustFindVal(oneGroup, "expIds");
 wordList = slNameListFromComma(s);
 ret->size = slCount(wordList);
 AllocArray(ret->expIds, ret->size);
 for (oneWord = wordList; oneWord != NULL; oneWord = oneWord->next)
     ret->expIds[i++] = atoi(oneWord->name);
 slNameFreeList(&wordList);
 s = hashMustFindVal(oneGroup, "names");
 wordList = slNameListFromComma(s);
 i = 0;
 ret->numGroups = slCount(wordList);
 AllocArray(ret->names, ret->numGroups);
 for (oneWord = wordList; oneWord != NULL; oneWord = oneWord->next)
     ret->names[i++] = cloneString(oneWord->name);
 slNameFreeList(&wordList);
 s = hashMustFindVal(oneGroup, "groupSizes");
 wordList = slNameListFromComma(s);
 i = 0;
 AllocArray(ret->groupSizes, ret->numGroups);
 if (ret->numGroups != slCount(wordList))
     errAbort("Bad format of microarrayGroups.ra in %s, groupSizes size "
 	     "= %d, != to names size = %d", ret->name, slCount(wordList), ret->numGroups);
 for (oneWord = wordList; oneWord != NULL; oneWord = oneWord->next)
     ret->groupSizes[i++] = atoi(oneWord->name);
 slNameFreeList(&wordList);
 return ret;
 }
 
 struct maGrouping *maGetGroupingsFromList(char *commaList, struct hash *allGroupings)
 /* Load all the maGroupings from a comma-delimited list. */
 {
 struct slName *list = slNameListFromComma(commaList), *oneItem;
 struct maGrouping *retList = NULL;
 for (oneItem = list; oneItem != NULL; oneItem = oneItem->next)
     {
     struct hash *oneGrouping = hashMustFindVal(allGroupings, oneItem->name);
     struct maGrouping *newGroup = maHashToMaGrouping(oneGrouping);
     slAddHead(&retList, newGroup);
     }
 slNameFreeList(&list);
 slReverse(&retList);
 return retList;
 }
 
 struct maGrouping *maGetGroupingFromCt(struct customTrack *ct)
 /* Spoof an "all" maGrouping from a customTrack. */
 {
 struct maGrouping *ret;
 char *words = trackDbRequiredSetting(ct->tdb, "expNames");
 struct slName *list = slNameListFromComma(words), *oneItem;
 int i;
 AllocVar(ret);
 /* ret->name not used in custom tracks. */
 ret->type = cloneString("all");
 ret->description = cloneString("Custom Track");
 ret->size = slCount(list);
 ret->numGroups = ret->size;
 AllocArray(ret->expIds, ret->size);
 AllocArray(ret->groupSizes, ret->size);
 AllocArray(ret->names, ret->size);
 for (oneItem = list, i = 0; (oneItem != NULL) && (i < ret->size); oneItem = oneItem->next, i++)
     {
     ret->expIds[i] = i;
     ret->groupSizes[i] = 1;
     ret->names[i] = cloneString(oneItem->name);
     }
 slFreeList(&list);
 return ret;
 }
 
 struct bed *ctLoadMultScoresBedDb(struct customTrack *ct, char *chrom, int start, int end)
 /* If the custom track is stored in a database, load it. */
 {
 int rowOffset;
 char **row;
 struct sqlConnection *conn = hAllocConn(CUSTOM_TRASH);
 struct sqlResult *sr;
 struct bed *bed, *bedList = NULL;
 sr = hRangeQuery(conn, ct->dbTableName, chrom, start, end,
 		 NULL, &rowOffset);
 while ((row = sqlNextRow(sr)) != NULL)
     {
     bed = bedLoadN(row+rowOffset, ct->fieldCount);
     slAddHead(&bedList, bed);
     }
 sqlFreeResult(&sr);
 hFreeConn(&conn);
 slReverse(&bedList);
 return bedList;
 }
 
 struct microarrayGroups *maGetTrackGroupings(char *database, struct trackDb *tdb)
 /* Get the settings from the .ra files and put them in a convenient struct. */
 {
 struct microarrayGroups *ret;
 char *groupings = trackDbSetting(tdb, "groupings");
 char *s = NULL;
 struct hash *allGroups;
 struct hash *mainGroup;
 struct hash *tmpGroup;
 struct hash *hashList; 
 if (groupings == NULL)
     return NULL;
 hashList = 
     hgReadRa(hGenome(database), database, "hgCgiData", 
 	     "microarrayGroups.ra", &allGroups);
 if (allGroups == NULL)
     errAbort("Could not get group settings for track.");
 AllocVar(ret);
 mainGroup = hashMustFindVal(allGroups, groupings);
 s = hashMustFindVal(mainGroup, "all");
 tmpGroup = hashMustFindVal(allGroups, s);
 ret->allArrays = maHashToMaGrouping(tmpGroup);
 s = hashFindVal(mainGroup, "combine");
 if (s)
     {
     ret->combineSettings = maGetGroupingsFromList(s, allGroups);
     s = hashFindVal(mainGroup, "combine.default");
     if (s)
 	{
 	struct maGrouping *cur;
 	if (lastChar(s) == ',')
 	    s[strlen(s)-1] = '\0';
 	for (cur = ret->combineSettings; cur != NULL; cur = cur->next)
 	    if (sameWord(s, cur->name))
 		{
 		ret->defaultCombine = cur;
 		break;
 		}
 	if (ret->defaultCombine == NULL)
 	    errAbort("$%s$ not a valid combine.default in %s", s, groupings);
 	}
     }
 s = hashFindVal(mainGroup, "subset");
 if (s)
     ret->subsetSettings = maGetGroupingsFromList(s, allGroups);
 ret->numCombinations = slCount(ret->combineSettings);
 ret->numSubsets = slCount(ret->subsetSettings);
 hashFreeList(&hashList);
 return ret;
 }
 
 struct maGrouping *maGetGrouping(struct microarrayGroups *groupings, char *name)
 /* Return the specfic grouping (combine or subset), or NULL if not found */
 {
 struct maGrouping *ret;
 for (ret = groupings->combineSettings; ret != NULL; ret = ret->next)
     if (sameString(ret->name, name))
 	return ret;
 for (ret = groupings->subsetSettings; ret != NULL; ret = ret->next)
     if (sameString(ret->name, name))
 	return ret;
 return NULL;
 }
 
 struct maGrouping *maCombineGroupingFromCart(struct microarrayGroups *groupings, 
 				    struct cart *cart, char *trackName)
 /* Determine which grouping to use based on the cart status or lack thereof. */
 {
 char *setting = NULL;
 char *cartVar = expRatioCombineDLName(trackName);
 /* Possibly NULL from custom trackness. */
 if (!groupings)
     return NULL;
 setting = cartUsualString(cart, cartVar, NULL);
 if (setting && sameWord(groupings->allArrays->name, setting))
     return groupings->allArrays;
 if (setting)
     {
     struct maGrouping *cur;
     for (cur = groupings->combineSettings; cur != NULL; cur = cur->next)
 	if (sameWord(cur->name, setting))
 	    return cur;
     }
 if (groupings->defaultCombine)
     return groupings->defaultCombine;
 return groupings->allArrays;
 }
 
 /* int maSubsetOffsetFromCart(struct microarrayGroups *groupings, struct cart *cart, char *trackName) */
 
 struct maGrouping *maSubsetGroupingFromCart(struct microarrayGroups *groupings, 
 				    struct cart *cart, char *trackName)
 /* Determine which grouping to use based on the cart status or lack thereof. */
 {
 char *setting = NULL;
 char *cartVar = expRatioSubsetRadioName(trackName, groupings);
 /* Possibly NULL from custom trackness. */
 if (!groupings)
     return NULL;
 setting = cartUsualString(cart, cartVar, NULL);
 if (setting)
     {
     struct maGrouping *cur;
     for (cur = groupings->subsetSettings; cur != NULL; cur = cur->next)
 	if (sameWord(cur->name, setting))
 	    return cur;
     }
 return NULL;
 }
 
 int maSubsetOffsetFromCart(struct maGrouping *subset, struct cart* cart, char *trackName)
 {
 int setting;
 char *cartVar;
 if (!subset)
     return -1;
 cartVar = expRatioSubsetDLName(trackName, subset);
 setting = cartUsualInt(cart, cartVar, -1);
 return setting;
 }
 
 /********* Dealing with BED. ************/
 
 struct expData *maExpDataFromExpBed(struct bed *oneBed)
 /* Convert a bed record's expScores into an expData. */
 {
 struct expData *ret = NULL;
 AllocVar(ret);
 ret->name = cloneString(oneBed->name);
 ret->expCount = oneBed->expCount;
 if (ret->expCount > 0) 
     ret->expScores = CloneArray(oneBed->expScores, oneBed->expCount);
 return ret;
 }
 
 struct expData *maExpDataListFromExpBedList(struct bed *bedList)
 /* Convert list of bed 15 records to a list of expDatas. */
 {
 struct expData *newList = NULL;
 struct bed *cur;
 for (cur = bedList; cur != NULL; cur = cur->next)
     {
     struct expData *addMe = maExpDataFromExpBed(cur);
     slAddHead(&newList, addMe);
     }
 slReverse(&newList);
 return newList;
 }
 
 void maBedClumpGivenGrouping(struct bed *bedList, struct maGrouping *grouping, struct maGrouping *subset, int subsetOffset)
 /* Clump (mean/median) a bed 15 given the grouping kind. */
 {
 struct expData *exps = maExpDataListFromExpBedList(bedList);
 struct expData *clumpedExps;
 struct bed *bed;
 struct expData *exp;
 if (subset)
     maPruneGroupingWithSubset(grouping, subset, subsetOffset);
 clumpedExps = maExpDataClumpGivenGrouping(exps, grouping);
 expDataFreeList(&exps);
 if (!clumpedExps)
     return;
 /* Go through each bed and copy over the new expDatas */
 /* and free up what was there. */
 for (bed = bedList, exp = clumpedExps; (bed != NULL) && (exp != NULL); 
      bed = bed->next, exp = exp->next)
     {
     int i;
     bed->expCount = exp->expCount;
     if (bed->expScores)
 	freeMem(bed->expScores);
     if (bed->expIds)
 	freeMem(bed->expIds);
     bed->expScores = CloneArray(exp->expScores, exp->expCount);
     AllocArray(bed->expIds, bed->expCount);
     for (i = 0; i < bed->expCount; i++)
 	bed->expIds[i] = i;
     }
 expDataFreeList(&clumpedExps);
 }
 
 enum expColorType getExpColorType(char *colorScheme)
 /* From a color type return the respective enum. */
 {
 if (sameString(colorScheme, "redBlue"))
     return redBlue;
 if (sameString(colorScheme, "yellowBlue"))
     return yellowBlue;
 if (sameString(colorScheme, "redBlueOnWhite"))
     return redBlueOnWhite;
 if (sameString(colorScheme, "redBlueOnYellow"))
     return redBlueOnYellow;
 return redGreen;
 }
 
 char *expRatioCombineDLName(char *trackName)
 {
 char dropDownName[128];
 safef(dropDownName, sizeof(dropDownName), "%s.combine", trackName);
 return cloneString(dropDownName);
 }
 
 char *expRatioSubsetRadioName(char *trackName, struct microarrayGroups *groupings)
 {
 char radioVarName[128];
 safef(radioVarName, sizeof(radioVarName), "%s.subset", trackName);
 return cloneString(radioVarName);
 }
 
 char *expRatioSubsetDLName(char *trackName, struct maGrouping *group)
 {
 char dropVarName[256];
 safef(dropVarName, sizeof(dropVarName), "%s.subset.%s", trackName, group->name);
 return cloneString(dropVarName);
 }