2028f58f234220d59b228639ff4e4ead0d83d2c0
aamp
  Tue May 17 15:03:28 2011 -0700
Stuff to do with microarray subsetting.  It might need a little smoothing over
diff --git src/hg/lib/microarray.c src/hg/lib/microarray.c
index 6e16290..54036d3 100644
--- src/hg/lib/microarray.c
+++ src/hg/lib/microarray.c
@@ -635,39 +635,165 @@
 struct mapArray *mapping;
 if (specList == NULL) 
     {
     if (method == useMedian)
 	maExpDataDoLogRatioMeanOrMedian(exps, FALSE);
     else if (method == useMean)
 	maExpDataDoLogRatioMeanOrMedian(exps, TRUE);
     return;
     }
 mapping = mappingFromMedSpec(specList);
 maExpDataDoLogRatioClumping(exps, mapping, method);
 freeMem(mapping->data);
 freez(&mapping);
 }
 
+/* static void uglyPrintSlInt(struct slInt *list) */
+/* { */
+/* struct slInt *item; */
+/* int i = 0; */
+/* for (item = list; item != NULL; item = item->next) */
+/*     uglyf("i = %d, item->val = %d\n", i++, item->val); */
+/* } */
+
+static struct slRef *groupingLoL(struct maGrouping *grouping)
+/* return the grouping groups array as an slRef list of slInts */
+{
+struct slRef *list = NULL;
+int i;
+int off = 0;
+for (i = 0; i < grouping->numGroups; i++)
+    {
+    struct slInt *ints = NULL;
+    int j;
+    for (j = off; j < off + grouping->groupSizes[i]; j++)
+	{
+	struct slInt *oneInt = slIntNew(grouping->expIds[j]);
+	slAddHead(&ints, oneInt);
+	}
+    slReverse(&ints);
+    refAdd(&list, ints);
+    off += grouping->groupSizes[i];
+    }
+slReverse(&list);
+return list;
+}
+
+static void maPruneLoL(struct slRef *lol, struct slInt *subsetList)
+/* remove things from list where the subset */
+{
+struct slRef *ref;
+for (ref = lol; ref != NULL; ref = ref->next)
+    {
+    struct slInt *intList = (struct slInt *)ref->val;
+    struct slInt *cur;
+    struct slInt *newList = NULL;
+    while ((cur = slPopHead(&intList)) != NULL)
+	{
+	if (slIntFind(subsetList, cur->val))
+	    slAddHead(&newList, cur);
+	else
+	    freeMem(cur);
+	}
+    if (newList)
+	slSort(&newList, slIntCmp);
+    ref->val = newList;
+    }
+}
+
+static struct slInt *makeSubsetSlInt(struct maGrouping *subset, int subsetOffset)
+/* just make a linked-list out of an array */
+{
+struct slInt *list = NULL;
+if (subset)
+    {
+    int i;
+    int j = 0;
+    for (i = 0; i < subsetOffset; i++)
+	j += subset->groupSizes[i];
+    for (i = j; i < j + subset->groupSizes[subsetOffset]; i++)
+	{
+	struct slInt *newint = slIntNew(subset->expIds[i]);
+	slAddHead(&list, newint);
+	}
+    slReverse(&list);
+    }
+return list;
+}
+
+static void maPruneGroupingWithSubset(struct maGrouping *grouping, struct maGrouping *subset, int subsetOffset)
+/* remove all the expIds not contained in the subset from the given grouping, */
+/* and remove groups that have no expIds left */
+{
+if (grouping && subset && (subsetOffset < subset->numGroups) && (subsetOffset >= 0))
+    {
+    int i, j, k;
+    int newSize = subset->groupSizes[subsetOffset];
+    struct slRef *lol = groupingLoL(grouping);
+    struct slRef *cur;
+    struct slInt *ssList = makeSubsetSlInt(subset, subsetOffset);
+    int *newExpIds;
+    int newNumGroups = 0;
+    char **newNames;
+    int *newGroupSizes;
+    maPruneLoL(lol, ssList);
+    for (cur = lol; cur != NULL; cur = cur->next)
+	{
+	struct slInt *curList = cur->val;
+	if (slCount(curList) > 0)
+	    newNumGroups++;
+	}
+    AllocArray(newGroupSizes, newNumGroups);
+    AllocArray(newNames, newNumGroups);
+    AllocArray(newExpIds, newSize);
+    i = 0;
+    j = 0;
+    for (k = 0, cur = lol; (k < grouping->numGroups) && (cur != NULL); k++, cur = cur->next)
+	{
+	struct slInt *curList = cur->val;
+	int groupSize = slCount(curList);
+	if (groupSize > 0)
+	    {
+	    struct slInt *intcur;
+	    for (intcur = curList; intcur != NULL; intcur = intcur->next)
+		newExpIds[j++] = intcur->val;
+	    newNames[i] = cloneString(grouping->names[k]);
+	    newGroupSizes[i] = groupSize;
+	    i++;
+	    }
+	freeMem(grouping->names[k]);
+	}
+    freeMem(grouping->names);
+    freeMem(grouping->groupSizes);
+    freeMem(grouping->expIds);
+    grouping->names = newNames;
+    grouping->groupSizes = newGroupSizes;
+    grouping->numGroups = newNumGroups;
+    grouping->expIds = newExpIds;
+    grouping->size = newSize;
+    }
+}
+
 struct expData *maExpDataClumpGivenGrouping(struct expData *exps, struct maGrouping *grouping)
 /* Clump expDatas from a grouping from a .ra file. */
 {
 struct mapArray *mapping;
 struct expData *ret = NULL;
 enum maCombineMethod combine = useMedian;
 char *combType;
-if (!grouping->type || sameWord(grouping->type, "all"))
-    return NULL;
+/* if (!grouping->type || sameWord(grouping->type, "all")) */
+/*     return NULL; */
 combType = cloneString(grouping->type);
 eraseWhiteSpace(combType);
 if (sameWord(combType, "combinemean"))
     combine = useMean;
 mapping = maMappingFromGrouping(grouping);
 ret = maExpDataCombineCols(exps, mapping, combine, 0);
 freeMem(mapping->data);
 freez(&mapping);
 freeMem(combType);
 return ret;
 }
 
 void maExpDataAddConstant(struct expData *exps, double c)
 /* Add a constant c to all of the microarray data. Ensure that NA values */
 /* aren't inadvertantly created. */
@@ -876,88 +1002,136 @@
 		break;
 		}
 	if (ret->defaultCombine == NULL)
 	    errAbort("$%s$ not a valid combine.default in %s", s, groupings);
 	}
     }
 s = hashFindVal(mainGroup, "subset");
 if (s)
     ret->subsetSettings = maGetGroupingsFromList(s, allGroups);
 ret->numCombinations = slCount(ret->combineSettings);
 ret->numSubsets = slCount(ret->subsetSettings);
 hashFreeList(&hashList);
 return ret;
 }
 
+struct maGrouping *maGetGrouping(struct microarrayGroups *groupings, char *name)
+/* Return the specfic grouping (combine or subset), or NULL if not found */
+{
+struct maGrouping *ret;
+for (ret = groupings->combineSettings; ret != NULL; ret = ret->next)
+    if (sameString(ret->name, name))
+	return ret;
+for (ret = groupings->subsetSettings; ret != NULL; ret = ret->next)
+    if (sameString(ret->name, name))
+	return ret;
+return NULL;
+}
+
 struct maGrouping *maCombineGroupingFromCart(struct microarrayGroups *groupings, 
 				    struct cart *cart, char *trackName)
 /* Determine which grouping to use based on the cart status or lack thereof. */
 {
 char *setting = NULL;
-char cartVar[512];
+char *cartVar = expRatioCombineDLName(trackName);
 /* Possibly NULL from custom trackness. */
 if (!groupings)
     return NULL;
-safef(cartVar, sizeof(cartVar), "%s.combine", trackName);
 setting = cartUsualString(cart, cartVar, NULL);
 if (setting && sameWord(groupings->allArrays->name, setting))
     return groupings->allArrays;
 if (setting)
     {
     struct maGrouping *cur;
     for (cur = groupings->combineSettings; cur != NULL; cur = cur->next)
 	if (sameWord(cur->name, setting))
 	    return cur;
     }
 return groupings->defaultCombine;
 }
 
+/* int maSubsetOffsetFromCart(struct microarrayGroups *groupings, struct cart *cart, char *trackName) */
+
+struct maGrouping *maSubsetGroupingFromCart(struct microarrayGroups *groupings, 
+				    struct cart *cart, char *trackName)
+/* Determine which grouping to use based on the cart status or lack thereof. */
+{
+char *setting = NULL;
+char *cartVar = expRatioSubsetRadioName(trackName, groupings);
+/* Possibly NULL from custom trackness. */
+if (!groupings)
+    return NULL;
+setting = cartUsualString(cart, cartVar, NULL);
+if (setting)
+    {
+    struct maGrouping *cur;
+    for (cur = groupings->subsetSettings; cur != NULL; cur = cur->next)
+	if (sameWord(cur->name, setting))
+	    return cur;
+    }
+return NULL;
+}
+
+int maSubsetOffsetFromCart(struct maGrouping *subset, struct cart* cart, char *trackName)
+{
+int setting;
+char *cartVar;
+if (!subset)
+    return -1;
+cartVar = expRatioSubsetDLName(trackName, subset);
+setting = cartUsualInt(cart, cartVar, -1);
+return setting;
+}
+
 /********* Dealing with BED. ************/
 
 struct expData *maExpDataFromExpBed(struct bed *oneBed)
 /* Convert a bed record's expScores into an expData. */
 {
 struct expData *ret = NULL;
 AllocVar(ret);
 ret->name = cloneString(oneBed->name);
 ret->expCount = oneBed->expCount;
 if (ret->expCount > 0) 
     ret->expScores = CloneArray(oneBed->expScores, oneBed->expCount);
 return ret;
 }
 
 struct expData *maExpDataListFromExpBedList(struct bed *bedList)
 /* Convert list of bed 15 records to a list of expDatas. */
 {
 struct expData *newList = NULL;
 struct bed *cur;
 for (cur = bedList; cur != NULL; cur = cur->next)
     {
     struct expData *addMe = maExpDataFromExpBed(cur);
     slAddHead(&newList, addMe);
     }
 slReverse(&newList);
 return newList;
 }
 
-void maBedClumpGivenGrouping(struct bed *bedList, struct maGrouping *grouping)
+void maBedClumpGivenGrouping(struct bed *bedList, struct maGrouping *grouping, struct maGrouping *subset, int subsetOffset)
 /* Clump (mean/median) a bed 15 given the grouping kind. */
 {
 struct expData *exps = maExpDataListFromExpBedList(bedList);
-struct expData *clumpedExps = maExpDataClumpGivenGrouping(exps, grouping);
+struct expData *clumpedExps;
 struct bed *bed;
 struct expData *exp;
+if (subset)
+    maPruneGroupingWithSubset(grouping, subset, subsetOffset);
+clumpedExps = maExpDataClumpGivenGrouping(exps, grouping);
 expDataFreeList(&exps);
 if (!clumpedExps)
     return;
 /* Go through each bed and copy over the new expDatas */
 /* and free up what was there. */
 for (bed = bedList, exp = clumpedExps; (bed != NULL) && (exp != NULL); 
      bed = bed->next, exp = exp->next)
     {
     int i;
     bed->expCount = exp->expCount;
     if (bed->expScores)
 	freeMem(bed->expScores);
     if (bed->expIds)
 	freeMem(bed->expIds);
     bed->expScores = CloneArray(exp->expScores, exp->expCount);
@@ -969,15 +1143,36 @@
 }
 
 enum expColorType getExpColorType(char *colorScheme)
 /* From a color type return the respective enum. */
 {
 if (sameString(colorScheme, "redBlue"))
     return redBlue;
 if (sameString(colorScheme, "yellowBlue"))
     return yellowBlue;
 if (sameString(colorScheme, "redBlueOnWhite"))
     return redBlueOnWhite;
 if (sameString(colorScheme, "redBlueOnYellow"))
     return redBlueOnYellow;
 return redGreen;
 }
+
+char *expRatioCombineDLName(char *trackName)
+{
+char dropDownName[128];
+safef(dropDownName, sizeof(dropDownName), "%s.combine", trackName);
+return cloneString(dropDownName);
+}
+
+char *expRatioSubsetRadioName(char *trackName, struct microarrayGroups *groupings)
+{
+char radioVarName[128];
+safef(radioVarName, sizeof(radioVarName), "%s.subset", trackName);
+return cloneString(radioVarName);
+}
+
+char *expRatioSubsetDLName(char *trackName, struct maGrouping *group)
+{
+char dropVarName[256];
+safef(dropVarName, sizeof(dropVarName), "%s.subset.%s", trackName, group->name);
+return cloneString(dropVarName);
+}