1e7f2a7a806fccac0d5633d8191b8475821b0480
ceisenhart
  Sat Aug 23 11:55:44 2014 -0700
ExpData.c takes in a matrix of data and a corresponding matrix of names.The output is a .json file which can be used for visualizations.
forceLayout.html is a d3 visualization that is generated with  a .json file.
radialDend.html is a d3 visualization that is generated with a .json
file.

hacTree.c, refactored the code slightly to remove uneccesary merge
calls.

bigWigCluster.c
runs on a list of bigWig files, uses the hacTree library to cluster the
bigWigs into a binary tree. The output is a .json file which can be used
for visualizations

diff --git src/utils/bigWigCluster/bigWigCluster.c src/utils/bigWigCluster/bigWigCluster.c
new file mode 100644
index 0000000..e7ae234
--- /dev/null
+++ src/utils/bigWigCluster/bigWigCluster.c
@@ -0,0 +1,195 @@
+/* bigWigCluster - Cluster bigWigs using a hactree. */
+#include "sqlNum.h"
+#include "common.h"
+#include "linefile.h"
+#include "hash.h"
+#include "options.h"
+#include "hacTree.h"
+#include "rainbow.h"
+
+
+void usage()
+/* Explain usage and exit. */
+{
+errAbort(
+  "bigWigCluster - Cluster bigWigs using a hactree\n"
+  "usage:\n"
+  "   bigWigCluster input.list chrom.sizes output.json\n"
+  "options:\n"
+  );
+}
+
+/* Command line validation table. */
+static struct optionSpec options[] = {
+   {NULL, 0},
+};
+double longest = 0;
+int nameCount = 0;
+
+
+
+struct bigWig
+{
+struct bigWig *next;  //next item in series
+char *name;	//name of the bigWig filei
+struct rgbColor color; //for coloring
+};
+
+struct bigWig *getBigWigs(char* input)
+// get the bigWig files 
+{
+struct bigWig **list;
+AllocVar(list);
+char* line = NULL;
+int i = 0;
+struct lineFile *lf = lineFileOpen(input,TRUE);
+while(lineFileNext(lf, &line, NULL))
+    {
+    ++i;
+    struct bigWig *bw;
+    AllocVar(bw);
+    bw->name = line;
+    slAddHead(&list,bw);
+    }
+slReverse(&list);
+return *list;
+}
+
+
+
+static void rPrintHierarchicalJson(FILE *f, struct hacTree *tree, int level, double distance,
+		int normConstant, int cgConstant)
+/* Recursively prints out the elements of the hierarchical .json file. */
+{
+struct bigWig *bio = (struct bigWig *)tree->itemOrCluster;
+char *tissue = bio->name;
+struct rgbColor colors = bio->color;
+if (tree->childDistance > longest)
+    // the first distance will be the longest, and is used for normalization
+    longest = tree->childDistance;
+int i;
+for (i = 0;  i < level;  i++)
+    fputc(' ', f); // correct spacing for .json format
+if (tree->left == NULL && tree->right == NULL)
+    {
+    // Prints out the leaf objects
+ //   fprintf(f, "{\"name\": \"%s\",\"similarity\": %f,\"linkGroup\": \" \"", tissue, distance);
+    fprintf(f, "{\"%s\"%s \"%s\"%s\"%s\"%s %f %s\"%s\"%s \"rgb(%i,%i,%i)\"}", "name", ":", tissue, ", ",
+    		"similarity", ":", distance, "," , "colorGroup", ":", colors.r, colors.g, colors.b);
+    return;
+    }
+else if (tree->left == NULL || tree->right == NULL)
+    errAbort("\nHow did we get a node with one NULL kid??");
+
+// Prints out the node object and opens a new children block
+fprintf(f, "{\"%s\"%s \"%s\"%s", "name", ":", " ", ",");
+fprintf(f, "\"colorGroup\": \"rgb(%i,%i,%i)\",", colors.r, colors.g, colors.b );
+fprintf(f, "\"%s\"%s \"%f\"%s\n", "distance", ":", normConstant * (tree->childDistance/longest), ",");
+for (i = 0;  i < level + 1;  i++)
+    fputc(' ', f);
+fprintf(f, "\"%s\"%s\n", "children", ": [");
+distance = tree->childDistance/longest; 
+rPrintHierarchicalJson(f, tree->left, level+1, distance, normConstant, cgConstant);
+fputs(",\n", f);
+rPrintHierarchicalJson(f, tree->right, level+1, distance, normConstant, cgConstant);
+fputc('\n', f);
+// Closes the children block for node objects
+for (i=0;  i < level + 1;  i++)
+    fputc(' ', f);
+fputs("]\n", f);
+for (i = 0;  i < level;  i++)
+    fputc(' ', f);
+fputs("}", f);
+}
+
+void printHierarchicalJson(FILE *f, struct hacTree *tree, int normConstant, int cgConstant)
+/* Prints out the binary tree into .json format intended for d3
+ * hierarchical layouts */
+{
+if (tree == NULL)
+    {
+    fputs("Empty tree.\n", f);
+    return;
+    }
+double distance = 0;
+rPrintHierarchicalJson(f, tree, 0, distance, normConstant, cgConstant);
+fputc('\n', f);
+}
+
+
+
+
+double slBigWigDistance(const struct slList *item1, const struct slList *item2, void *extraData)
+/* Return the absolute difference between the two kids' values. 
+ * Designed for HAC tree use*/
+{
+verbose(1,"Calculating Distance...\n");
+const struct bigWig *kid1 = (const struct bigWig *)item1;
+const struct bigWig *kid2 = (const struct bigWig *)item2;
+char cmd[1024];
+safef(cmd, 1024, "bigWigCorrelate %s %s > output", kid1->name, kid2->name);
+double diff = 0;
+mustSystem(cmd);
+struct lineFile *lf = lineFileOpen("output",TRUE);
+char* line = NULL;
+if (!lineFileNext(lf, &line, NULL))
+    errAbort("no difference output, check bigWigCorrelate");
+diff = sqlDouble(line);
+remove("output");
+return diff;
+}
+
+
+struct slList *slBigWigMerge(const struct slList *item1, const struct slList *item2,
+				void *unusedExtraData)
+/* Make a new slPair where the name is the children names concattenated and the 
+ * value is the average of kids' values.
+ * Designed for HAC tree use*/
+{
+verbose(1,"Merging...\n");
+++nameCount;
+struct bigWig *result;
+AllocVar(result);
+const struct bigWig *kid1 = (const struct bigWig *)item1;
+const struct bigWig *kid2 = (const struct bigWig *)item2;
+char cmd1[1024];
+char cmd2[1024];
+safef(cmd1, 1024, "bigWigMerge %s %s output -verbose=0", kid1->name, kid2->name);
+char name[1024];
+safef(name,1024, "%i", nameCount);
+safef(cmd2, 1024, "bedGraphToBigWig output chrom.sizes %s", name);
+mustSystem(cmd1);
+mustSystem(cmd2);
+result->name = name;
+return (struct slList *)result;
+}
+
+void bigWigCluster(char *inputList, char* chromSizes, char* output)
+/* bigWigCluster - Cluster bigWigs using a hactree. */
+{
+struct bigWig *list = getBigWigs(inputList);
+FILE *f = mustOpen(output,"w");
+struct lm *localMem = lmInit(0);
+struct hacTree *clusters = hacTreeFromItems((struct slList *)list, localMem,
+					    slBigWigDistance, slBigWigMerge, NULL, NULL);
+printHierarchicalJson(f, clusters, 20, 20);
+
+// some cleanup
+int i;
+for (i = 0 ; i <= nameCount; ++i)
+   {
+   char name[1024];
+   safef(name,1024, "%i", i);
+   remove(name);
+   }
+}
+
+int main(int argc, char *argv[])
+/* Process command line. */
+{
+optionInit(&argc, argv, options);
+if (argc != 4)
+    usage();
+bigWigCluster(argv[1], argv[2], argv[3]);
+return 0;
+}