1e7f2a7a806fccac0d5633d8191b8475821b0480
ceisenhart
  Sat Aug 23 11:55:44 2014 -0700
ExpData.c takes in a matrix of data and a corresponding matrix of names.The output is a .json file which can be used for visualizations.
forceLayout.html is a d3 visualization that is generated with  a .json file.
radialDend.html is a d3 visualization that is generated with a .json
file.

hacTree.c, refactored the code slightly to remove uneccesary merge
calls.

bigWigCluster.c
runs on a list of bigWig files, uses the hacTree library to cluster the
bigWigs into a binary tree. The output is a .json file which can be used
for visualizations

diff --git src/lib/hacTree.c src/lib/hacTree.c
index e843b26..39fb47d 100644
--- src/lib/hacTree.c
+++ src/lib/hacTree.c
@@ -49,64 +49,73 @@
 /* Use cmpF and extraData to sort wrapped leaves so that identical leaves will be adjacent. */
 {
 struct sortWrapper *leafWraps = lmAlloc(localMem, itemCount * sizeof(struct sortWrapper));
 int i;
 for (i=0;  i < itemCount;  i++)
     {
     leafWraps[i].node = &(leafNodes[i]);
     leafWraps[i].cmpF = cmpF;
     leafWraps[i].extraData = extraData;
     }
 qsort(leafWraps, itemCount, sizeof(struct sortWrapper), sortWrapCmp);
 return leafWraps;
 }
 
 INLINE void initNode(struct hacTree *node, const struct hacTree *left, const struct hacTree *right,
-		     hacDistanceFunction *distF, hacMergeFunction *mergeF, void *extraData)
+		     hacDistanceFunction *distF, hacMergeFunction *mergeF, void *extraData, bool distOnly)
 /* Initialize node to have left and right as its children.  Leave parent pointers
  * alone -- they would be unstable during tree construction. */
 {
 node->left = (struct hacTree *)left;
 node->right = (struct hacTree *)right;
 if (left != NULL && right != NULL)
     {
+    if (distOnly)
+        {
+	struct slList * el;
+	AllocVar(el);
+        node->childDistance = distF(left->itemOrCluster, right->itemOrCluster, extraData);
+        node->itemOrCluster = el;
+        }
+    else {
         node->childDistance = distF(left->itemOrCluster, right->itemOrCluster, extraData);
         node->itemOrCluster = mergeF(left->itemOrCluster, right->itemOrCluster, extraData);
         }
     }
+}
 
 INLINE struct hacTree preClusterNodes(const struct sortWrapper *leafWraps, int i, int runLength,
 				      hacDistanceFunction *distF, hacMergeFunction *mergeF,
 				      void *extraData, struct lm *localMem)
 /* Caller has allocated a node, and this returns what to store there:
  * a recursively constructed cluster of nodes extracted from wrapped
  * leafNodes (leafWraps) starting at i, for runLength items. */
 {
 struct hacTree ret = {NULL, NULL, NULL, NULL, 0, NULL};
 if (runLength > 2)
     {
     struct hacTree *newClusters = lmAlloc(localMem, 2 * sizeof(struct hacTree));
     int halfLength = runLength/2;
     newClusters[0] = preClusterNodes(leafWraps, i, halfLength,
 				     distF, mergeF, extraData, localMem);
     newClusters[1] = preClusterNodes(leafWraps, i+halfLength, runLength-halfLength,
 				     distF, mergeF, extraData, localMem);
-    initNode(&ret, &(newClusters[0]), &(newClusters[1]), distF, mergeF, extraData);
+    initNode(&ret, &(newClusters[0]), &(newClusters[1]), distF, mergeF, extraData, FALSE);
     }
 else if (runLength == 2)
     {
-    initNode(&ret, leafWraps[i].node, leafWraps[i+1].node, distF, mergeF, extraData);
+    initNode(&ret, leafWraps[i].node, leafWraps[i+1].node, distF, mergeF, extraData, FALSE);
     }
 else
     ret = *(leafWraps[i].node);
 return ret;
 }
 
 static struct hacTree *sortAndPreCluster(struct hacTree *leafNodes, int *retItemCount,
 					 struct lm *localMem, hacDistanceFunction *distF,
 					 hacMergeFunction *mergeF, hacCmpFunction *cmpF,
 					 void *extraData)
 /* Use cmpF and extraData to sort wrapped leaf nodes so that identical leaves will be adjacent,
  * then replace leaves with clusters of identical leaves where possible.  Place new
  * (hopefully smaller) item count in retItemCount. */
 {
 int itemCount = *retItemCount;
@@ -132,38 +141,38 @@
 static struct hacTree *pairUpItems(const struct slList *itemList, int itemCount,
 				   int *retPairCount, struct lm *localMem,
 				   hacDistanceFunction *distF, hacMergeFunction *mergeF,
 				   hacCmpFunction *cmpF, void *extraData)
 /* Allocate & initialize leaf nodes and all possible pairings of leaf nodes
  * which will be our seed clusters.  If cmpF is given, pre-sort the leaf nodes
  * and pre-cluster identical leaves before generating seed clusters. */
 {
 struct hacTree *leafNodes = leafNodesFromItems(itemList, itemCount, localMem);
 if (cmpF != NULL)
     leafNodes = sortAndPreCluster(leafNodes, &itemCount, localMem,
 				  distF, mergeF, cmpF, extraData);
 int pairCount = (itemCount == 1) ? 1 : (itemCount * (itemCount-1) / 2);
 struct hacTree *pairPool = lmAlloc(localMem, pairCount * sizeof(struct hacTree));
 if (itemCount == 1)
-    initNode(pairPool, leafNodes, NULL, distF, mergeF, extraData);
+    initNode(pairPool, leafNodes, NULL, distF, mergeF, extraData, FALSE);
 else
     {
     int i, j, pairIx;
     for (i=0, pairIx=0;  i < itemCount-1;  i++)
 	for (j=i+1;  j < itemCount;  j++, pairIx++)
 	    initNode(&(pairPool[pairIx]), &(leafNodes[i]), &(leafNodes[j]), distF, mergeF,
-		     extraData);
+		     extraData, TRUE);
     }
 *retPairCount = pairCount;
 return pairPool;
 }
 
 struct hacTree *hacTreeFromItems(const struct slList *itemList, struct lm *localMem,
 				 hacDistanceFunction *distF, hacMergeFunction *mergeF,
 				 hacCmpFunction *cmpF, void *extraData)
 /* Using distF, mergeF, optionally cmpF and binary tree operations,
  * perform a hierarchical agglomerative (bottom-up) clustering of
  * items.  To free the resulting tree, lmCleanup(&localMem). */
 //
 // Implementation:
 //
 // Create a pool containing all pairs of items (N*(N-1)/2), and build
@@ -216,45 +225,46 @@
     {
     // Scan pool for node with lowest childDistance; swap that node w/head
     int bestIx = 0;
     double minScore = poolHead[0].childDistance;
     int i;
     for (i=1;  i < poolLength;  i++)
 	if (poolHead[i].childDistance < minScore)
 	    {
 	    minScore = poolHead[i].childDistance;
 	    bestIx = i;
 	    }
     if (bestIx != 0)
 	swapBytes((char *)&(poolHead[0]), (char *)&(poolHead[bestIx]), sizeof(struct hacTree));
     // Pop the best (lowest-distance) node from poolHead, make it root (for now).
     root = poolHead;
+    root->itemOrCluster = mergeF(root->left->itemOrCluster, root->right->itemOrCluster, extraData);
     poolHead = &(poolHead[1]);
     poolLength--;
     // Where root->left is found in the pool, replace it with root.
     // Where root->right is found, drop that node so it doesn't become
     // a duplicate of the replacement cases.
     int numNodesToDelete = 0;
     for (i=0;  i < poolLength;  i++)
 	{
 	struct hacTree *node = &(poolHead[i]);
 	if (node->left == root->left)
 	    // found root->left; replace node->left with root (merge root with node->right):
-	    initNode(node, root, node->right, distF, mergeF, extraData);
+	    initNode(node, root, node->right, distF, mergeF, extraData, FALSE);
 	else if (node->right == root->left)
 	    // found root->left; replace node->right with root (merge root with node->left):
-	    initNode(node, node->left, root, distF, mergeF, extraData);
+	    initNode(node, node->left, root, distF, mergeF, extraData, FALSE);
 	else if (node->left == root->right || node->right == root->right)
 	    // found root->right; mark this node for deletion:
 	    nodesToDelete[numNodesToDelete++] = i;
 	}
     if (numNodesToDelete > 0)
 	{
 	int newPoolLen = nodesToDelete[0];
 	// This will be "next node to delete" for the last marked node:
 	nodesToDelete[numNodesToDelete] = poolLength;
 	for (i = 0;  i < numNodesToDelete;  i++)
 	    {
 	    int nodeToDel = nodesToDelete[i];
 	    int nextNodeToDel = nodesToDelete[i+1];
 	    int blkSize = nextNodeToDel - (nodeToDel+1);
 	    if (blkSize == 0)