1e7f2a7a806fccac0d5633d8191b8475821b0480 ceisenhart Sat Aug 23 11:55:44 2014 -0700 ExpData.c takes in a matrix of data and a corresponding matrix of names.The output is a .json file which can be used for visualizations. forceLayout.html is a d3 visualization that is generated with a .json file. radialDend.html is a d3 visualization that is generated with a .json file. hacTree.c, refactored the code slightly to remove uneccesary merge calls. bigWigCluster.c runs on a list of bigWig files, uses the hacTree library to cluster the bigWigs into a binary tree. The output is a .json file which can be used for visualizations diff --git src/lib/hacTree.c src/lib/hacTree.c index e843b26..39fb47d 100644 --- src/lib/hacTree.c +++ src/lib/hacTree.c @@ -49,64 +49,73 @@ /* Use cmpF and extraData to sort wrapped leaves so that identical leaves will be adjacent. */ { struct sortWrapper *leafWraps = lmAlloc(localMem, itemCount * sizeof(struct sortWrapper)); int i; for (i=0; i < itemCount; i++) { leafWraps[i].node = &(leafNodes[i]); leafWraps[i].cmpF = cmpF; leafWraps[i].extraData = extraData; } qsort(leafWraps, itemCount, sizeof(struct sortWrapper), sortWrapCmp); return leafWraps; } INLINE void initNode(struct hacTree *node, const struct hacTree *left, const struct hacTree *right, - hacDistanceFunction *distF, hacMergeFunction *mergeF, void *extraData) + hacDistanceFunction *distF, hacMergeFunction *mergeF, void *extraData, bool distOnly) /* Initialize node to have left and right as its children. Leave parent pointers * alone -- they would be unstable during tree construction. */ { node->left = (struct hacTree *)left; node->right = (struct hacTree *)right; if (left != NULL && right != NULL) { + if (distOnly) + { + struct slList * el; + AllocVar(el); + node->childDistance = distF(left->itemOrCluster, right->itemOrCluster, extraData); + node->itemOrCluster = el; + } + else { node->childDistance = distF(left->itemOrCluster, right->itemOrCluster, extraData); node->itemOrCluster = mergeF(left->itemOrCluster, right->itemOrCluster, extraData); } } +} INLINE struct hacTree preClusterNodes(const struct sortWrapper *leafWraps, int i, int runLength, hacDistanceFunction *distF, hacMergeFunction *mergeF, void *extraData, struct lm *localMem) /* Caller has allocated a node, and this returns what to store there: * a recursively constructed cluster of nodes extracted from wrapped * leafNodes (leafWraps) starting at i, for runLength items. */ { struct hacTree ret = {NULL, NULL, NULL, NULL, 0, NULL}; if (runLength > 2) { struct hacTree *newClusters = lmAlloc(localMem, 2 * sizeof(struct hacTree)); int halfLength = runLength/2; newClusters[0] = preClusterNodes(leafWraps, i, halfLength, distF, mergeF, extraData, localMem); newClusters[1] = preClusterNodes(leafWraps, i+halfLength, runLength-halfLength, distF, mergeF, extraData, localMem); - initNode(&ret, &(newClusters[0]), &(newClusters[1]), distF, mergeF, extraData); + initNode(&ret, &(newClusters[0]), &(newClusters[1]), distF, mergeF, extraData, FALSE); } else if (runLength == 2) { - initNode(&ret, leafWraps[i].node, leafWraps[i+1].node, distF, mergeF, extraData); + initNode(&ret, leafWraps[i].node, leafWraps[i+1].node, distF, mergeF, extraData, FALSE); } else ret = *(leafWraps[i].node); return ret; } static struct hacTree *sortAndPreCluster(struct hacTree *leafNodes, int *retItemCount, struct lm *localMem, hacDistanceFunction *distF, hacMergeFunction *mergeF, hacCmpFunction *cmpF, void *extraData) /* Use cmpF and extraData to sort wrapped leaf nodes so that identical leaves will be adjacent, * then replace leaves with clusters of identical leaves where possible. Place new * (hopefully smaller) item count in retItemCount. */ { int itemCount = *retItemCount; @@ -132,38 +141,38 @@ static struct hacTree *pairUpItems(const struct slList *itemList, int itemCount, int *retPairCount, struct lm *localMem, hacDistanceFunction *distF, hacMergeFunction *mergeF, hacCmpFunction *cmpF, void *extraData) /* Allocate & initialize leaf nodes and all possible pairings of leaf nodes * which will be our seed clusters. If cmpF is given, pre-sort the leaf nodes * and pre-cluster identical leaves before generating seed clusters. */ { struct hacTree *leafNodes = leafNodesFromItems(itemList, itemCount, localMem); if (cmpF != NULL) leafNodes = sortAndPreCluster(leafNodes, &itemCount, localMem, distF, mergeF, cmpF, extraData); int pairCount = (itemCount == 1) ? 1 : (itemCount * (itemCount-1) / 2); struct hacTree *pairPool = lmAlloc(localMem, pairCount * sizeof(struct hacTree)); if (itemCount == 1) - initNode(pairPool, leafNodes, NULL, distF, mergeF, extraData); + initNode(pairPool, leafNodes, NULL, distF, mergeF, extraData, FALSE); else { int i, j, pairIx; for (i=0, pairIx=0; i < itemCount-1; i++) for (j=i+1; j < itemCount; j++, pairIx++) initNode(&(pairPool[pairIx]), &(leafNodes[i]), &(leafNodes[j]), distF, mergeF, - extraData); + extraData, TRUE); } *retPairCount = pairCount; return pairPool; } struct hacTree *hacTreeFromItems(const struct slList *itemList, struct lm *localMem, hacDistanceFunction *distF, hacMergeFunction *mergeF, hacCmpFunction *cmpF, void *extraData) /* Using distF, mergeF, optionally cmpF and binary tree operations, * perform a hierarchical agglomerative (bottom-up) clustering of * items. To free the resulting tree, lmCleanup(&localMem). */ // // Implementation: // // Create a pool containing all pairs of items (N*(N-1)/2), and build @@ -216,45 +225,46 @@ { // Scan pool for node with lowest childDistance; swap that node w/head int bestIx = 0; double minScore = poolHead[0].childDistance; int i; for (i=1; i < poolLength; i++) if (poolHead[i].childDistance < minScore) { minScore = poolHead[i].childDistance; bestIx = i; } if (bestIx != 0) swapBytes((char *)&(poolHead[0]), (char *)&(poolHead[bestIx]), sizeof(struct hacTree)); // Pop the best (lowest-distance) node from poolHead, make it root (for now). root = poolHead; + root->itemOrCluster = mergeF(root->left->itemOrCluster, root->right->itemOrCluster, extraData); poolHead = &(poolHead[1]); poolLength--; // Where root->left is found in the pool, replace it with root. // Where root->right is found, drop that node so it doesn't become // a duplicate of the replacement cases. int numNodesToDelete = 0; for (i=0; i < poolLength; i++) { struct hacTree *node = &(poolHead[i]); if (node->left == root->left) // found root->left; replace node->left with root (merge root with node->right): - initNode(node, root, node->right, distF, mergeF, extraData); + initNode(node, root, node->right, distF, mergeF, extraData, FALSE); else if (node->right == root->left) // found root->left; replace node->right with root (merge root with node->left): - initNode(node, node->left, root, distF, mergeF, extraData); + initNode(node, node->left, root, distF, mergeF, extraData, FALSE); else if (node->left == root->right || node->right == root->right) // found root->right; mark this node for deletion: nodesToDelete[numNodesToDelete++] = i; } if (numNodesToDelete > 0) { int newPoolLen = nodesToDelete[0]; // This will be "next node to delete" for the last marked node: nodesToDelete[numNodesToDelete] = poolLength; for (i = 0; i < numNodesToDelete; i++) { int nodeToDel = nodesToDelete[i]; int nextNodeToDel = nodesToDelete[i+1]; int blkSize = nextNodeToDel - (nodeToDel+1); if (blkSize == 0)