c0963f33ec59a8b8db383ea4e8bf6ca30c9b7bbf tdreszer Tue Feb 5 17:40:04 2013 -0800 Name changes to clarify code, made at Angie's request. No functional changes here. diff --git src/lib/elmTree.c src/lib/elmTree.c index c136416..0beb908 100644 --- src/lib/elmTree.c +++ src/lib/elmTree.c @@ -1,275 +1,274 @@ // elmTree.c/.h - Extensible local memory tree grown from an slList of objects, via // NEIGHBOR JOINING and a supplied compare routine. Unlike hacTree, this is not a binary tree // and any node can have original content, a single parent and N children. Superficially similar // to running slSort(), running elmTreeGrow() creates a tree from a single "root". All branches // and leaves are represented as "tree nodes" which contain pointers to a user defined object, // a single parent, a first child (if the node is not a leaf) and a next sibling. All nodes are // additionally joined together as an slList with the single "root" as the first member. Thus // the tree can be traversed linerarly through node->next or hierarchically through recursive // node->firstChild,child->sibling access. // The tree is grown through neighbor joins and involves figuring out whether each successive // content should be attached as a branch node or leaf node to the existing tree. To facilitate // neighbor joining, each node has cumulative content from the single root node (whcih has NULL // content). An example might be a tree of bitmaps where each node contains all the bits of its // parent plus additional bits. // Since an elmTree is build with local memory, free the tree by lmCleanup(). #include "common.h" #include "elmTree.h" struct elmNode *treeNewNode(struct lm *lm,struct elmNode **tree,struct slList *content) // Returns a new node to attach to a tree. { assert((*tree != NULL && content != NULL) || (*tree == NULL && content == NULL)); struct elmNode *node; lmAllocVar(lm,node); if (tree != NULL) // Keep all nodes in a simple slList slAddHead(tree,node); node->selfAndDescendants = 1; node->content = content; return node; } // NOTE the root of the tree always has NULL content. #define treePlant(lm,tree) treeNewNode(lm,tree,NULL) -inline void treeSproutLeaf(struct elmNode *parent,struct elmNode *leaf) -// Adds a leaf to a node. Note that the leaf may itself have children. +inline void treeSprout(struct elmNode *parent,struct elmNode *node) +// Adds a leaf or branch node to an existing tree node. { -leaf->parent = parent; -leaf->sibling = parent->firstChild; -parent->firstChild = leaf; -parent->selfAndDescendants += leaf->selfAndDescendants; +node->parent = parent; +node->sibling = parent->firstChild; +parent->firstChild = node; +parent->selfAndDescendants += node->selfAndDescendants; } -struct elmNode *treeClipLeaf(struct elmNode *leaf) -// Clips leaf and return parent +struct elmNode *treeClip(struct elmNode *node) +// Clips leaf or branch node from a tree returning the node's former parent { -assert(leaf->parent != NULL); -struct elmNode *parent = leaf->parent; -leaf->parent = NULL; +assert(node->parent != NULL); +struct elmNode *parent = node->parent; +node->parent = NULL; -// leaf may not be first child, so find leaf in children +// node may not be first child, so find node in children struct elmNode *child = parent->firstChild; struct elmNode *prevChild = NULL; struct elmNode *nextChild = NULL; for (; child != NULL; prevChild = child, child = nextChild) { nextChild = child->sibling; - if (child == leaf) + if (child == node) { child->sibling = NULL; if (prevChild != NULL) prevChild->sibling = nextChild; else parent->firstChild = nextChild; break; } } -parent->selfAndDescendants -= leaf->selfAndDescendants; +parent->selfAndDescendants -= node->selfAndDescendants; return parent; } -inline void treeInsertBranchBefore(struct elmNode *branch,struct elmNode *node) +inline void treeBranchBefore(struct elmNode *branch,struct elmNode *node) +// Adds a new 'branch' node just before the current 'node' { -struct elmNode *parent = treeClipLeaf(node); -treeSproutLeaf(parent,branch); -treeSproutLeaf(branch,node); +struct elmNode *parent = treeClip(node); +treeSprout(parent,branch); +treeSprout(branch,node); } enum elmNodeOverlap rTreeBranchFindMaxWeight(struct elmNode *branch, struct slList *element, elmNodeCompareFunction *neighborCmp, int *pWeight, void *extra) // Using the compare routine, the max weight is found an element and a candidate branch { int bestWeight = 0; int topNodeResult = neighborCmp(branch->content,element,&bestWeight,extra); if (topNodeResult != enoEqual && topNodeResult != enoSuperset // could do better && bestWeight > 0 && bestWeight >= *pWeight) // reason to look for more { struct elmNode *child = branch->firstChild; for ( ;child != NULL; child = child->sibling) { int weight = bestWeight; // will not recurse if new weight is less than best int result = rTreeBranchFindMaxWeight(child,element,neighborCmp,&weight,extra); if (bestWeight < weight) bestWeight = weight; if (result == enoEqual || result == enoSuperset) break; // don't return result of compare to child, but do select this best weight } } *pWeight = bestWeight; return topNodeResult; } static enum elmNodeOverlap treeChooseBranch(struct elmNode **pBranch, struct slList *element, elmNodeCompareFunction *neighborCmp, void *extra) // Using the compare routine, a branch is chosen that most closely matches the element being tested // Simple case: one of two branches has best weight so follow that branch. More subtle case: // both branches have equal weight. In this case using recursive routine will look deeper into // each branch to find the best weight before choosing a branch. { struct elmNode *branch = *pBranch; assert(branch->content != NULL); struct elmNode *bestBranch = branch; int bestWeight = 0; // NOTE: using recursive routine will look deeper into a branch to find the best weight //int bestResult = neighborCmp(bestBranch->content,element,&bestWeight,extra); int bestResult = rTreeBranchFindMaxWeight(bestBranch,element,neighborCmp,&bestWeight,extra); // Is there need to compare further? if (bestResult != enoEqual && bestResult != enoSuperset) // Could do better { for (branch = branch->sibling;branch != NULL; branch = branch->sibling) { int weight = bestWeight; // will not recurse if new weight is less than best //int result = neighborCmp(branch->content,element,&weight,extra); int result = rTreeBranchFindMaxWeight(branch,element,neighborCmp,&weight,extra); - - if (result == enoEqual || result == enoSuperset // best that we can do - || bestWeight < weight) // better than last branch + if (bestWeight < weight) // better than last branch // NOTE: preferring larger of 2 equal branches does not help. //|| (bestWeight == weight // prefer largest branch // && branch->selfAndDescendants > bestBranch->selfAndDescendants)) { bestBranch = branch; bestWeight = weight; bestResult = result; if (bestResult == enoEqual || bestResult == enoSuperset) // doesn't get any better break; } } } *pBranch = bestBranch; return bestResult; } struct elmNode *elmTreeGrow(struct lm *lm,struct slList *collection, void *extra, elmNodeCompareFunction *neighborCmp, elmNodeJoiningFunction *neighborJoin) // Given a collection of content and a function to compare elements of the content, // returns a tree based on neighbor joining. The nodes of the tree will have content // which is cumulative out to the leaves: the root node has null content, while any child // has content that is the superset of all it's parents. Fill 'extra' with anything (like lm) // that you want passed into your compare and joining functions. { if (collection == NULL) return NULL; // Begin tree -struct elmNode *tree = NULL; -struct elmNode *root = treePlant(lm,&tree); +struct elmNode *treeNodeList = NULL; +struct elmNode *root = treePlant(lm,&treeNodeList); // Add first element: struct slList *element = collection; -struct elmNode *newNode = treeNewNode(lm,&tree,element); -treeSproutLeaf(root,newNode); +struct elmNode *newNode = treeNewNode(lm,&treeNodeList,element); +treeSprout(root,newNode); element = element->next; // for each additional element, walks current tree level by level, choosing the best branch and // ultimately adding the element as a leaf. Current leaves can become branches and // current branches may get split to add a new leaf sprout in the middle. struct elmNode *curNode = NULL; struct elmNode *newBranch = NULL; for ( ;element != NULL; element = element->next) { for (curNode = root->firstChild; // For each element, start at just off root curNode != NULL; curNode = curNode->firstChild) // walks down { int result = treeChooseBranch(&curNode,element,neighborCmp,extra); // chooses across if (result == enoEqual) // Replaces current { // Should only get here if a previous node has the same content as this new element struct slList *replacement = neighborJoin(curNode->content,element,extra); curNode->content = replacement; } else if (result == enoExcluding) // New leaf { - newNode = treeNewNode(lm,&tree,element); - treeSproutLeaf(curNode->parent,newNode); // shares nothing + newNode = treeNewNode(lm,&treeNodeList,element); + treeSprout(curNode->parent,newNode); // shares nothing } else if (result == enoSubset) // on to children or new leaf { if (curNode->firstChild != NULL) continue; // Only case where we continue! - newNode = treeNewNode(lm,&tree,element); - treeSproutLeaf(curNode,newNode); // shares all of curNode + newNode = treeNewNode(lm,&treeNodeList,element); + treeSprout(curNode,newNode); // shares all of curNode } else if (result == enoSuperset) // New branch with old node as child { - newNode = treeNewNode(lm,&tree,element); - treeInsertBranchBefore(newNode,curNode); + newBranch = treeNewNode(lm,&treeNodeList,element); + treeBranchBefore(newBranch,curNode); } else if (result == enoMixed) // New branch with both nodes as children { struct slList *matching = neighborJoin(curNode->content,element,extra); - newBranch = treeNewNode(lm,&tree,matching); - treeInsertBranchBefore(newBranch,curNode); - newNode = treeNewNode(lm,&tree,element); - treeSproutLeaf(newBranch,newNode); + newBranch = treeNewNode(lm,&treeNodeList,matching); + treeBranchBefore(newBranch,curNode); + newNode = treeNewNode(lm,&treeNodeList,element); + treeSprout(newBranch,newNode); } break; // done, move on to next element } } -slReverse(&tree); -assert(elmNodeIsRoot(tree)); -return tree; +slReverse(&treeNodeList); +assert(elmNodeIsRoot(treeNodeList) && root == treeNodeList); +return root; } boolean rElmTreeClimb(const struct elmNode *node, struct slList *parent, void *extra, elmNodePickFunction *elmPick, struct slList **results) // Recursively climbs tree and examines every node using the supplied function. // Each call on a node iterates through its siblings, recursively calling itself on any children. // This function might be used to build a list of objects from each or a subset of nodes. // If all examinations resulted in a structure, then the list will be in REVERSE traversal order. // If you immediately slReverse(results) then the list will ascend to the furthest leaf before // moving on to sibling leaves, twigs and branches. // Note: if results are returned, then "parent" is filled with nearest parent's result. // Return FALSE from the elmPick function to stop the traversal. Thus, a complete traversal // returns TRUE, but one that has been stopped (after finding one node?) returns FALSE. { const struct elmNode *sibling = elmNodeIsRoot(node) ? node->firstChild: node; for (;sibling!=NULL;sibling = sibling->sibling) { // Some nodes are subsets rather than alleles struct slList *localParent = NULL; struct slList *result = NULL; boolean ret = elmPick(sibling->content,parent,extra,&result); if (result) { //assert(results != NULL); slAddHead(results,result); localParent = result; result = NULL; } else localParent = parent; if (!ret) return FALSE; // Stop traversing // a node points to only one child, but that child may have siblings struct elmNode *child = sibling->firstChild; // additional children are siblings if (child) { assert(child->content != NULL); ret = rElmTreeClimb(child, localParent, extra, elmPick, &result); if (result != NULL) { //assert(results != NULL); *results = slCat(result,*results); // newer results go to front! } if (!ret) return FALSE; // Stop traversing } } return TRUE; // continue traversing }