752bae9723eeeabb02d950157ab0f9512fd689b3 tdreszer Mon Jan 28 15:05:59 2013 -0800 Checking in 'elmTree' which uses neighbor joining to build a tree in local memory. Also checking in 2 sets of changes to vcf. First, added support for a 'reuse' local memory pool which aid traversing giant files. Second, added routines for bitmap based analysis of variants. diff --git src/inc/elmTree.h src/inc/elmTree.h new file mode 100644 index 0000000..92c5df0 --- /dev/null +++ src/inc/elmTree.h @@ -0,0 +1,122 @@ +// elmTree.c/.h - Extensible local memory tree grown from an slList of objects, via +// NEIGHBOR JOINING and a supplied compare routine. Unlike hacTree, this is not a binary tree +// and any node can have original content, a single parent and N children. Superficially similar +// to running slSort(), running elmTreeGrow() creates a tree from a single "root". All branches +// and leaves are represented as "tree nodes" which contain pointers to a user defined object, +// a single parent, a first child (if the node is not a leaf) and a next sibling. All nodes are +// additionally joined together as an slList with the single "root" as the first member. Thus +// the tree can be traversed linerarly through node->next or hierarchically through recursive +// node->firstChild,child->sibling access. +// The tree is grown through neighbor joins and involves figuring out whether each successive +// content should be attached as a branch node or leaf node to the existing tree. To facilitate +// neighbor joining, each node has cumulative content from the single root node (whcih has NULL +// content). An example might be a tree of bitmaps where each node contains all the bits of its +// parent plus additional bits. +// Since an elmTree is build with local memory, free the tree by lmCleanup(). + +#ifndef ELMTREE_H +#define ELMTREE_H + +#include "common.h" +#include "localmem.h" + +struct elmNode { +// A tree node may be either a leaf or a branch, with branches having children +// NOTE the root of the tree always has NULL content. + struct elmNode * next; // slList of all nodes in tree starting with root + struct elmNode * parent; // NULL: root of tree, else only one parent allowed + struct elmNode * firstChild; // NULL: leaf, or firstChild (successive are siblings) + struct elmNode * sibling; // children are singly linked as on "next" sibling + int selfAndDescendants; // count of self and descendants, but not siblings and parents + void *content; // Content of node, such as bitMap that defines node. +}; + +enum elmNodeOverlap { + enoExcluding = 0, // Nothing in common between 2 elements + enoEqual = 1, // Two node elements are identical + enoSuperset = 2, // First element completely covers second + enoSubset = 3, // First element is a subset of second + enoMixed = 4 // Elements match some and differ some (use weight to distinguish) +}; + + +// - - - - - growing a tree: +typedef enum elmNodeOverlap (elmNodeCompareFunction)(const struct slList *elA, + const struct slList *elB, + int *joinWeight, void *extra); +// Prototype of variable compare routines. Caller may request joinWeight. Fill this with a +// weight of the commonality between two elements, with a greater weight representing a better +// possible join. An example might be bit compares and the weight being the number of bits +// that match with the max being the number of possible matches. This value will be used to +// help decide what branch a new node should join. + +typedef struct slList *(elmNodeJoiningFunction)(const struct slList *elA, + const struct slList *elB,void *extra); +// Prototype of function for creating the common content of 2 elements that are part of a tree. +// As the tree grows, elements will get attached to common branches and those branches will +// have content created by this function. An example application might be two of your bitmaps +// representing actual data being used to create a third bitmap with is the common bits set +// but not representing one of your original element bitmaps (actual data). + +struct elmNode *elmTreeGrow(struct lm *lm,struct slList *collection, void *extra, + elmNodeCompareFunction *neighborCmp, + elmNodeJoiningFunction *neighborJoin); +// Given a collection of content and a function to compare elements of the content, +// returns a tree based on neighbor joining. The nodes of the tree will have content +// which is cumulative out to the leaves: the root node has null content, while any child +// has content that is the superset of all it's parents. Fill 'extra' with anything (like lm) +// that you want passed into your compare and joining functions. + + +// - - - - - picking nodes from a tree: +typedef boolean (elmNodePickFunction)(struct slList *element, + struct slList *parent,void *extra, + struct slList **result); +// Prototype for picking nodes by using recursive tree climbing routine. This user provided +// function receives the content of a node and any extra structure provided. +// The routine can return a result if desired. When the function returns a result, then the +// result of the nearest "parent" (if applicable) will be provided as well. All results will +// ultimately be strung together in a list in traversal order. The routine should +// return TRUE to keep traversing the tree and FALSE if tree traversal should stop. + +boolean rElmTreeClimb(const struct elmNode *node, struct slList *parent, void *extra, + elmNodePickFunction *elmPick, struct slList **results); +// Recursively climbs tree and examines every node using the supplied function. +// Each call on a node iterates through its siblings, recursively calling itself on any children. +// This function might be used to build a list of objects from each or a subset of nodes. +// If all examinations resulted in a structure, then the list will be in REVERSE traversal order. +// If you immediately slReverse(results) then the list will ascend to the furthest leaf before +// moving on to sibling leaves, twigs and branches. +// Note: if results are returned, then "parent" is filled with nearest parent's result. +// Return FALSE from the elmPick function to stop the traversal. Thus, a complete traversal +// returns TRUE, but one that has been stopped (after finding one node?) returns FALSE. + + +// - - - - - general tree examination +INLINE int elmNodeCountSiblings(const struct elmNode *node) +// Counts siblings, including current +{ +int count = 0; +const struct elmNode *sibling = node; +for (;sibling != NULL; sibling = sibling->sibling) + count++; +return count; +} +#define elmNodeCountChildren(node) elmNodeCountSiblings((node)->firstChild) + +INLINE int elmNodeCountGenerations(const struct elmNode *node) +// Counts direct generations (first children only), including current +{ +int count = 0; +const struct elmNode *child = node; +for (;child != NULL; child = child->firstChild) + count++; +return count; +} + +#define elmNodeIsBranch(node) (node->firstChild != NULL) +#define elmNodeIsLeaf(node) (node->firstChild == NULL) +#define elmNodeIsRoot(node) (node->parent == NULL) + + +#endif//ndef ELMTREE_H