f8addec505c50bc03231acb7e418fc056dc4cf7e ceisenhart Mon Mar 7 12:31:28 2016 -0800 Adding in a sibling comparison function to the multi threaded function, refs #16216 diff --git src/inc/hacTree.h src/inc/hacTree.h index 1bf994b..595a3e8 100644 --- src/inc/hacTree.h +++ src/inc/hacTree.h @@ -43,56 +43,61 @@ #include "localmem.h" /* Caller-provided function to measure distance between two items or clusters */ /* Note: this must be able to handle identical inputs or NULL inputs */ /* Note: this should be monotonic and nonnegative, *not* a + or - * difference (as would be used for comparing/ordering/sorting) */ typedef double hacDistanceFunction(const struct slList *item1, const struct slList *item2, void *extraData); /* Caller-provided function to merge two items or clusters into a new cluster */ /* Note: this must be able to handle NULL inputs */ typedef struct slList *(hacMergeFunction)(const struct slList *item1, const struct slList *item2, void *extraData); +/* Optional caller-provided function to compare two children and determine first born. */ +typedef bool hacSibCmpFunction(const struct slList *item1, const struct slList *item2, + void *extraData); + /* Optional caller-provided function to compare two items or clusters for pre-sorting * and pre-clustering of identical items. */ typedef int hacCmpFunction(const struct slList *item1, const struct slList *item2, void *extraData); /* Structure of nodes of the binary tree that contains a hierarchical clustering of inputs */ struct hacTree { struct hacTree *next; // Can have an unordered list of these struct hacTree *parent; // Cluster that contains this cluster, NULL for root node struct hacTree *left; // Left child, NULL for leaf node struct hacTree *right; // Right child, NULL for leaf node double childDistance; // Result of distance function on left and right kids struct slList *itemOrCluster; // If leaf node, one of the items passed in; // otherwise, result of merging left and right kids' itemOrClusters }; struct hacTree *hacTreeFromItems(const struct slList *itemList, struct lm *localMem, hacDistanceFunction *distF, hacMergeFunction *mergeF, hacCmpFunction *cmpF, void *extraData); /* Using distF, mergeF, optionally cmpF and binary tree operations, * perform a hierarchical agglomerative (bottom-up) clustering of * items. To free the resulting tree, lmCleanup(&localMem). */ struct hacTree *hacTreeMultiThread(int threadCount, struct slList *itemList, struct lm *localMem, hacDistanceFunction *distF, hacMergeFunction *mergeF, + hacSibCmpFunction *cmpF, void *extraData, struct hash *precalcDistanceHash); /* Construct hacTree minimizing number of merges called, and doing distance calls * in parallel when possible. Do a lmCleanup(localMem) to free returned tree. * The inputs are * threadCount - number of threads - at least one, recommended no more than 15 * itemList - list of items to tree up. Format can vary, but must start with a * pointer to next item in list. * localMem - memory pool where hacTree and a few other things are allocated from * distF - function that calculates distance between two items, passed items and extraData * mergeF - function that creates a new item in same format as itemList from two passed * in items and the extraData. Typically does average of two input items * extraData - parameter passed through to distF and mergeF, otherwise unused, may be NULL * precalcDistanceHash - a hash containing at least some of the pairwise distances * between items on itemList, set with hacTreeDistanceHashAdd. * As a side effect this hash will be expanded to include all distances