74b1b9642714b6cf8043087a393cb5aa89bc32c1 kent Mon Aug 25 15:48:44 2014 -0700 Reversing ordering, and refactoring to set up for parallelization of distance calcs. diff --git src/lib/hacTree.c src/lib/hacTree.c index 3196d82..cdf91bd 100644 --- src/lib/hacTree.c +++ src/lib/hacTree.c @@ -272,31 +272,34 @@ if (root->left != NULL) root->left->parent = root; if (root->right != NULL) root->right->parent = root; } // This shouldn't be necessary as long as initNode leaves parent pointers alone, // but just in case that changes: root->parent = NULL; return root; } /** The code from here on down is an alternative implementation that calls the merge ** function and for that matter the distance function much less than the function ** above. */ -static double findClosestPair(struct dlList *list, struct hash *distanceHash, +typedef double hacPairingFunction(struct dlList *list, struct hash *distanceHash, + hacDistanceFunction *distF, void *distData, struct dlNode **retNodeA, struct dlNode **retNodeB); + +static double pairSerially(struct dlList *list, struct hash *distanceHash, hacDistanceFunction *distF, void *extraData, struct dlNode **retNodeA, struct dlNode **retNodeB) /* Loop through list returning closest two nodes */ { struct dlNode *aNode; double closestDistance = BIGDOUBLE; struct dlNode *closestA = NULL, *closestB = NULL; for (aNode = list->head; !dlEnd(aNode); aNode = aNode->next) { struct hacTree *aHt = aNode->val; struct slList *a = aHt->itemOrCluster; struct dlNode *bNode; for (bNode = aNode->next; !dlEnd(bNode); bNode = bNode->next) { struct hacTree *bHt = bNode->val; char key[64]; @@ -310,75 +313,84 @@ } double d = *pd; if (d < closestDistance) { closestDistance = d; closestA = aNode; closestB = bNode; } } } *retNodeA = closestA; *retNodeB = closestB; return closestDistance; } -static void lmDlAddValTail(struct lm *lm, struct dlList *list, void *val) +static void lmDlAddValHead(struct lm *lm, struct dlList *list, void *val) /* Allocate new dlNode out of lm, initialize it with val, and add it to end of list */ { struct dlNode *node; lmAllocVar(lm, node); node->val = val; -dlAddTail(list, node); +dlAddHead(list, node); } -struct hacTree *hacTreeForCostlyMerges(struct slList *itemList, struct lm *localMem, +struct hacTree *hacTreeVirtualPairing(struct slList *itemList, struct lm *localMem, hacDistanceFunction *distF, hacMergeFunction *mergeF, - void *extraData) + void *extraData, hacPairingFunction *pairingF) /* Construct hacTree using a method that will minimize the number of calls to * the distance and merge functions, assuming they are expensive. Do a lmCleanup(localMem) * to free the returned tree. */ { /* Make up a doubly-linked list in 'remaining' with all items in it */ struct dlList remaining; dlListInit(&remaining); struct slList *item; int count = 0; struct hash *distanceHash = hashNew(0); for (item = itemList; item != NULL; item = item->next) { struct hacTree *ht; lmAllocVar(localMem, ht); ht->itemOrCluster = item; - lmDlAddValTail(localMem, &remaining, ht); + lmDlAddValHead(localMem, &remaining, ht); count += 1; } /* Loop through finding closest and merging until only one node left on remaining. */ int i; for (i=1; i<count; ++i) { /* Find closest pair and take them off of remaining list */ struct dlNode *aNode, *bNode; - double distance = findClosestPair(&remaining, distanceHash, distF, extraData, &aNode, &bNode); + double distance = pairingF(&remaining, distanceHash, distF, extraData, &aNode, &bNode); dlRemove(aNode); dlRemove(bNode); /* Allocated new hacTree item for them and fill it in with a merged value. */ struct hacTree *ht; lmAllocVar(localMem, ht); struct hacTree *left = ht->left = aNode->val; struct hacTree *right = ht->right = bNode->val; left->parent = right->parent = ht; ht->itemOrCluster = mergeF(left->itemOrCluster, right->itemOrCluster, extraData); ht->childDistance = distance; /* Put merged item onto remaining list. */ - lmDlAddValTail(localMem, &remaining, ht); + lmDlAddValHead(localMem, &remaining, ht); } /* Clean up and go home. */ hashFree(&distanceHash); struct dlNode *lastNode = dlPopHead(&remaining); return lastNode->val; } +struct hacTree *hacTreeForCostlyMerges(struct slList *itemList, struct lm *localMem, + hacDistanceFunction *distF, hacMergeFunction *mergeF, + void *extraData) +/* Construct hacTree using a method that will minimize the number of calls to + * the distance and merge functions, assuming they are expensive. Do a lmCleanup(localMem) + * to free the returned tree. */ +{ +return hacTreeVirtualPairing(itemList, localMem, distF, mergeF, extraData, pairSerially); +}