74b1b9642714b6cf8043087a393cb5aa89bc32c1
kent
  Mon Aug 25 15:48:44 2014 -0700
Reversing ordering, and refactoring to set up for parallelization of distance calcs.
diff --git src/lib/hacTree.c src/lib/hacTree.c
index 3196d82..cdf91bd 100644
--- src/lib/hacTree.c
+++ src/lib/hacTree.c
@@ -272,31 +272,34 @@
     if (root->left != NULL)
 	root->left->parent = root;
     if (root->right != NULL)
 	root->right->parent = root;
     }
 // This shouldn't be necessary as long as initNode leaves parent pointers alone,
 // but just in case that changes:
 root->parent = NULL;
 return root;
 }
 
 /** The code from here on down is an alternative implementation that calls the merge
  ** function and for that matter the distance function much less than the function
  ** above.  */
 
-static double findClosestPair(struct dlList *list, struct hash *distanceHash, 
+typedef double hacPairingFunction(struct dlList *list, struct hash *distanceHash, 
+    hacDistanceFunction *distF, void *distData, struct dlNode **retNodeA, struct dlNode **retNodeB);
+
+static double pairSerially(struct dlList *list, struct hash *distanceHash, 
     hacDistanceFunction *distF, void *extraData, struct dlNode **retNodeA, struct dlNode **retNodeB)
 /* Loop through list returning closest two nodes */
 {
 struct dlNode *aNode;
 double closestDistance = BIGDOUBLE;
 struct dlNode *closestA = NULL, *closestB = NULL;
 for (aNode = list->head; !dlEnd(aNode); aNode = aNode->next)
     {
     struct hacTree *aHt = aNode->val;
     struct slList *a = aHt->itemOrCluster;
     struct dlNode *bNode;
     for (bNode = aNode->next; !dlEnd(bNode); bNode = bNode->next)
         {
 	struct hacTree *bHt = bNode->val;
 	char key[64];
@@ -310,75 +313,84 @@
 	     }
 	double d = *pd;
 	if (d < closestDistance)
 	    {
 	    closestDistance = d;
 	    closestA = aNode;
 	    closestB = bNode;
 	    }
 	}
     }
 *retNodeA = closestA;
 *retNodeB = closestB;
 return closestDistance;
 }
 
-static void lmDlAddValTail(struct lm *lm, struct dlList *list, void *val)
+static void lmDlAddValHead(struct lm *lm, struct dlList *list, void *val)
 /* Allocate new dlNode out of lm, initialize it with val, and add it to end of list */
 {
 struct dlNode *node;
 lmAllocVar(lm, node);
 node->val = val;
-dlAddTail(list, node);
+dlAddHead(list, node);
 }
 
-struct hacTree *hacTreeForCostlyMerges(struct slList *itemList, struct lm *localMem,
+struct hacTree *hacTreeVirtualPairing(struct slList *itemList, struct lm *localMem,
 				 hacDistanceFunction *distF, hacMergeFunction *mergeF,
-				 void *extraData)
+				 void *extraData, hacPairingFunction *pairingF)
 /* Construct hacTree using a method that will minimize the number of calls to
  * the distance and merge functions, assuming they are expensive.  Do a lmCleanup(localMem)
  * to free the returned tree. */
 {
 /* Make up a doubly-linked list in 'remaining' with all items in it */
 struct dlList remaining;
 dlListInit(&remaining);
 struct slList *item;
 int count = 0;
 struct hash *distanceHash = hashNew(0);
 for (item = itemList; item != NULL; item = item->next)
     {
     struct hacTree *ht;
     lmAllocVar(localMem, ht);
     ht->itemOrCluster = item;
-    lmDlAddValTail(localMem, &remaining, ht);
+    lmDlAddValHead(localMem, &remaining, ht);
     count += 1;
     }
 
 /* Loop through finding closest and merging until only one node left on remaining. */
 int i;
 for (i=1; i<count; ++i)
     {
     /* Find closest pair and take them off of remaining list */
     struct dlNode *aNode, *bNode;
-    double distance = findClosestPair(&remaining, distanceHash, distF, extraData, &aNode, &bNode);
+    double distance = pairingF(&remaining, distanceHash, distF, extraData, &aNode, &bNode);
     dlRemove(aNode);
     dlRemove(bNode);
 
     /* Allocated new hacTree item for them and fill it in with a merged value. */
     struct hacTree *ht;
     lmAllocVar(localMem, ht);
     struct hacTree *left = ht->left = aNode->val;
     struct hacTree *right = ht->right = bNode->val;
     left->parent = right->parent = ht;
     ht->itemOrCluster = mergeF(left->itemOrCluster, right->itemOrCluster, extraData);
     ht->childDistance = distance;
 
     /* Put merged item onto remaining list. */
-    lmDlAddValTail(localMem, &remaining, ht);
+    lmDlAddValHead(localMem, &remaining, ht);
     }
 
 /* Clean up and go home. */
 hashFree(&distanceHash);
 struct dlNode *lastNode = dlPopHead(&remaining);
 return lastNode->val;
 }
 
+struct hacTree *hacTreeForCostlyMerges(struct slList *itemList, struct lm *localMem,
+				 hacDistanceFunction *distF, hacMergeFunction *mergeF,
+				 void *extraData)
+/* Construct hacTree using a method that will minimize the number of calls to
+ * the distance and merge functions, assuming they are expensive.  Do a lmCleanup(localMem)
+ * to free the returned tree. */
+{
+return hacTreeVirtualPairing(itemList, localMem, distF, mergeF, extraData, pairSerially);
+}