c69992a8a18bf93e03b4d57af505b05df2213f45
angie
  Wed Aug 17 16:43:41 2011 -0700
Feature #3711 (VCF haplotype sorting display): Performance improvements.1. lib/hacTree.c: instead of doing a zillion incremental memcpy's to drop nodes,
build up a set of nodes to delete and then do the minimal memmove's afterwards.
Use memmove per memcpy's man page, which says not to use memcpy if regions overlap!
2. lib/vcf.c: avoid unnecessary calls to vcfFilePooledStr from inner loops.
Also, if ref is all nucleotides (not symbolic as for SV's), use it to set chromEnd.
It may still be overwritten if the END keyword is used in the info column.

diff --git src/lib/hacTree.c src/lib/hacTree.c
index f7e4bdc..c017520 100644
--- src/lib/hacTree.c
+++ src/lib/hacTree.c
@@ -194,67 +194,83 @@
 //
 // Now say ((A, B), D) is the closest remaining pair, and is popped from
 // the head of the pool.  We substitute into pairs containing (A, B) and
 // delete pairs containing D.  After the replacement step, the pool is
 // down to a single element:
 //
 //    (((A, B), D), C)   [X]
 {
 if (itemList == NULL)
     return NULL;
 struct hacTree *root = NULL;
 int itemCount = slCount(itemList);
 int pairCount = 0;
 struct hacTree *leafPairs = pairUpItems(itemList, itemCount, &pairCount, localMem,
 					distF, mergeF, cmpF, extraData);
+int *nodesToDelete = needMem(pairCount * sizeof(int));
 struct hacTree *poolHead = leafPairs;
 int poolLength = pairCount;
 while (poolLength > 0)
     {
     // Scan pool for node with lowest childDistance; swap that node w/head
-    int i;
     int bestIx = 0;
     double minScore = poolHead[0].childDistance;
+    int i;
     for (i=1;  i < poolLength;  i++)
 	if (poolHead[i].childDistance < minScore)
 	    {
 	    minScore = poolHead[i].childDistance;
 	    bestIx = i;
 	    }
     if (bestIx != 0)
 	swapBytes((char *)&(poolHead[0]), (char *)&(poolHead[bestIx]), sizeof(struct hacTree));
     // Pop the best (lowest-distance) node from poolHead, make it root (for now).
     root = poolHead;
     poolHead = &(poolHead[1]);
     poolLength--;
     // Where root->left is found in the pool, replace it with root.
     // Where root->right is found, drop that node so it doesn't become
     // a duplicate of the replacement cases.
+    int numNodesToDelete = 0;
     for (i=0;  i < poolLength;  i++)
 	{
 	struct hacTree *node = &(poolHead[i]);
 	if (node->left == root->left)
 	    // found root->left; replace node->left with root (merge root with node->right):
 	    initNode(node, root, node->right, distF, mergeF, extraData);
 	else if (node->right == root->left)
 	    // found root->left; replace node->right with root (merge root with node->left):
-	    initNode(node, root, node->left, distF, mergeF, extraData);
+	    initNode(node, node->left, root, distF, mergeF, extraData);
 	else if (node->left == root->right || node->right == root->right)
+	    // found root->right; mark this node for deletion:
+	    nodesToDelete[numNodesToDelete++] = i;
+	}
+    if (numNodesToDelete > 0)
 	    {
-	    // found root->right; drop this node:
-	    if (i < poolLength-1)
-		memcpy(node, &(poolHead[i+1]), (poolLength-i-1)*sizeof(struct hacTree));
-	    poolLength--;
-	    i--;
+	int newPoolLen = nodesToDelete[0];
+	// This will be "next node to delete" for the last marked node:
+	nodesToDelete[numNodesToDelete] = poolLength;
+	for (i = 0;  i < numNodesToDelete;  i++)
+	    {
+	    int nodeToDel = nodesToDelete[i];
+	    int nextNodeToDel = nodesToDelete[i+1];
+	    int blkSize = nextNodeToDel - (nodeToDel+1);
+	    if (blkSize == 0)
+		continue;
+	    struct hacTree *fromNode = &(poolHead[nodeToDel+1]);
+	    struct hacTree *toNode = &(poolHead[newPoolLen]);
+	    memmove(toNode, fromNode, blkSize * sizeof(struct hacTree));
+	    newPoolLen += blkSize;
 	    }
+	poolLength = newPoolLen;
 	}
     // root now has a stable address, unlike nodes still in the pool, so set parents here:
     if (root->left != NULL)
 	root->left->parent = root;
     if (root->right != NULL)
 	root->right->parent = root;
     }
 // This shouldn't be necessary as long as initNode leaves parent pointers alone,
 // but just in case that changes:
 root->parent = NULL;
 return root;
 }