b4a8c2460a293048399a03b8f35ddfc588cd9f21
angie
  Wed Aug 10 15:46:02 2011 -0700
Feature #3711 (vcfTabix haplotype sorting display): Draw hierarchical tree in left label area (as much as can fit there).
diff --git src/hg/hgTracks/vcfTrack.c src/hg/hgTracks/vcfTrack.c
index 2e73653..0a06af2 100644
--- src/hg/hgTracks/vcfTrack.c
+++ src/hg/hgTracks/vcfTrack.c
@@ -206,31 +206,31 @@
     struct hapCluster *cR = (struct hapCluster *)ht->right->itemOrCluster;
     if (cL->leafCount >= cR->leafCount)
 	{
 	rSetGtHapOrder(ht->left, gtHapOrder, retGtHapEnd);
 	rSetGtHapOrder(ht->right, gtHapOrder, retGtHapEnd);
 	}
     else
 	{
 	rSetGtHapOrder(ht->right, gtHapOrder, retGtHapEnd);
 	rSetGtHapOrder(ht->left, gtHapOrder, retGtHapEnd);
 	}
     }
 }
 
 static unsigned short *clusterChroms(const struct vcfFile *vcff, int centerIx,
-				     unsigned short *retGtHapEnd)
+				     unsigned short *retGtHapEnd, struct hacTree **retTree)
 /* Given a bunch of VCF records with phased genotypes, build up one haplotype string
  * per chromosome that is the sequence of alleles in all variants (simplified to one base
  * per variant).  Each individual/sample will have two haplotype strings (unless haploid
  * like Y or male X).  Independently cluster the haplotype strings using hacTree with the 
  * center-weighted alpha functions above. Return an array of genotype+haplotype indices
  * in the order determined by the hacTree, and set retGtHapEnd to its length/end. */
 {
 int len = slCount(vcff->records);
 // Should alpha depend on len?  Should the penalty drop off with distance?  Seems like
 // straight-up exponential will cause the signal to drop to nothing pretty quickly...
 double alpha = 0.5;
 struct lm *lm = lmInit(0);
 struct cwaExtraData helper = { centerIx, len, alpha, lm };
 int ploidy = 2; // Assuming diploid genomes here, no XXY, tetraploid etc.
 int gtCount = vcff->genotypeCount;
@@ -287,30 +287,31 @@
 	// Some array items will have an empty cluster for missing hap2 --
 	// trim those from the linked list.
 	struct hapCluster *c = hapArray[0];
 	while (c != NULL && c->next != NULL)
 	    {
 	    if (c->next->leafCount == 0)
 		c->next = c->next->next;
 	    c = c->next;
 	    }
 	}
     }
 struct hacTree *ht = hacTreeFromItems((struct slList *)(hapArray[0]), lm,
 				      cwaDistance, cwaMerge, cwaCmp, &helper);
 unsigned short *gtHapOrder = needMem(vcff->genotypeCount * 2 * sizeof(unsigned short));
 rSetGtHapOrder(ht, gtHapOrder, retGtHapEnd);
+*retTree = ht;
 return gtHapOrder;
 }
 
 INLINE Color pgSnpColor(char *allele)
 /* Color allele by first base according to pgSnp palette. */
 {
 if (allele[0] == 'A')
     return revCmplDisp ? MG_MAGENTA : MG_RED;
 else if (allele[0] == 'C')
     return revCmplDisp ? darkGreenColor : MG_BLUE;
 else if (allele[0] == 'G')
     return revCmplDisp ? MG_BLUE : darkGreenColor;
 else if (allele[0] == 'T')
     return revCmplDisp ? MG_RED : MG_MAGENTA;
 else
@@ -354,45 +355,45 @@
 	       rec->alleles[1], rec->alleles[1], gtAltAltCount);
 if (gtOtherCount > 0)
     dyStringPrintf(dy, " other:%d", gtOtherCount);
 // Restore original values of pooled strings.
 if (revCmplDisp)
     {
     for (i=0;  i < rec->alleleCount;  i++)
 	reverseComplement(rec->alleles[i], strlen(rec->alleles[i]));
     }
 return dy->string;
 }
 
 // This is initialized when we start drawing:
 static Color purple = 0;
 
-static void drawOneRec(struct vcfRecord *rec, unsigned short *gtHapOrder, int gtHapEnd,
+static void drawOneRec(struct vcfRecord *rec, unsigned short *gtHapOrder, unsigned short gtHapCount,
 		       struct track *tg, struct hvGfx *hvg, int xOff, int yOff, int width,
 		       boolean isCenter, boolean colorByRefAlt)
 /* Draw a stack of genotype bars for this record */
 {
 const double scale = scaleForPixels(width);
 int x1 = round((double)(rec->chromStart-winStart)*scale) + xOff;
 int x2 = round((double)(rec->chromEnd-winStart)*scale) + xOff;
 int w = x2-x1;
 if (w <= 1)
     {
     x1--;
     w = 3;
     }
-double hapsPerPix = (2 * (double)rec->file->genotypeCount / tg->height);
+double hapsPerPix = (double)gtHapCount / (tg->height-1);
 int pixIx;
 for (pixIx = 0;  pixIx < tg->height;  pixIx++)
     {
     int gtHapOrderIxStart = round(hapsPerPix * pixIx);
     int gtHapOrderIxEnd = round(hapsPerPix * (pixIx + 1));
     if (gtHapOrderIxEnd == gtHapOrderIxStart)
 	gtHapOrderIxEnd++;
     int unks = 0, refs = 0, alts = 0;
     int gtHapOrderIx;
     for (gtHapOrderIx = gtHapOrderIxStart;  gtHapOrderIx < gtHapOrderIxEnd;  gtHapOrderIx++)
 	{
 	int gtHapIx = gtHapOrder[gtHapOrderIx];
 	int hapIx = gtHapIx & 1;
 	int gtIx = gtHapIx >>1;
 	struct vcfGenotype *gt = &(rec->genotypes[gtIx]);
@@ -464,72 +465,228 @@
     int centerPos = cartInt(cart, cartVar);
     int winSize = seqEnd - seqStart;
     if (centerPos > (seqStart - winSize) && centerPos < (seqEnd + winSize))
 	{
 	int i;
 	struct vcfRecord *rec;
 	for (rec = records, i = 0;  rec != NULL;  rec = rec->next, i++)
 	    if (rec->chromStart >= centerPos)
 		return i;
 	return i-1;
 	}
     }
 return defaultIx;
 }
 
+/* Pixel y offset return type for recursive tree-drawing: */
+enum yRetType
+    {
+    yrtMidPoint,
+    yrtStart,
+    yrtEnd,
+    };
+
+/* Callback for calculating y (in pixels) for a cluster node: */
+typedef int yFromNodeFunc(const struct slList *itemOrCluster, void *extraData,
+			  enum yRetType yType);
+
+static int rDrawTreeInLabelArea(struct hacTree *ht, struct hvGfx *hvg, enum yRetType yType, int x,
+				yFromNodeFunc *yFromNode, void *extraData)
+/* Recursively draw the haplotype clustering tree in the left label area.
+ * Returns pixel height for use at non-leaf levels of tree. */
+{
+const int branchW = 4;
+int labelEnd = leftLabelX + leftLabelWidth;
+if (yType == yrtStart || yType == yrtEnd)
+    {
+    // We're just getting vertical span of a leaf cluster, not drawing any lines.
+    int yLeft, yRight;
+    if (ht->left)
+	yLeft = rDrawTreeInLabelArea(ht->left, hvg, yType, x, yFromNode, extraData);
+    else
+	yLeft = yFromNode(ht->itemOrCluster, extraData, yType);
+    if (ht->right)
+	yRight = rDrawTreeInLabelArea(ht->right, hvg, yType, x, yFromNode, extraData);
+    else
+	yRight = yFromNode(ht->itemOrCluster, extraData, yType);
+    if (yType == yrtStart)
+	return min(yLeft, yRight);
+    else
+	return max(yLeft, yRight);
+    }
+// Otherwise yType is yrtMidPoint.  If we have 2 children, we'll be drawing some lines:
+if (ht->left != NULL && ht->right != NULL)
+    {
+    int midY;
+    if (ht->childDistance == 0 || x+(2*branchW) > labelEnd)
+	{
+	// Treat this as a leaf cluster.
+	// Recursing twice is wasteful. Could be avoided if this, and yFromNode,
+	// returned both yStart and yEnd. However, the time to draw a tree of
+	// 2188 hap's (1kG phase1 interim) is in the noise, so I consider it
+	// not worth the effort of refactoring to save a sub-millisecond here.
+	int yStartLeft = rDrawTreeInLabelArea(ht->left, hvg, yrtStart, x+branchW,
+					      yFromNode, extraData);
+	int yEndLeft = rDrawTreeInLabelArea(ht->left, hvg, yrtEnd, x+branchW,
+					    yFromNode, extraData);
+	int yStartRight = rDrawTreeInLabelArea(ht->right, hvg, yrtStart, x+branchW,
+					       yFromNode, extraData);
+	int yEndRight = rDrawTreeInLabelArea(ht->right, hvg, yrtEnd, x+branchW,
+					     yFromNode, extraData);
+	int yStart = min(yStartLeft, yStartRight);
+	int yEnd = max(yEndLeft, yEndRight);
+	midY = (yStart + yEnd) / 2;
+	hvGfxLine(hvg, x+branchW-1, yStart, x+branchW-1, yEnd-1, MG_BLACK);
+	hvGfxLine(hvg, x+branchW, yStart, labelEnd, yStart, MG_BLACK);
+	hvGfxLine(hvg, x+branchW, yEnd-1, labelEnd, yEnd-1, MG_BLACK);
+	}
+    else
+	{
+	int leftMid = rDrawTreeInLabelArea(ht->left, hvg, yrtMidPoint, x+branchW,
+					   yFromNode, extraData);
+	int rightMid = rDrawTreeInLabelArea(ht->right, hvg, yrtMidPoint, x+branchW,
+					    yFromNode, extraData);
+	midY = (leftMid + rightMid) / 2;
+	hvGfxLine(hvg, x+branchW-1, leftMid, x+branchW-1, rightMid, MG_BLACK);
+	}
+    hvGfxLine(hvg, x, midY, x+branchW-1, midY, MG_BLACK);
+    return midY;
+    }
+else if (ht->left != NULL)
+    return rDrawTreeInLabelArea(ht->left, hvg, yType, x, yFromNode, extraData);
+else if (ht->right != NULL)
+    return rDrawTreeInLabelArea(ht->right, hvg, yType, x, yFromNode, extraData);
+// Leaf node -- return pixel height. Draw a line if yType is midpoint.
+int y = yFromNode(ht->itemOrCluster, extraData, yType);
+if (yType == yrtMidPoint && x < labelEnd)
+    hvGfxLine(hvg, x, y, labelEnd, y, MG_BLACK);
+return y;
+}
+
+struct yFromNodeHelper
+/* Pre-computed mapping from cluster nodes' gtHapIx to pixel heights. */
+    {
+    unsigned short gtHapCount;
+    unsigned short *gtHapIxToPxStart;
+    unsigned short *gtHapIxToPxEnd;
+    };
+
+void initYFromNodeHelper(struct yFromNodeHelper *helper, int yOff, int height,
+			 unsigned short gtHapCount, unsigned short *gtHapOrder)
+/* Build a mapping of genotype and haplotype to pixel y coords. */
+{
+helper->gtHapCount = gtHapCount;
+helper->gtHapIxToPxStart = needMem(gtHapCount * sizeof(unsigned short));
+helper->gtHapIxToPxEnd = needMem(gtHapCount * sizeof(unsigned short));
+double pxPerHap = (double)height / gtHapCount;
+int i;
+for (i = 0;  i < gtHapCount;  i++)
+    {
+    int yStart = round(i * pxPerHap);
+    int yEnd = round((i+1) * pxPerHap);
+    if (yEnd == yStart)
+	yEnd++;
+    int gtHapIx = gtHapOrder[i];
+    helper->gtHapIxToPxStart[gtHapIx] = yOff + yStart;
+    helper->gtHapIxToPxEnd[gtHapIx] = yOff + yEnd;
+    }
+}
+
+static int yFromHapNode(const struct slList *itemOrCluster, void *extraData,
+			enum yRetType yType)
+/* Extract the gtHapIx from hapCluster (hacTree node item), find out its relative order
+ * and translate that to a pixel height. */
+{
+unsigned short gtHapIx = ((const struct hapCluster *)itemOrCluster)->gtHapIx;
+struct yFromNodeHelper *helper = extraData;
+if (gtHapIx >= helper->gtHapCount)
+    errAbort("vcfTrack.c: gtHapIx %d out of range [0,%d).", gtHapIx, helper->gtHapCount);
+int y;
+if (yType == yrtStart)
+    y = helper->gtHapIxToPxStart[gtHapIx];
+else if (yType == yrtEnd)
+    y = helper->gtHapIxToPxEnd[gtHapIx];
+else
+    y = (helper->gtHapIxToPxStart[gtHapIx] + helper->gtHapIxToPxEnd[gtHapIx]) / 2;
+return y;
+}
+
+static void drawTreeInLabelArea(struct hacTree *ht, struct hvGfx *hvg, int yOff, int height,
+				unsigned short gtHapCount, unsigned short *gtHapOrder)
+/* Draw the haplotype clustering in the left label area (as much as fits there). */
+{
+// Figure out which hvg to use, save current clipping, and clip to left label coords:
+struct hvGfx *hvgLL = (hvgSide != NULL) ? hvgSide : hvg;
+int clipXBak, clipYBak, clipWidthBak, clipHeightBak;
+hvGfxGetClip(hvgLL, &clipXBak, &clipYBak, &clipWidthBak, &clipHeightBak);
+hvGfxUnclip(hvgLL);
+hvGfxSetClip(hvgLL, leftLabelX, yOff, leftLabelWidth, height);
+// Draw the tree:
+int x = leftLabelX;
+struct yFromNodeHelper helper = {0, NULL, NULL};
+initYFromNodeHelper(&helper, yOff, height-1, gtHapCount, gtHapOrder);
+(void)rDrawTreeInLabelArea(ht, hvgLL, yrtMidPoint, x, yFromHapNode, &helper);
+// Restore the prior clipping:
+hvGfxUnclip(hvgLL);
+hvGfxSetClip(hvgLL, clipXBak, clipYBak, clipWidthBak, clipHeightBak);
+}
+
 static void vcfHapClusterDraw(struct track *tg, int seqStart, int seqEnd,
 			      struct hvGfx *hvg, int xOff, int yOff, int width,
 			      MgFont *font, Color color, enum trackVisibility vis)
 /* Split samples' chromosomes (haplotypes), cluster them by center-weighted
  * alpha similarity, and draw in the order determined by clustering. */
 {
 const struct vcfFile *vcff = tg->extraUiData;
 if (vcff->records == NULL)
     return;
 purple = hvGfxFindColorIx(hvg, 0x99, 0x00, 0xcc);
 boolean compositeLevel = isNameAtCompositeLevel(tg->tdb, tg->tdb->track);
 char *colorBy = cartUsualStringClosestToHome(cart, tg->tdb, compositeLevel,
 					     VCF_HAP_COLORBY_VAR, VCF_HAP_COLORBY_REFALT);
 boolean colorByRefAlt = sameString(colorBy, VCF_HAP_COLORBY_REFALT);
-unsigned short gtHapEnd = 0;
+unsigned short gtHapCount = 0;
 int ix, centerIx = getCenterVariantIx(tg, seqStart, seqEnd, vcff->records);
-unsigned short *gtHapOrder = clusterChroms(vcff, centerIx, &gtHapEnd);
+struct hacTree *ht = NULL;
+unsigned short *gtHapOrder = clusterChroms(vcff, centerIx, &gtHapCount, &ht);
 struct vcfRecord *rec, *centerRec = NULL;
 for (rec = vcff->records, ix=0;  rec != NULL;  rec = rec->next, ix++)
     {
     if (ix == centerIx)
 	centerRec = rec;
     else
-	drawOneRec(rec, gtHapOrder, gtHapEnd, tg, hvg, xOff, yOff, width, FALSE, colorByRefAlt);
+	drawOneRec(rec, gtHapOrder, gtHapCount, tg, hvg, xOff, yOff, width, FALSE, colorByRefAlt);
     }
 // Draw the center rec on top, outlined with black lines, to make sure it is very visible:
-drawOneRec(centerRec, gtHapOrder, gtHapEnd, tg, hvg, xOff, yOff, width, TRUE, colorByRefAlt);
+drawOneRec(centerRec, gtHapOrder, gtHapCount, tg, hvg, xOff, yOff, width, TRUE, colorByRefAlt);
+// Draw as much of the tree as can fit in the left label area:
+drawTreeInLabelArea(ht, hvg, yOff, tg->height, gtHapCount, gtHapOrder);
 }
 
 static int vcfHapClusterTotalHeight(struct track *tg, enum trackVisibility vis)
 /* Return height of haplotype graph (2 * #samples * lineHeight);
  * 2 because we're assuming diploid genomes here, no XXY, tetraploid etc. */
 {
 // Should we make it single-height when on chrY?
 const struct vcfFile *vcff = tg->extraUiData;
 if (vcff->records == NULL)
     return 0;
 int ploidy = sameString(chromName, "chrY") ? 1 : 2;
 int simpleHeight = ploidy * vcff->genotypeCount * tg->lineHeight;
 int defaultHeight = min(simpleHeight, VCF_DEFAULT_HAP_HEIGHT);
 int cartHeight = cartOrTdbInt(cart, tg->tdb, VCF_HAP_HEIGHT_VAR, defaultHeight);
-tg->height = min(cartHeight, maximumTrackHeight(tg));
+tg->height = min(cartHeight+1, maximumTrackHeight(tg));
 return tg->height;
 }
 
 static char *vcfHapClusterTrackName(struct track *tg, void *item)
 /* If someone asks for itemName/mapItemName, just send name of track like wiggle. */
 {
 return tg->track;
 }
 
 static void vcfHapClusterOverloadMethods(struct track *tg, struct vcfFile *vcff)
 /* If we confirm at load time that we can draw a haplotype graph, use
  * this to overwrite the methods for the rest of execution: */
 {
 tg->heightPer = (tg->visibility == tvSquish) ? (tl.fontHeight/4) : (tl.fontHeight / 2);
 tg->lineHeight = tg->heightPer + 1;