7f3ea18de77e114f05f283db03f117151e1e199b
angie
  Fri May 13 16:06:16 2011 -0700
Feature #3711 (VCF center-weighted haplotype clustering): David request:make the variant used as the center really stand out visually.

diff --git src/hg/hgTracks/vcfTrack.c src/hg/hgTracks/vcfTrack.c
index 2c6994d..f3c697f 100644
--- src/hg/hgTracks/vcfTrack.c
+++ src/hg/hgTracks/vcfTrack.c
@@ -183,47 +183,45 @@
     struct hapCluster *cL = (struct hapCluster *)ht->left->itemOrCluster;
     struct hapCluster *cR = (struct hapCluster *)ht->right->itemOrCluster;
     if (cL->leafCount >= cR->leafCount)
 	{
 	rSetGtHapOrder(ht->left, gtHapOrder, retGtHapEnd);
 	rSetGtHapOrder(ht->right, gtHapOrder, retGtHapEnd);
 	}
     else
 	{
 	rSetGtHapOrder(ht->right, gtHapOrder, retGtHapEnd);
 	rSetGtHapOrder(ht->left, gtHapOrder, retGtHapEnd);
 	}
     }
 }
 
-static unsigned short *clusterChroms(const struct vcfFile *vcff, unsigned short *retGtHapEnd)
+static unsigned short *clusterChroms(const struct vcfFile *vcff, int centerIx,
+				     unsigned short *retGtHapEnd)
 /* Given a bunch of VCF records with phased genotypes, build up one haplotype string
  * per chromosome that is the sequence of alleles in all variants (simplified to one base
  * per variant).  Each individual/sample will have two haplotype strings (unless haploid
  * like Y or male X).  Independently cluster the haplotype strings using hacTree with the 
  * center-weighted alpha functions above. Return an array of genotype+haplotype indices
  * in the order determined by the hacTree, and set retGtHapEnd to its length/end. */
 {
 int len = slCount(vcff->records);
-// Use the median variant in the window as the center; would be even nicer to allow
-// the user to choose a variant (or position) to use as center:
-int center = len / 2;
 // Should alpha depend on len?  Should the penalty drop off with distance?  Seems like
 // straight-up exponential will cause the signal to drop to nothing pretty quickly...
 double alpha = 0.5;
 struct lm *lm = lmInit(0);
-struct cwaExtraData helper = { center, len, alpha, lm };
+struct cwaExtraData helper = { centerIx, len, alpha, lm };
 int ploidy = 2; // Assuming diploid genomes here, no XXY, tetraploid etc.
 int gtCount = vcff->genotypeCount;
 // Make an slList of hapClusters, but allocate in a big block so I can use
 // array indexing.
 struct hapCluster **hapArray = lmAlloc(lm, sizeof(struct hapCluster *) * gtCount * ploidy);
 int i;
 for (i=0;  i < ploidy * gtCount;  i++)
     {
     hapArray[i] = lmHapCluster(&helper);
     if (i > 0)
 	hapArray[i-1]->next = hapArray[i];
     }
 boolean haveHaploid = FALSE;
 int varIx;
 struct vcfRecord *rec;
@@ -300,47 +298,57 @@
     return shadesOfGray[5];
 // Copying pgSnp color scheme here, using first base of allele which is not ideal for multibase
 // but allows us to simplify it to 5 colors:
 else if (allele[0] == 'A')
     return MG_RED;
 else if (allele[0] == 'C')
     return MG_BLUE;
 else if (allele[0] == 'G')
     return darkGreenColor;
 else if (allele[0] == 'T')
     return MG_MAGENTA;
 else
     return shadesOfGray[5];
 }
 
-INLINE Color colorFromRefAlt(struct vcfGenotype *gt, int hapIx, boolean grayUnphasedHet)
-/* Color allele red for alternate allele, blue for reference allele. */
+INLINE Color colorFromRefAlt(struct vcfGenotype *gt, int hapIx, boolean grayUnphasedHet,
+			     boolean isCenter)
+/* Color allele red for alternate allele, blue for reference allele -- 
+ * except for special center variant, make it yellow/green for contrast. */
 {
 if (grayUnphasedHet && !gt->isPhased && gt->hapIxA != gt->hapIxB)
     return shadesOfGray[5];
 int alIx = hapIx ? gt->hapIxB : gt->hapIxA;
+if (isCenter)
+    return alIx ? MG_YELLOW : MG_GREEN;
 return alIx ? MG_RED : MG_BLUE;
 }
 
 
 INLINE int drawOneHap(struct vcfGenotype *gt, int hapIx,
 		      char *ref, char *altAlleles[], int altCount,
-		      struct hvGfx *hvg, int x1, int y, int w, int itemHeight, int lineHeight)
+		      struct hvGfx *hvg, int x1, int y, int w, int itemHeight, int lineHeight,
+		      boolean isCenter)
 /* Draw a base-colored box for genotype[hapIx].  Return the new y offset. */
 {
-Color color = colorHapByRefAlt ? colorFromRefAlt(gt, hapIx, TRUE) :
+Color color = colorHapByRefAlt ? colorFromRefAlt(gt, hapIx, TRUE, isCenter) :
 				 colorFromGt(gt, hapIx, ref, altAlleles, altCount, TRUE);
+if (w == 1)
+    {
+    x1--;
+    w = 3;
+    }
 hvGfxBox(hvg, x1, y, w, itemHeight+1, color);
 y += itemHeight+1;
 return y;
 }
 
 INLINE char *gtSummaryString(struct vcfRecord *rec, char **altAlleles, int altCount)
 // Make pgSnp-like mouseover text, but with genotype counts instead of allele counts.
 // NOTE 1: Returned string is statically allocated, don't free it!
 // NOTE 2: if revCmplDisp is set, this reverse-complements rec->ref and altAlleles!
 {
 static struct dyString *dy = NULL;
 if (dy == NULL)
     dy = dyStringNew(0);
 dyStringClear(dy);
 const struct vcfFile *vcff = rec->file;
@@ -361,73 +369,99 @@
 if (revCmplDisp)
     {
     reverseComplement(rec->ref, strlen(rec->ref));
     for (i=0;  i < altCount;  i++)
 	reverseComplement(altAlleles[i], strlen(altAlleles[i]));
     }
 
 dyStringPrintf(dy, "%s/%s:%d %s/%s:%d %s/%s:%d", rec->ref, rec->ref, gtRefRefCount,
 	       rec->ref, altAlleles[0], gtRefAltCount,
 	       altAlleles[0], altAlleles[0], gtAltAltCount);
 if (gtOtherCount > 0)
     dyStringPrintf(dy, " other:%d", gtOtherCount);
 return dy->string;
 }
 
-static void vcfHapClusterDraw(struct track *tg, int seqStart, int seqEnd,
-			      struct hvGfx *hvg, int xOff, int yOff, int width,
-			      MgFont *font, Color color, enum trackVisibility vis)
-/* Split samples' chromosomes (haplotypes), cluster them by center-weighted
- * alpha similarity, and draw in the order determined by clustering. */
+static void drawOneRec(struct vcfRecord *rec, boolean isCenter,
+		       unsigned short *gtHapOrder, int gtHapEnd,
+		       struct track *tg, struct hvGfx *hvg, int xOff, int yOff, int width)
+/* Draw a stack of genotype bars for this record */
 {
-const struct vcfFile *vcff = tg->extraUiData;
-if (vcff->records == NULL)
-    return;
-unsigned short gtHapEnd = 0;
-unsigned short *gtHapOrder = clusterChroms(vcff, &gtHapEnd);
-struct dyString *tmp = dyStringNew(0);
-struct vcfRecord *rec;
+static struct dyString *tmp = NULL;
+if (tmp == NULL)
+    tmp = dyStringNew(0);
+char *altAlleles[256];
+int altCount;
 const int lineHeight = tg->lineHeight;
 const int itemHeight = tg->heightPer;
 const double scale = scaleForPixels(width);
-for (rec = vcff->records;  rec != NULL;  rec = rec->next)
-    {
-    dyStringClear(tmp);
-    dyStringAppend(tmp, rec->alt);
-    char *altAlleles[256];
-    int altCount = chopCommas(tmp->string, altAlleles);
     int x1 = round((double)(rec->chromStart-winStart)*scale) + xOff;
     int x2 = round((double)(rec->chromEnd-winStart)*scale) + xOff;
     int w = x2-x1;
     if (w < 1)
 	w = 1;
     int y = yOff;
+dyStringClear(tmp);
+dyStringAppend(tmp, rec->alt);
+altCount = chopCommas(tmp->string, altAlleles);
     int gtHapOrderIx;
     for (gtHapOrderIx = 0;  gtHapOrderIx < gtHapEnd;  gtHapOrderIx++)
 	{
 	int gtHapIx = gtHapOrder[gtHapOrderIx];
 	int hapIx = gtHapIx & 1;
 	int gtIx = gtHapIx >>1;
 	struct vcfGenotype *gt = &(rec->genotypes[gtIx]);
 	y = drawOneHap(gt, hapIx, rec->ref, altAlleles, altCount,
-		       hvg, x1, y, w, itemHeight, lineHeight);
+		   hvg, x1, y, w, itemHeight, lineHeight, isCenter);
 	}
     mapBoxHgcOrHgGene(hvg, rec->chromStart, rec->chromEnd, x1, yOff, w, tg->height, tg->track,
 		      rec->name, gtSummaryString(rec, altAlleles, altCount),
 		      NULL, TRUE, NULL);
     }
-// left labels?
+
+static void vcfHapClusterDraw(struct track *tg, int seqStart, int seqEnd,
+			      struct hvGfx *hvg, int xOff, int yOff, int width,
+			      MgFont *font, Color color, enum trackVisibility vis)
+/* Split samples' chromosomes (haplotypes), cluster them by center-weighted
+ * alpha similarity, and draw in the order determined by clustering. */
+{
+const struct vcfFile *vcff = tg->extraUiData;
+if (vcff->records == NULL)
+    return;
+unsigned short gtHapEnd = 0;
+// Use the median variant in the window as the center; would be even nicer to allow
+// the user to choose a variant (or position) to use as center:
+int ix, centerIx = (slCount(vcff->records)-1) / 2;
+unsigned short *gtHapOrder = clusterChroms(vcff, centerIx, &gtHapEnd);
+struct vcfRecord *rec, *centerRec = NULL;
+for (rec = vcff->records, ix=0;  rec != NULL;  rec = rec->next, ix++)
+    {
+    boolean isCenter = (ix == centerIx);
+    drawOneRec(rec, isCenter, gtHapOrder, gtHapEnd, tg, hvg, xOff, yOff, width);
+    if (isCenter)
+	centerRec = rec;
+    }
+// Draw the center rec on top, outlined with black lines, to make sure it is very visible:
+drawOneRec(centerRec, TRUE, gtHapOrder, gtHapEnd, tg, hvg, xOff, yOff, width);
+const double scale = scaleForPixels(width);
+int x1 = round((double)(centerRec->chromStart-winStart)*scale) + xOff;
+int x2 = round((double)(centerRec->chromEnd-winStart)*scale) + xOff;
+int yBot = yOff + tg->height - 2;
+hvGfxLine(hvg, x1-2, yOff, x1-2, yBot, MG_BLACK);
+hvGfxLine(hvg, x1-2, yOff, x2+2, yOff, MG_BLACK);
+hvGfxLine(hvg, x2+2, yOff, x2+2, yBot, MG_BLACK);
+hvGfxLine(hvg, x1-2, yBot, x2+2, yBot, MG_BLACK);
 }
 
 static int vcfHapClusterTotalHeight(struct track *tg, enum trackVisibility vis)
 /* Return height of haplotype graph (2 * #samples * lineHeight);
  * 2 because we're assuming diploid genomes here, no XXY, tetraploid etc. */
 {
 // Should we make it single-height when on chrY?
 const struct vcfFile *vcff = tg->extraUiData;
 if (vcff->records == NULL)
     return 0;
 int ploidy = 2;
 tg->height = ploidy * vcff->genotypeCount * tg->lineHeight;
 return tg->height;
 }