66c9381654739f32b29ad55e3f1188a138b12b66
angie
  Mon Jul 23 08:44:19 2012 -0700
Track #7964 (1000 Genomes Phase 1): added recently-released chrY.Fixed bugs with display of haploid genotype data.
1000 Genomes' chrY has tons of missing calls, and it turns out that clustering
results are much better if missing info is completely omitted from distance
measure -- although the tree calls things identical that clearly aren't.

diff --git src/hg/hgTracks/vcfTrack.c src/hg/hgTracks/vcfTrack.c
index c15662b..1e7c199 100644
--- src/hg/hgTracks/vcfTrack.c
+++ src/hg/hgTracks/vcfTrack.c
@@ -255,40 +255,42 @@
 static double cwaDistance(const struct slList *item1, const struct slList *item2, void *extraData)
 /* Center-weighted alpha sequence distance function for hacTree clustering of haplotype seqs */
 // This is inner-loop so I am not doing defensive checks.  Caller must ensure:
 // 1. kids's sequences' lengths are both equal to helper->len
 // 2. 0 <= helper->center <= len
 // 3. 0.0 < helper->alpha <= 1.0
 {
 const struct hapCluster *kid1 = (const struct hapCluster *)item1;
 const struct hapCluster *kid2 = (const struct hapCluster *)item2;
 struct cwaExtraData *helper = extraData;
 double distance = 0;
 double weight = 1; // start at center: alpha to the 0th power
 int i;
 for (i=helper->center;  i >= 0;  i--)
     {
-    if (isRef(kid1, i) != isRef(kid2, i))
+    if (hasUnk(kid1, i) != hasUnk(kid2, i))
+	; // no penalty!
+    else if (isRef(kid1, i) != isRef(kid2, i))
 	distance += weight;
-    else if (hasUnk(kid1, i) != hasUnk(kid2, i))
-	distance += weight/2;
     weight *= helper->alpha;
     }
 weight = helper->alpha; // start at center+1: alpha to the 1st power
 for (i=helper->center+1;  i < helper->len;  i++)
     {
-    if (isRef(kid1, i) != isRef(kid2, i))
+    if (hasUnk(kid1, i) != hasUnk(kid2, i))
+	; // no penalty!
+    else if (isRef(kid1, i) != isRef(kid2, i))
 	distance += weight;
     weight *= helper->alpha;
     }
 return distance;
 }
 
 static struct hapCluster *lmHapCluster(struct cwaExtraData *helper)
 /* Use localMem to allocate a new cluster of the given len. */
 {
 struct hapCluster *c = lmAlloc(helper->localMem, sizeof(struct hapCluster));
 c->refCounts = lmAlloc(helper->localMem, helper->len * sizeof(unsigned short));
 c->unkCounts = lmAlloc(helper->localMem, helper->len * sizeof(unsigned short));
 return c;
 }
 
@@ -396,65 +398,69 @@
 for (varIx = 0, rec = vcff->records;  rec != NULL && varIx < endIx;  varIx++, rec = rec->next)
     {
     if (varIx < startIx)
 	continue;
     int countIx = varIx - startIx;
     int gtIx;
     for (gtIx=0;  gtIx < gtCount;  gtIx++)
 	{
 	struct vcfGenotype *gt = &(rec->genotypes[gtIx]);
 	struct hapCluster *c1 = hapArray[gtIx];
 	struct hapCluster *c2 = hapArray[gtCount + gtIx]; // hardwired ploidy=2
 	if (gt->isPhased || gt->isHaploid || (gt->hapIxA == gt->hapIxB))
 	    {
 	    // first chromosome:
 	    c1->leafCount = 1;
-	    c1->gtHapIx = gtIx << 1;
 	    if (gt->hapIxA < 0)
 		c1->unkCounts[countIx] = 1;
 	    else if (gt->hapIxA == 0)
 		c1->refCounts[countIx] = 1;
 	    if (gt->isHaploid)
+		{
 		haveHaploid = TRUE;
+		c1->gtHapIx = gtIx;
+		}
 	    else
 		{
-		c2->leafCount = 1;
+		c1->gtHapIx = gtIx << 1;
 		c2->gtHapIx = (gtIx << 1) | 1;
+		c2->leafCount = 1;
 		if (gt->hapIxB < 0)
 		    c2->unkCounts[countIx] = 1;
 		else if (gt->hapIxB == 0)
 		    c2->refCounts[countIx] = 1;
 		}
 	    }
 	else
 	    {
 	    // Missing data or unphased heterozygote, don't use haplotype info for clustering
 	    c1->leafCount = c2->leafCount = 1;
 	    c1->gtHapIx = gtIx << 1;
 	    c2->gtHapIx = (gtIx << 1) | 1;
 	    c1->unkCounts[countIx] = c2->unkCounts[countIx] = 1;
 	    }
 	}
     if (haveHaploid)
 	{
 	// Some array items will have an empty cluster for missing hap2 --
 	// trim those from the linked list.
 	struct hapCluster *c = hapArray[0];
 	while (c != NULL && c->next != NULL)
 	    {
 	    if (c->next->leafCount == 0)
 		c->next = c->next->next;
+	    else
 	    c = c->next;
 	    }
 	}
     }
 struct hacTree *ht = hacTreeFromItems((struct slList *)(hapArray[0]), lm,
 				      cwaDistance, cwaMerge, cwaCmp, &helper);
 unsigned short *gtHapOrder = needMem(vcff->genotypeCount * 2 * sizeof(unsigned short));
 rSetGtHapOrder(ht, gtHapOrder, retGtHapEnd);
 *retTree = ht;
 return gtHapOrder;
 }
 
 INLINE Color pgSnpColor(char *allele)
 /* Color allele by first base according to pgSnp palette. */
 {
@@ -469,53 +475,61 @@
 else
     return shadesOfGray[5];
 }
 
 INLINE void gtSummaryString(struct vcfRecord *rec, struct dyString *dy)
 // Make pgSnp-like mouseover text, but with genotype counts instead of allele counts.
 {
 if (isNotEmpty(rec->name) && !sameString(rec->name, "."))
     dyStringPrintf(dy, "%s: ", rec->name);
 if (rec->alleleCount < 2)
     {
     dyStringAppendC(dy, '?');
     return;
     }
 const struct vcfFile *vcff = rec->file;
-int gtRefRefCount = 0, gtRefAltCount = 0, gtAltAltCount = 0, gtOtherCount = 0;
+int gtRefRefCount = 0, gtRefAltCount = 0, gtAltAltCount = 0, gtUnkCount = 0, gtOtherCount = 0;
 int i;
 for (i=0;  i < vcff->genotypeCount;  i++)
     {
     struct vcfGenotype *gt = &(rec->genotypes[i]);
     if (gt->hapIxA == 0 && gt->hapIxB == 0)
 	gtRefRefCount++;
     else if (gt->hapIxA == 1 && gt->hapIxB == 1)
 	gtAltAltCount++;
     else if ((gt->hapIxA == 0 && gt->hapIxB == 1) || (gt->hapIxA == 1 && gt->hapIxB == 0))
 	gtRefAltCount++;
+    else if (gt->hapIxA < 0 || gt->hapIxB < 0)
+	gtUnkCount++;
     else
 	gtOtherCount++;
     }
 // These are pooled strings! Restore when done.
 if (revCmplDisp)
     {
     for (i=0;  i < rec->alleleCount;  i++)
 	reverseComplement(rec->alleles[i], strlen(rec->alleles[i]));
     }
+if (sameString(chromName, "chrY"))
+    dyStringPrintf(dy, "%s:%d %s:%d",
+		   rec->alleles[0], gtRefRefCount, rec->alleles[1], gtRefAltCount);
+else
 dyStringPrintf(dy, "%s/%s:%d %s/%s:%d %s/%s:%d", rec->alleles[0], rec->alleles[0], gtRefRefCount,
 	       rec->alleles[0], rec->alleles[1], gtRefAltCount,
 	       rec->alleles[1], rec->alleles[1], gtAltAltCount);
+if (gtUnkCount > 0)
+    dyStringPrintf(dy, " unknown:%d", gtUnkCount);
 if (gtOtherCount > 0)
     dyStringPrintf(dy, " other:%d", gtOtherCount);
 // Restore original values of pooled strings.
 if (revCmplDisp)
     {
     for (i=0;  i < rec->alleleCount;  i++)
 	reverseComplement(rec->alleles[i], strlen(rec->alleles[i]));
     }
 }
 
 void mapBoxForCenterVariant(struct vcfRecord *rec, struct hvGfx *hvg, struct track *tg,
 			    int xOff, int yOff, int width)
 /* Special mouseover for center variant */
 {
 struct dyString *dy = dyStringNew(0);
@@ -616,46 +630,59 @@
     hapHeight -= extraPixel*2;
     }
 double hapsPerPix = (double)gtHapCount / hapHeight;
 int pixIx;
 for (pixIx = 0;  pixIx < hapHeight;  pixIx++)
     {
     int gtHapOrderIxStart = (int)(hapsPerPix * pixIx);
     int gtHapOrderIxEnd = round(hapsPerPix * (pixIx + 1));
     if (gtHapOrderIxEnd == gtHapOrderIxStart)
 	gtHapOrderIxEnd++;
     int unks = 0, refs = 0, alts = 0;
     int gtHapOrderIx;
     for (gtHapOrderIx = gtHapOrderIxStart;  gtHapOrderIx < gtHapOrderIxEnd;  gtHapOrderIx++)
 	{
 	int gtHapIx = gtHapOrder[gtHapOrderIx];
-	int hapIx = gtHapIx & 1;
-	int gtIx = gtHapIx >>1;
+	int ploidy = (sameString(chromName, "chrY")) ? 1 : 2;
+	int hapIx = (ploidy == 1) ? 0 : gtHapIx & 1;
+	int gtIx = (ploidy == 1) ? gtHapIx : gtHapIx >>1;
 	struct vcfGenotype *gt = &(rec->genotypes[gtIx]);
+	if (ploidy == 2)
+	    {
 	if (!gt->isPhased && gt->hapIxA != gt->hapIxB)
 	    unks++;
 	else
 	    {
 	    int alIx = hapIx ? gt->hapIxB : gt->hapIxA;
 	    if (alIx < 0)
 		unks++;
 	    else if (alIx > 0)
 		alts++;
 	    else
 		refs++;
 	    }
 	}
+	else
+	    {
+	    if (gt->hapIxA < 0)
+		unks++;
+	    else if (gt->hapIxA > 0)
+		alts++;
+	    else
+		refs++;
+	    }
+	}
     int y = yOff + extraPixel + pixIx;
     Color col;
     if (colorMode == baseMode)
 	col = colorByBase(refs, alts, unks, rec->alleles[0], rec->alleles[1]);
     else if (colorMode == refAltMode)
 	col = colorByRefAlt(refs, alts, unks);
     else
 	col = colorByAltOnly(refs, alts, unks);
     if (col != MG_WHITE)
 	hvGfxLine(hvg, x1, y, x2, y, col);
     }
 int yBot = yOff + tg->height - CLIP_PAD - 1;
 if (isCenter)
     {
     if (colorMode == altOnlyMode)