9a49290fedc8b4f9c7a631dedcfe8ac3aa2b4cee
chinhli
  Thu Oct 20 14:10:43 2011 -0700
merge conflict resolved
diff --git src/hg/hgTracks/vcfTrack.c src/hg/hgTracks/vcfTrack.c
index 14ab5c6..dd49567 100644
--- src/hg/hgTracks/vcfTrack.c
+++ src/hg/hgTracks/vcfTrack.c
@@ -1,895 +1,974 @@
 /* vcfTrack -- handlers for Variant Call Format data. */
 
 #include "common.h"
 #include "bigWarn.h"
 #include "dystring.h"
 #include "errCatch.h"
 #include "hacTree.h"
 #include "hdb.h"
 #include "hgTracks.h"
 #include "pgSnp.h"
 #include "trashDir.h"
 #include "vcf.h"
 #include "vcfUi.h"
 #if (defined USE_TABIX && defined KNETFILE_HOOKS)
 #include "knetUdc.h"
 #include "udc.h"
 #endif//def USE_TABIX && KNETFILE_HOOKS
 
 #ifdef USE_TABIX
 
 static boolean getMinQual(struct trackDb *tdb, double *retMinQual, boolean compositeLevel)
-/* Return TRUE and set retMinQual id cart contains minimum QUAL filter */
+/* Return TRUE and set retMinQual if cart contains minimum QUAL filter */
 {
 char cartVar[512];
 safef(cartVar, sizeof(cartVar), "%s."VCF_APPLY_MIN_QUAL_VAR, tdb->track);
 if (cartUsualBooleanClosestToHome(cart, tdb, compositeLevel,
 				  VCF_APPLY_MIN_QUAL_VAR, VCF_DEFAULT_APPLY_MIN_QUAL))
     {
     if (retMinQual != NULL)
 	*retMinQual = cartUsualDoubleClosestToHome(cart, tdb, compositeLevel, VCF_MIN_QUAL_VAR,
 						   VCF_DEFAULT_MIN_QUAL);
     return TRUE;
     }
 return FALSE;
 }
 
 static boolean minQualFail(struct vcfRecord *record, double minQual)
 /* Return TRUE if record's QUAL column value is non-numeric or is less than minQual. */
 {
 if (isEmpty(record->qual) ||
     (record->qual[0] != '-' && !isdigit(record->qual[0])) ||
     atof(record->qual) < minQual)
     return TRUE;
 return FALSE;
 }
 
 static boolean getFilterValues(struct trackDb *tdb, struct slName **retValues,
 			       boolean compositeLevel)
 /* Return TRUE and set retValues if cart contains FILTER column values to exclude */
 {
 char cartVar[512];
 safef(cartVar, sizeof(cartVar), "%s."VCF_EXCLUDE_FILTER_VAR, tdb->track);
 if (cartListVarExists(cart, cartVar))
     {
     struct slName *selectedValues = cartOptionalSlNameList(cart, cartVar);
     if (retValues != NULL)
 	*retValues = selectedValues;
     return TRUE;
     }
 return FALSE;
 }
 
 static boolean filterColumnFail(struct vcfRecord *record, struct slName *filterValues)
 /* Return TRUE if record's FILTER column value(s) matches one of filterValues (from cart). */
 {
 int i;
 for (i = 0;  i < record->filterCount;  i++)
     if (slNameInList(filterValues, record->filters[i]))
 	return TRUE;
 return FALSE;
 }
 
+static boolean getMinFreq(struct trackDb *tdb, double *retMinFreq, boolean compositeLevel)
+/* Return TRUE and set retMinFreq if cart contains nonzero minimum minor allele frequency. */
+{
+char cartVar[512];
+//#*** is there an ExistsClosestToHome?
+safef(cartVar, sizeof(cartVar), "%s."VCF_MIN_ALLELE_FREQ_VAR, tdb->track);
+if (cartVarExists(cart, cartVar))
+    {
+    double minFreq = cartUsualDoubleClosestToHome(cart, tdb, compositeLevel,
+					    VCF_MIN_ALLELE_FREQ_VAR, VCF_DEFAULT_MIN_ALLELE_FREQ);
+    if (minFreq > 0)
+	{
+	if (retMinFreq != NULL)
+	    *retMinFreq = minFreq;
+	return TRUE;
+	}
+    }
+return FALSE;
+}
+
+static boolean minFreqFail(struct vcfRecord *record, double minFreq)
+/* Return TRUE if record's INFO include AF (alternate allele frequencies) or AC+AN
+ * (alternate allele counts and total count of observed alleles) and the minor allele
+ * frequency < minFreq -- or rather, major allele frequency > (1 - minFreq) because
+ * variants with > 2 alleles might have some significant minor frequencies along with
+ * tiny minor frequencies). */
+{
+struct vcfFile *vcff = record->file;
+boolean gotInfo = FALSE;
+double refFreq = 1.0;
+double maxAltFreq = 0.0;
+int i;
+const struct vcfInfoElement *afEl = vcfRecordFindInfo(record, "AF");
+const struct vcfInfoDef *afDef = vcfInfoDefForKey(vcff, "AF");
+if (afEl != NULL && afDef != NULL && afDef->type == vcfInfoFloat)
+    {
+    // If INFO includes alt allele freqs, use them directly.
+    gotInfo = TRUE;
+    for (i = 0;  i < afEl->count;  i++)
+	{
+	double altFreq = afEl->values[i].datFloat;
+	refFreq -= altFreq;
+	if (altFreq > maxAltFreq)
+	    maxAltFreq = altFreq;
+	}
+    }
+else
+    {
+    // Calculate alternate allele freqs from AC and AN:
+    const struct vcfInfoElement *acEl = vcfRecordFindInfo(record, "AC");
+    const struct vcfInfoDef *acDef = vcfInfoDefForKey(vcff, "AC");
+    const struct vcfInfoElement *anEl = vcfRecordFindInfo(record, "AN");
+    const struct vcfInfoDef *anDef = vcfInfoDefForKey(vcff, "AN");
+    if (acEl != NULL && acDef != NULL && acDef->type == vcfInfoInteger &&
+	anEl != NULL && anDef != NULL && anDef->type == vcfInfoInteger && anEl->count == 1)
+	{
+	gotInfo = TRUE;
+	int totalCount = anEl->values[0].datFloat;
+	for (i = 0;  i < acEl->count;  i++)
+	    {
+	    int altCount = acEl->values[i].datFloat;
+	    double altFreq = (double)altCount / totalCount;
+	    refFreq -= altFreq;
+	    if (altFreq < maxAltFreq)
+		maxAltFreq = altFreq;
+	    }
+	}
+    }
+if (gotInfo)
+    {
+    double majorAlFreq = max(refFreq, maxAltFreq);
+    if (majorAlFreq > (1.0 - minFreq))
+	return TRUE;
+    }
+return FALSE;
+}
+
 static void filterRecords(struct vcfFile *vcff, struct trackDb *tdb)
 /* If a filter is specified in the cart, remove any records that don't pass filter. */
 {
 boolean compositeLevel = isNameAtCompositeLevel(tdb, tdb->track);
-double minQual = 0;
+double minQual = VCF_DEFAULT_MIN_QUAL;
 struct slName *filterValues = NULL;
+double minFreq = VCF_DEFAULT_MIN_ALLELE_FREQ;
 boolean gotQualFilter = getMinQual(tdb, &minQual, compositeLevel);
 boolean gotFilterFilter = getFilterValues(tdb, &filterValues, compositeLevel);
-if (! (gotQualFilter || gotFilterFilter) )
+boolean gotMinFreqFilter = getMinFreq(tdb, &minFreq, compositeLevel);
+if (! (gotQualFilter || gotFilterFilter || gotMinFreqFilter) )
     return;
 
 struct vcfRecord *rec, *nextRec, *newList = NULL;
 for (rec = vcff->records;  rec != NULL;  rec = nextRec)
     {
     nextRec = rec->next;
     if (! ((gotQualFilter && minQualFail(rec, minQual)) ||
-	   (gotFilterFilter && filterColumnFail(rec, filterValues))) )
+	   (gotFilterFilter && filterColumnFail(rec, filterValues)) ||
+	   (gotMinFreqFilter && minFreqFail(rec, minFreq)) ))
 	slAddHead(&newList, rec);
     }
 slReverse(&newList);
 vcff->records = newList;
 }
 
 static struct pgSnp *vcfFileToPgSnp(struct vcfFile *vcff, struct trackDb *tdb)
 /* Convert vcff's records to pgSnp; don't free vcff until you're done with pgSnp
  * because it contains pointers into vcff's records' chrom. */
 {
 struct pgSnp *pgsList = NULL;
 struct vcfRecord *rec;
 int maxLen = 33;
+int maxAlCount = 5;
 for (rec = vcff->records;  rec != NULL;  rec = rec->next)
     {
     struct pgSnp *pgs = pgSnpFromVcfRecord(rec);
     // Insertion sequences can be quite long; abbreviate here for display.
     int len = strlen(pgs->name);
     if (len > maxLen)
 	{
-	if (strchr(pgs->name, '/') != NULL)
-	    {
-	    char *copy = cloneString(pgs->name);
-	    char *allele[8];
-	    int cnt = chopByChar(copy, '/', allele, pgs->alleleCount);
-	    int maxAlLen = maxLen / pgs->alleleCount;
+	int maxAlLen = maxLen / min(rec->alleleCount, maxAlCount);
 	    pgs->name[0] = '\0';
 	    int i;
-	    for (i = 0;  i < cnt;  i++)
+	for (i = 0;  i < rec->alleleCount;  i++)
 		{
 		if (i > 0)
 		    safencat(pgs->name, len+1, "/", 1);
-		if (strlen(allele[i]) > maxAlLen-3)
-		    strcpy(allele[i]+maxAlLen-3, "...");
-		safencat(pgs->name, len+1, allele[i], maxAlLen);
+	    if (i >= maxAlCount)
+		{
+		safecat(pgs->name, len+1, "...");
+		pgs->alleleCount = maxAlCount;
+		break;
 		}
+	    if (strlen(rec->alleles[i]) > maxAlLen-3)
+		strcpy(rec->alleles[i]+maxAlLen-3, "...");
+	    safencat(pgs->name, len+1, rec->alleles[i], maxAlLen);
 	    }
-	else
-	    strcpy(pgs->name+maxLen-3, "...");
 	}
     slAddHead(&pgsList, pgs);
     }
 slReverse(&pgsList);
 return pgsList;
 }
 
 
 // Center-weighted alpha clustering of haplotypes -- see Redmine #3711, #2823 note 7
 // It might be nice to use an allele-frequency representation here instead of [ACGTN] strings
 // with "N" for missing info or differences, but keep it simple.
 
 struct cwaExtraData
 /* Helper data for hacTree clustering of haplotypes by center-weighted alpha distance */
     {
     int center;    // index from which each point's contribution to distance is to be weighted
     int len;       // total length of haplotype strings
     double alpha;  // weighting factor for distance from center
     struct lm *localMem;
     };
 
 // This is the representation of a cluster of up to 65,535 haplotypes of equal length,
 // where each variant's alleles are specified as 0 (reference) or 1 (alternate)
 // [or possibly 2 for second alternate, but those are rare so I'll ignore them].
 // When an individual is heterozygous and unphased for some variant, we need to
 // account for missing data.
 struct hapCluster
 {
     struct hapCluster *next;   // hacTree wants slList of items
     unsigned short *refCounts; // per-variant count of reference alleles observed
     unsigned short *unkCounts; // per-variant count of unknown (or unphased het) alleles
     unsigned short leafCount;  // number of leaves under this node (or 1 if leaf)
     unsigned short gtHapIx;    // if leaf, (genotype index << 1) + hapIx (0 or 1 for diploid)
 };
 
 INLINE boolean isRef(const struct hapCluster *c, int varIx)
 // Return TRUE if the leaves of cluster have at least as many reference alleles
 // as alternate alleles for variant varIx.
 {
 unsigned short altCount = c->leafCount - c->refCounts[varIx] - c->unkCounts[varIx];
 return (c->refCounts[varIx] >= altCount);
 }
 
 INLINE boolean hasUnk(const struct hapCluster *c, int varIx)
 // Return TRUE if at least one haplotype in this cluster has an unknown/unphased value at varIx.
 {
 return (c->unkCounts[varIx] > 0);
 }
 
 static double cwaDistance(const struct slList *item1, const struct slList *item2, void *extraData)
 /* Center-weighted alpha sequence distance function for hacTree clustering of haplotype seqs */
 // This is inner-loop so I am not doing defensive checks.  Caller must ensure:
 // 1. kids's sequences' lengths are both equal to helper->len
 // 2. 0 <= helper->center <= len
 // 3. 0.0 < helper->alpha <= 1.0
 {
 const struct hapCluster *kid1 = (const struct hapCluster *)item1;
 const struct hapCluster *kid2 = (const struct hapCluster *)item2;
 struct cwaExtraData *helper = extraData;
 double distance = 0;
 double weight = 1; // start at center: alpha to the 0th power
 int i;
 for (i=helper->center;  i >= 0;  i--)
     {
     if (isRef(kid1, i) != isRef(kid2, i))
 	distance += weight;
     else if (hasUnk(kid1, i) != hasUnk(kid2, i))
 	distance += weight/2;
     weight *= helper->alpha;
     }
 weight = helper->alpha; // start at center+1: alpha to the 1st power
 for (i=helper->center+1;  i < helper->len;  i++)
     {
     if (isRef(kid1, i) != isRef(kid2, i))
 	distance += weight;
     weight *= helper->alpha;
     }
 return distance;
 }
 
 static struct hapCluster *lmHapCluster(struct cwaExtraData *helper)
 /* Use localMem to allocate a new cluster of the given len. */
 {
 struct hapCluster *c = lmAlloc(helper->localMem, sizeof(struct hapCluster));
 c->refCounts = lmAlloc(helper->localMem, helper->len * sizeof(unsigned short));
 c->unkCounts = lmAlloc(helper->localMem, helper->len * sizeof(unsigned short));
 return c;
 }
 
 static struct slList *cwaMerge(const struct slList *item1, const struct slList *item2,
 			       void *extraData)
 /* Make a consensus haplotype from two input haplotypes, for hacTree clustering by
  * center-weighted alpha distance. */
 // This is inner-loop so I am not doing defensive checks.  Caller must ensure that
 // kids's sequences' lengths are both equal to helper->len.
 {
 const struct hapCluster *kid1 = (const struct hapCluster *)item1;
 const struct hapCluster *kid2 = (const struct hapCluster *)item2;
 struct cwaExtraData *helper = extraData;
 struct hapCluster *consensus = lmHapCluster(helper);
 consensus->leafCount = kid1->leafCount + kid2->leafCount;
 consensus->gtHapIx = kid1->gtHapIx;
 int i;
 for (i=0;  i < helper->len;  i++)
     {
     consensus->refCounts[i] = kid1->refCounts[i] + kid2->refCounts[i];
     consensus->unkCounts[i] = kid1->unkCounts[i] + kid2->unkCounts[i];
     }
 return (struct slList *)consensus;
 }
 
 INLINE void hapClusterToString(const struct hapCluster *c, struct dyString *dy, int len)
 /* Write a text representation of hapCluster's alleles into dy.  */
 {
 dyStringClear(dy);
 int i;
 for (i=0;  i < len;  i++)
     dyStringAppendC(dy, (isRef(c, i) ? '0': '1'));
 }
 
 static int cwaCmp(const struct slList *item1, const struct slList *item2, void *extraData)
 /* Convert hapCluster to allele strings for easy comparison by strcmp. */
 {
 const struct hapCluster *c1 = (const struct hapCluster *)item1;
 const struct hapCluster *c2 = (const struct hapCluster *)item2;
 struct cwaExtraData *helper = extraData;
 static struct dyString *dy1 = NULL, *dy2 = NULL;
 if (dy1 == NULL)
     {
     dy1 = dyStringNew(0);
     dy2 = dyStringNew(0);
     }
 hapClusterToString(c1, dy1, helper->len);
 hapClusterToString(c2, dy2, helper->len);
 return strcmp(dy1->string, dy2->string);
 }
 
 void rSetGtHapOrder(struct hacTree *ht, unsigned short *gtHapOrder, unsigned short *retGtHapEnd)
 /* Traverse hacTree and build an ordered array of genotype + haplotype indices. */
 {
 if (ht->left == NULL && ht->right == NULL)
     {
     struct hapCluster *c = (struct hapCluster *)ht->itemOrCluster;
     gtHapOrder[(*retGtHapEnd)++] = c->gtHapIx;
     }
 else if (ht->left == NULL)
     rSetGtHapOrder(ht->right, gtHapOrder, retGtHapEnd);
 else if (ht->right == NULL)
     rSetGtHapOrder(ht->left, gtHapOrder, retGtHapEnd);
 else
     {
     struct hapCluster *cL = (struct hapCluster *)ht->left->itemOrCluster;
     struct hapCluster *cR = (struct hapCluster *)ht->right->itemOrCluster;
     if (cL->leafCount >= cR->leafCount)
 	{
 	rSetGtHapOrder(ht->left, gtHapOrder, retGtHapEnd);
 	rSetGtHapOrder(ht->right, gtHapOrder, retGtHapEnd);
 	}
     else
 	{
 	rSetGtHapOrder(ht->right, gtHapOrder, retGtHapEnd);
 	rSetGtHapOrder(ht->left, gtHapOrder, retGtHapEnd);
 	}
     }
 }
 
 static unsigned short *clusterChroms(const struct vcfFile *vcff, int centerIx,
 				     unsigned short *retGtHapEnd, struct hacTree **retTree)
 /* Given a bunch of VCF records with phased genotypes, build up one haplotype string
  * per chromosome that is the sequence of alleles in all variants (simplified to one base
  * per variant).  Each individual/sample will have two haplotype strings (unless haploid
  * like Y or male X).  Independently cluster the haplotype strings using hacTree with the 
  * center-weighted alpha functions above. Return an array of genotype+haplotype indices
  * in the order determined by the hacTree, and set retGtHapEnd to its length/end. */
 {
 int len = slCount(vcff->records);
 // Limit the number of variants that we compare, to keep from timing out:
 // (really what we should limit is the number of distinct haplo's passed to hacTree!)
 const int maxVariantsPerSide = 20;
 int startIx = max(0, centerIx - maxVariantsPerSide);
 int endIx = min(len, centerIx+1 + maxVariantsPerSide);
 double alpha = 0.5;
 struct lm *lm = lmInit(0);
 struct cwaExtraData helper = { centerIx-startIx, endIx-startIx, alpha, lm };
 int ploidy = 2; // Assuming diploid genomes here, no XXY, tetraploid etc.
 int gtCount = vcff->genotypeCount;
 // Make an slList of hapClusters, but allocate in a big block so I can use
 // array indexing.
 struct hapCluster **hapArray = lmAlloc(lm, sizeof(struct hapCluster *) * gtCount * ploidy);
 int i;
 for (i=0;  i < ploidy * gtCount;  i++)
     {
     hapArray[i] = lmHapCluster(&helper);
     if (i > 0)
 	hapArray[i-1]->next = hapArray[i];
     }
 boolean haveHaploid = FALSE;
 int varIx;
 struct vcfRecord *rec;
 for (varIx = 0, rec = vcff->records;  rec != NULL && varIx < endIx;  varIx++, rec = rec->next)
     {
     if (varIx < startIx)
 	continue;
     int countIx = varIx - startIx;
     int gtIx;
     for (gtIx=0;  gtIx < gtCount;  gtIx++)
 	{
 	struct vcfGenotype *gt = &(rec->genotypes[gtIx]);
 	struct hapCluster *c1 = hapArray[gtIx];
 	struct hapCluster *c2 = hapArray[gtCount + gtIx]; // hardwired ploidy=2
 	if (gt->isPhased || gt->isHaploid || (gt->hapIxA == gt->hapIxB))
 	    {
 	    // first chromosome:
 	    c1->leafCount = 1;
 	    c1->gtHapIx = gtIx << 1;
 	    if (gt->hapIxA == 0)
 		c1->refCounts[countIx] = 1;
 	    if (gt->isHaploid)
 		haveHaploid = TRUE;
 	    else
 		{
 		c2->leafCount = 1;
 		c2->gtHapIx = (gtIx << 1) | 1;
 		if (gt->hapIxB == 0)
 		    c2->refCounts[countIx] = 1;
 		}
 	    }
 	else
 	    {
 	    // Unphased heterozygote, don't use haplotype info for clustering
 	    c1->leafCount = c2->leafCount = 1;
 	    c1->gtHapIx = gtIx << 1;
 	    c2->gtHapIx = (gtIx << 1) | 1;
 	    c1->unkCounts[countIx] = c2->unkCounts[countIx] = 1;
 	    }
 	}
     if (haveHaploid)
 	{
 	// Some array items will have an empty cluster for missing hap2 --
 	// trim those from the linked list.
 	struct hapCluster *c = hapArray[0];
 	while (c != NULL && c->next != NULL)
 	    {
 	    if (c->next->leafCount == 0)
 		c->next = c->next->next;
 	    c = c->next;
 	    }
 	}
     }
 struct hacTree *ht = hacTreeFromItems((struct slList *)(hapArray[0]), lm,
 				      cwaDistance, cwaMerge, cwaCmp, &helper);
 unsigned short *gtHapOrder = needMem(vcff->genotypeCount * 2 * sizeof(unsigned short));
 rSetGtHapOrder(ht, gtHapOrder, retGtHapEnd);
 *retTree = ht;
 return gtHapOrder;
 }
 
 INLINE Color pgSnpColor(char *allele)
 /* Color allele by first base according to pgSnp palette. */
 {
 if (allele[0] == 'A')
     return revCmplDisp ? MG_MAGENTA : MG_RED;
 else if (allele[0] == 'C')
     return revCmplDisp ? darkGreenColor : MG_BLUE;
 else if (allele[0] == 'G')
     return revCmplDisp ? MG_BLUE : darkGreenColor;
 else if (allele[0] == 'T')
     return revCmplDisp ? MG_RED : MG_MAGENTA;
 else
     return shadesOfGray[5];
 }
 
 INLINE char *gtSummaryString(struct vcfRecord *rec)
 // Make pgSnp-like mouseover text, but with genotype counts instead of allele counts.
 // NOTE: Returned string is statically allocated, don't free it!
 {
 static struct dyString *dy = NULL;
 if (dy == NULL)
     dy = dyStringNew(0);
 else
     dyStringClear(dy);
 if (rec->alleleCount < 2)
     return "";
 const struct vcfFile *vcff = rec->file;
 int gtRefRefCount = 0, gtRefAltCount = 0, gtAltAltCount = 0, gtOtherCount = 0;
 int i;
 for (i=0;  i < vcff->genotypeCount;  i++)
     {
     struct vcfGenotype *gt = &(rec->genotypes[i]);
     if (gt->hapIxA == 0 && gt->hapIxB == 0)
 	gtRefRefCount++;
     else if (gt->hapIxA == 1 && gt->hapIxB == 1)
 	gtAltAltCount++;
     else if ((gt->hapIxA == 0 && gt->hapIxB == 1) || (gt->hapIxA == 1 && gt->hapIxB == 0))
 	gtRefAltCount++;
     else
 	gtOtherCount++;
     }
 // These are pooled strings! Restore when done.
 if (revCmplDisp)
     {
     for (i=0;  i < rec->alleleCount;  i++)
 	reverseComplement(rec->alleles[i], strlen(rec->alleles[i]));
     }
 dyStringPrintf(dy, "%s/%s:%d %s/%s:%d %s/%s:%d", rec->alleles[0], rec->alleles[0], gtRefRefCount,
 	       rec->alleles[0], rec->alleles[1], gtRefAltCount,
 	       rec->alleles[1], rec->alleles[1], gtAltAltCount);
 if (gtOtherCount > 0)
     dyStringPrintf(dy, " other:%d", gtOtherCount);
 // Restore original values of pooled strings.
 if (revCmplDisp)
     {
     for (i=0;  i < rec->alleleCount;  i++)
 	reverseComplement(rec->alleles[i], strlen(rec->alleles[i]));
     }
 return dy->string;
 }
 
 // This is initialized when we start drawing:
 static Color purple = 0;
 
 static void drawOneRec(struct vcfRecord *rec, unsigned short *gtHapOrder, unsigned short gtHapCount,
 		       struct track *tg, struct hvGfx *hvg, int xOff, int yOff, int width,
 		       boolean isCenter, boolean colorByRefAlt)
 /* Draw a stack of genotype bars for this record */
 {
 const double scale = scaleForPixels(width);
 int x1 = round((double)(rec->chromStart-winStart)*scale) + xOff;
 int x2 = round((double)(rec->chromEnd-winStart)*scale) + xOff;
 int w = x2-x1;
 if (w <= 1)
     {
     x1--;
     w = 3;
     }
 double hapsPerPix = (double)gtHapCount / (tg->height-1);
 int pixIx;
 for (pixIx = 0;  pixIx < tg->height;  pixIx++)
     {
     int gtHapOrderIxStart = round(hapsPerPix * pixIx);
     // Watch out for overflow:
     if (gtHapOrderIxStart >= gtHapCount)
 	break;
     int gtHapOrderIxEnd = round(hapsPerPix * (pixIx + 1));
     if (gtHapOrderIxEnd == gtHapOrderIxStart)
 	gtHapOrderIxEnd++;
     int unks = 0, refs = 0, alts = 0;
     int gtHapOrderIx;
     for (gtHapOrderIx = gtHapOrderIxStart;  gtHapOrderIx < gtHapOrderIxEnd;  gtHapOrderIx++)
 	{
 	int gtHapIx = gtHapOrder[gtHapOrderIx];
 	int hapIx = gtHapIx & 1;
 	int gtIx = gtHapIx >>1;
 	struct vcfGenotype *gt = &(rec->genotypes[gtIx]);
 	if (!gt->isPhased && gt->hapIxA != gt->hapIxB)
 	    unks++;
 	else
 	    {
 	    int alIx = hapIx ? gt->hapIxB : gt->hapIxA;
 	    if (alIx)
 		alts++;
 	    else
 		refs++;
 	    }
 	}
     const int fudgeFactor = 4;
     Color col = MG_BLACK;
     if (unks > (refs + alts))
 	col = shadesOfGray[5];
     else if (alts > fudgeFactor * refs)
 	col = colorByRefAlt ? MG_RED : pgSnpColor(rec->alleles[1]);
     else if (refs > fudgeFactor * alts)
 	col = colorByRefAlt ? MG_BLUE : pgSnpColor(rec->alleles[0]);
     else
 	col = colorByRefAlt ? purple : shadesOfGray[5];
     int y = yOff + pixIx;
     hvGfxLine(hvg, x1, y, x2, y, col);
     }
 char *mouseoverText = gtSummaryString(rec);
 if (isCenter)
     {
     // Thick black lines to distinguish this variant:
     int yBot = yOff + tg->height - 2;
     hvGfxBox(hvg, x1-3, yOff, 3, tg->height, MG_BLACK);
     hvGfxBox(hvg, x2, yOff, 3, tg->height, MG_BLACK);
     hvGfxLine(hvg, x1-2, yOff, x2+2, yOff, MG_BLACK);
     hvGfxLine(hvg, x1-2, yBot, x2+2, yBot, MG_BLACK);
     // Special mouseover instructions:
     static struct dyString *dy = NULL;
     if (dy == NULL)
 	dy = dyStringNew(0);
     dyStringPrintf(dy, "%s   Haplotypes sorted on ", mouseoverText);
     char cartVar[512];
     safef(cartVar, sizeof(cartVar), "%s.centerVariantChrom", tg->tdb->track);
     char *centerChrom = cartOptionalString(cart, cartVar);
     if (centerChrom == NULL || !sameString(chromName, centerChrom))
 	dyStringAppend(dy, "middle variant by default. ");
     else
 	dyStringAppend(dy, "this variant. ");
     dyStringAppend(dy, "To anchor sorting to a different variant, click on that variant and "
 		   "then click on the 'Use this variant' button below the variant name.");
     mouseoverText = dy->string;
     }
 mapBoxHgcOrHgGene(hvg, rec->chromStart, rec->chromEnd, x1, yOff, w, tg->height, tg->track,
 		  rec->name, mouseoverText, NULL, TRUE, NULL);
 }
 
 static int getCenterVariantIx(struct track *tg, int seqStart, int seqEnd,
 			      struct vcfRecord *records)
 // If the user hasn't specified a local variant/position to use as center,
 // just use the median variant in window.
 {
 int defaultIx = (slCount(records)-1) / 2;
 char cartVar[512];
 safef(cartVar, sizeof(cartVar), "%s.centerVariantChrom", tg->tdb->track);
 char *centerChrom = cartOptionalString(cart, cartVar);
 if (centerChrom != NULL && sameString(chromName, centerChrom))
     {
     safef(cartVar, sizeof(cartVar), "%s.centerVariantPos", tg->tdb->track);
     int centerPos = cartInt(cart, cartVar);
     int winSize = seqEnd - seqStart;
     if (centerPos > (seqStart - winSize) && centerPos < (seqEnd + winSize))
 	{
 	int i;
 	struct vcfRecord *rec;
 	for (rec = records, i = 0;  rec != NULL;  rec = rec->next, i++)
 	    if (rec->chromStart >= centerPos)
 		return i;
 	return i-1;
 	}
     }
 return defaultIx;
 }
 
 /* Pixel y offset return type for recursive tree-drawing: */
 enum yRetType
     {
     yrtMidPoint,
     yrtStart,
     yrtEnd,
     };
 
 /* Callback for calculating y (in pixels) for a cluster node: */
 typedef int yFromNodeFunc(const struct slList *itemOrCluster, void *extraData,
 			  enum yRetType yType);
 
 static int rDrawTreeInLabelArea(struct hacTree *ht, struct hvGfx *hvg, enum yRetType yType, int x,
 				yFromNodeFunc *yFromNode, void *extraData)
 /* Recursively draw the haplotype clustering tree in the left label area.
  * Returns pixel height for use at non-leaf levels of tree. */
 {
 const int branchW = 3;
 const Color boundaryColor = MG_RED;
 const int labelEnd = leftLabelX + leftLabelWidth;
 const int rightEdge = labelEnd - 1;
 if (yType == yrtStart || yType == yrtEnd)
     {
     // We're just getting vertical span of a leaf cluster, not drawing any lines.
     int yLeft, yRight;
     if (ht->left)
 	yLeft = rDrawTreeInLabelArea(ht->left, hvg, yType, x, yFromNode, extraData);
     else
 	yLeft = yFromNode(ht->itemOrCluster, extraData, yType);
     if (ht->right)
 	yRight = rDrawTreeInLabelArea(ht->right, hvg, yType, x, yFromNode, extraData);
     else
 	yRight = yFromNode(ht->itemOrCluster, extraData, yType);
     if (yType == yrtStart)
 	return min(yLeft, yRight);
     else
 	return max(yLeft, yRight);
     }
 // Otherwise yType is yrtMidPoint.  If we have 2 children, we'll be drawing some lines:
 if (ht->left != NULL && ht->right != NULL)
     {
     const int nextX = x + branchW;
     int midY;
     if (ht->childDistance == 0 || x+(2*branchW) > labelEnd)
 	{
 	// Treat this as a leaf cluster.
 	// Recursing twice is wasteful. Could be avoided if this, and yFromNode,
 	// returned both yStart and yEnd. However, the time to draw a tree of
 	// 2188 hap's (1kG phase1 interim) is in the noise, so I consider it
 	// not worth the effort of refactoring to save a sub-millisecond here.
 	int yStartLeft = rDrawTreeInLabelArea(ht->left, hvg, yrtStart, nextX,
 					      yFromNode, extraData);
 	int yEndLeft = rDrawTreeInLabelArea(ht->left, hvg, yrtEnd, nextX,
 					    yFromNode, extraData);
 	int yStartRight = rDrawTreeInLabelArea(ht->right, hvg, yrtStart, nextX,
 					       yFromNode, extraData);
 	int yEndRight = rDrawTreeInLabelArea(ht->right, hvg, yrtEnd, nextX,
 					     yFromNode, extraData);
 	int yStart = min(yStartLeft, yStartRight);
 	int yEnd = max(yEndLeft, yEndRight);
 	midY = (yStart + yEnd) / 2;
 	Color branchColor = (ht->childDistance == 0) ? MG_BLACK : shadesOfGray[5];
 	hvGfxLine(hvg, x, midY, rightEdge, midY, branchColor);
 	hvGfxLine(hvg, rightEdge, yStart, rightEdge, yEnd-1, branchColor);
 	hvGfxLine(hvg, max(rightEdge-1, x), yStart, rightEdge, yStart, boundaryColor);
 	hvGfxLine(hvg, max(rightEdge-1, x), yEnd-1, rightEdge, yEnd-1, boundaryColor);
 	}
     else
 	{
 	int leftMid = rDrawTreeInLabelArea(ht->left, hvg, yrtMidPoint, nextX,
 					   yFromNode, extraData);
 	int rightMid = rDrawTreeInLabelArea(ht->right, hvg, yrtMidPoint, nextX,
 					    yFromNode, extraData);
 	midY = (leftMid + rightMid) / 2;
 	hvGfxLine(hvg, nextX-1, leftMid, nextX-1, rightMid, MG_BLACK);
 	hvGfxLine(hvg, x, midY, nextX-1, midY, MG_BLACK);
 	}
     return midY;
     }
 else if (ht->left != NULL)
     return rDrawTreeInLabelArea(ht->left, hvg, yType, x, yFromNode, extraData);
 else if (ht->right != NULL)
     return rDrawTreeInLabelArea(ht->right, hvg, yType, x, yFromNode, extraData);
 // Leaf node -- return pixel height. Draw a line if yType is midpoint.
 int y = yFromNode(ht->itemOrCluster, extraData, yType);
 if (yType == yrtMidPoint && x < labelEnd)
     {
     hvGfxLine(hvg, x, y, rightEdge, y, MG_BLACK);
     hvGfxLine(hvg, max(rightEdge-1, x), y, rightEdge, y, boundaryColor);
     }
 return y;
 }
 
 struct yFromNodeHelper
 /* Pre-computed mapping from cluster nodes' gtHapIx to pixel heights. */
     {
     unsigned short gtHapCount;
     unsigned short *gtHapIxToPxStart;
     unsigned short *gtHapIxToPxEnd;
     };
 
 void initYFromNodeHelper(struct yFromNodeHelper *helper, int yOff, int height,
 			 unsigned short gtHapCount, unsigned short *gtHapOrder)
 /* Build a mapping of genotype and haplotype to pixel y coords. */
 {
 helper->gtHapCount = gtHapCount;
 helper->gtHapIxToPxStart = needMem(gtHapCount * sizeof(unsigned short));
 helper->gtHapIxToPxEnd = needMem(gtHapCount * sizeof(unsigned short));
 double pxPerHap = (double)height / gtHapCount;
 int i;
 for (i = 0;  i < gtHapCount;  i++)
     {
     int yStart = round(i * pxPerHap);
     int yEnd = round((i+1) * pxPerHap);
     if (yEnd == yStart)
 	yEnd++;
     int gtHapIx = gtHapOrder[i];
     helper->gtHapIxToPxStart[gtHapIx] = yOff + yStart;
     helper->gtHapIxToPxEnd[gtHapIx] = yOff + yEnd;
     }
 }
 
 static int yFromHapNode(const struct slList *itemOrCluster, void *extraData,
 			enum yRetType yType)
 /* Extract the gtHapIx from hapCluster (hacTree node item), find out its relative order
  * and translate that to a pixel height. */
 {
 unsigned short gtHapIx = ((const struct hapCluster *)itemOrCluster)->gtHapIx;
 struct yFromNodeHelper *helper = extraData;
 if (gtHapIx >= helper->gtHapCount)
     errAbort("vcfTrack.c: gtHapIx %d out of range [0,%d).", gtHapIx, helper->gtHapCount);
 int y;
 if (yType == yrtStart)
     y = helper->gtHapIxToPxStart[gtHapIx];
 else if (yType == yrtEnd)
     y = helper->gtHapIxToPxEnd[gtHapIx];
 else
     y = (helper->gtHapIxToPxStart[gtHapIx] + helper->gtHapIxToPxEnd[gtHapIx]) / 2;
 return y;
 }
 
 static void drawTreeInLabelArea(struct hacTree *ht, struct hvGfx *hvg, int yOff, int height,
 				unsigned short gtHapCount, unsigned short *gtHapOrder)
 /* Draw the haplotype clustering in the left label area (as much as fits there). */
 {
 // Figure out which hvg to use, save current clipping, and clip to left label coords:
 struct hvGfx *hvgLL = (hvgSide != NULL) ? hvgSide : hvg;
 int clipXBak, clipYBak, clipWidthBak, clipHeightBak;
 hvGfxGetClip(hvgLL, &clipXBak, &clipYBak, &clipWidthBak, &clipHeightBak);
 hvGfxUnclip(hvgLL);
 hvGfxSetClip(hvgLL, leftLabelX, yOff, leftLabelWidth, height);
 // Draw the tree:
 int x = leftLabelX;
 struct yFromNodeHelper helper = {0, NULL, NULL};
 initYFromNodeHelper(&helper, yOff, height-1, gtHapCount, gtHapOrder);
 (void)rDrawTreeInLabelArea(ht, hvgLL, yrtMidPoint, x, yFromHapNode, &helper);
 // Restore the prior clipping:
 hvGfxUnclip(hvgLL);
 hvGfxSetClip(hvgLL, clipXBak, clipYBak, clipWidthBak, clipHeightBak);
 }
 
 static void ignoreEm(char *format, va_list args)
 /* Ignore warnings from genotype parsing -- when there's one, there
  * are usually hundreds more just like it. */
 {
 }
 
 static void vcfHapClusterDraw(struct track *tg, int seqStart, int seqEnd,
 			      struct hvGfx *hvg, int xOff, int yOff, int width,
 			      MgFont *font, Color color, enum trackVisibility vis)
 /* Split samples' chromosomes (haplotypes), cluster them by center-weighted
  * alpha similarity, and draw in the order determined by clustering. */
 {
 const struct vcfFile *vcff = tg->extraUiData;
 if (vcff->records == NULL)
     return;
 purple = hvGfxFindColorIx(hvg, 0x99, 0x00, 0xcc);
 boolean compositeLevel = isNameAtCompositeLevel(tg->tdb, tg->tdb->track);
 char *colorBy = cartUsualStringClosestToHome(cart, tg->tdb, compositeLevel,
 					     VCF_HAP_COLORBY_VAR, VCF_HAP_COLORBY_REFALT);
 boolean colorByRefAlt = sameString(colorBy, VCF_HAP_COLORBY_REFALT);
 pushWarnHandler(ignoreEm);
 struct vcfRecord *rec;
 for (rec = vcff->records;  rec != NULL;  rec = rec->next)
     vcfParseGenotypes(rec);
 popWarnHandler();
 unsigned short gtHapCount = 0;
 int ix, centerIx = getCenterVariantIx(tg, seqStart, seqEnd, vcff->records);
 struct hacTree *ht = NULL;
 unsigned short *gtHapOrder = clusterChroms(vcff, centerIx, &gtHapCount, &ht);
 struct vcfRecord *centerRec = NULL;
 for (rec = vcff->records, ix=0;  rec != NULL;  rec = rec->next, ix++)
     {
     if (ix == centerIx)
 	centerRec = rec;
     else
 	drawOneRec(rec, gtHapOrder, gtHapCount, tg, hvg, xOff, yOff, width, FALSE, colorByRefAlt);
     }
 // Draw the center rec on top, outlined with black lines, to make sure it is very visible:
 drawOneRec(centerRec, gtHapOrder, gtHapCount, tg, hvg, xOff, yOff, width, TRUE, colorByRefAlt);
 // Draw as much of the tree as can fit in the left label area:
 drawTreeInLabelArea(ht, hvg, yOff, tg->height, gtHapCount, gtHapOrder);
 }
 
 static int vcfHapClusterTotalHeight(struct track *tg, enum trackVisibility vis)
 /* Return height of haplotype graph (2 * #samples * lineHeight);
  * 2 because we're assuming diploid genomes here, no XXY, tetraploid etc. */
 {
 // Should we make it single-height when on chrY?
 const struct vcfFile *vcff = tg->extraUiData;
 if (vcff->records == NULL)
     return 0;
 int ploidy = sameString(chromName, "chrY") ? 1 : 2;
 int simpleHeight = ploidy * vcff->genotypeCount * tg->lineHeight;
 int defaultHeight = min(simpleHeight, VCF_DEFAULT_HAP_HEIGHT);
 int cartHeight = cartOrTdbInt(cart, tg->tdb, VCF_HAP_HEIGHT_VAR, defaultHeight);
 tg->height = min(cartHeight+1, maximumTrackHeight(tg));
 return tg->height;
 }
 
 static char *vcfHapClusterTrackName(struct track *tg, void *item)
 /* If someone asks for itemName/mapItemName, just send name of track like wiggle. */
 {
 return tg->track;
 }
 
 static void vcfHapClusterOverloadMethods(struct track *tg, struct vcfFile *vcff)
 /* If we confirm at load time that we can draw a haplotype graph, use
  * this to overwrite the methods for the rest of execution: */
 {
 tg->heightPer = (tg->visibility == tvSquish) ? (tl.fontHeight/4) : (tl.fontHeight / 2);
 tg->lineHeight = tg->heightPer + 1;
 tg->drawItems = vcfHapClusterDraw;
 tg->totalHeight = vcfHapClusterTotalHeight;
 tg->itemHeight = tgFixedItemHeight;
 tg->itemName = vcfHapClusterTrackName;
 tg->mapItemName = vcfHapClusterTrackName;
 tg->itemStart = tgItemNoStart;
 tg->itemEnd = tgItemNoEnd;
 tg->mapsSelf = TRUE;
 tg->extraUiData = vcff;
 }
 
 static void vcfTabixLoadItems(struct track *tg)
 /* Load items in window from VCF file using its tabix index file. */
 {
 char *fileOrUrl = NULL;
 /* Figure out url or file name. */
 if (tg->parallelLoading)
     {
     /* do not use mysql uring parallel-fetch load */
     fileOrUrl = trackDbSetting(tg->tdb, "bigDataUrl");
     }
 else
     {
     // TODO: may need to handle per-chrom files like bam, maybe fold bamFileNameFromTable into this:
     struct sqlConnection *conn = hAllocConnTrack(database, tg->tdb);
     fileOrUrl = bbiNameFromSettingOrTable(tg->tdb, conn, tg->table);
     hFreeConn(&conn);
     }
 int vcfMaxErr = -1;
 struct vcfFile *vcff = NULL;
 boolean compositeLevel = isNameAtCompositeLevel(tg->tdb, tg->tdb->track);
 boolean hapClustEnabled = cartUsualBooleanClosestToHome(cart, tg->tdb, compositeLevel,
 							VCF_HAP_ENABLED_VAR, TRUE);
 /* protect against temporary network error */
 struct errCatch *errCatch = errCatchNew();
 if (errCatchStart(errCatch))
     {
     vcff = vcfTabixFileMayOpen(fileOrUrl, chromName, winStart, winEnd, vcfMaxErr);
     if (vcff != NULL)
 	{
 	filterRecords(vcff, tg->tdb);
 	if (hapClustEnabled && vcff->genotypeCount > 1 && vcff->genotypeCount < 3000 &&
 	    (tg->visibility == tvPack || tg->visibility == tvSquish))
 	    vcfHapClusterOverloadMethods(tg, vcff);
 	else
 	    {
 	    tg->items = vcfFileToPgSnp(vcff, tg->tdb);
 	    // pgSnp bases coloring/display decision on count of items:
 	    tg->customInt = slCount(tg->items);
 	    }
 	// Don't vcfFileFree here -- we are using its string pointers!
 	}
     }
 errCatchEnd(errCatch);
 if (errCatch->gotError || vcff == NULL)
     {
     if (isNotEmpty(errCatch->message->string))
 	tg->networkErrMsg = cloneString(errCatch->message->string);
     tg->drawItems = bigDrawWarning;
     tg->totalHeight = bigWarnTotalHeight;
     }
 errCatchFree(&errCatch);
 }
 
 void vcfTabixMethods(struct track *track)
 /* Methods for VCF + tabix files. */
 {
 pgSnpMethods(track);
 track->loadItems = vcfTabixLoadItems;
 track->canPack = TRUE;
 }
 
 #else // no USE_TABIX:
 
 // If code was not built with USE_TABIX=1, but there are vcfTabix tracks, display a message
 // in place of the tracks (instead of annoying "No track handler" warning messages).
 
 static void drawUseVcfTabixWarning(struct track *tg, int seqStart, int seqEnd, struct hvGfx *hvg,
 				   int xOff, int yOff, int width, MgFont *font, Color color,
 				   enum trackVisibility vis)
 /* Draw a message saying that the code needs to be built with USE_TABIX=1. */
 {
 char message[512];
 safef(message, sizeof(message),
       "Get tabix from samtools.sourceforge.net and recompile kent/src with USE_TABIX=1");
 Color yellow = hvGfxFindRgb(hvg, &undefinedYellowColor);
 hvGfxBox(hvg, xOff, yOff, width, tg->heightPer, yellow);
 hvGfxTextCentered(hvg, xOff, yOff, width, tg->heightPer, MG_BLACK, font, message);
 }
 
 void vcfTabixMethods(struct track *track)
 /* Methods for VCF alignment files, in absence of tabix lib. */
 {
 #if (defined USE_TABIX && defined KNETFILE_HOOKS)
 knetUdcInstall();
 #endif//def USE_TABIX && KNETFILE_HOOKS
 messageLineMethods(track);
 track->drawItems = drawUseVcfTabixWarning;
 }
 
 #endif // no USE_TABIX