9ead4a885ce52795137d26e290c397f165d163da
angie
  Wed Aug 17 16:50:12 2011 -0700
Feature #3711 (VCF haplotype sorting display): For now, limit the numberof variants on either side of the center variant to 20 instead of 50 --
I happened to land in a small region that had many variants that seemed
to have no linkage whatsoever in 1000 Genomes phase 1 interim VCF, e.g.
chr1:1,225,165-1,225,869 would take 42 seconds to cluster and draw!
Limiting to 20 reduces that ~0.5s (also paired w/performance improvements,
but it's mostly the number of distinct haplotypes that we pass into
hacTreeFromItems).
Also, a rounding overflow (transforming pixels to gtHapOrder index) was
sporadically causing apache-only SEGVs; check that range!  :)

diff --git src/hg/hgTracks/vcfTrack.c src/hg/hgTracks/vcfTrack.c
index 102c23a..f09ae03 100644
--- src/hg/hgTracks/vcfTrack.c
+++ src/hg/hgTracks/vcfTrack.c
@@ -216,31 +216,32 @@
 	}
     }
 }
 
 static unsigned short *clusterChroms(const struct vcfFile *vcff, int centerIx,
 				     unsigned short *retGtHapEnd, struct hacTree **retTree)
 /* Given a bunch of VCF records with phased genotypes, build up one haplotype string
  * per chromosome that is the sequence of alleles in all variants (simplified to one base
  * per variant).  Each individual/sample will have two haplotype strings (unless haploid
  * like Y or male X).  Independently cluster the haplotype strings using hacTree with the 
  * center-weighted alpha functions above. Return an array of genotype+haplotype indices
  * in the order determined by the hacTree, and set retGtHapEnd to its length/end. */
 {
 int len = slCount(vcff->records);
 // Limit the number of variants that we compare, to keep from timing out:
-const int maxVariantsPerSide = 50;
+// (really what we should limit is the number of distinct haplo's passed to hacTree!)
+const int maxVariantsPerSide = 20;
 int startIx = max(0, centerIx - maxVariantsPerSide);
 int endIx = min(len, centerIx+1 + maxVariantsPerSide);
 double alpha = 0.5;
 struct lm *lm = lmInit(0);
 struct cwaExtraData helper = { centerIx-startIx, endIx-startIx, alpha, lm };
 int ploidy = 2; // Assuming diploid genomes here, no XXY, tetraploid etc.
 int gtCount = vcff->genotypeCount;
 // Make an slList of hapClusters, but allocate in a big block so I can use
 // array indexing.
 struct hapCluster **hapArray = lmAlloc(lm, sizeof(struct hapCluster *) * gtCount * ploidy);
 int i;
 for (i=0;  i < ploidy * gtCount;  i++)
     {
     hapArray[i] = lmHapCluster(&helper);
     if (i > 0)
@@ -378,30 +379,33 @@
 {
 const double scale = scaleForPixels(width);
 int x1 = round((double)(rec->chromStart-winStart)*scale) + xOff;
 int x2 = round((double)(rec->chromEnd-winStart)*scale) + xOff;
 int w = x2-x1;
 if (w <= 1)
     {
     x1--;
     w = 3;
     }
 double hapsPerPix = (double)gtHapCount / (tg->height-1);
 int pixIx;
 for (pixIx = 0;  pixIx < tg->height;  pixIx++)
     {
     int gtHapOrderIxStart = round(hapsPerPix * pixIx);
+    // Watch out for overflow:
+    if (gtHapOrderIxStart >= gtHapCount)
+	break;
     int gtHapOrderIxEnd = round(hapsPerPix * (pixIx + 1));
     if (gtHapOrderIxEnd == gtHapOrderIxStart)
 	gtHapOrderIxEnd++;
     int unks = 0, refs = 0, alts = 0;
     int gtHapOrderIx;
     for (gtHapOrderIx = gtHapOrderIxStart;  gtHapOrderIx < gtHapOrderIxEnd;  gtHapOrderIx++)
 	{
 	int gtHapIx = gtHapOrder[gtHapOrderIx];
 	int hapIx = gtHapIx & 1;
 	int gtIx = gtHapIx >>1;
 	struct vcfGenotype *gt = &(rec->genotypes[gtIx]);
 	if (!gt->isPhased && gt->hapIxA != gt->hapIxB)
 	    unks++;
 	else
 	    {