2c0f74b4fbfefc533d27bf3ccdc18e4ecfdecae7
angie
  Fri Oct 5 17:07:18 2012 -0700
Follow-up to ce70491d:1. Move hgTracks left-base-of-indel trimming code up to vcf.[ch] for
sharing w/hgc.
2. Correct chromStart in hgTracks mapBox links to hgc when we have
trimmed a left base.
3. In hgc, abbreviate long sequences (e.g. 40kb deletion) and show
trimmed left base in parentheses for consistency with VCF file (and
sometimes INFO fields that use left-inclusive coords/seqs).
4. In pgSnpFromVcfRecord, don't truncate long alleles because hgTracks
and hgc do their own abbreviating.

diff --git src/hg/hgTracks/vcfTrack.c src/hg/hgTracks/vcfTrack.c
index 856496a..11156ce 100644
--- src/hg/hgTracks/vcfTrack.c
+++ src/hg/hgTracks/vcfTrack.c
@@ -159,76 +159,53 @@
     return;
 
 struct vcfRecord *rec, *nextRec, *newList = NULL;
 for (rec = vcff->records;  rec != NULL;  rec = nextRec)
     {
     nextRec = rec->next;
     if (! ((gotQualFilter && minQualFail(rec, minQual)) ||
 	   (gotFilterFilter && filterColumnFail(rec, filterValues)) ||
 	   (gotMinFreqFilter && minFreqFail(rec, minFreq)) ))
 	slAddHead(&newList, rec);
     }
 slReverse(&newList);
 vcff->records = newList;
 }
 
-static void trimIndelAlleles(struct vcfFile *vcff)
-/* For indels, VCF includes the left neighboring base; for example, if the alleles are
- * AA/- following a G base, then the VCF record will start one base to the left and have
- * "GAA" and "G" as the alleles.  That is not nice for display for two reasons:
- * 1. Indels appear one base wider than their dbSNP entries.
- * 2. In pgSnp display mode, the two alleles are always the same color.
- * So here we take the liberty of trimming that left neighboring base for display. */
+struct pgSnpVcfStart
+/* This extends struct pgSnp by tacking on an original VCF chromStart at the end,
+ * for use by indelTweakMapItem below.  This can be cast to pgs. */
 {
-struct vcfRecord *rec;
-for (rec = vcff->records;  rec != NULL;  rec = rec->next)
-    if (rec->alleleCount > 1)
-	{
-	boolean allSameFirstBase = TRUE;
-	char firstBase = rec->alleles[0][0];
-	int i;
-	for (i = 1;  i < rec->alleleCount;  i++)
-	    if (rec->alleles[i][0] != firstBase)
-		{
-		allSameFirstBase = FALSE;
-		break;
-		}
-	if (allSameFirstBase)
-	    {
-	    rec->chromStart++;
-	    for (i = 0;  i < rec->alleleCount;  i++)
-		{
-		if (rec->alleles[i][1] == '\0')
-		    rec->alleles[i] = vcfFilePooledStr(vcff, "-");
-		else
-		    rec->alleles[i] = vcfFilePooledStr(vcff, rec->alleles[i]+1);
-		}
-	    }
-	}
-}
+    struct pgSnp pgs;
+    unsigned int vcfStart;
+};
 
 static struct pgSnp *vcfFileToPgSnp(struct vcfFile *vcff, struct trackDb *tdb)
 /* Convert vcff's records to pgSnp; don't free vcff until you're done with pgSnp
  * because it contains pointers into vcff's records' chrom. */
 {
 struct pgSnp *pgsList = NULL;
 struct vcfRecord *rec;
 int maxLen = 33;
 int maxAlCount = 5;
 for (rec = vcff->records;  rec != NULL;  rec = rec->next)
     {
+    struct pgSnpVcfStart *psvs = needMem(sizeof(*psvs));
+    psvs->vcfStart = vcfRecordTrimIndelLeftBase(rec);
     struct pgSnp *pgs = pgSnpFromVcfRecord(rec);
+    memcpy(&(psvs->pgs), pgs, sizeof(*pgs));
+    pgs = (struct pgSnp *)psvs; // leak mem
     // Insertion sequences can be quite long; abbreviate here for display.
     int len = strlen(pgs->name);
     if (len > maxLen)
 	{
 	int maxAlLen = (maxLen / min(rec->alleleCount, maxAlCount)) - 1;
 	pgs->name[0] = '\0';
 	int i;
 	for (i = 0;  i < rec->alleleCount;  i++)
 	    {
 	    if (i > 0)
 		safencat(pgs->name, len+1, "/", 1);
 	    if (i >= maxAlCount)
 		{
 		safecat(pgs->name, len+1, "...");
 		pgs->alleleCount = maxAlCount;
@@ -623,30 +600,31 @@
 if (alts > fudgeFactor * refs)
     return pgSnpColor(altAl);
 if (refs > fudgeFactor * alts)
     return pgSnpColor(refAl);
 return shadesOfGray[5];
 }
 
 // tg->height needs an extra pixel at the bottom; it's eaten by the clipping rectangle:
 #define CLIP_PAD 1
 
 static void drawOneRec(struct vcfRecord *rec, unsigned short *gtHapOrder, unsigned short gtHapCount,
 		       struct track *tg, struct hvGfx *hvg, int xOff, int yOff, int width,
 		       boolean isClustered, boolean isCenter, enum hapColorMode colorMode)
 /* Draw a stack of genotype bars for this record */
 {
+unsigned int chromStartMap = vcfRecordTrimIndelLeftBase(rec);
 const double scale = scaleForPixels(width);
 int x1 = round((double)(rec->chromStart-winStart)*scale) + xOff;
 int x2 = round((double)(rec->chromEnd-winStart)*scale) + xOff;
 int w = x2-x1;
 if (w <= 1)
     {
     x1--;
     w = 3;
     }
 // When coloring mode is altOnly, we draw one extra pixel row at the top & one at bottom
 // to show the locations of variants, since the reference alleles are invisible:
 int extraPixel = 0;
 int hapHeight = tg->height - CLIP_PAD;
 if (colorMode == altOnlyMode)
     {
@@ -714,37 +692,37 @@
 	{
 	// Colorful outline to distinguish this variant:
 	hvGfxLine(hvg, x1-1, yOff, x1-1, yBot, purple);
 	hvGfxLine(hvg, x2+1, yOff, x2+1, yBot, purple);
 	hvGfxLine(hvg, x1-1, yOff, x2+1, yOff, purple);
 	hvGfxLine(hvg, x1-1, yBot, x2+1, yBot, purple);
 	}
     else
 	{
 	// Thick black lines to distinguish this variant:
 	hvGfxBox(hvg, x1-3, yOff, 3, tg->height, MG_BLACK);
 	hvGfxBox(hvg, x2, yOff, 3, tg->height, MG_BLACK);
 	hvGfxLine(hvg, x1-2, yOff, x2+2, yOff, MG_BLACK);
 	hvGfxLine(hvg, x1-2, yBot, x2+2, yBot, MG_BLACK);
 	}
-    // Mouseover is handled separately by mapBoxForCenterVariant
+    // Mouseover was handled already by mapBoxForCenterVariant
     }
 else
     {
     struct dyString *dy = dyStringNew(0);
     gtSummaryString(rec, dy);
-    mapBoxHgcOrHgGene(hvg, rec->chromStart, rec->chromEnd, x1, yOff, w, tg->height, tg->track,
+    mapBoxHgcOrHgGene(hvg, chromStartMap, rec->chromEnd, x1, yOff, w, tg->height, tg->track,
 		      rec->name, dy->string, NULL, TRUE, NULL);
     }
 if (colorMode == altOnlyMode)
     hvGfxLine(hvg, x1, yBot, x2, yBot, (isClustered ? purple : shadesOfGray[5]));
 }
 
 static int getCenterVariantIx(struct track *tg, int seqStart, int seqEnd,
 			      struct vcfRecord *records)
 // If the user hasn't specified a local variant/position to use as center,
 // just use the median variant in window.
 {
 int defaultIx = (slCount(records)-1) / 2;
 char *centerChrom = cartOptionalStringClosestToHome(cart, tg->tdb, FALSE, "centerVariantChrom");
 if (centerChrom != NULL && sameString(chromName, centerChrom))
     {
@@ -1155,61 +1133,70 @@
     fileOrUrl = bbiNameFromSettingOrTableChrom(tg->tdb, conn, tg->table, chromName);
     hFreeConn(&conn);
     }
 int vcfMaxErr = -1;
 struct vcfFile *vcff = NULL;
 boolean hapClustEnabled = cartUsualBooleanClosestToHome(cart, tg->tdb, FALSE,
 							VCF_HAP_ENABLED_VAR, TRUE);
 /* protect against temporary network error */
 struct errCatch *errCatch = errCatchNew();
 if (errCatchStart(errCatch))
     {
     vcff = vcfTabixFileMayOpen(fileOrUrl, chromName, winStart, winEnd, vcfMaxErr, -1);
     if (vcff != NULL)
 	{
 	filterRecords(vcff, tg->tdb);
-	trimIndelAlleles(vcff);
 	if (hapClustEnabled && vcff->genotypeCount > 1 && vcff->genotypeCount < 3000 &&
 	    (tg->visibility == tvPack || tg->visibility == tvSquish))
 	    vcfHapClusterOverloadMethods(tg, vcff);
 	else
 	    {
 	    tg->items = vcfFileToPgSnp(vcff, tg->tdb);
 	    // pgSnp bases coloring/display decision on count of items:
 	    tg->customInt = slCount(tg->items);
 	    }
 	// Don't vcfFileFree here -- we are using its string pointers!
 	}
     }
 errCatchEnd(errCatch);
 if (errCatch->gotError || vcff == NULL)
     {
     if (isNotEmpty(errCatch->message->string))
 	tg->networkErrMsg = cloneString(errCatch->message->string);
     tg->drawItems = bigDrawWarning;
     tg->totalHeight = bigWarnTotalHeight;
     }
 errCatchFree(&errCatch);
 }
 
+static void indelTweakMapItem(struct track *tg, struct hvGfx *hvg, void *item,
+        char *itemName, char *mapItemName, int start, int end, int x, int y, int width, int height)
+/* Pass the original vcf chromStart to pgSnpMapItem, so if we have trimmed an identical
+ * first base from item's alleles and start, we will still pass the correct start to hgc. */
+{
+struct pgSnpVcfStart *psvs = item;
+pgSnpMapItem(tg, hvg, item, itemName, mapItemName, psvs->vcfStart, end, x, y, width, height);
+}
+
 void vcfTabixMethods(struct track *track)
 /* Methods for VCF + tabix files. */
 {
 #ifdef KNETFILE_HOOKS
 knetUdcInstall();
 #endif
 pgSnpMethods(track);
+track->mapItem = indelTweakMapItem;
 // Disinherit next/prev flag and methods since we don't support next/prev:
 track->nextExonButtonable = FALSE;
 track->nextPrevExon = NULL;
 track->nextPrevItem = NULL;
 track->loadItems = vcfTabixLoadItems;
 track->canPack = TRUE;
 }
 
 #else // no USE_TABIX:
 
 // If code was not built with USE_TABIX=1, but there are vcfTabix tracks, display a message
 // in place of the tracks (instead of annoying "No track handler" warning messages).
 
 static void drawUseVcfTabixWarning(struct track *tg, int seqStart, int seqEnd, struct hvGfx *hvg,
 				   int xOff, int yOff, int width, MgFont *font, Color color,