ce70491d6debd76c80dfc0e8ac4102a2315f815f
angie
  Mon Sep 17 09:59:43 2012 -0700
VCF indel alleles always start with a reference base that is the same(e.g. for a dbSNP item with -/C, the VCF item starts with the reference
base to the left like A/AC). Make the display consistent with dbSNP
and other variation tracks by trimming the left neighboring base.

diff --git src/hg/hgTracks/vcfTrack.c src/hg/hgTracks/vcfTrack.c
index 1af8b37..856496a 100644
--- src/hg/hgTracks/vcfTrack.c
+++ src/hg/hgTracks/vcfTrack.c
@@ -159,30 +159,65 @@
     return;
 
 struct vcfRecord *rec, *nextRec, *newList = NULL;
 for (rec = vcff->records;  rec != NULL;  rec = nextRec)
     {
     nextRec = rec->next;
     if (! ((gotQualFilter && minQualFail(rec, minQual)) ||
 	   (gotFilterFilter && filterColumnFail(rec, filterValues)) ||
 	   (gotMinFreqFilter && minFreqFail(rec, minFreq)) ))
 	slAddHead(&newList, rec);
     }
 slReverse(&newList);
 vcff->records = newList;
 }
 
+static void trimIndelAlleles(struct vcfFile *vcff)
+/* For indels, VCF includes the left neighboring base; for example, if the alleles are
+ * AA/- following a G base, then the VCF record will start one base to the left and have
+ * "GAA" and "G" as the alleles.  That is not nice for display for two reasons:
+ * 1. Indels appear one base wider than their dbSNP entries.
+ * 2. In pgSnp display mode, the two alleles are always the same color.
+ * So here we take the liberty of trimming that left neighboring base for display. */
+{
+struct vcfRecord *rec;
+for (rec = vcff->records;  rec != NULL;  rec = rec->next)
+    if (rec->alleleCount > 1)
+	{
+	boolean allSameFirstBase = TRUE;
+	char firstBase = rec->alleles[0][0];
+	int i;
+	for (i = 1;  i < rec->alleleCount;  i++)
+	    if (rec->alleles[i][0] != firstBase)
+		{
+		allSameFirstBase = FALSE;
+		break;
+		}
+	if (allSameFirstBase)
+	    {
+	    rec->chromStart++;
+	    for (i = 0;  i < rec->alleleCount;  i++)
+		{
+		if (rec->alleles[i][1] == '\0')
+		    rec->alleles[i] = vcfFilePooledStr(vcff, "-");
+		else
+		    rec->alleles[i] = vcfFilePooledStr(vcff, rec->alleles[i]+1);
+		}
+	    }
+	}
+}
+
 static struct pgSnp *vcfFileToPgSnp(struct vcfFile *vcff, struct trackDb *tdb)
 /* Convert vcff's records to pgSnp; don't free vcff until you're done with pgSnp
  * because it contains pointers into vcff's records' chrom. */
 {
 struct pgSnp *pgsList = NULL;
 struct vcfRecord *rec;
 int maxLen = 33;
 int maxAlCount = 5;
 for (rec = vcff->records;  rec != NULL;  rec = rec->next)
     {
     struct pgSnp *pgs = pgSnpFromVcfRecord(rec);
     // Insertion sequences can be quite long; abbreviate here for display.
     int len = strlen(pgs->name);
     if (len > maxLen)
 	{
@@ -1120,30 +1155,31 @@
     fileOrUrl = bbiNameFromSettingOrTableChrom(tg->tdb, conn, tg->table, chromName);
     hFreeConn(&conn);
     }
 int vcfMaxErr = -1;
 struct vcfFile *vcff = NULL;
 boolean hapClustEnabled = cartUsualBooleanClosestToHome(cart, tg->tdb, FALSE,
 							VCF_HAP_ENABLED_VAR, TRUE);
 /* protect against temporary network error */
 struct errCatch *errCatch = errCatchNew();
 if (errCatchStart(errCatch))
     {
     vcff = vcfTabixFileMayOpen(fileOrUrl, chromName, winStart, winEnd, vcfMaxErr, -1);
     if (vcff != NULL)
 	{
 	filterRecords(vcff, tg->tdb);
+	trimIndelAlleles(vcff);
 	if (hapClustEnabled && vcff->genotypeCount > 1 && vcff->genotypeCount < 3000 &&
 	    (tg->visibility == tvPack || tg->visibility == tvSquish))
 	    vcfHapClusterOverloadMethods(tg, vcff);
 	else
 	    {
 	    tg->items = vcfFileToPgSnp(vcff, tg->tdb);
 	    // pgSnp bases coloring/display decision on count of items:
 	    tg->customInt = slCount(tg->items);
 	    }
 	// Don't vcfFileFree here -- we are using its string pointers!
 	}
     }
 errCatchEnd(errCatch);
 if (errCatch->gotError || vcff == NULL)
     {