2c0f74b4fbfefc533d27bf3ccdc18e4ecfdecae7
angie
  Fri Oct 5 17:07:18 2012 -0700
Follow-up to ce70491d:1. Move hgTracks left-base-of-indel trimming code up to vcf.[ch] for
sharing w/hgc.
2. Correct chromStart in hgTracks mapBox links to hgc when we have
trimmed a left base.
3. In hgc, abbreviate long sequences (e.g. 40kb deletion) and show
trimmed left base in parentheses for consistency with VCF file (and
sometimes INFO fields that use left-inclusive coords/seqs).
4. In pgSnpFromVcfRecord, don't truncate long alleles because hgTracks
and hgc do their own abbreviating.

diff --git src/lib/vcf.c src/lib/vcf.c
index 1176273..9acc900 100644
--- src/lib/vcf.c
+++ src/lib/vcf.c
@@ -657,30 +657,67 @@
 struct vcfRecord *vcfNextRecord(struct vcfFile *vcff)
 /* Parse the words in the next line from vcff into a vcfRecord. Return NULL at end of file.
  * Note: this does not store record in vcff->records! */
 {
 char *words[VCF_MAX_COLUMNS];
 int wordCount;
 if ((wordCount = lineFileChop(vcff->lf, words)) <= 0)
     return NULL;
 int expected = 8;
 if (vcff->genotypeCount > 0)
     expected = 9 + vcff->genotypeCount;
 lineFileExpectWords(vcff->lf, expected, wordCount);
 return vcfRecordFromRow(vcff, words);
 }
 
+unsigned int vcfRecordTrimIndelLeftBase(struct vcfRecord *rec)
+/* For indels, VCF includes the left neighboring base; for example, if the alleles are
+ * AA/- following a G base, then the VCF record will start one base to the left and have
+ * "GAA" and "G" as the alleles.  That is not nice for display for two reasons:
+ * 1. Indels appear one base wider than their dbSNP entries.
+ * 2. In pgSnp display mode, the two alleles are always the same color.
+ * However, for hgTracks' mapBox we need the correct chromStart for identifying the
+ * record in hgc -- so return the original chromStart. */
+{
+unsigned int chromStartOrig = rec->chromStart;
+struct vcfFile *vcff = rec->file;
+if (rec->alleleCount > 1)
+    {
+    boolean allSameFirstBase = TRUE;
+    char firstBase = rec->alleles[0][0];
+    int i;
+    for (i = 1;  i < rec->alleleCount;  i++)
+	if (rec->alleles[i][0] != firstBase)
+	    {
+	    allSameFirstBase = FALSE;
+	    break;
+	    }
+    if (allSameFirstBase)
+	{
+	rec->chromStart++;
+	for (i = 0;  i < rec->alleleCount;  i++)
+	    {
+	    if (rec->alleles[i][1] == '\0')
+		rec->alleles[i] = vcfFilePooledStr(vcff, "-");
+	    else
+		rec->alleles[i] = vcfFilePooledStr(vcff, rec->alleles[i]+1);
+	    }
+	}
+    }
+return chromStartOrig;
+}
+
 static void vcfParseData(struct vcfFile *vcff, int maxRecords)
 /* Given a vcfFile into which the header has been parsed, and whose lineFile is positioned
  * at the beginning of a data row, parse and store all data rows from lineFile. */
 {
 if (vcff == NULL)
     return;
 int recCount = 0;
 struct vcfRecord *record;
 while ((record = vcfNextRecord(vcff)) != NULL)
     {
     if (maxRecords >= 0 && recCount >= maxRecords)
 	break;
     slAddHead(&(vcff->records), record);
     recCount++;
     }