3640a4d6b3303a6bebc7c5b2fc5abcf7f4fae0b2
angie
  Wed Sep 28 11:56:00 2016 -0700
Partial support for changes in VCF4.2 and latest samtools mpileup output:
- Tolerate 'Number=R' and new INFO attributes Source and Version
- Tolerate mpileup's '<X>' alt (no alternate allele was observed)
- The 4.3 spec includes '<*>' from gVCF, also meaning no alt al obsvd.
- GT is no longer required; user's example has PL instead, so parse that
into genotypes.
- hgVai now annotates "variants" with <X> and <*> as no_sequence_alteration
- annoFormatVep now uses html encoding for html output in various places so
that "<X>" is displayed properly (custom track labels and various item
names could also have undesirable characters).  I am not encoding the
extras' descriptions because those are internal and some have <a>'s.
refs #15625

diff --git src/hg/lib/pgSnp.c src/hg/lib/pgSnp.c
index 61a3f19..2412e6d 100644
--- src/hg/lib/pgSnp.c
+++ src/hg/lib/pgSnp.c
@@ -829,31 +829,34 @@
 }
 
 struct pgSnp *pgSnpFromVcfRecord(struct vcfRecord *rec)
 /* Convert VCF rec to pgSnp; don't free rec->file (vcfFile) until
  * you're done with pgSnp because pgSnp points to rec->chrom. */
 {
 struct dyString *dy = dyStringNew(0);
 struct pgSnp *pgs;
 AllocVar(pgs);
 pgs->chrom = rec->chrom;
 pgs->chromStart = rec->chromStart;
 pgs->chromEnd = rec->chromEnd;
 // Build up slash-separated allele string from rec->alleles, starting with ref allele:
 dyStringAppend(dy, rec->alleles[0]);
 int alCount = rec->alleleCount, i;
-if (rec->alleleCount == 2 && sameString(rec->alleles[1], "."))
+if (rec->alleleCount == 2 &&
+    (sameString(rec->alleles[1], ".") ||
+     sameString(rec->alleles[1], "<X>") ||
+     sameString(rec->alleles[1], "<*>")))
     // ignore N/A alternate allele
     alCount = 1;
 else if (rec->alleleCount >= 2)
     {
     // append /-sep'd alternate alleles
     for (i = 1;  i < rec->alleleCount;  i++)
 	dyStringPrintf(dy, "/%s", rec->alleles[i]);
     }
 pgs->name = cloneStringZ(dy->string, dy->stringSize+1);
 pgs->alleleCount = alCount;
 pgs->alleleFreq = alleleCountsFromVcfRecord(rec, alCount);
 // Build up comma-sep list... supposed to be per-allele quality scores but I think
 // the VCF spec only gives us one BQ... for the reference position?  should ask.
 dyStringClear(dy);
 for (i = 0;  i < rec->infoCount;  i++)