3640a4d6b3303a6bebc7c5b2fc5abcf7f4fae0b2
angie
  Wed Sep 28 11:56:00 2016 -0700
Partial support for changes in VCF4.2 and latest samtools mpileup output:
- Tolerate 'Number=R' and new INFO attributes Source and Version
- Tolerate mpileup's '<X>' alt (no alternate allele was observed)
- The 4.3 spec includes '<*>' from gVCF, also meaning no alt al obsvd.
- GT is no longer required; user's example has PL instead, so parse that
into genotypes.
- hgVai now annotates "variants" with <X> and <*> as no_sequence_alteration
- annoFormatVep now uses html encoding for html output in various places so
that "<X>" is displayed properly (custom track labels and various item
names could also have undesirable characters).  I am not encoding the
extras' descriptions because those are internal and some have <a>'s.
refs #15625

diff --git src/hg/lib/gpFx.c src/hg/lib/gpFx.c
index 4eab84d..2d91199 100644
--- src/hg/lib/gpFx.c
+++ src/hg/lib/gpFx.c
@@ -975,42 +975,56 @@
 }
 
 static void checkVariantList(struct variant *variant)
 // check to see that we either have one variant (possibly with multiple
 // alleles) or that if we have a list of variants, they only have
 // one allele a piece.
 {
 if (variant->next == NULL)	 // just one variant
     return;
 
 for(; variant; variant = variant->next)
     if (variant->numAlleles != 1)
 	errAbort("gpFxPredEffect needs either 1 variant, or only 1 allele in all variants");
 }
 
+static struct gpFx *gpFxNoVariation(struct variant *variant, struct lm *lm)
+/* Return a gpFx with SO term no_sequence_alteration, for VCF rows that aren't really variants. */
+{
+char *seq = NULL;
+struct allele *allele;
+for (allele = variant->alleles;  allele != NULL;  allele = allele->next)
+    if (allele->isReference)
+        {
+        seq = allele->sequence;
+        // Don't break out of the loop -- pick the last one we see because the first is likely
+        // the "real" reference allele, while the other(s) is something like "<X>" or "<*>".
+        }
+return gpFxNew(seq, "", no_sequence_alteration, none, lm);
+}
+
 struct gpFx *gpFxPredEffect(struct variant *variant, struct genePred *pred,
 			    struct dnaSeq *transcriptSequence, struct lm *lm)
 // return the predicted effect(s) of a variation list on a genePred
 {
 struct gpFx *effectsList = NULL;
 
 // make sure we can deal with the variants that are coming in
 checkVariantList(variant);
 
 for (; variant != NULL;  variant = variant->next)
     {
-    // If only the reference allele has been observed, skip it:
-    //#*** Some might like to keep variants e.g. in VCF output... 
-    //#*** aha, Ensembl has requested a term for 'no change' from SONG.
-    //#*** Add that to soTerm when it exists...
     if (! hasAltAllele(variant->alleles))
-	return NULL;
-
+	effectsList = slCat(effectsList, gpFxNoVariation(variant, lm));
+    else
+        {
         // check to see if SNP is up or downstream
         effectsList = slCat(effectsList, gpFxCheckUpDownstream(variant, pred, lm));
 
         // check to see if SNP is in the transcript
-    effectsList = slCat(effectsList, gpFxCheckTranscript(variant, pred, transcriptSequence, lm));
+        effectsList = slCat(effectsList,
+                            gpFxCheckTranscript(variant, pred, transcriptSequence, lm));
+        }
     }
 
 return effectsList;
 }