380a1b308bd3bb4f4e52d89ef9e1ccb962892bab
angie
  Tue Oct 3 14:10:37 2017 -0700
Major changes to annoGratorGpVar, annoFormatVep and gpFx.c with the addition of functional effect prediction to variantProjector using PSL+CDS from annoStreamDbPslPlus, which enables accurate predictions even when the genome and transcript have indel differences.

struct gpFx includes new members exonCount, txRef and txAlt so that gpFx and variantProjector can compute those and send them forward to annoFormatVep, instead of annoFormatVep computing them assuming that genome and transcript match perfectly.

annoGratorGpVar passes forward the new gpFx members in output columns and, when input is PSL+CDS instead of genePred, uses variantProjector instead of gpFx to do functional predictions.

diff --git src/hg/lib/variant.c src/hg/lib/variant.c
index 2270896..b6f9f71 100644
--- src/hg/lib/variant.c
+++ src/hg/lib/variant.c
@@ -149,15 +149,64 @@
 char *alStr = vcfGetSlashSepAllelesFromWords(words, dyScratch);
 // The reference allele is the first allele in alStr -- and it may be trimmed on both ends with
 // respect to the raw VCF ref allele in words[3], so copy vcfRefAllele back out of alStr.
 // That ensures that variantNew will get the reference allele that matches the slash-separated
 // allele string.
 int refLen = strlen(alStr);
 char *p = strchr(alStr, '/');
 if (p)
     refLen = p - alStr;
 char vcfRefAllele[refLen + 1];
 safencpy(vcfRefAllele, sizeof(vcfRefAllele), alStr, refLen);
 unsigned alCount = countChars(alStr, '/') + 1;
 return variantNew(row->chrom, row->start, row->end, alCount, alStr, vcfRefAllele, lm);
 }
 
+static char *findRefAllele(struct variant *variant)
+/* Find the reference allele (preferably not symbolic); return NULL if variant doesn't have one.
+ * Don't free result. */
+{
+char *refAllele = NULL;
+struct allele *allele;
+for (allele = variant->alleles;  allele != NULL;  allele = allele->next)
+    {
+    if (allele->isReference)
+        {
+        refAllele = allele->sequence;
+        if (isAllNt(refAllele, strlen(refAllele)))
+            break;
+        }
+    }
+return refAllele;
+}
+
+struct variant *splitAndTrimVariants(struct variant *variantIn, struct lm *lm)
+/* Split variantIn into a list of single-allele variants with redundant ref/alt bases trimmed. */
+{
+struct variant *variantList = NULL;
+struct variant *variant;
+for (variant = variantIn; variant != NULL;  variant = variant->next)
+    {
+    char *refAllele = findRefAllele(variant);
+    if (refAllele == NULL || !isAllNt(refAllele, strlen(refAllele)))
+        slAddHead(&variantList, variant);
+    else
+        {
+        struct allele *allele;
+        for (allele = variant->alleles;  allele != NULL;  allele = allele->next)
+            {
+            if (! allele->isReference)
+                {
+                int refLen = strlen(refAllele), altLen = strlen(allele->sequence);
+                char ref[refLen+1], alt[altLen+1];
+                safecpy(ref, sizeof(ref), refAllele);
+                safecpy(alt, sizeof(alt), allele->sequence);
+                uint start = variant->chromStart, end = variant->chromEnd;
+                trimRefAlt(ref, alt, &start, &end, &refLen, &altLen);
+                slAddHead(&variantList, variantNew(variant->chrom, start, end, 1, alt, ref, lm));
+                }
+            }
+        }
+    }
+return variantList;
+}
+