ec16665b0cbbc7f8e01992a65d328bee629b4e01
angie
  Fri Oct 13 15:01:12 2017 -0700
Multi-base substitutions that straddled codon boundary were incompletely reported in {codon,aa}{Old,New} fields.  fixes #20327

diff --git src/hg/lib/gpFx.c src/hg/lib/gpFx.c
index 5d6888c..2b98633 100644
--- src/hg/lib/gpFx.c
+++ src/hg/lib/gpFx.c
@@ -667,31 +667,32 @@
 struct codingChange *cc = &effect->details.codingChange;
 cc->cDnaPosition = txc->startInCdna;
 cc->cdsPosition = startInCds;
 cc->exonNumber = exonIx;
 int pepPos = startInCds / 3;
 // At this point we don't use genePredExt's exonFrames field -- we just assume that
 // the CDS starts in frame.  That's not always the case (e.g. ensGene has some CDSs
 // that begin out of frame), so watch out for early truncation of oldCodingSequence
 // due to stop codon in the wrong frame:
 if (pepPos >= strlen(oldaa))
     return effect;
 cc->pepPosition = pepPos;
 if (cdsBasesAdded % 3 == 0)
     {
     // Common case: substitution, same number of old/new codons/peps:
-    int numOldCodons = (1 + allele->length / 3), numNewCodons = (1 + allele->length / 3);
+    int refPepEnd = (endInCds + 2) / 3;
+    int numOldCodons = refPepEnd - pepPos, numNewCodons = numOldCodons;
     if (cdsBasesAdded > 0)
 	{
 	// insertion: more new codons than old
 	numOldCodons = (cc->cdsPosition % 3) == 0 ? 0 : 1;
 	numNewCodons = numOldCodons + (cdsBasesAdded / 3);
 	}
     else if (cdsBasesAdded < 0)
 	{
 	// deletion: more old codons than new
 	numNewCodons = (cc->cdsPosition % 3) == 0 ? 0 : 1;
 	numOldCodons = numNewCodons + (-cdsBasesAdded / 3);
 	}
     cc->codonOld = lmCloneStringZ(lm, oldCodingSequence + pepPos*3, numOldCodons*3);
     cc->codonNew = lmCloneStringZ(lm, newCodingSequence + pepPos*3, numNewCodons*3);
     cc->aaOld = lmCloneStringZ(lm, oldaa + pepPos, numOldCodons);