ec16665b0cbbc7f8e01992a65d328bee629b4e01
angie
Fri Oct 13 15:01:12 2017 -0700
Multi-base substitutions that straddled codon boundary were incompletely reported in {codon,aa}{Old,New} fields. fixes #20327
diff --git src/hg/lib/gpFx.c src/hg/lib/gpFx.c
index 5d6888c..2b98633 100644
--- src/hg/lib/gpFx.c
+++ src/hg/lib/gpFx.c
@@ -667,31 +667,32 @@
struct codingChange *cc = &effect->details.codingChange;
cc->cDnaPosition = txc->startInCdna;
cc->cdsPosition = startInCds;
cc->exonNumber = exonIx;
int pepPos = startInCds / 3;
// At this point we don't use genePredExt's exonFrames field -- we just assume that
// the CDS starts in frame. That's not always the case (e.g. ensGene has some CDSs
// that begin out of frame), so watch out for early truncation of oldCodingSequence
// due to stop codon in the wrong frame:
if (pepPos >= strlen(oldaa))
return effect;
cc->pepPosition = pepPos;
if (cdsBasesAdded % 3 == 0)
{
// Common case: substitution, same number of old/new codons/peps:
- int numOldCodons = (1 + allele->length / 3), numNewCodons = (1 + allele->length / 3);
+ int refPepEnd = (endInCds + 2) / 3;
+ int numOldCodons = refPepEnd - pepPos, numNewCodons = numOldCodons;
if (cdsBasesAdded > 0)
{
// insertion: more new codons than old
numOldCodons = (cc->cdsPosition % 3) == 0 ? 0 : 1;
numNewCodons = numOldCodons + (cdsBasesAdded / 3);
}
else if (cdsBasesAdded < 0)
{
// deletion: more old codons than new
numNewCodons = (cc->cdsPosition % 3) == 0 ? 0 : 1;
numOldCodons = numNewCodons + (-cdsBasesAdded / 3);
}
cc->codonOld = lmCloneStringZ(lm, oldCodingSequence + pepPos*3, numOldCodons*3);
cc->codonNew = lmCloneStringZ(lm, newCodingSequence + pepPos*3, numNewCodons*3);
cc->aaOld = lmCloneStringZ(lm, oldaa + pepPos, numOldCodons);