ec16665b0cbbc7f8e01992a65d328bee629b4e01 angie Fri Oct 13 15:01:12 2017 -0700 Multi-base substitutions that straddled codon boundary were incompletely reported in {codon,aa}{Old,New} fields. fixes #20327 diff --git src/hg/lib/gpFx.c src/hg/lib/gpFx.c index 5d6888c..2b98633 100644 --- src/hg/lib/gpFx.c +++ src/hg/lib/gpFx.c @@ -667,31 +667,32 @@ struct codingChange *cc = &effect->details.codingChange; cc->cDnaPosition = txc->startInCdna; cc->cdsPosition = startInCds; cc->exonNumber = exonIx; int pepPos = startInCds / 3; // At this point we don't use genePredExt's exonFrames field -- we just assume that // the CDS starts in frame. That's not always the case (e.g. ensGene has some CDSs // that begin out of frame), so watch out for early truncation of oldCodingSequence // due to stop codon in the wrong frame: if (pepPos >= strlen(oldaa)) return effect; cc->pepPosition = pepPos; if (cdsBasesAdded % 3 == 0) { // Common case: substitution, same number of old/new codons/peps: - int numOldCodons = (1 + allele->length / 3), numNewCodons = (1 + allele->length / 3); + int refPepEnd = (endInCds + 2) / 3; + int numOldCodons = refPepEnd - pepPos, numNewCodons = numOldCodons; if (cdsBasesAdded > 0) { // insertion: more new codons than old numOldCodons = (cc->cdsPosition % 3) == 0 ? 0 : 1; numNewCodons = numOldCodons + (cdsBasesAdded / 3); } else if (cdsBasesAdded < 0) { // deletion: more old codons than new numNewCodons = (cc->cdsPosition % 3) == 0 ? 0 : 1; numOldCodons = numNewCodons + (-cdsBasesAdded / 3); } cc->codonOld = lmCloneStringZ(lm, oldCodingSequence + pepPos*3, numOldCodons*3); cc->codonNew = lmCloneStringZ(lm, newCodingSequence + pepPos*3, numNewCodons*3); cc->aaOld = lmCloneStringZ(lm, oldaa + pepPos, numOldCodons);