5a54acb02ba08533f895379e2bfd42d0ef779c8e markd Fri Apr 29 09:45:33 2016 -0700 improved documentation on CDS issues diff --git src/hg/lib/genePred.c src/hg/lib/genePred.c index 6e8bbc1..a00c210 100644 --- src/hg/lib/genePred.c +++ src/hg/lib/genePred.c @@ -1102,34 +1102,39 @@ { /* use the 3' end is used if it's complete, as it is more often accurate when * genes are defined from mRNAs sequenced with reverse-transcriptase. */ int frame = -1; /* map to mRNA coords in CDS since frame for an exon is in direction of * transcription. */ if (psl->strand[0] == '-') reverseIntRange(&start, &end, psl->qSize); if (start < cds->start) start = cds->start; if (end > cds->end) end = cds->end; if (start < end) { - /* Compute from end if it is complete in mRNA and start is not complete. - * This is doesn't as the end is more likely completely. However, so - * code doesn't correctly create CDS to indicate completeness, so don't - * use CDS end unless we know start is incomplete. */ + /* Compute frame from end of RNA if CDS end is marked complete and start + * is not complete. This is done as the end of an RNA is more likely + * completely due to reverse transcriptase not replicating the entire RNA. + * However, code that create CDS from genePreds doesn't always create a + * CDS specification that indicates incompleteness. So don't use CDS end + * unless we know start is incomplete, mean code tried to set it. This is + * not a perfect solution, as handling of CDS specification is naive and + * doesn't account for truncated start or stop. Incomplete codons can + * result in frame shift even is CDS completeness is set correctly. */ if (cds->endComplete && !cds->startComplete) { int fr = (cds->end-start) % 3; frame = (fr == 2) ? 1 : ((fr == 1) ? 2 : 0); } else frame = (start-cds->start) % 3; } return frame; } static boolean shouldMergeBlocks(struct genePred *gene, unsigned tStart, unsigned prevTEnd, unsigned qStart, unsigned prevQEnd, unsigned options,