5a54acb02ba08533f895379e2bfd42d0ef779c8e
markd
  Fri Apr 29 09:45:33 2016 -0700
improved documentation on CDS issues

diff --git src/hg/lib/genePred.c src/hg/lib/genePred.c
index 6e8bbc1..a00c210 100644
--- src/hg/lib/genePred.c
+++ src/hg/lib/genePred.c
@@ -1102,34 +1102,39 @@
 {
 /* use the 3' end is used if it's complete, as it is more often accurate when
  * genes are defined from mRNAs sequenced with reverse-transcriptase. */
 int frame = -1;
 /* map to mRNA coords in CDS since frame for an exon is in direction of
  * transcription. */
 if (psl->strand[0] == '-')
     reverseIntRange(&start, &end, psl->qSize);
 if (start < cds->start)
     start = cds->start;
 if (end > cds->end)
     end = cds->end;
 
 if (start < end)
     {
-    /* Compute from end if it is complete in mRNA and start is not complete.
-     * This is doesn't as the end is more likely completely.  However, so
-     * code doesn't correctly create CDS to indicate completeness, so don't
-     * use CDS end unless we know start is incomplete. */
+    /* Compute frame from end of RNA if CDS end is marked complete and start
+     * is not complete.  This is done as the end of an RNA is more likely
+     * completely due to reverse transcriptase not replicating the entire RNA.
+     * However, code that create CDS from genePreds doesn't always create a
+     * CDS specification that indicates incompleteness. So don't use CDS end
+     * unless we know start is incomplete, mean code tried to set it.  This is
+     * not a perfect solution, as handling of CDS specification is naive and
+     * doesn't account for truncated start or stop.  Incomplete codons can
+     * result in frame shift even is CDS completeness is set correctly. */
     if (cds->endComplete && !cds->startComplete)
         {
         int fr = (cds->end-start) % 3;
         frame = (fr == 2) ? 1 : ((fr == 1) ? 2 : 0);
         }
     else
         frame = (start-cds->start) % 3;
     }
 return frame;
 }
 
 static boolean shouldMergeBlocks(struct genePred *gene, 
                                  unsigned tStart, unsigned prevTEnd,
                                  unsigned qStart, unsigned prevQEnd,
                                  unsigned options,