ee9f92a97d27b6e5708cd0647d470be190aa8b60
angie
  Tue Oct 26 16:12:29 2010 -0700
Code Review #1452 (v243 preview1): comment additions & clarificationsrequested by Melissa.

diff --git src/hg/hgTables/gffOut.c src/hg/hgTables/gffOut.c
index 7f32135..c02dbe7 100644
--- src/hg/hgTables/gffOut.c
+++ src/hg/hgTables/gffOut.c
@@ -40,30 +40,31 @@
     static int namelessIx = 0;
     char buf[64];
     safef(buf, sizeof(buf), "gene%d", ++namelessIx);
     gff.geneId = buf;
     }
 gffTabOut(&gff, stdout);
 }
 
 
 static char *computeFrames(struct bed *bed, int *retStartIndx, int *retStopIndx)
 /* Compute frames, in order dictated by strand.  bed must be BED12.  */
 {
 char *frames = needMem(bed->blockCount);
 boolean gotFirstCds = FALSE;
 int nextPhase = 0, startIndx = 0, stopIndx = 0;
+// If lack of thick region has been represented this way, fix:
 if (bed->thickStart == 0 && bed->thickEnd == 0)
     bed->thickStart = bed->thickEnd = bed->chromStart;
 int i;
 for (i=0;  i < bed->blockCount;  i++)
     {
     int j = (bed->strand[0] == '-') ? bed->blockCount-i-1 : i;
     int exonStart = bed->chromStart + bed->chromStarts[j];
     int exonEnd = exonStart + bed->blockSizes[j];
     if ((exonStart < bed->thickEnd) && (exonEnd > bed->thickStart))
 	{
 	int cdsS = max(exonStart, bed->thickStart);
 	int cdsE = min(exonEnd, bed->thickEnd);
 	int cdsSize = cdsE - cdsS;
 	if (! gotFirstCds)
 	    {
@@ -109,42 +110,48 @@
     {
     if (exonIndx >= bed->blockCount - 1)
 	errAbort("offsetToGenomic: need next exon, but given index of %d (>= %d)",
 		 exonIndx, bed->blockCount - 1);
     int stillNeeded = simpleAnswer - exonEnd;
     int nextExonStart = bed->chromStart + bed->chromStarts[exonIndx+1];
     return offsetToGenomic(bed, exonIndx+1, nextExonStart, stillNeeded);
     }
 return simpleAnswer;
 }
 
 static void addCdsStartStop(struct bed *bed, char *source, int exonCdsStart, int exonCdsEnd,
 			    char *frames, int exonIndx, int cdsStartIndx, int cdsStopIndx,
 			    boolean gtf2StopCodons, char *txName)
 /* Output a CDS record for the trimmed CDS portion of exonIndx, and a start_codon or
- * stop_codon if applicable. Return the number of bases in the next exon (exonIndx+1)
- * that are covered by the stop_codon, in case we need to exclude those from next CDS. */
+ * stop_codon if applicable. */
 {
 boolean isRc = (bed->strand[0] == '-');
 /* start_codon (goes first for + strand) overlaps with CDS */
 if ((exonIndx == cdsStartIndx) && !isRc)
     {
     int startCodonEnd = offsetToGenomic(bed, exonIndx, exonCdsStart, 3);
     addGffLineFromBed(bed, source, "start_codon", exonCdsStart, startCodonEnd, '.', txName);
     }
-/* stop codon does not overlap with CDS in GTF2 -- watch out for 
- * gtf2StopCodons, especially in conjunction with strand. */
+/* If gtf2StopCodons is set, then we follow the GTF2 convention of excluding
+ * the stop codon from the CDS region.  In other GFF flavors, stop_codon is
+ * part of the CDS, which is the case in our table coords too.
+ * This function would already be complicated enough without gtf2StopCodons,
+ * due to strand and the possibility of a start or stop codon being split
+ * across exons.  Excluding the stop codon from CDS complicates it further
+ * because cdsEnd on the current exon may affect the CDS line of the previous
+ * or next exon.  The cdsPortion* variables below are the CDS extremities that
+ * may have the stop codon excised. */
 if (exonIndx == cdsStopIndx)
     {
     if (isRc)
 	{
 	int stopCodonEnd = offsetToGenomic(bed, exonIndx, exonCdsStart, 3);
 	addGffLineFromBed(bed, source, "stop_codon", exonCdsStart, stopCodonEnd, '.', txName);
 	int cdsPortionStart = gtf2StopCodons ? stopCodonEnd : exonCdsStart;
 	if (cdsPortionStart < exonCdsEnd)
 	    addGffLineFromBed(bed, source, "CDS", cdsPortionStart, exonCdsEnd,
 			      frames[exonIndx], txName);
 	}
     else
 	{
 	int stopCodonStart = offsetToGenomic(bed, exonIndx, exonCdsEnd, -3);
 	int cdsPortionEnd = gtf2StopCodons ? stopCodonStart : exonCdsEnd;