a42565e69da8b3a6e72c92afb4108ae522216195
angie
  Mon Mar 28 10:41:35 2011 -0700
Bug #2964 (Table Browser: RefGene output in GTF gives error):An unanticipated condition led to an errAbort: rn4 refGene contained
a transcript (alternate alignment of NM_001008876) with the coding
region annotated as the first base of the first exon.  Now, instead
of errAborting when a requested codon offset is out of bounds, we
don't attempt to print start_codon or stop_codon elements when the
entire codon is not contained in the transcript's exons and coding
region.

diff --git src/hg/hgTables/gffOut.c src/hg/hgTables/gffOut.c
index c02dbe7..a614b19 100644
--- src/hg/hgTables/gffOut.c
+++ src/hg/hgTables/gffOut.c
@@ -77,101 +77,114 @@
 	}
     else
 	{
 	frames[j] = '.';
 	}
     }
 if (retStartIndx)
     *retStartIndx = startIndx;
 if (retStopIndx)
     *retStopIndx = stopIndx;
 return frames;
 }
 
 static int offsetToGenomic(struct bed *bed, int exonIndx, int anchor, int offset)
 /* Return the genomic coord which is offset transcribed bases away from anchor,
- * which must fall in the exonIndx exon of bed. */
+ * which must fall in the exonIndx exon of bed.  If the offset is not contained
+ * by any exon, return -1. */
 {
 int simpleAnswer = anchor + offset;
 int exonStart = bed->chromStart + bed->chromStarts[exonIndx];
 int exonEnd = exonStart + bed->blockSizes[exonIndx];
 if ((offset >= 0 && (anchor >= exonEnd || anchor < exonStart)) ||
     (offset < 0 && (anchor > exonEnd || anchor <= exonStart)))
     errAbort("offsetToGenomic: anchor %d is not in exon %d [%d,%d]",
 	     anchor, exonIndx, exonStart, exonEnd);
 if (offset < 0 && simpleAnswer < exonStart)
     {
     if (exonIndx < 1)
-	errAbort("offsetToGenomic: need previous exon, but given index of %d", exonIndx);
+	return -1;
     int stillNeeded = simpleAnswer - exonStart;
     int prevExonEnd = bed->chromStart + bed->chromStarts[exonIndx-1] + bed->blockSizes[exonIndx-1];
     return offsetToGenomic(bed, exonIndx-1, prevExonEnd, stillNeeded);
     }
 else if (offset > 0 && simpleAnswer > exonEnd)
     {
     if (exonIndx >= bed->blockCount - 1)
-	errAbort("offsetToGenomic: need next exon, but given index of %d (>= %d)",
-		 exonIndx, bed->blockCount - 1);
+	return -1;
     int stillNeeded = simpleAnswer - exonEnd;
     int nextExonStart = bed->chromStart + bed->chromStarts[exonIndx+1];
     return offsetToGenomic(bed, exonIndx+1, nextExonStart, stillNeeded);
     }
 return simpleAnswer;
 }
 
 static void addCdsStartStop(struct bed *bed, char *source, int exonCdsStart, int exonCdsEnd,
 			    char *frames, int exonIndx, int cdsStartIndx, int cdsStopIndx,
 			    boolean gtf2StopCodons, char *txName)
 /* Output a CDS record for the trimmed CDS portion of exonIndx, and a start_codon or
  * stop_codon if applicable. */
 {
 boolean isRc = (bed->strand[0] == '-');
 /* start_codon (goes first for + strand) overlaps with CDS */
 if ((exonIndx == cdsStartIndx) && !isRc)
     {
     int startCodonEnd = offsetToGenomic(bed, exonIndx, exonCdsStart, 3);
+    if (startCodonEnd >= 0 && startCodonEnd <= bed->thickEnd)
     addGffLineFromBed(bed, source, "start_codon", exonCdsStart, startCodonEnd, '.', txName);
     }
 /* If gtf2StopCodons is set, then we follow the GTF2 convention of excluding
  * the stop codon from the CDS region.  In other GFF flavors, stop_codon is
  * part of the CDS, which is the case in our table coords too.
  * This function would already be complicated enough without gtf2StopCodons,
  * due to strand and the possibility of a start or stop codon being split
  * across exons.  Excluding the stop codon from CDS complicates it further
  * because cdsEnd on the current exon may affect the CDS line of the previous
  * or next exon.  The cdsPortion* variables below are the CDS extremities that
  * may have the stop codon excised. */
 if (exonIndx == cdsStopIndx)
     {
     if (isRc)
 	{
 	int stopCodonEnd = offsetToGenomic(bed, exonIndx, exonCdsStart, 3);
+	int cdsPortionStart = exonCdsStart;
+	if (stopCodonEnd >= 0 && stopCodonEnd <= bed->thickEnd)
+	    {
+	    if (gtf2StopCodons)
+		cdsPortionStart = stopCodonEnd;
 	addGffLineFromBed(bed, source, "stop_codon", exonCdsStart, stopCodonEnd, '.', txName);
-	int cdsPortionStart = gtf2StopCodons ? stopCodonEnd : exonCdsStart;
+	    }
 	if (cdsPortionStart < exonCdsEnd)
 	    addGffLineFromBed(bed, source, "CDS", cdsPortionStart, exonCdsEnd,
 			      frames[exonIndx], txName);
 	}
     else
 	{
 	int stopCodonStart = offsetToGenomic(bed, exonIndx, exonCdsEnd, -3);
-	int cdsPortionEnd = gtf2StopCodons ? stopCodonStart : exonCdsEnd;
-	if (cdsPortionEnd > exonCdsStart)
+	int cdsPortionEnd = exonCdsEnd;
+	if (stopCodonStart >= 0 && stopCodonStart >= bed->thickStart)
+	    {
+	    if (gtf2StopCodons)
+		cdsPortionEnd = stopCodonStart;
 	    addGffLineFromBed(bed, source, "CDS", exonCdsStart, cdsPortionEnd,
 			      frames[exonIndx], txName);
 	addGffLineFromBed(bed, source, "stop_codon", stopCodonStart, exonCdsEnd, '.', txName);
 	}
+	else if (cdsPortionEnd > exonCdsStart)
+	    addGffLineFromBed(bed, source, "CDS", exonCdsStart, cdsPortionEnd,
+			      frames[exonIndx], txName);
+	}
     }
 else
     {
     int cdsPortionStart = exonCdsStart;
     int cdsPortionEnd = exonCdsEnd;
     if (gtf2StopCodons)
 	{
 	if (isRc && exonIndx - 1 == cdsStopIndx)
 	    {
 	    int lastExonEnd = (bed->chromStart + bed->chromStarts[exonIndx-1] +
 			       bed->blockSizes[exonIndx-1]);
 	    int basesWereStolen = 3 - (lastExonEnd - bed->thickStart);
 	    if (basesWereStolen > 0)
 		cdsPortionStart += basesWereStolen;
 	    }
@@ -179,30 +192,31 @@
 	    {
 	    int nextExonStart = bed->chromStart + bed->chromStarts[exonIndx+1];
 	    int basesWillBeStolen = 3 - (bed->thickEnd - nextExonStart);
 	    if (basesWillBeStolen > 0)
 		cdsPortionEnd -= basesWillBeStolen;
 	    }
 	}
     if (cdsPortionEnd > cdsPortionStart)
 	addGffLineFromBed(bed, source, "CDS", cdsPortionStart, cdsPortionEnd,
 			  frames[exonIndx], txName);
     }
 /* start_codon (goes last for - strand) overlaps with CDS */
 if ((exonIndx == cdsStartIndx) && isRc)
     {
     int startCodonStart = offsetToGenomic(bed, exonIndx, exonCdsEnd, -3);
+    if (startCodonStart >= 0 && startCodonStart >= bed->thickStart)
     addGffLineFromBed(bed, source, "start_codon", startCodonStart, exonCdsEnd, '.', txName);
     }
 }
 
 
 static int bedToGffLines(struct bed *bedList, struct hTableInfo *hti,
 			 int fieldCount, char *source, boolean gtf2StopCodons)
 /* Translate a (list of) bed into gff and print out.
  * Note that field count (perhaps reduced by bitwise intersection)
  * can in effect override hti. */
 {
 struct hash *nameHash = newHash(20);
 struct bed *bed;
 int i, exonStart, exonEnd;
 char txName[256];