a42565e69da8b3a6e72c92afb4108ae522216195 angie Mon Mar 28 10:41:35 2011 -0700 Bug #2964 (Table Browser: RefGene output in GTF gives error):An unanticipated condition led to an errAbort: rn4 refGene contained a transcript (alternate alignment of NM_001008876) with the coding region annotated as the first base of the first exon. Now, instead of errAborting when a requested codon offset is out of bounds, we don't attempt to print start_codon or stop_codon elements when the entire codon is not contained in the transcript's exons and coding region. diff --git src/hg/hgTables/gffOut.c src/hg/hgTables/gffOut.c index c02dbe7..a614b19 100644 --- src/hg/hgTables/gffOut.c +++ src/hg/hgTables/gffOut.c @@ -77,101 +77,114 @@ } else { frames[j] = '.'; } } if (retStartIndx) *retStartIndx = startIndx; if (retStopIndx) *retStopIndx = stopIndx; return frames; } static int offsetToGenomic(struct bed *bed, int exonIndx, int anchor, int offset) /* Return the genomic coord which is offset transcribed bases away from anchor, - * which must fall in the exonIndx exon of bed. */ + * which must fall in the exonIndx exon of bed. If the offset is not contained + * by any exon, return -1. */ { int simpleAnswer = anchor + offset; int exonStart = bed->chromStart + bed->chromStarts[exonIndx]; int exonEnd = exonStart + bed->blockSizes[exonIndx]; if ((offset >= 0 && (anchor >= exonEnd || anchor < exonStart)) || (offset < 0 && (anchor > exonEnd || anchor <= exonStart))) errAbort("offsetToGenomic: anchor %d is not in exon %d [%d,%d]", anchor, exonIndx, exonStart, exonEnd); if (offset < 0 && simpleAnswer < exonStart) { if (exonIndx < 1) - errAbort("offsetToGenomic: need previous exon, but given index of %d", exonIndx); + return -1; int stillNeeded = simpleAnswer - exonStart; int prevExonEnd = bed->chromStart + bed->chromStarts[exonIndx-1] + bed->blockSizes[exonIndx-1]; return offsetToGenomic(bed, exonIndx-1, prevExonEnd, stillNeeded); } else if (offset > 0 && simpleAnswer > exonEnd) { if (exonIndx >= bed->blockCount - 1) - errAbort("offsetToGenomic: need next exon, but given index of %d (>= %d)", - exonIndx, bed->blockCount - 1); + return -1; int stillNeeded = simpleAnswer - exonEnd; int nextExonStart = bed->chromStart + bed->chromStarts[exonIndx+1]; return offsetToGenomic(bed, exonIndx+1, nextExonStart, stillNeeded); } return simpleAnswer; } static void addCdsStartStop(struct bed *bed, char *source, int exonCdsStart, int exonCdsEnd, char *frames, int exonIndx, int cdsStartIndx, int cdsStopIndx, boolean gtf2StopCodons, char *txName) /* Output a CDS record for the trimmed CDS portion of exonIndx, and a start_codon or * stop_codon if applicable. */ { boolean isRc = (bed->strand[0] == '-'); /* start_codon (goes first for + strand) overlaps with CDS */ if ((exonIndx == cdsStartIndx) && !isRc) { int startCodonEnd = offsetToGenomic(bed, exonIndx, exonCdsStart, 3); + if (startCodonEnd >= 0 && startCodonEnd <= bed->thickEnd) addGffLineFromBed(bed, source, "start_codon", exonCdsStart, startCodonEnd, '.', txName); } /* If gtf2StopCodons is set, then we follow the GTF2 convention of excluding * the stop codon from the CDS region. In other GFF flavors, stop_codon is * part of the CDS, which is the case in our table coords too. * This function would already be complicated enough without gtf2StopCodons, * due to strand and the possibility of a start or stop codon being split * across exons. Excluding the stop codon from CDS complicates it further * because cdsEnd on the current exon may affect the CDS line of the previous * or next exon. The cdsPortion* variables below are the CDS extremities that * may have the stop codon excised. */ if (exonIndx == cdsStopIndx) { if (isRc) { int stopCodonEnd = offsetToGenomic(bed, exonIndx, exonCdsStart, 3); + int cdsPortionStart = exonCdsStart; + if (stopCodonEnd >= 0 && stopCodonEnd <= bed->thickEnd) + { + if (gtf2StopCodons) + cdsPortionStart = stopCodonEnd; addGffLineFromBed(bed, source, "stop_codon", exonCdsStart, stopCodonEnd, '.', txName); - int cdsPortionStart = gtf2StopCodons ? stopCodonEnd : exonCdsStart; + } if (cdsPortionStart < exonCdsEnd) addGffLineFromBed(bed, source, "CDS", cdsPortionStart, exonCdsEnd, frames[exonIndx], txName); } else { int stopCodonStart = offsetToGenomic(bed, exonIndx, exonCdsEnd, -3); - int cdsPortionEnd = gtf2StopCodons ? stopCodonStart : exonCdsEnd; - if (cdsPortionEnd > exonCdsStart) + int cdsPortionEnd = exonCdsEnd; + if (stopCodonStart >= 0 && stopCodonStart >= bed->thickStart) + { + if (gtf2StopCodons) + cdsPortionEnd = stopCodonStart; addGffLineFromBed(bed, source, "CDS", exonCdsStart, cdsPortionEnd, frames[exonIndx], txName); addGffLineFromBed(bed, source, "stop_codon", stopCodonStart, exonCdsEnd, '.', txName); } + else if (cdsPortionEnd > exonCdsStart) + addGffLineFromBed(bed, source, "CDS", exonCdsStart, cdsPortionEnd, + frames[exonIndx], txName); + } } else { int cdsPortionStart = exonCdsStart; int cdsPortionEnd = exonCdsEnd; if (gtf2StopCodons) { if (isRc && exonIndx - 1 == cdsStopIndx) { int lastExonEnd = (bed->chromStart + bed->chromStarts[exonIndx-1] + bed->blockSizes[exonIndx-1]); int basesWereStolen = 3 - (lastExonEnd - bed->thickStart); if (basesWereStolen > 0) cdsPortionStart += basesWereStolen; } @@ -179,30 +192,31 @@ { int nextExonStart = bed->chromStart + bed->chromStarts[exonIndx+1]; int basesWillBeStolen = 3 - (bed->thickEnd - nextExonStart); if (basesWillBeStolen > 0) cdsPortionEnd -= basesWillBeStolen; } } if (cdsPortionEnd > cdsPortionStart) addGffLineFromBed(bed, source, "CDS", cdsPortionStart, cdsPortionEnd, frames[exonIndx], txName); } /* start_codon (goes last for - strand) overlaps with CDS */ if ((exonIndx == cdsStartIndx) && isRc) { int startCodonStart = offsetToGenomic(bed, exonIndx, exonCdsEnd, -3); + if (startCodonStart >= 0 && startCodonStart >= bed->thickStart) addGffLineFromBed(bed, source, "start_codon", startCodonStart, exonCdsEnd, '.', txName); } } static int bedToGffLines(struct bed *bedList, struct hTableInfo *hti, int fieldCount, char *source, boolean gtf2StopCodons) /* Translate a (list of) bed into gff and print out. * Note that field count (perhaps reduced by bitwise intersection) * can in effect override hti. */ { struct hash *nameHash = newHash(20); struct bed *bed; int i, exonStart, exonEnd; char txName[256];