ee9f92a97d27b6e5708cd0647d470be190aa8b60 angie Tue Oct 26 16:12:29 2010 -0700 Code Review #1452 (v243 preview1): comment additions & clarificationsrequested by Melissa. diff --git src/hg/hgTables/gffOut.c src/hg/hgTables/gffOut.c index 7f32135..c02dbe7 100644 --- src/hg/hgTables/gffOut.c +++ src/hg/hgTables/gffOut.c @@ -40,30 +40,31 @@ static int namelessIx = 0; char buf[64]; safef(buf, sizeof(buf), "gene%d", ++namelessIx); gff.geneId = buf; } gffTabOut(&gff, stdout); } static char *computeFrames(struct bed *bed, int *retStartIndx, int *retStopIndx) /* Compute frames, in order dictated by strand. bed must be BED12. */ { char *frames = needMem(bed->blockCount); boolean gotFirstCds = FALSE; int nextPhase = 0, startIndx = 0, stopIndx = 0; +// If lack of thick region has been represented this way, fix: if (bed->thickStart == 0 && bed->thickEnd == 0) bed->thickStart = bed->thickEnd = bed->chromStart; int i; for (i=0; i < bed->blockCount; i++) { int j = (bed->strand[0] == '-') ? bed->blockCount-i-1 : i; int exonStart = bed->chromStart + bed->chromStarts[j]; int exonEnd = exonStart + bed->blockSizes[j]; if ((exonStart < bed->thickEnd) && (exonEnd > bed->thickStart)) { int cdsS = max(exonStart, bed->thickStart); int cdsE = min(exonEnd, bed->thickEnd); int cdsSize = cdsE - cdsS; if (! gotFirstCds) { @@ -109,42 +110,48 @@ { if (exonIndx >= bed->blockCount - 1) errAbort("offsetToGenomic: need next exon, but given index of %d (>= %d)", exonIndx, bed->blockCount - 1); int stillNeeded = simpleAnswer - exonEnd; int nextExonStart = bed->chromStart + bed->chromStarts[exonIndx+1]; return offsetToGenomic(bed, exonIndx+1, nextExonStart, stillNeeded); } return simpleAnswer; } static void addCdsStartStop(struct bed *bed, char *source, int exonCdsStart, int exonCdsEnd, char *frames, int exonIndx, int cdsStartIndx, int cdsStopIndx, boolean gtf2StopCodons, char *txName) /* Output a CDS record for the trimmed CDS portion of exonIndx, and a start_codon or - * stop_codon if applicable. Return the number of bases in the next exon (exonIndx+1) - * that are covered by the stop_codon, in case we need to exclude those from next CDS. */ + * stop_codon if applicable. */ { boolean isRc = (bed->strand[0] == '-'); /* start_codon (goes first for + strand) overlaps with CDS */ if ((exonIndx == cdsStartIndx) && !isRc) { int startCodonEnd = offsetToGenomic(bed, exonIndx, exonCdsStart, 3); addGffLineFromBed(bed, source, "start_codon", exonCdsStart, startCodonEnd, '.', txName); } -/* stop codon does not overlap with CDS in GTF2 -- watch out for - * gtf2StopCodons, especially in conjunction with strand. */ +/* If gtf2StopCodons is set, then we follow the GTF2 convention of excluding + * the stop codon from the CDS region. In other GFF flavors, stop_codon is + * part of the CDS, which is the case in our table coords too. + * This function would already be complicated enough without gtf2StopCodons, + * due to strand and the possibility of a start or stop codon being split + * across exons. Excluding the stop codon from CDS complicates it further + * because cdsEnd on the current exon may affect the CDS line of the previous + * or next exon. The cdsPortion* variables below are the CDS extremities that + * may have the stop codon excised. */ if (exonIndx == cdsStopIndx) { if (isRc) { int stopCodonEnd = offsetToGenomic(bed, exonIndx, exonCdsStart, 3); addGffLineFromBed(bed, source, "stop_codon", exonCdsStart, stopCodonEnd, '.', txName); int cdsPortionStart = gtf2StopCodons ? stopCodonEnd : exonCdsStart; if (cdsPortionStart < exonCdsEnd) addGffLineFromBed(bed, source, "CDS", cdsPortionStart, exonCdsEnd, frames[exonIndx], txName); } else { int stopCodonStart = offsetToGenomic(bed, exonIndx, exonCdsEnd, -3); int cdsPortionEnd = gtf2StopCodons ? stopCodonStart : exonCdsEnd;