7d25bd4ceec23b81514d2d4852f75ddc28b8d87e kent Thu Mar 21 13:34:50 2013 -0700 When turning GFF to gene pred, calculate start/end from exons and cds only, not from every possible type. diff --git src/hg/lib/genePred.c src/hg/lib/genePred.c index 807be2c..5a1e59e 100644 --- src/hg/lib/genePred.c +++ src/hg/lib/genePred.c @@ -765,44 +765,64 @@ * ugly with to many check of isGtf, however the was way to much identical * code the other way. Options are from genePredFromGxfOpts */ { struct genePred *gp; int stopCodonStart = -1, stopCodonEnd = -1; int cdsStart = BIGNUM, cdsEnd = -BIGNUM; int exonCount = 0; boolean haveStartCodon = FALSE, haveStopCodon = FALSE; struct gffLine *gl; unsigned *eStarts, *eEnds; int i; /* should we count on start/stop codon annotation in GFFs? */ boolean useStartStops = isGtf || haveStartStopCodons(gff); +int geneStart = 0, geneEnd = 0; + /* Count up exons and figure out cdsStart and cdsEnd. */ for (gl = group->lineList; gl != NULL; gl = gl->next) { + boolean exonishLine = FALSE; if (ignoreGxfLine(gl, isGtf)) continue; if (isExon(gl->feature, isGtf, exonSelectWord)) + { + exonishLine = TRUE; ++exonCount; + } if (isCds(gl->feature)) { + exonishLine = TRUE; if (gl->start < cdsStart) cdsStart = gl->start; if (gl->end > cdsEnd) cdsEnd = gl->end; } + if (exonishLine) + { + if (geneStart == geneEnd) // Not initialized yet + { + geneStart = gl->start; + geneEnd = gl->end; + } + else + { + geneStart = min(gl->start, geneStart); + geneEnd = max(gl->end, geneEnd); + } + } if (sameWord(gl->feature, "start_codon")) haveStartCodon = TRUE; if (sameWord(gl->feature, "stop_codon")) { /* stop_codon can be split, need bounds for adjusting CDS below */ if ((stopCodonStart < 0) || (gl->start < stopCodonStart)) stopCodonStart = gl->start; if ((stopCodonEnd < 0) || (gl->end > stopCodonEnd)) stopCodonEnd = gl->end; haveStopCodon = TRUE; } } if (exonCount == 0) return NULL; if (cdsStart > cdsEnd) @@ -819,32 +839,32 @@ if (stopCodonEnd > cdsEnd) cdsEnd = stopCodonEnd; } else { if (stopCodonStart < cdsStart) cdsStart = stopCodonStart; } } /* Allocate genePred and fill in values. */ AllocVar(gp); gp->name = cloneString(name); gp->chrom = cloneString(group->seq); gp->strand[0] = group->strand; -gp->txStart = group->start; -gp->txEnd = group->end; +gp->txStart = geneStart; +gp->txEnd = geneEnd; if (cdsStart < cdsEnd) { gp->cdsStart = cdsStart; gp->cdsEnd = cdsEnd; } else { // no CDS, set to txEnd gp->cdsStart = gp->txEnd; gp->cdsEnd = gp->txEnd; } gp->exonStarts = AllocArray(eStarts, exonCount); gp->exonEnds = AllocArray(eEnds, exonCount); gp->optFields = optFields;