7d25bd4ceec23b81514d2d4852f75ddc28b8d87e
kent
  Thu Mar 21 13:34:50 2013 -0700
When turning GFF to gene pred, calculate start/end from exons and cds only, not from every possible type.
diff --git src/hg/lib/genePred.c src/hg/lib/genePred.c
index 807be2c..5a1e59e 100644
--- src/hg/lib/genePred.c
+++ src/hg/lib/genePred.c
@@ -765,44 +765,64 @@
  * ugly with to many check of isGtf, however the was way to much identical
  * code the other way. Options are from genePredFromGxfOpts */
 {
 struct genePred *gp;
 int stopCodonStart = -1, stopCodonEnd = -1;
 int cdsStart = BIGNUM, cdsEnd = -BIGNUM;
 int exonCount = 0;
 boolean haveStartCodon = FALSE, haveStopCodon = FALSE;
 struct gffLine *gl;
 unsigned *eStarts, *eEnds;
 int i;
 
 /* should we count on start/stop codon annotation in GFFs? */
 boolean useStartStops = isGtf || haveStartStopCodons(gff);
 
+int geneStart = 0, geneEnd = 0;
+
 /* Count up exons and figure out cdsStart and cdsEnd. */
 for (gl = group->lineList; gl != NULL; gl = gl->next)
     {
+    boolean exonishLine = FALSE;
     if (ignoreGxfLine(gl, isGtf))
         continue;
     if (isExon(gl->feature, isGtf, exonSelectWord))
+	{
+	exonishLine = TRUE;
 	++exonCount;
+	}
     if (isCds(gl->feature))
         {
+	exonishLine = TRUE;
 	if (gl->start < cdsStart)
             cdsStart = gl->start;
 	if (gl->end > cdsEnd)
             cdsEnd = gl->end;
 	}
+    if (exonishLine)
+        {
+	if (geneStart == geneEnd)  // Not initialized yet
+	     {
+	     geneStart = gl->start;
+	     geneEnd = gl->end;
+	     }
+	else
+	     {
+	     geneStart = min(gl->start, geneStart);
+	     geneEnd = max(gl->end, geneEnd);
+	     }
+	}
     if (sameWord(gl->feature, "start_codon"))
         haveStartCodon = TRUE;
     if (sameWord(gl->feature, "stop_codon"))
         {
         /* stop_codon can be split, need bounds for adjusting CDS below */
         if ((stopCodonStart < 0) || (gl->start < stopCodonStart))
             stopCodonStart = gl->start;
         if ((stopCodonEnd < 0) || (gl->end > stopCodonEnd))
             stopCodonEnd = gl->end;
         haveStopCodon = TRUE;
         }
     }
 if (exonCount == 0)
     return NULL;
 if (cdsStart > cdsEnd)
@@ -819,32 +839,32 @@
         if (stopCodonEnd > cdsEnd)
             cdsEnd = stopCodonEnd;
         }
     else
         {
         if (stopCodonStart < cdsStart)
             cdsStart = stopCodonStart;
         }
     }
 
 /* Allocate genePred and fill in values. */
 AllocVar(gp);
 gp->name = cloneString(name);
 gp->chrom = cloneString(group->seq);
 gp->strand[0] = group->strand;
-gp->txStart = group->start;
-gp->txEnd = group->end;
+gp->txStart = geneStart;
+gp->txEnd = geneEnd;
 if (cdsStart < cdsEnd)
     {
     gp->cdsStart = cdsStart;
     gp->cdsEnd = cdsEnd;
     }
 else
     {
     // no CDS, set to txEnd
     gp->cdsStart = gp->txEnd;
     gp->cdsEnd = gp->txEnd;
     }
 gp->exonStarts = AllocArray(eStarts, exonCount);
 gp->exonEnds = AllocArray(eEnds, exonCount);
 gp->optFields = optFields;