148cd685553c26bb02f7d6e9cde931a153faa7d8
hiram
  Tue Jun 30 14:12:33 2020 -0700
now correctly numbering reverse strand exons refs #25379

diff --git src/hg/genePredToGtf/genePredToGtf.c src/hg/genePredToGtf/genePredToGtf.c
index b0bf513..fb8b4f5 100644
--- src/hg/genePredToGtf/genePredToGtf.c
+++ src/hg/genePredToGtf/genePredToGtf.c
@@ -73,53 +73,61 @@
     hashAddInt(dupeHash, root, 1);
     return root;
     }
 else
     {
     static char buf[256];
     int val = ptToInt(hel->val) + 1;
     hel->val = intToPt(val);
     safef(buf, sizeof(buf), "%s_%d", root, val);
     return buf;
     }
 }
 
 static void writeGtfLine(FILE *f, char *source, char *name, char *geneName,
 	char *chrom, char strand, char *type, 
-	int start, int end, int exonIx, int frame)
+	int start, int end, int exonIx, int frame, int exonCount)
 /* Write a single exon to GTF file */
 {
 assert(start <= end);
 
 /* convert frame to phase */
 char phase = (frame < 0) ? '.' : (frame == 0) ? '0'
     : (frame == 1) ? '2' : '1';
 fprintf(f, "%s\t", chrom);
 fprintf(f, "%s\t", source);
 fprintf(f, "%s\t", type);
 fprintf(f, "%d\t", start+1);
 fprintf(f, "%d\t", end);
 fprintf(f, ".\t");	/* Score. */
 fprintf(f, "%c\t", strand);
 fprintf(f, "%c\t", phase);
 fprintf(f, "gene_id \"%s\"; ", (isNotEmpty(geneName)) ? geneName : name);
 fprintf(f, "transcript_id \"%s\"; ", name);
 if (exonIx >= 0)
     {
+    if (strand == '-')
+	{
+	fprintf(f, "exon_number \"%d\"; ", exonCount-exonIx);
+	fprintf(f, "exon_id \"%s.%d\";", name, exonCount-exonIx);
+	}
+    else
+	{
 	fprintf(f, "exon_number \"%d\"; ", exonIx+1);
 	fprintf(f, "exon_id \"%s.%d\";", name, exonIx+1);
 	}
+    }
 if (isNotEmpty(geneName))
     fprintf(f, " gene_name \"%s\";", geneName);
 fprintf(f, "\n");
 }
 
 static boolean inExon(struct genePred *gp, int iExon, int pos)
 /* determine if pos is in the specified exon */
 {
 return ((gp->exonStarts[iExon] <= pos) && (pos <= gp->exonEnds[iExon]));
 }
 
 static int movePos(struct genePred *gp, int pos, int dist)
 /* Move a position in an exon by dist, which is positive to move forward, and
  * negative to move backwards. Introns are skipped.  Error if can't move
  * distance and stay in exon.
@@ -179,40 +187,40 @@
     int start1, end1;   /* first part of codon */
     int iExon1;         /* exon containing first part */
     int start2, end2;   /* second part of codon if spliced, or 0,0 */
     int iExon2;         /* exon containing second part */
 };
 static struct codonCoords zeroCodonCoords = {0, 0, 0, 0, 0, 0, 0, 0};
 
 static boolean codonComplete(struct codonCoords *codon)
 /* are all bases of codon defined? */
 {
 return (((codon->end1-codon->start1)+(codon->end2-codon->start2)) == 3);
 }
 
 static void writeCodon(FILE *f, char *source, char *name, char *geneName,
                        char *chrom, char strand, char *type, 
-                       struct codonCoords *codon)
+                       struct codonCoords *codon, int exonCount)
 /* write the location of a codon to the GTF, if it is non-zero and complete */
 {
 writeGtfLine(f, source, name, geneName, chrom, strand, type, 
-             codon->start1, codon->end1, codon->iExon1, 0);
+             codon->start1, codon->end1, codon->iExon1, 0, exonCount);
         
 if (codon->start2 < codon->end2)
     writeGtfLine(f, source, name, geneName, chrom, strand, type, 
                  codon->start2, codon->end2, codon->iExon2, 
-                 (codon->end1-codon->start1));
+                 (codon->end1-codon->start1), exonCount);
 }
 
 static struct codonCoords findFirstCodon(struct genePred *gp, int *frames)
 /* get the coordinates of the first codon (start or stop). It must be in
  * correct frame, or zero is returned. */
 {
 if (honorCdsStat && (gp->optFields & genePredCdsStatFld)
     && (gp->cdsStartStat != cdsComplete))
     return zeroCodonCoords;  // not flagged as complete
 
 // find first CDS exon
 int iExon, cdsStart = 0, cdsEnd = 0;
 for (iExon = 0; iExon < gp->exonCount; iExon++)
     {
     if (genePredCdsExon(gp, iExon, &cdsStart, &cdsEnd))
@@ -322,45 +330,45 @@
 }
 
 static void writeFeatures(struct genePred *gp, int i, char *source, char *name,
                           char *chrom, char strand, char *geneName,
                           int firstUtrEnd, int cdsStart, int cdsEnd, int lastUtrStart,
                           int frame, FILE *f)
 /* write exons CDS/UTR features */
 {
 int exonStart = gp->exonStarts[i];
 int exonEnd = gp->exonEnds[i];
 if (utr && (exonStart < firstUtrEnd))
     {
     int end = min(exonEnd, firstUtrEnd);
     writeGtfLine(f, source, name, geneName, chrom, strand,
                  ((strand == '+') ? "5UTR" : "3UTR"),
-                 exonStart, end, i, -1);
+                 exonStart, end, i, -1, gp->exonCount);
     }
 if ((cdsStart < exonEnd) && (cdsEnd > exonStart))
         {
         int start = max(exonStart, cdsStart);
         int end = min(exonEnd, cdsEnd);
         writeGtfLine(f, source, name, geneName, chrom, strand, "CDS",
-                     start, end, i, frame);
+                     start, end, i, frame, gp->exonCount);
         }
 if (utr && (exonEnd > lastUtrStart))
     {
     int start = max(lastUtrStart, exonStart);
     writeGtfLine(f, source, name, geneName, chrom, strand,
                  ((strand == '+') ? "3UTR" : "5UTR"),
-                 start, exonEnd, i, -1);
+                 start, exonEnd, i, -1, gp->exonCount);
     }
 }
 
 void genePredWriteToGtf(struct genePred *gp, char *source, 
 	struct hash *dupeHash, FILE *f)
 /* Write out genePredName to GTF file. */
 {
 int i;
 char *name = findUniqueName(dupeHash, gp->name);
 char *geneName = gp->name2;
 char *chrom = gp->chrom;
 char strand = gp->strand[0];
 int *frames = (gp->optFields & genePredExonFramesFld) ? gp->exonFrames : calcFrames(gp);
 struct codonCoords firstCodon = findFirstCodon(gp, frames);
 struct codonCoords lastCodon = findLastCodon(gp, frames);
@@ -368,57 +376,57 @@
 // figure out bounds of CDS and UTR regions, moving stop codon to outside of
 // CDS.
 int firstUtrEnd = gp->cdsStart, lastUtrStart = gp->cdsEnd;
 int cdsStart = gp->cdsStart, cdsEnd = gp->cdsEnd;
 if ((strand == '+') && codonComplete(&lastCodon))
     cdsEnd = movePos(gp, lastUtrStart, -3);
 if ((strand == '-') && codonComplete(&firstCodon))
     cdsStart = movePos(gp, cdsStart, 3);
 
 if (addComments)
     fprintf(f, "###\n# %s %s:%d-%d (%s) CDS: %d-%d\n#\n",
             gp->name, gp->chrom, gp->txStart, gp->txEnd,
             gp->strand, gp->cdsStart, gp->cdsEnd);
 
 writeGtfLine(f, source, name, geneName, chrom, strand, "transcript", 
-             gp->txStart, gp->txEnd, -1, -1);
+             gp->txStart, gp->txEnd, -1, -1, gp->exonCount);
 for (i=0; i<gp->exonCount; ++i)
     {
     writeGtfLine(f, source, name, geneName, chrom, strand, "exon", 
-             gp->exonStarts[i], gp->exonEnds[i], i, -1);
+             gp->exonStarts[i], gp->exonEnds[i], i, -1, gp->exonCount);
     if (cdsStart < cdsEnd)
         writeFeatures(gp, i, source, name, chrom, strand, geneName, 
                       firstUtrEnd, cdsStart, cdsEnd, lastUtrStart,
                       frames[i], f);
     }
 if (gp->strand[0] == '+')
     {
     if (codonComplete(&firstCodon))
         writeCodon(f, source, name, geneName, chrom, strand, "start_codon", 
-                   &firstCodon);
+                   &firstCodon, gp->exonCount);
     if (codonComplete(&lastCodon))
         writeCodon(f, source, name, geneName, chrom, strand, "stop_codon",
-                   &lastCodon);
+                   &lastCodon, gp->exonCount);
     }
 else
     {
     if (codonComplete(&lastCodon))
         writeCodon(f, source, name, geneName, chrom, strand, "start_codon",
-                   &lastCodon);
+                   &lastCodon, gp->exonCount);
     if (codonComplete(&firstCodon))
         writeCodon(f, source, name, geneName, chrom, strand, "stop_codon",
-                   &firstCodon);
+                   &firstCodon, gp->exonCount);
     }
 if (!(gp->optFields & genePredExonFramesFld))
     freeMem(frames);
 }
 
 void genePredToGtf(char *database, char *table, char *gtfOut)
 /* genePredToGtf - Convert genePredName table or file to gtf.. */
 {
 FILE *f = mustOpen(gtfOut, "w");
 struct hash *dupeHash = newHash(16);
 struct genePred *gpList = NULL, *gp = NULL;
 
 if (sameString(database, "file"))
     {
     gpList = genePredReaderLoadFile(table, NULL);