d6321fcd0162394464a5b65cf47c9adad66e0b63
galt
  Thu Nov 7 15:39:39 2013 -0800
fixes #12107. This fixes the hgTables gtf-output frame column, at least for tables that have exonFrames column available like non-ancient refGene and other genePredExt tables usually do.
diff --git src/hg/hgTables/gffOut.c src/hg/hgTables/gffOut.c
index cec1834..cb0ff47 100644
--- src/hg/hgTables/gffOut.c
+++ src/hg/hgTables/gffOut.c
@@ -33,71 +33,96 @@
 gff.frame = frame;
 gff.group = txName;
 if (bed->name != NULL)
     gff.geneId = bed->name;
 else
     {
     static int namelessIx = 0;
     char buf[64];
     safef(buf, sizeof(buf), "gene%d", ++namelessIx);
     gff.geneId = buf;
     }
 gffTabOut(&gff, stdout);
 }
 
 
-static char *computeFrames(struct bed *bed, int *retStartIndx, int *retStopIndx)
-/* Compute frames, in order dictated by strand.  bed must be BED12.  */
+static char *computeFrames(struct bed *bed, char *exonFrames, int *retStartIndx, int *retStopIndx)
+/* Compute frames, in order dictated by strand.  bed must be BED12. 
+ * Convert exonFrames to gtf frame if available. */
 {
+int *ef;
+if (exonFrames)
+    {
+    int efSize = 0;
+    sqlSignedStaticArray(exonFrames, &ef, &efSize); // not thread safe.
+    if (efSize != bed->blockCount)
+	errAbort("Unexpected error, name=%s blockCount=%d but exonFrames size = %d", bed->name, bed->blockCount, efSize);
+    }
 char *frames = needMem(bed->blockCount);
 boolean gotFirstCds = FALSE;
 int nextPhase = 0, startIndx = 0, stopIndx = 0;
 // If lack of thick region has been represented this way, fix:
 if (bed->thickStart == 0 && bed->thickEnd == 0)
     bed->thickStart = bed->thickEnd = bed->chromStart;
 int i;
 for (i=0;  i < bed->blockCount;  i++)
     {
     int j = (bed->strand[0] == '-') ? bed->blockCount-i-1 : i;
     int exonStart = bed->chromStart + bed->chromStarts[j];
     int exonEnd = exonStart + bed->blockSizes[j];
     if ((exonStart < bed->thickEnd) && (exonEnd > bed->thickStart))
 	{
 	int cdsS = max(exonStart, bed->thickStart);
 	int cdsE = min(exonEnd, bed->thickEnd);
 	int cdsSize = cdsE - cdsS;
 	if (! gotFirstCds)
 	    {
 	    gotFirstCds = TRUE;
 	    startIndx = j;
 	    }
+	if (exonFrames)
+	    {
+	    int n = ef[j];
+	    char c = '.';     // -1 exonFrame becomes '.' in gtf 
+	    if (n == 0)
+		c = '0';
+	    else if (n == 1)  // gtf frames are really "phases", so 1 and 2 swap.
+		c = '2';
+	    else if (n == 2)
+		c = '1';
+	    frames[j] = c;
+	    }
+	else
+	    {
 	    frames[j] = '0' + nextPhase;
 	    nextPhase = (3 + ((nextPhase - cdsSize) % 3)) % 3;
+	    }
 	stopIndx = j;
 	}
     else
 	{
 	frames[j] = '.';
 	}
     }
 if (retStartIndx)
     *retStartIndx = startIndx;
 if (retStopIndx)
     *retStopIndx = stopIndx;
 return frames;
 }
 
+
 static void addStartStopCodon(struct bed *bed, int exonIndx, int anchor, int offset, char *codon,
 			      char *source, char *txName)
 /* Output a start or stop codon -- if it is split across multiple exons, output multiple lines. 
  * anchor must fall in the exonIndx exon of bed.
  * If offset is positive, we are computing an end coord; if negative, a start coord. */
 {
 int simpleAnswer = anchor + offset;
 int exonStart = bed->chromStart + bed->chromStarts[exonIndx];
 int exonEnd = exonStart + bed->blockSizes[exonIndx];
 if ((offset >= 0 && (anchor >= exonEnd || anchor < exonStart)) ||
     (offset < 0 && (anchor > exonEnd || anchor <= exonStart)))
     errAbort("addStartStopCodon: anchor %d is not in exon %d [%d,%d]",
 	     anchor, exonIndx, exonStart, exonEnd);
 if (offset < 0)
     {
@@ -190,68 +215,73 @@
 	    int basesWillBeStolen = 3 - (bed->thickEnd - nextExonStart);
 	    if (basesWillBeStolen > 0)
 		cdsPortionEnd -= basesWillBeStolen;
 	    }
 	}
     if (cdsPortionEnd > cdsPortionStart)
 	addGffLineFromBed(bed, source, "CDS", cdsPortionStart, cdsPortionEnd,
 			  frames[exonIndx], txName);
     }
 /* start_codon (goes last for - strand) overlaps with CDS */
 if ((exonIndx == cdsStartIndx) && isRc)
     addStartStopCodon(bed, exonIndx, exonCdsEnd, -3, "start_codon", source, txName);
 }
 
 
-static int bedToGffLines(struct bed *bedList, struct hTableInfo *hti,
+static int bedToGffLines(struct bed *bedList, struct slName *exonFramesList, struct hTableInfo *hti,
 			 int fieldCount, char *source, boolean gtf2StopCodons)
 /* Translate a (list of) bed into gff and print out.
  * Note that field count (perhaps reduced by bitwise intersection)
  * can in effect override hti. */
 {
 struct hash *nameHash = newHash(20);
 struct bed *bed;
+struct slName *exonFrames = exonFramesList;
 int i, exonStart, exonEnd;
 char txName[256];
 int itemCount = 0;
 static int namelessIx = 0;
 
 for (bed = bedList;  bed != NULL;  bed = bed->next)
     {
     /* Enforce unique transcript_ids. */
     if (bed->name != NULL)
 	{
 	struct hashEl *hel = hashLookup(nameHash, bed->name);
 	int dupCount = (hel != NULL ? ptToInt(hel->val) : 0);
 	if (dupCount > 0)
 	    {
 	    safef(txName, sizeof(txName), "%s_dup%d", bed->name, dupCount);
 	    hel->val = intToPt(dupCount + 1);
 	    }
 	else
 	    {
 	    safef(txName, sizeof(txName), "%s", bed->name);
 	    hashAddInt(nameHash, bed->name, 1);
 	    }
 	}
     else
 	safef(txName, sizeof(txName), "tx%d", ++namelessIx);
     if (hti->hasBlocks && hti->hasCDS && fieldCount > 4)
 	{
 	/* first pass: compute frames, in order dictated by strand. */
 	int startIndx = 0, stopIndx = 0;
-	char *frames = computeFrames(bed, &startIndx, &stopIndx);
+	char *frames = NULL;
+	char *ef = NULL;
+	if (exonFramesList)
+    	    ef = exonFrames->name;
+	frames = computeFrames(bed, ef, &startIndx, &stopIndx);
 
 	/* second pass: one exon (possibly CDS, start/stop_codon) per block. */
 	for (i=0;  i < bed->blockCount;  i++)
 	    {
 	    exonStart = bed->chromStart + bed->chromStarts[i];
 	    exonEnd = exonStart + bed->blockSizes[i];
 	    if ((exonStart < bed->thickEnd) && (exonEnd > bed->thickStart))
 		{
 		int exonCdsStart = max(exonStart, bed->thickStart);
 		int exonCdsEnd = min(exonEnd, bed->thickEnd);
 		addCdsStartStop(bed, source, exonCdsStart, exonCdsEnd,
 				frames, i, startIndx, stopIndx, gtf2StopCodons, txName);
 		}
 	    addGffLineFromBed(bed, source, "exon", exonStart, exonEnd, '.', txName);
 	    }
@@ -274,45 +304,82 @@
 	    {
 	    addGffLineFromBed(bed, source, "exon", bed->chromStart, bed->thickStart, '.', txName);
 	    }
 	if (bed->thickEnd > bed->thickStart)
 	    addGffLineFromBed(bed, source, "CDS", bed->thickStart, bed->thickEnd, '0', txName);
 	if (bed->thickEnd < bed->chromEnd)
 	    {
 	    addGffLineFromBed(bed, source, "exon", bed->thickEnd, bed->chromEnd, '.', txName);
 	    }
 	}
     else
 	{
 	addGffLineFromBed(bed, source, "exon", bed->chromStart, bed->chromEnd, '.', txName);
 	}
     itemCount++;
+    if (exonFrames)
+    	exonFrames = exonFrames->next;
     }
 hashFree(&nameHash);
 return itemCount;
 }
 
+static struct slName* getExonFrames(char *table, struct sqlConnection *conn, struct bed *bedList)
+/* get real exonFrames if they are available */
+{
+struct slName* list = NULL;
+struct bed *bed;
+for (bed = bedList;  bed != NULL;  bed = bed->next)
+    {
+    char sql[1024];
+    sqlSafef(sql, sizeof sql, "select exonFrames "
+	"from %s where " 
+	"name = '%s' and "  // be specific, the same name may align to multiple locations
+	"chrom = '%s' and "
+	"strand = '%c' and "
+	"txStart = %d and "
+	"txEnd = %d"
+	, 
+	table, 
+	bed->name,
+	bed->chrom,
+	bed->strand[0],
+	bed->chromStart,
+	bed->chromEnd
+	);
+    char *exonFrames = sqlQuickString(conn, sql);
+    slNameAddHead(&list, exonFrames);
+    }
+slReverse(&list);
+return list;
+}
+
 void doOutGff(char *table, struct sqlConnection *conn, boolean outputGtf)
 /* Save as GFF/GTF. */
 {
 struct hTableInfo *hti = getHti(database, table, conn);
 struct bed *bedList;
+struct slName *exonFramesList = NULL;
 char source[HDB_MAX_TABLE_STRING];
 int itemCount;
 struct region *region, *regionList = getRegions();
 
 textOpen();
 
+int efIdx = sqlFieldIndex(conn, table, "exonFrames");
+
 safef(source, sizeof(source), "%s_%s", database, table);
 itemCount = 0;
 for (region = regionList; region != NULL; region = region->next)
     {
     struct lm *lm = lmInit(64*1024);
     int fieldCount;
     bedList = cookedBedList(conn, table, region, lm, &fieldCount);
-    itemCount += bedToGffLines(bedList, hti, fieldCount, source, outputGtf);
+    if (efIdx != -1)
+	exonFramesList = getExonFrames(table, conn, bedList);
+    itemCount += bedToGffLines(bedList, exonFramesList, hti, fieldCount, source, outputGtf);
     lmCleanup(&lm);
     }
 if (itemCount == 0)
     hPrintf(NO_RESULTS);
 }