4d2f4049ac3d658165ea79d365cc6ed642e1683e
angie
  Tue May 26 12:25:41 2015 -0700
In order to make it more clear that wiggle values are averaged now,
print out wiggle's value column as "valueAverage" in the header.
refs #14579 note 61

diff --git src/lib/annoFormatTab.c src/lib/annoFormatTab.c
index 9df06f0..d80f3a3 100644
--- src/lib/annoFormatTab.c
+++ src/lib/annoFormatTab.c
@@ -1,275 +1,289 @@
 /* annoFormatTab -- collect fields from all inputs and print them out, tab-separated. */
 
 /* Copyright (C) 2013 The Regents of the University of California 
  * See README in this or parent directory for licensing information. */
 
 #include "annoFormatTab.h"
 #include "annoGratorQuery.h"
 #include "dystring.h"
 
 struct annoFormatTab
     {
     struct annoFormatter formatter;     // External interface
     char *fileName;                     // Output file name, can be "stdout"
     FILE *f;                            // Output file handle
     struct hash *columnVis;             // Hash of columns that have been explicitly selected
                                         // or deselected by user.
     boolean needHeader;			// TRUE if we should print out the header
     };
 
+//#*** config options: avg? more stats? list of values?
+static boolean doAvg = TRUE;
+
 static void makeFullColumnName(char *fullName, size_t size, char *sourceName, char *colName)
 /* If sourceName is non-empty, make fullName sourceName.colName, otherwise just colName. */
 {
 if (isNotEmpty(sourceName))
     safef(fullName, size, "%s.%s", sourceName, colName);
 else
     safecpy(fullName, size, colName);
 }
 
 void annoFormatTabSetColumnVis(struct annoFormatter *vSelf, char *sourceName, char *colName,
                                boolean enabled)
 /* Explicitly include or exclude column in output.  sourceName must be the same
  * as the corresponding annoStreamer source's name. */
 {
 struct annoFormatTab *self = (struct annoFormatTab *)vSelf;
 if (! self->columnVis)
     self->columnVis = hashNew(0);
 char fullName[PATH_LEN];
 makeFullColumnName(fullName, sizeof(fullName), sourceName, colName);
 hashAddInt(self->columnVis, fullName, enabled);
 }
 
 static boolean columnIsIncluded(struct annoFormatTab *self, char *sourceName, char *colName)
 // Return TRUE if column has not been explicitly deselected.
 {
 if (self->columnVis)
     {
     char fullName[PATH_LEN];
     makeFullColumnName(fullName, sizeof(fullName), sourceName, colName);
     int vis = hashIntValDefault(self->columnVis, fullName, 1);
     if (vis == 0)
         return FALSE;
     }
 return TRUE;
 }
 
+static boolean isWiggle(struct asColumn *colList)
+/* Recognize wiggle/bigWig data by its column signature. */
+{
+return (slCount(colList) == 4 &&
+        sameString(colList->name, "chrom") &&
+        sameString(colList->next->name, "chromStart") &&
+        sameString(colList->next->next->name, "chromEnd") &&
+        sameString(colList->next->next->next->name, "value"));
+}
+
 static void printHeaderColumns(struct annoFormatTab *self, struct annoStreamer *source,
                                boolean isFirst)
 /* Print names of included columns from this source. */
 {
 FILE *f = self->f;
 char *sourceName = source->name;
+boolean isAvg = isWiggle(source->asObj->columnList) && doAvg;
 struct asColumn *col;
 int i;
 for (col = source->asObj->columnList, i = 0;  col != NULL;  col = col->next, i++)
     {
     if (columnIsIncluded(self, sourceName, col->name))
         {
         if (isFirst)
             {
             fputc('#', f);
             isFirst = FALSE;
             }
         else
             fputc('\t', f);
         char fullName[PATH_LEN];
         makeFullColumnName(fullName, sizeof(fullName), sourceName, col->name);
+        if (isAvg && sameString(col->name, "value"))
+            safecat(fullName, sizeof(fullName), "Average");
         fputs(fullName, f);
         }
     }
 }
 
 static void aftInitialize(struct annoFormatter *vSelf, struct annoStreamer *primary,
 			  struct annoStreamer *integrators)
 /* Print header, regardless of whether we get any data after this. */
 {
 struct annoFormatTab *self = (struct annoFormatTab *)vSelf;
 if (self->needHeader)
     {
     char *primaryHeader = primary->getHeader(primary);
     if (isNotEmpty(primaryHeader))
 	fprintf(self->f, "# Header from primary input:\n%s", primaryHeader);
     printHeaderColumns(self, primary, TRUE);
     struct annoStreamer *grator;
     for (grator = integrators;  grator != NULL;  grator = grator->next)
 	printHeaderColumns(self, grator, FALSE);
     fputc('\n', self->f);
     self->needHeader = FALSE;
     }
 }
 
 static double wigRowAvg(struct annoRow *row)
 /* Return the average value of floats in row->data. */
 {
 float *vector = row->data;
 int len = row->end - row->start;
 double sum = 0.0;
 int i;
 for (i = 0;  i < len;  i++)
     sum += vector[i];
 return sum / (double)len;
 }
 
 static char **bed4WordsFromAnnoRow(struct annoRow *row, char *fourth)
 /* Return an array of 4 words with row's chrom, chromStart, and chromEnd, and cloned fourth. */
 {
 char **words;
 AllocArray(words, 4);
 words[0] = cloneString(row->chrom);
 char buf[PATH_LEN];
 safef(buf, sizeof(buf), "%u", row->start);
 words[1] = cloneString(buf);
 safef(buf, sizeof(buf), "%u", row->end);
 words[2] = cloneString(buf);
 words[3] = cloneString(fourth);
 return words;
 }
 
 static char **wordsFromWigRowAvg(struct annoRow *row)
 /* Return chrom, chromStart, chromEnd and a string containing the average of values in row. */
 {
 double avg = wigRowAvg(row);
 char avgStr[32];
 safef(avgStr, sizeof(avgStr), "%lf", avg);
 return bed4WordsFromAnnoRow(row, avgStr);
 }
 
 static char **wordsFromWigRowVals(struct annoRow *row)
 /* Return chrom, chromStart, chromEnd and a string containing comma-sep per-base wiggle values. */
 {
 float *vector = row->data;
 int len = row->end - row->start;
 struct dyString *dy = dyStringNew(10*len);
 int i;
 for (i = 0;  i < len;  i++)
     dyStringPrintf(dy, "%g,", vector[i]);
 char **words = bed4WordsFromAnnoRow(row, dy->string);
 dyStringFree(&dy);
 return words;
 }
 
 static char **wordsFromRow(struct annoRow *row, struct annoStreamer *source,
 			   boolean *retFreeWhenDone)
 /* If source->rowType is arWords, return its words.  Otherwise translate row->data into words. */
 {
 if (row == NULL)
     return NULL;
 boolean freeWhenDone = FALSE;
 char **words = NULL;
 if (source->rowType == arWords)
     words = row->data;
 else if (source->rowType == arWig)
     {
     freeWhenDone = TRUE;
-    //#*** config options: avg? more stats? list of values?
-    boolean doAvg = TRUE;
     if (doAvg)
 	words = wordsFromWigRowAvg(row);
     else
 	words = wordsFromWigRowVals(row);
     }
 else
     errAbort("annoFormatTab: unrecognized row type %d from source %s",
 	     source->rowType, source->name);
 if (retFreeWhenDone != NULL)
     *retFreeWhenDone = freeWhenDone;
 return words;
 }
 
 static void printColumns(struct annoFormatTab *self, struct annoStreamer *streamer,
 			 struct annoRow *row, boolean isFirst)
 /* Print columns in streamer's row (if NULL, print the right number of empty fields). */
 {
 FILE *f = self->f;
 char *sourceName = streamer->name;
 boolean freeWhenDone = FALSE;
 char **words = wordsFromRow(row, streamer, &freeWhenDone);
 struct asColumn *col;
 int i;
 for (col = streamer->asObj->columnList, i = 0;  col != NULL;  col = col->next, i++)
     {
     if (columnIsIncluded(self, sourceName, col->name))
         {
         if (isFirst)
             isFirst = FALSE;
         else
             fputc('\t', f);
         if (words != NULL)
             fputs((words[i] ? words[i] : ""), f);
         }
     }
 int wordCount = i;
 if (freeWhenDone)
     {
     for (i = 0;  i < wordCount;  i++)
         freeMem(words[i]);
     freeMem(words);
     }
 }
 
 static void aftComment(struct annoFormatter *fSelf, char *content)
 /* Print out a comment line. */
 {
 if (strchr(content, '\n'))
     errAbort("aftComment: no multi-line input");
 struct annoFormatTab *self = (struct annoFormatTab *)fSelf;
 fprintf(self->f, "# %s\n", content);
 }
 
 static void aftFormatOne(struct annoFormatter *vSelf, struct annoStreamRows *primaryData,
 			 struct annoStreamRows *gratorData, int gratorCount)
 /* Print out tab-separated columns that we have gathered in prior calls to aftCollect,
  * and start over fresh for the next line of output. */
 {
 struct annoFormatTab *self = (struct annoFormatTab *)vSelf;
 // Got one row from primary; what's the largest # of rows from any grator?
 int maxRows = 1;
 int iG;
 for (iG = 0;  iG < gratorCount;  iG++)
     {
     int gratorRowCount = slCount(gratorData[iG].rowList);
     if (gratorRowCount > maxRows)
 	maxRows = gratorRowCount;
     }
 // Print out enough rows to make sure that all grator rows are included.
 int iR;
 for (iR = 0;  iR < maxRows;  iR++)
     {
     printColumns(self, primaryData->streamer, primaryData->rowList, TRUE);
     for (iG = 0;  iG < gratorCount;  iG++)
 	{
 	struct annoRow *gratorRow = slElementFromIx(gratorData[iG].rowList, iR);
 	printColumns(self, gratorData[iG].streamer, gratorRow, FALSE);
 	}
     fputc('\n', self->f);
     }
 }
 
 static void aftClose(struct annoFormatter **pVSelf)
 /* Close file handle, free self. */
 {
 if (pVSelf == NULL)
     return;
 struct annoFormatTab *self = *(struct annoFormatTab **)pVSelf;
 freeMem(self->fileName);
 carefulClose(&(self->f));
 annoFormatterFree(pVSelf);
 }
 
 struct annoFormatter *annoFormatTabNew(char *fileName)
 /* Return a formatter that will write its tab-separated output to fileName. */
 {
 struct annoFormatTab *aft;
 AllocVar(aft);
 struct annoFormatter *formatter = &(aft->formatter);
 formatter->getOptions = annoFormatterGetOptions;
 formatter->setOptions = annoFormatterSetOptions;
 formatter->initialize = aftInitialize;
 formatter->comment = aftComment;
 formatter->formatOne = aftFormatOne;
 formatter->close = aftClose;
 aft->fileName = cloneString(fileName);
 aft->f = mustOpen(fileName, "w");
 aft->needHeader = TRUE;
 return (struct annoFormatter *)aft;
 }