33990deeb6214328424d9acf9bfaf667ae8f3f4f
angie
  Wed Mar 28 15:34:14 2012 -0700
Feature #6152 (Variant Annotation Tool): Added annoStreamWig and annoGrateWig,which led to some significant changes: annoRow is now polymorphic (words[]
vs. wig float[]), annoFilter has a new function to apply filter(s) to a single
number, and annoFormatTab can print out per-base wiggle values or average
wiggle values (hardcoded to do per-base until a real config option is added).
One puny test case was verified against mysql & table browser output.

diff --git src/hg/lib/annoStreamWig.c src/hg/lib/annoStreamWig.c
new file mode 100644
index 0000000..0621bb8
--- /dev/null
+++ src/hg/lib/annoStreamWig.c
@@ -0,0 +1,152 @@
+/* annoStreamWig -- subclass of annoStreamer for wiggle database tables */
+
+#include "annoStreamWig.h"
+#include "annoStreamDb.h"
+#include "jksql.h"
+#include "wiggle.h"
+
+//#*** Should this be defined elsewhere?
+char *annoRowWigAsText = 
+"table annoRowWig\n"
+"\"autoSql description of a single annoRowWig value, for filtering\"\n"
+"    (\n"
+"    float  value;  \"data value for this range\"\n"
+"    )\n"
+    ;
+
+struct annoStreamWig
+    {
+    struct annoStreamer streamer;	// Parent class members & methods / external interface
+    // Private members
+    struct annoStreamer *wigStr;	// Internal streamer for .wig as in wiggle db tables
+    FILE *wibF;				// wib file handle
+    char *wibFile;			// name of wib file on which wibF was opened
+    };
+
+static void aswSetRegion(struct annoStreamer *vSelf, char *chrom, uint regionStart, uint regionEnd)
+/* Set region -- and free current sqlResult if there is one. */
+{
+annoStreamerSetRegion(vSelf, chrom, regionStart, regionEnd);
+struct annoStreamWig *self = (struct annoStreamWig *)vSelf;
+self->wigStr->setRegion(self->wigStr, chrom, regionStart, regionEnd);
+}
+
+static void aswSetQuery(struct annoStreamer *vSelf, struct annoGratorQuery *query)
+/* Set query (to be called only by annoGratorQuery which is created after streamers). */
+{
+annoStreamerSetQuery(vSelf, query);
+struct annoStreamWig *self = (struct annoStreamWig *)vSelf;
+self->wigStr->setQuery((struct annoStreamer *)(self->wigStr), query);
+}
+
+static void checkWibFile(struct annoStreamWig *self, char *wibFile)
+/* If self doesn't have a .wib file name and handle open, or if the new wibFile is
+ * not the same as the old one, update self to use new wibFile. */
+{
+if (self->wibFile == NULL || !sameString(self->wibFile, wibFile))
+    {
+    carefulClose(&(self->wibF));
+    freeMem(self->wibFile);
+    self->wibFile = cloneString(wibFile);
+    self->wibF = mustOpen(self->wibFile, "r");
+    }
+}
+
+static void paranoidCheckSize(struct wiggle *wiggle)
+/* paranoid, consider taking this out when code is stable: */
+{
+int bpLen = wiggle->chromEnd - wiggle->chromStart;
+if (bpLen != (wiggle->span * wiggle->count))
+    errAbort("annoStreamWig: length in bases (%u - %u = %d) != span*count (%u * %u = %u)",
+	     wiggle->chromEnd, wiggle->chromStart, bpLen,
+	     wiggle->span, wiggle->count, (wiggle->span * wiggle->count));
+}
+
+static void getFloatArray(struct annoStreamWig *self, struct wiggle *wiggle,
+			  boolean *retRightFail, int *retValidCount, float *vector)
+/* expand wiggle bytes & spans to per-bp floats; filter values here! */
+{
+fseek(self->wibF, wiggle->offset, SEEK_SET);
+UBYTE wigBuf[wiggle->count];
+size_t bytesRead = fread(wigBuf, 1, wiggle->count, self->wibF);
+if (bytesRead != wiggle->count)
+    errnoAbort("annoStreamWig: failed to read %u bytes from %s (got %llu)\n",
+	       wiggle->count, wiggle->file, (unsigned long long)bytesRead);
+paranoidCheckSize(wiggle);
+int i, j, validCount = 0;
+for (i = 0;  i < wiggle->count;  i++)
+    {
+    float value;
+    if (wigBuf[i] == WIG_NO_DATA)
+	value = NAN;
+    else
+	{
+	value = BIN_TO_VALUE(wigBuf[i], wiggle->lowerLimit, wiggle->dataRange);
+	if (annoFilterWigValueFails(self->streamer.filters, value, retRightFail))
+	    value = NAN;
+	else
+	    validCount++;
+	}
+    int bpOffset = i * wiggle->span;
+    for (j = 0;  j < wiggle->span;  j++)
+	vector[bpOffset + j] = value;
+    }
+if (retValidCount != NULL)
+    *retValidCount = validCount;
+}
+
+static struct annoRow *aswNextRow(struct annoStreamer *vSelf)
+/* Return an annoRow encoding the next chunk of wiggle data, or NULL if there are no more items. */
+{
+struct annoStreamWig *self = (struct annoStreamWig *)vSelf;
+struct annoRow *rowOut = NULL;
+boolean done = FALSE;
+while (!done)
+    {
+    struct annoRow *wigRow = self->wigStr->nextRow(self->wigStr);
+    if (wigRow == NULL)
+	return NULL;
+    struct wiggle wiggle;
+    wiggleStaticLoad((char **)wigRow->data, &wiggle);
+    checkWibFile(self, wiggle.file);
+    // translate wigRow + bytes to float vector
+    boolean rightFail = FALSE;
+    int validCount = 0;
+    int bpLen = wiggle.chromEnd - wiggle.chromStart;
+    float vector[bpLen];
+    getFloatArray(self, &wiggle, &rightFail, &validCount, vector);
+    if (rightFail || validCount > 0)
+	{
+	rowOut = annoRowWigNew(wigRow->chrom, wigRow->start, wigRow->end, rightFail, vector);
+	done = TRUE;
+	}
+    annoRowFree(&wigRow, WIGGLE_NUM_COLS);
+    }
+return rowOut;
+}
+
+static void aswClose(struct annoStreamer **pVSelf)
+/* Free wiggleDataStream and self. */
+{
+if (pVSelf == NULL)
+    return;
+struct annoStreamWig *self = *(struct annoStreamWig **)pVSelf;
+carefulClose(&(self->wibF));
+freeMem(self->wibFile);
+annoStreamerFree(pVSelf);
+}
+
+struct annoStreamer *annoStreamWigDbNew(char *db, char *table, int maxOutput)
+/* Create an annoStreamer (subclass) object from a wiggle database table. */
+{
+struct annoStreamWig *self = NULL;
+AllocVar(self);
+struct annoStreamer *streamer = &(self->streamer);
+annoStreamerInit(streamer, asParseText(annoRowWigAsText));
+streamer->setRegion = aswSetRegion;
+streamer->setQuery = aswSetQuery;
+streamer->nextRow = aswNextRow;
+streamer->close = aswClose;
+self->wigStr = annoStreamDbNew(db, table, asParseText(wiggleAsText));
+return (struct annoStreamer *)self;
+}