33990deeb6214328424d9acf9bfaf667ae8f3f4f angie Wed Mar 28 15:34:14 2012 -0700 Feature #6152 (Variant Annotation Tool): Added annoStreamWig and annoGrateWig,which led to some significant changes: annoRow is now polymorphic (words[] vs. wig float[]), annoFilter has a new function to apply filter(s) to a single number, and annoFormatTab can print out per-base wiggle values or average wiggle values (hardcoded to do per-base until a real config option is added). One puny test case was verified against mysql & table browser output. diff --git src/hg/lib/annoStreamWig.c src/hg/lib/annoStreamWig.c new file mode 100644 index 0000000..0621bb8 --- /dev/null +++ src/hg/lib/annoStreamWig.c @@ -0,0 +1,152 @@ +/* annoStreamWig -- subclass of annoStreamer for wiggle database tables */ + +#include "annoStreamWig.h" +#include "annoStreamDb.h" +#include "jksql.h" +#include "wiggle.h" + +//#*** Should this be defined elsewhere? +char *annoRowWigAsText = +"table annoRowWig\n" +"\"autoSql description of a single annoRowWig value, for filtering\"\n" +" (\n" +" float value; \"data value for this range\"\n" +" )\n" + ; + +struct annoStreamWig + { + struct annoStreamer streamer; // Parent class members & methods / external interface + // Private members + struct annoStreamer *wigStr; // Internal streamer for .wig as in wiggle db tables + FILE *wibF; // wib file handle + char *wibFile; // name of wib file on which wibF was opened + }; + +static void aswSetRegion(struct annoStreamer *vSelf, char *chrom, uint regionStart, uint regionEnd) +/* Set region -- and free current sqlResult if there is one. */ +{ +annoStreamerSetRegion(vSelf, chrom, regionStart, regionEnd); +struct annoStreamWig *self = (struct annoStreamWig *)vSelf; +self->wigStr->setRegion(self->wigStr, chrom, regionStart, regionEnd); +} + +static void aswSetQuery(struct annoStreamer *vSelf, struct annoGratorQuery *query) +/* Set query (to be called only by annoGratorQuery which is created after streamers). */ +{ +annoStreamerSetQuery(vSelf, query); +struct annoStreamWig *self = (struct annoStreamWig *)vSelf; +self->wigStr->setQuery((struct annoStreamer *)(self->wigStr), query); +} + +static void checkWibFile(struct annoStreamWig *self, char *wibFile) +/* If self doesn't have a .wib file name and handle open, or if the new wibFile is + * not the same as the old one, update self to use new wibFile. */ +{ +if (self->wibFile == NULL || !sameString(self->wibFile, wibFile)) + { + carefulClose(&(self->wibF)); + freeMem(self->wibFile); + self->wibFile = cloneString(wibFile); + self->wibF = mustOpen(self->wibFile, "r"); + } +} + +static void paranoidCheckSize(struct wiggle *wiggle) +/* paranoid, consider taking this out when code is stable: */ +{ +int bpLen = wiggle->chromEnd - wiggle->chromStart; +if (bpLen != (wiggle->span * wiggle->count)) + errAbort("annoStreamWig: length in bases (%u - %u = %d) != span*count (%u * %u = %u)", + wiggle->chromEnd, wiggle->chromStart, bpLen, + wiggle->span, wiggle->count, (wiggle->span * wiggle->count)); +} + +static void getFloatArray(struct annoStreamWig *self, struct wiggle *wiggle, + boolean *retRightFail, int *retValidCount, float *vector) +/* expand wiggle bytes & spans to per-bp floats; filter values here! */ +{ +fseek(self->wibF, wiggle->offset, SEEK_SET); +UBYTE wigBuf[wiggle->count]; +size_t bytesRead = fread(wigBuf, 1, wiggle->count, self->wibF); +if (bytesRead != wiggle->count) + errnoAbort("annoStreamWig: failed to read %u bytes from %s (got %llu)\n", + wiggle->count, wiggle->file, (unsigned long long)bytesRead); +paranoidCheckSize(wiggle); +int i, j, validCount = 0; +for (i = 0; i < wiggle->count; i++) + { + float value; + if (wigBuf[i] == WIG_NO_DATA) + value = NAN; + else + { + value = BIN_TO_VALUE(wigBuf[i], wiggle->lowerLimit, wiggle->dataRange); + if (annoFilterWigValueFails(self->streamer.filters, value, retRightFail)) + value = NAN; + else + validCount++; + } + int bpOffset = i * wiggle->span; + for (j = 0; j < wiggle->span; j++) + vector[bpOffset + j] = value; + } +if (retValidCount != NULL) + *retValidCount = validCount; +} + +static struct annoRow *aswNextRow(struct annoStreamer *vSelf) +/* Return an annoRow encoding the next chunk of wiggle data, or NULL if there are no more items. */ +{ +struct annoStreamWig *self = (struct annoStreamWig *)vSelf; +struct annoRow *rowOut = NULL; +boolean done = FALSE; +while (!done) + { + struct annoRow *wigRow = self->wigStr->nextRow(self->wigStr); + if (wigRow == NULL) + return NULL; + struct wiggle wiggle; + wiggleStaticLoad((char **)wigRow->data, &wiggle); + checkWibFile(self, wiggle.file); + // translate wigRow + bytes to float vector + boolean rightFail = FALSE; + int validCount = 0; + int bpLen = wiggle.chromEnd - wiggle.chromStart; + float vector[bpLen]; + getFloatArray(self, &wiggle, &rightFail, &validCount, vector); + if (rightFail || validCount > 0) + { + rowOut = annoRowWigNew(wigRow->chrom, wigRow->start, wigRow->end, rightFail, vector); + done = TRUE; + } + annoRowFree(&wigRow, WIGGLE_NUM_COLS); + } +return rowOut; +} + +static void aswClose(struct annoStreamer **pVSelf) +/* Free wiggleDataStream and self. */ +{ +if (pVSelf == NULL) + return; +struct annoStreamWig *self = *(struct annoStreamWig **)pVSelf; +carefulClose(&(self->wibF)); +freeMem(self->wibFile); +annoStreamerFree(pVSelf); +} + +struct annoStreamer *annoStreamWigDbNew(char *db, char *table, int maxOutput) +/* Create an annoStreamer (subclass) object from a wiggle database table. */ +{ +struct annoStreamWig *self = NULL; +AllocVar(self); +struct annoStreamer *streamer = &(self->streamer); +annoStreamerInit(streamer, asParseText(annoRowWigAsText)); +streamer->setRegion = aswSetRegion; +streamer->setQuery = aswSetQuery; +streamer->nextRow = aswNextRow; +streamer->close = aswClose; +self->wigStr = annoStreamDbNew(db, table, asParseText(wiggleAsText)); +return (struct annoStreamer *)self; +}