31f492f7d887cacb8e1300c6e95dd2d16d08ea21 angie Mon Feb 11 10:07:26 2013 -0800 annoGrator:- Added annoGratorInit so annoGrator can be subclassed and extended; - Added enum annoGratorOverlap and setOverlapRule method to specify how to treat overlap between primary and internal rows: ignore it, require it, or forbid it, via integrate method's retRJFilterFailed. - Added setAutoSqlObject method to update both the outward-facing (streamer interface) asObj and internal state derived from it. annoGratorGpVar: - Added cdsOnly property to keep only variants that change CDS (via retRJFilterFailed). - The language of aggvGenRows implies that multiple effect rows may arise from one {variant x gene}. In case that happens, use slCat instead of slAddHead to keep all effect rows. - Added slReverse to make sure output rows are ordered by position. That required shuffling some lines in an expected-results file. - Renamed some variables because there are now 3 types of "self": streamer, grator, gpVar. diff --git src/inc/annoGrator.h src/inc/annoGrator.h index 5bc625a..a5d4fad 100644 --- src/inc/annoGrator.h +++ src/inc/annoGrator.h @@ -1,55 +1,74 @@ /* annoGrator -- annoStreamer that integrates genomic annotations from two annoStreamers */ // Subclasses of annoGrator can add new columns of output such as predicted function given // a variant and a gene; the base class simply intersects by position, returning // all rows from its internal data source that overlap the position of primaryRow. // The interface to an annoGrator is almost the same as the interface to an annoStreamer, // *except* you call integrate() instead of nextRow(). #ifndef ANNOGRATOR_H #define ANNOGRATOR_H #include "annoStreamer.h" +enum annoGratorOverlap +/* How integrate() method handles overlap (or non-overlap) of internal rows with primary row */ + { + agoNoConstraint, // Default: overlap with primary row doesn't matter + agoMustOverlap, // integrate() sets RJFilterFail if no internal rows overlap primary + agoMustNotOverlap // integrate() sets RJFilterFail if any internal rows overlap primary + }; + struct annoGrator /* annoStreamer that can integrate an internal annoStreamer's data * with data from a primary source. */ { struct annoStreamer streamer; // external annoStreamer interface // Public method that makes this a 'grator: struct annoRow *(*integrate)(struct annoGrator *self, struct annoRow *primaryRow, boolean *retRJFilterFailed); /* Integrate internal source's data with single row of primary source's data */ + void (*setOverlapRule)(struct annoGrator *self, enum annoGratorOverlap rule); + /* Tell annoGrator how to handle overlap of its rows with primary row. */ + // Private members -- callers are on the honor system to access these using only methods above. struct annoStreamer *mySource; // internal source struct annoRow *qHead; // head of FIFO queue of rows from internal source struct annoRow *qTail; // head of FIFO queue of rows from internal source char *prevPChrom; // for detection of unsorted input from primary uint prevPStart; // for detection of unsorted input from primary boolean eof; // stop asking internal source for rows when it's done - boolean haveRJIncludeFilter; // TRUE if some filter has !isExclude && rightJoin; - // if TRUE and there are no overlapping rows, then RJ fail + boolean haveRJIncludeFilter; // TRUE if some filter has !isExclude && rightJoin + enum annoGratorOverlap overlapRule; // constraint (if any) on overlap of internal & primary }; #endif//ndef ANNOGRATOR_H // ---------------------- annoGrator default methods ----------------------- struct annoRow *annoGratorIntegrate(struct annoGrator *self, struct annoRow *primaryRow, boolean *retRJFilterFailed); /* Given a single row from the primary source, get all overlapping rows from internal - * source, and produce joined output rows. If retRJFilterFailed is non-NULL and any - * overlapping row has a rightJoin filter failure (see annoFilter.h), - * set retRJFilterFailed and stop. */ + * source, and produce joined output rows. + * If retRJFilterFailed is non-NULL: + * - any overlapping row has a rightJoin filter failure (see annoFilter.h), or + * - overlap rule is agoMustOverlap and no rows overlap, or + * - overlap rule is agoMustNotOverlap and any overlapping row is found, + * then set retRJFilterFailed and stop. */ + +void annoGratorInit(struct annoGrator *self, struct annoStreamer *mySource); +/* Initialize an integrator of columns from mySource with (positions of) + * rows passed to integrate(). + * mySource becomes property of the annoGrator. */ struct annoGrator *annoGratorNew(struct annoStreamer *mySource); /* Make a new integrator of columns from mySource with (positions of) rows passed to integrate(). * mySource becomes property of the new annoGrator. */ void annoGratorSetQuery(struct annoStreamer *vSelf, struct annoGratorQuery *query); /* Set query (to be called only by annoGratorQuery which is created after streamers). */ void annoGratorClose(struct annoStreamer **pSelf); /* Free self (including mySource). */