31f492f7d887cacb8e1300c6e95dd2d16d08ea21 angie Mon Feb 11 10:07:26 2013 -0800 annoGrator:- Added annoGratorInit so annoGrator can be subclassed and extended; - Added enum annoGratorOverlap and setOverlapRule method to specify how to treat overlap between primary and internal rows: ignore it, require it, or forbid it, via integrate method's retRJFilterFailed. - Added setAutoSqlObject method to update both the outward-facing (streamer interface) asObj and internal state derived from it. annoGratorGpVar: - Added cdsOnly property to keep only variants that change CDS (via retRJFilterFailed). - The language of aggvGenRows implies that multiple effect rows may arise from one {variant x gene}. In case that happens, use slCat instead of slAddHead to keep all effect rows. - Added slReverse to make sure output rows are ordered by position. That required shuffling some lines in an expected-results file. - Renamed some variables because there are now 3 types of "self": streamer, grator, gpVar. diff --git src/lib/annoGrator.c src/lib/annoGrator.c index 5a19528..66a1770 100644 --- src/lib/annoGrator.c +++ src/lib/annoGrator.c @@ -92,60 +92,65 @@ { self->qTail->next = newRow; self->qTail = newRow; } if (cDifNewP > 0) // newRow->chrom comes after chrom; we're done for now break; } } } } struct annoRow *annoGratorIntegrate(struct annoGrator *self, struct annoRow *primaryRow, boolean *retRJFilterFailed) /* Given a single row from the primary source, get all overlapping rows from internal - * source, and produce joined output rows. If retRJFilterFailed is non-NULL and any - * overlapping row has a rightJoin filter failure (see annoFilter.h), - * set retRJFilterFailed and stop. */ + * source, and produce joined output rows. + * If retRJFilterFailed is non-NULL: + * - any overlapping row has a rightJoin filter failure (see annoFilter.h), or + * - overlap rule is agoMustOverlap and no rows overlap, or + * - overlap rule is agoMustNotOverlap and any overlapping row is found, + * then set retRJFilterFailed and stop. */ { struct annoRow *rowList = NULL; agCheckPrimarySorting(self, primaryRow); agTrimToStart(self, primaryRow->chrom, primaryRow->start); agFetchToEnd(self, primaryRow->chrom, primaryRow->end); boolean rjFailHard = (retRJFilterFailed != NULL); if (rjFailHard) *retRJFilterFailed = FALSE; struct annoRow *qRow; for (qRow = self->qHead; qRow != NULL; qRow = qRow->next) { if (qRow->start < primaryRow->end && qRow->end > primaryRow->start && sameString(qRow->chrom, primaryRow->chrom)) { slAddHead(&rowList, annoRowClone(qRow, self->mySource)); if (rjFailHard && qRow->rightJoinFail) { *retRJFilterFailed = TRUE; break; } } } slReverse(&rowList); // If no rows overlapped primary, and there is a right-join, !isExclude (i.e. isInclude) filter, // then we need to set retRJFilterFailed because the condition was not met to include // the primary item. -if (rowList == NULL && self->haveRJIncludeFilter) +if (retRJFilterFailed && + ((rowList == NULL && (self->haveRJIncludeFilter || self->overlapRule == agoMustOverlap)) || + (rowList != NULL && self->overlapRule == agoMustNotOverlap))) *retRJFilterFailed = TRUE; return rowList; } void annoGratorClose(struct annoStreamer **pSelf) /* Free self (including mySource). */ { if (pSelf == NULL) return; struct annoGrator *self = *(struct annoGrator **)pSelf; annoRowFreeList(&(self->qHead), self->mySource); self->mySource->close(&(self->mySource)); freeMem(self->prevPChrom); freez(pSelf); } @@ -189,34 +194,59 @@ /* Set genomic region for query, and reset internal state. */ { struct annoGrator *self = (struct annoGrator *)vSelf; self->mySource->setRegion((struct annoStreamer *)(self->mySource), chrom, rStart, rEnd); agReset(self); } void annoGratorSetQuery(struct annoStreamer *vSelf, struct annoGratorQuery *query) /* Set query (to be called only by annoGratorQuery which is created after streamers). */ { struct annoGrator *self = (struct annoGrator *)vSelf; self->streamer.query = query; self->mySource->setQuery((struct annoStreamer *)(self->mySource), query); } -struct annoGrator *annoGratorNew(struct annoStreamer *mySource) -/* Make a new integrator of columns from mySource with (positions of) rows passed to integrate(). - * mySource becomes property of the new annoGrator. */ +static void agSetAutoSqlObject(struct annoStreamer *sSelf, struct asObject *asObj) +/* Use new asObj and update internal state derived from asObj. */ +{ +struct annoGrator *gSelf = (struct annoGrator *)sSelf; +annoStreamerSetAutoSqlObject(sSelf, asObj); +gSelf->haveRJIncludeFilter = filtersHaveRJInclude(sSelf->filters); +} + +void agSetOverlapRule(struct annoGrator *self, enum annoGratorOverlap rule) +/* Tell annoGrator how to handle overlap of its rows with primary row. */ +{ +self->overlapRule = rule; +} + +void annoGratorInit(struct annoGrator *self, struct annoStreamer *mySource) +/* Initialize an integrator of columns from mySource with (positions of) + * rows passed to integrate(). + * mySource becomes property of the annoGrator. */ { -struct annoGrator *self; -AllocVar(self); struct annoStreamer *streamer = &(self->streamer); annoStreamerInit(streamer, mySource->getAutoSqlObject(mySource)); streamer->rowType = mySource->rowType; +streamer->setAutoSqlObject = agSetAutoSqlObject; streamer->setFilters = agSetFilters; streamer->setRegion = annoGratorSetRegion; streamer->setQuery = annoGratorSetQuery; streamer->nextRow = noNextRow; streamer->close = annoGratorClose; self->integrate = annoGratorIntegrate; +self->setOverlapRule = agSetOverlapRule; +self->overlapRule = agoNoConstraint; self->mySource = mySource; self->haveRJIncludeFilter = filtersHaveRJInclude(streamer->filters); +} + +struct annoGrator *annoGratorNew(struct annoStreamer *mySource) +/* Make a new integrator of columns from mySource with (positions of) rows passed to integrate(). + * mySource becomes property of the new annoGrator. */ +{ +struct annoGrator *self; +AllocVar(self); +annoGratorInit(self, mySource); return self; }