85be21057a058a0bdb12caac2bae553376442f5a angie Mon Apr 15 10:56:28 2013 -0700 Refactoring to remove dependencies on annoGratorQuery from streamers,grators and formatters. Instead, provide basic assembly info and explicitly pass streamers(/grators) along with the rows that they produced into formatters. ref #6152 diff --git src/lib/annoGratorQuery.c src/lib/annoGratorQuery.c index 2b4e986..2d783bf 100644 --- src/lib/annoGratorQuery.c +++ src/lib/annoGratorQuery.c @@ -1,154 +1,126 @@ /* annoGratorQuery -- framework for integrating genomic annotations from many sources */ #include "annoGratorQuery.h" #include "errabort.h" #include "obscure.h" -struct annoGratorQuery *annoGratorQueryNew(char *assemblyName, struct hash *chromSizes, - struct twoBitFile *tbf, +struct annoGratorQuery +/* Representation of a complex query: multiple sources, each with its own filters, + * output data and means of integration, aggregated and output by a formatter. */ + { + struct annoAssembly *assembly; // Genome assembly to which annotations belong + struct annoStreamer *primarySource; // Annotations to be integrated with other annos. + struct annoGrator *integrators; // Annotations & methods for integrating w/primary + struct annoFormatter *formatters; // Writers of output collected from primary & intg's + }; + +struct annoGratorQuery *annoGratorQueryNew(struct annoAssembly *assembly, struct annoStreamer *primarySource, struct annoGrator *integrators, struct annoFormatter *formatters) /* Create an annoGratorQuery from all of its components, and introduce components to each other. - * Either chromSizes or tbf may be NULL. integrators may be NULL. - * All other inputs must be non-NULL. */ + * integrators may be NULL. All other inputs must be non-NULL. */ { -if (assemblyName == NULL) - errAbort("annoGratorQueryNew: assemblyName can't be NULL"); -if (chromSizes == NULL && tbf == NULL) - errAbort("annoGratorQueryNew: chromSizes and tbf can't both be NULL"); +if (assembly == NULL) + errAbort("annoGratorQueryNew: assembly can't be NULL"); if (primarySource == NULL) errAbort("annoGratorQueryNew: primarySource can't be NULL"); if (formatters == NULL) errAbort("annoGratorQueryNew: formatters can't be NULL"); struct annoGratorQuery *query = NULL; AllocVar(query); -if (tbf != NULL) - { - if (chromSizes != NULL) - { - // Ensure that tbf and chromSizes are consistent. - struct hashEl *hel; - struct hashCookie cookie = hashFirst(chromSizes); - while ((hel = hashNext(&cookie)) != NULL) - { - char *chrom = hel->name; - int size = ptToInt(hel->val); - int tbfSize = twoBitSeqSize(tbf, chrom); - if (tbfSize != size) - errAbort("Inconsistent size for %s: %s has %d but chromSizes hash has %d", - chrom, tbf->fileName, tbfSize, size); - } - } - else - { - // Make our own chromSizes from tbf info. We will leak this but I don't expect - // many annoGratorQuery's in the same process. - chromSizes = hashNew(0); - struct slName *tbfSeqs = twoBitSeqNames(tbf->fileName), *seq; - for (seq = tbfSeqs; seq != NULL; seq = seq->next) - hashAddInt(chromSizes, seq->name, twoBitSeqSize(tbf, seq->name)); - query->csAllocdHere = TRUE; - } - } -query->assemblyName = cloneString(assemblyName); -query->chromSizes = chromSizes; -query->tbf = tbf; +query->assembly = assembly; query->primarySource = primarySource; query->integrators = integrators; query->formatters = formatters; -// Set streamers' and formatters' query pointer. -primarySource->setQuery(primarySource, query); -struct annoStreamer *grator = (struct annoStreamer *)(query->integrators); -for (; grator != NULL; grator = grator->next) - grator->setQuery(grator, query); struct annoFormatter *formatter; for (formatter = query->formatters; formatter != NULL; formatter = formatter->next) - formatter->initialize(formatter, query); + formatter->initialize(formatter, primarySource, (struct annoStreamer *)integrators); return query; } void annoGratorQuerySetRegion(struct annoGratorQuery *query, char *chrom, uint rStart, uint rEnd) /* Set genomic region for query; if chrom is NULL, position is whole genome. */ { if (chrom != NULL) { - uint chromSize = (uint)hashIntVal(query->chromSizes, chrom); + uint chromSize = annoAssemblySeqSize(query->assembly, chrom); if (rEnd < rStart) errAbort("annoGratorQuerySetRegion: rStart (%u) can't be greater than rEnd (%u)", rStart, rEnd); if (rEnd > chromSize) errAbort("annoGratorQuerySetRegion: rEnd (%u) can't be greater than chrom %s size (%u)", rEnd, chrom, chromSize); if (rEnd == 0) rEnd = chromSize; } // Alert all streamers that they should now send data from a possibly different region: query->primarySource->setRegion(query->primarySource, chrom, rStart, rEnd); struct annoStreamer *grator = (struct annoStreamer *)(query->integrators); for (; grator != NULL; grator = grator->next) grator->setRegion(grator, chrom, rStart, rEnd); -//#*** formatters should be told too, in case the info should go in the header, or if -//#*** they should clip output to search region.... } void annoGratorQueryExecute(struct annoGratorQuery *query) /* For each annoRow from query->primarySource, invoke integrators and pass their annoRows * to formatters. */ { struct annoStreamer *primarySrc = query->primarySource; -struct annoFormatter *formatter = NULL; +struct annoStreamRows *primaryData = annoStreamRowsNew(primarySrc); +struct annoStreamRows *gratorData = NULL; +int gratorCount = slCount(query->integrators); +if (gratorCount > 0) + { + struct annoStreamer *gratorStreamList = (struct annoStreamer *)query->integrators; + gratorData = annoStreamRowsNew(gratorStreamList); + } struct annoRow *primaryRow = NULL; struct lm *lm = lmInit(0); while ((primaryRow = primarySrc->nextRow(primarySrc, lm)) != NULL) { if (primaryRow->rightJoinFail) continue; - struct slRef *gratorRowList = NULL; + primaryData->rowList = primaryRow; boolean rjFilterFailed = FALSE; - struct annoStreamer *grator = (struct annoStreamer *)(query->integrators); - for (; grator != NULL; grator = grator->next) + int i; + for (i = 0; i < gratorCount; i++) { - struct annoGrator *realGrator = (struct annoGrator *)grator; - struct annoRow *gratorRows = realGrator->integrate(realGrator, primaryRow, - &rjFilterFailed, lm); - slAddHead(&gratorRowList, slRefNew(gratorRows)); + + struct annoGrator *grator = (struct annoGrator *)gratorData[i].streamer; + gratorData[i].rowList = grator->integrate(grator, primaryRow, &rjFilterFailed, lm); if (rjFilterFailed) break; } - slReverse(&gratorRowList); + struct annoFormatter *formatter = NULL; for (formatter = query->formatters; formatter != NULL; formatter = formatter->next) if (!rjFilterFailed) - formatter->formatOne(formatter, primaryRow, gratorRowList); - struct slRef *oneRowList = gratorRowList; + formatter->formatOne(formatter, primaryData, gratorData, gratorCount); lmCleanup(&lm); lm = lmInit(0); - slFreeList(&oneRowList); } +freez(&primaryData); +freez(&gratorData); lmCleanup(&lm); } void annoGratorQueryFree(struct annoGratorQuery **pQuery) /* Close and free all inputs and outputs; free self. */ { if (pQuery == NULL) return; struct annoGratorQuery *query = *pQuery; -freez(&(query->assemblyName)); -if (query->csAllocdHere) - hashFree(&(query->chromSizes)); query->primarySource->close(&(query->primarySource)); struct annoStreamer *grator = (struct annoStreamer *)(query->integrators), *nextGrator; for (; grator != NULL; grator = nextGrator) { nextGrator = grator->next; grator->close(&grator); } struct annoFormatter *formatter, *nextFormatter; for (formatter = query->formatters; formatter != NULL; formatter = nextFormatter) { nextFormatter = formatter->next; formatter->close(&formatter); } freez(pQuery); }