70668f5dd4d6bbcf079a1025649e17b9885335c7 angie Mon May 13 10:17:53 2013 -0700 Added 2 new args to annoStreamer:nextRow: minChrom and minEnd(which could also be called regionStart depending on point of view). Streamers may use those hints to skip over data that precedes minChrom and minEnd, to avoid the overhead of creating annoRows that annoGrators will then have to skip over. When primary data are sparse and grator data are very dense, this saves significant memory and user-cycles. Unfortunately mysql can still be the bottleneck for elapsed time. Room for improvement in annoStreamDb: when assembly has a reasonably small number of sequences (<1000), genome-wide queries could be internally broken down into per-seq queries; that would let us skip over chroms that precede minChrom. refs #6152 diff --git src/inc/annoStreamer.h src/inc/annoStreamer.h index 25f2fec..0b2836a 100644 --- src/inc/annoStreamer.h +++ src/inc/annoStreamer.h @@ -24,32 +24,35 @@ struct asObject *(*getAutoSqlObject)(struct annoStreamer *self); void (*setAutoSqlObject)(struct annoStreamer *self, struct asObject *asObj); /* Get and set autoSql representation (do not modify or free!) */ void (*setRegion)(struct annoStreamer *self, char *chrom, uint rStart, uint rEnd); /* Set genomic region for query; if chrom is NULL, region is whole genome. * This must be called on all annoGrator components in query, not a subset. */ char *(*getHeader)(struct annoStreamer *self); /* Get the file header as a string (possibly NULL, possibly multi-line). */ struct annoFilter *(*getFilters)(struct annoStreamer *self); void (*setFilters)(struct annoStreamer *self, struct annoFilter *newFilters); /* Get and set filters */ - struct annoRow *(*nextRow)(struct annoStreamer *self, struct lm *lm); - /* Get the next item from this source. Use localmem lm to store returned annoRow. */ + struct annoRow *(*nextRow)(struct annoStreamer *self, char *minChrom, uint minEnd, + struct lm *lm); + /* Get the next item from this source. If minChrom is non-NULL, optionally use + * that as a hint to skip items that precede {minChrom, minEnd}. + * Use localmem lm to store returned annoRow. */ void (*close)(struct annoStreamer **pSelf); /* Close connection to source and free self. */ // Public members -- callers are on the honor system to access these read-only. struct annoAssembly *assembly; // Genome assembly that provides coords for annotations struct asObject *asObj; // Annotation data definition char *name; // Short identifier, e.g. name of file or database table struct annoFilter *filters; // Filters to constrain output char *chrom; // Non-NULL if querying a particular region uint regionStart; // If chrom is non-NULL, region start coord uint regionEnd; // If chrom is non-NULL, region end coord boolean positionIsGenome; // True if doing a whole-genome query enum annoRowType rowType; // Type of annotations (words or wiggle data) int numCols; // For word-based annotations, number of words/columns