76b1ff207748ca1da3d929e5286ccb72bd0528ef angie Wed Jun 12 13:29:36 2013 -0700 Bugfixes and improvements suggested by Brooke in #6152 notes 36, 38, 42. refs #6152 diff --git src/hg/lib/annoStreamDb.c src/hg/lib/annoStreamDb.c index ce9fa2d..890b65c 100644 --- src/hg/lib/annoStreamDb.c +++ src/hg/lib/annoStreamDb.c @@ -246,30 +246,34 @@ } if (self->eof) return; int queryMaxItems = ASD_CHUNK_SIZE; if (self->useMaxOutRows && self->maxOutRows < queryMaxItems) queryMaxItems = self->maxOutRows; if (self->hasBin) { // Results will be in bin order, but we can restore chromStart order by // accumulating initial coarse-bin items and merge-sorting them with // subsequent finest-bin items which will be in chromStart order. resetMergeState(self); self->mergeBins = TRUE; self->qLm = lmInit(0); } +if (self->endFieldIndexName != NULL) + // Don't let mysql use a (chrom, chromEnd) index because that messes up + // sorting by chromStart. + dyStringPrintf(query, "IGNORE INDEX (%s) ", self->endFieldIndexName); if (sSelf->chrom != NULL) { uint start = sSelf->regionStart; if (minChrom) { if (differentString(minChrom, sSelf->chrom)) errAbort("annoStreamDb %s: nextRow minChrom='%s' but region chrom='%s'", sSelf->name, minChrom, sSelf->chrom); if (start < minEnd) start = minEnd; } if (self->doNextChunk && start < self->nextChunkStart) start = self->nextChunkStart; dyStringPrintf(query, "where %s = '%s' and ", self->chromField, sSelf->chrom); if (self->hasBin) @@ -293,36 +297,45 @@ self->queryChrom = self->queryChrom->next; self->doNextChunk = FALSE; } } if (self->queryChrom == NULL) self->eof = TRUE; else { char *chrom = self->queryChrom->name; int start = 0; if (minChrom != NULL && sameString(chrom, minChrom)) start = minEnd; if (self->doNextChunk && start < self->nextChunkStart) start = self->nextChunkStart; uint end = annoAssemblySeqSize(self->streamer.assembly, self->queryChrom->name); - dyStringPrintf(query, "where %s = '%s' and ", self->chromField, chrom); + dyStringPrintf(query, "where %s = '%s' ", self->chromField, chrom); + if (start > 0) + { + dyStringAppend(query, "and "); if (self->hasBin) hAddBinToQuery(start, end, query); - dyStringPrintf(query, "%s < %u and %s > %u limit %d", - self->startField, end, self->endField, start, queryMaxItems); + // region end is chromSize, so no need to constrain startField here: + dyStringPrintf(query, "%s > %u ", + self->endField, start); + } + dyStringPrintf(query, "limit %d", queryMaxItems); bufferRowsFromSqlQuery(self, query->string, queryMaxItems); + // If there happens to be no items on chrom, try again with the next chrom: + if (! self->eof && self->rowBuf.size == 0) + asdDoQueryChunking(self, minChrom, minEnd); } } dyStringFree(&query); } static char **nextRowFromBuffer(struct annoStreamDb *self, char *minChrom, uint minEnd) /* Instead of streaming directly from self->sr, we have buffered up the results * of a chunked query; return the head of that queue. */ { struct rowBuf *rowBuf = &self->rowBuf; if (rowBuf->ix > rowBuf->size) errAbort("annoStreamDb %s: rowBuf overflow (%d > %d)", self->streamer.name, rowBuf->ix, rowBuf->size); if (rowBuf->ix == rowBuf->size) // Last row in buffer -- we'll need another query to get subsequent rows (if any). @@ -555,30 +568,31 @@ { self->hasBin = 1; self->minFinestBin = binFromRange(0, 1); } if (self->hasBin && !sameString(asFirstColumnName, "bin")) self->omitBin = 1; if (!asdInitBed3Fields(self)) errAbort("annoStreamDbNew: can't figure out which fields of %s.%s to use as " "{chrom, chromStart, chromEnd}.", db, table); // When a table has an index on endField, sometimes the query optimizer uses it // and that ruins the sorting. Fortunately most tables don't anymore. self->endFieldIndexName = sqlTableIndexOnField(self->conn, self->table, self->endField); self->notSorted = FALSE; self->mergeBins = FALSE; self->maxOutRows = maxOutRows; +self->useMaxOutRows = (maxOutRows > 0); self->needQuery = TRUE; self->chromList = annoAssemblySeqNames(aa); if (slCount(self->chromList) > 1000) { // Assembly has many sequences (e.g. scaffold-based assembly) -- // don't break up into per-sequence queries. Take our chances // with mysql being unhappy about the sqlResult being open too long. self->doQuery = asdDoQuerySimple; self->nextRowRaw = nextRowFromSqlResult; } else { // All-chromosome assembly -- if table is large, perform a series of // chunked queries. self->doQuery = asdDoQueryChunking;