76b1ff207748ca1da3d929e5286ccb72bd0528ef
angie
  Wed Jun 12 13:29:36 2013 -0700
Bugfixes and improvements suggested by Brooke in #6152 notes 36, 38, 42.  refs #6152
diff --git src/hg/lib/annoStreamDb.c src/hg/lib/annoStreamDb.c
index ce9fa2d..890b65c 100644
--- src/hg/lib/annoStreamDb.c
+++ src/hg/lib/annoStreamDb.c
@@ -246,30 +246,34 @@
     }
 if (self->eof)
     return;
 int queryMaxItems = ASD_CHUNK_SIZE;
 if (self->useMaxOutRows && self->maxOutRows < queryMaxItems)
     queryMaxItems = self->maxOutRows;
 if (self->hasBin)
     {
     // Results will be in bin order, but we can restore chromStart order by
     // accumulating initial coarse-bin items and merge-sorting them with
     // subsequent finest-bin items which will be in chromStart order.
     resetMergeState(self);
     self->mergeBins = TRUE;
     self->qLm = lmInit(0);
     }
+if (self->endFieldIndexName != NULL)
+    // Don't let mysql use a (chrom, chromEnd) index because that messes up
+    // sorting by chromStart.
+    dyStringPrintf(query, "IGNORE INDEX (%s) ", self->endFieldIndexName);
 if (sSelf->chrom != NULL)
     {
     uint start = sSelf->regionStart;
     if (minChrom)
 	{
 	if (differentString(minChrom, sSelf->chrom))
 	    errAbort("annoStreamDb %s: nextRow minChrom='%s' but region chrom='%s'",
 		     sSelf->name, minChrom, sSelf->chrom);
 	if (start < minEnd)
 	    start = minEnd;
 	}
     if (self->doNextChunk && start < self->nextChunkStart)
 	start = self->nextChunkStart;
     dyStringPrintf(query, "where %s = '%s' and ", self->chromField, sSelf->chrom);
     if (self->hasBin)
@@ -293,36 +297,45 @@
 	    self->queryChrom = self->queryChrom->next;
 	    self->doNextChunk = FALSE;
 	    }
 	}
     if (self->queryChrom == NULL)
 	self->eof = TRUE;
     else
 	{
 	char *chrom = self->queryChrom->name;
 	int start = 0;
 	if (minChrom != NULL && sameString(chrom, minChrom))
 	    start = minEnd;
 	if (self->doNextChunk && start < self->nextChunkStart)
 	    start = self->nextChunkStart;
 	uint end = annoAssemblySeqSize(self->streamer.assembly, self->queryChrom->name);
-	dyStringPrintf(query, "where %s = '%s' and ", self->chromField, chrom);
+	dyStringPrintf(query, "where %s = '%s' ", self->chromField, chrom);
+	if (start > 0)
+	    {
+	    dyStringAppend(query, "and ");
 	if (self->hasBin)
 	    hAddBinToQuery(start, end, query);
-	dyStringPrintf(query, "%s < %u and %s > %u limit %d",
-		       self->startField, end, self->endField, start, queryMaxItems);
+	    // region end is chromSize, so no need to constrain startField here:
+	    dyStringPrintf(query, "%s > %u ",
+			   self->endField, start);
+	    }
+	dyStringPrintf(query, "limit %d", queryMaxItems);
 	bufferRowsFromSqlQuery(self, query->string, queryMaxItems);
+	// If there happens to be no items on chrom, try again with the next chrom:
+	if (! self->eof && self->rowBuf.size == 0)
+	    asdDoQueryChunking(self, minChrom, minEnd);
 	}
     }
 dyStringFree(&query);
 }
 
 static char **nextRowFromBuffer(struct annoStreamDb *self, char *minChrom, uint minEnd)
 /* Instead of streaming directly from self->sr, we have buffered up the results
  * of a chunked query; return the head of that queue. */
 {
 struct rowBuf *rowBuf = &self->rowBuf;
 if (rowBuf->ix > rowBuf->size)
     errAbort("annoStreamDb %s: rowBuf overflow (%d > %d)", self->streamer.name,
 	     rowBuf->ix, rowBuf->size);
 if (rowBuf->ix == rowBuf->size)
     // Last row in buffer -- we'll need another query to get subsequent rows (if any).
@@ -555,30 +568,31 @@
     {
     self->hasBin = 1;
     self->minFinestBin = binFromRange(0, 1);
     }
 if (self->hasBin && !sameString(asFirstColumnName, "bin"))
     self->omitBin = 1;
 if (!asdInitBed3Fields(self))
     errAbort("annoStreamDbNew: can't figure out which fields of %s.%s to use as "
 	     "{chrom, chromStart, chromEnd}.", db, table);
 // When a table has an index on endField, sometimes the query optimizer uses it
 // and that ruins the sorting.  Fortunately most tables don't anymore.
 self->endFieldIndexName = sqlTableIndexOnField(self->conn, self->table, self->endField);
 self->notSorted = FALSE;
 self->mergeBins = FALSE;
 self->maxOutRows = maxOutRows;
+self->useMaxOutRows = (maxOutRows > 0);
 self->needQuery = TRUE;
 self->chromList = annoAssemblySeqNames(aa);
 if (slCount(self->chromList) > 1000)
     {
     // Assembly has many sequences (e.g. scaffold-based assembly) --
     // don't break up into per-sequence queries.  Take our chances
     // with mysql being unhappy about the sqlResult being open too long.
     self->doQuery = asdDoQuerySimple;
     self->nextRowRaw = nextRowFromSqlResult;
     }
 else
     {
     // All-chromosome assembly -- if table is large, perform a series of
     // chunked queries.
     self->doQuery = asdDoQueryChunking;