2b5eb866f050d964d8964ec5a84f7b63889cc6b1
angie
  Mon Jun 3 14:39:36 2013 -0700
New CGI, hgVai (Variant Annotation Integrator): simple checklist-styleUI by which user can select variants that they have uploaded; gene
predictions to identify which part of a gene, if any, is hit by each
variant; several additional sources of annotations/predictions e.g.
dbNSFP scores and conserved elements/scores; and several filters to
constrain output to the variants most likely to have a functional effect.
Along with the new CGI, there are various lib bugfixes and improvements,
a new hg/lib/tests/ testcase, and some test file changes to accomodate
data updates to both knownGene and the pg* tables in knownGene.
refs #6152

diff --git src/hg/lib/annoStreamDb.c src/hg/lib/annoStreamDb.c
index d84b1d3..68444c8 100644
--- src/hg/lib/annoStreamDb.c
+++ src/hg/lib/annoStreamDb.c
@@ -17,30 +17,31 @@
     char *startField;			// Name of chromStart-ish column in table
     char *endField;			// Name of chromEnd-ish column in table
     char *endFieldIndexName;		// SQL index on end field, if any (can mess up sorting)
     int chromIx;			// Index of chrom-ish col in autoSql or bin-less table
     int startIx;			// Index of chromStart-ish col in autoSql or bin-less table
     int endIx;				// Index of chromEnd-ish col in autoSql or bin-less table
     boolean notSorted;			// TRUE if table is not sorted (e.g. genbank-updated)
     boolean hasBin;			// 1 if SQL table's first column is bin
     boolean omitBin;			// 1 if table hasBin and autoSql doesn't have bin
     boolean mergeBins;			// TRUE if query results will be in bin order
     struct annoRow *bigItemQueue;	// If mergeBins, accumulate coarse-bin items here
     struct annoRow *smallItemQueue;	// Max 1 item for merge-sorting with bigItemQueue
     struct lm *qLm;			// localmem for merge-sorting queues
     int minFinestBin;			// Smallest bin number for finest bin level
     boolean gotFinestBin;		// Flag that it's time to merge-sort with bigItemQueue
+    int maxOutRows;			// Maximum number of rows we can output.
     };
 
 static void asdSetRegion(struct annoStreamer *vSelf, char *chrom, uint regionStart, uint regionEnd)
 /* Set region -- and free current sqlResult if there is one. */
 {
 annoStreamerSetRegion(vSelf, chrom, regionStart, regionEnd);
 struct annoStreamDb *self = (struct annoStreamDb *)vSelf;
 if (self->sr != NULL)
     sqlFreeResult(&(self->sr));
 }
 
 static void asdDoQuery(struct annoStreamDb *self, char *minChrom, uint minEnd)
 /* Return a sqlResult for a query on table items in position range. */
 // NOTE: it would be possible to implement filters at this level, as in hgTables.
 {
@@ -69,30 +70,32 @@
     dyStringPrintf(query, " where %s='%s'", self->chromField, streamer->chrom);
     int chromSize = annoAssemblySeqSize(streamer->assembly, streamer->chrom);
     if (streamer->regionStart != 0 || streamer->regionEnd != chromSize)
 	{
 	dyStringAppend(query, " and ");
 	if (self->hasBin)
 	    hAddBinToQuery(streamer->regionStart, streamer->regionEnd, query);
 	dyStringPrintf(query, "%s < %u and %s > %u", self->startField, streamer->regionEnd,
 		       self->endField, streamer->regionStart);
 	}
     if (self->notSorted)
 	dyStringPrintf(query, " order by %s", self->startField);
     }
 else if (self->notSorted)
     dyStringPrintf(query, " order by %s,%s", self->chromField, self->startField);
+if (self->maxOutRows > 0)
+    dyStringPrintf(query, " limit %d", self->maxOutRows);
 struct sqlResult *sr = sqlGetResult(self->conn, query->string);
 dyStringFree(&query);
 self->sr = sr;
 }
 
 static char **nextRowFiltered(struct annoStreamDb *self, boolean *retRightFail,
 			      char *minChrom, uint minEnd)
 /* Skip past any left-join failures until we get a right-join failure, a passing row,
  * or end of data.  Return row or NULL, and return right-join fail status via retRightFail. */
 {
 int numCols = self->streamer.numCols;
 char **row = sqlNextRow(self->sr);
 if (minChrom != NULL && row != NULL)
     {
     // Ignore rows that fall completely before (minChrom, minEnd) - save annoGrator's time
@@ -278,31 +281,31 @@
 struct sqlResult *sr = sqlGetResult(conn, query);
 char **row;
 while ((row = sqlNextRow(sr)) != NULL)
     {
     if (sameString(row[4], field))
 	{
 	indexName = cloneString(row[2]);
 	break;
 	}
     }
 sqlFreeResult(&sr);
 return indexName;
 }
 
 struct annoStreamer *annoStreamDbNew(char *db, char *table, struct annoAssembly *aa,
-				     struct asObject *asObj)
+				     struct asObject *asObj, int maxOutRows)
 /* Create an annoStreamer (subclass) object from a database table described by asObj. */
 {
 struct sqlConnection *conn = hAllocConn(db);
 if (!sqlTableExists(conn, table))
     errAbort("annoStreamDbNew: table '%s' doesn't exist in database '%s'", table, db);
 struct annoStreamDb *self = NULL;
 AllocVar(self);
 struct annoStreamer *streamer = &(self->streamer);
 int dbtLen = strlen(db) + strlen(table) + 2;
 char dbTable[dbtLen];
 safef(dbTable, dbtLen, "%s.%s", db, table);
 annoStreamerInit(streamer, aa, asObj, dbTable);
 streamer->rowType = arWords;
 streamer->setRegion = asdSetRegion;
 streamer->nextRow = asdNextRow;
@@ -313,17 +316,18 @@
 if (sqlFieldIndex(self->conn, self->table, "bin") == 0)
     {
     self->hasBin = 1;
     self->minFinestBin = binFromRange(0, 1);
     }
 if (self->hasBin && !sameString(asFirstColumnName, "bin"))
     self->omitBin = 1;
 if (!asdInitBed3Fields(self))
     errAbort("annoStreamDbNew: can't figure out which fields of %s.%s to use as "
 	     "{chrom, chromStart, chromEnd}.", db, table);
 // When a table has an index on endField, sometimes the query optimizer uses it
 // and that ruins the sorting.  Fortunately most tables don't anymore.
 self->endFieldIndexName = sqlTableIndexOnField(self->conn, self->table, self->endField);
 self->notSorted = FALSE;
 self->mergeBins = FALSE;
+self->maxOutRows = maxOutRows;
 return (struct annoStreamer *)self;
 }