2b5eb866f050d964d8964ec5a84f7b63889cc6b1 angie Mon Jun 3 14:39:36 2013 -0700 New CGI, hgVai (Variant Annotation Integrator): simple checklist-styleUI by which user can select variants that they have uploaded; gene predictions to identify which part of a gene, if any, is hit by each variant; several additional sources of annotations/predictions e.g. dbNSFP scores and conserved elements/scores; and several filters to constrain output to the variants most likely to have a functional effect. Along with the new CGI, there are various lib bugfixes and improvements, a new hg/lib/tests/ testcase, and some test file changes to accomodate data updates to both knownGene and the pg* tables in knownGene. refs #6152 diff --git src/hg/lib/annoStreamDb.c src/hg/lib/annoStreamDb.c index d84b1d3..68444c8 100644 --- src/hg/lib/annoStreamDb.c +++ src/hg/lib/annoStreamDb.c @@ -17,30 +17,31 @@ char *startField; // Name of chromStart-ish column in table char *endField; // Name of chromEnd-ish column in table char *endFieldIndexName; // SQL index on end field, if any (can mess up sorting) int chromIx; // Index of chrom-ish col in autoSql or bin-less table int startIx; // Index of chromStart-ish col in autoSql or bin-less table int endIx; // Index of chromEnd-ish col in autoSql or bin-less table boolean notSorted; // TRUE if table is not sorted (e.g. genbank-updated) boolean hasBin; // 1 if SQL table's first column is bin boolean omitBin; // 1 if table hasBin and autoSql doesn't have bin boolean mergeBins; // TRUE if query results will be in bin order struct annoRow *bigItemQueue; // If mergeBins, accumulate coarse-bin items here struct annoRow *smallItemQueue; // Max 1 item for merge-sorting with bigItemQueue struct lm *qLm; // localmem for merge-sorting queues int minFinestBin; // Smallest bin number for finest bin level boolean gotFinestBin; // Flag that it's time to merge-sort with bigItemQueue + int maxOutRows; // Maximum number of rows we can output. }; static void asdSetRegion(struct annoStreamer *vSelf, char *chrom, uint regionStart, uint regionEnd) /* Set region -- and free current sqlResult if there is one. */ { annoStreamerSetRegion(vSelf, chrom, regionStart, regionEnd); struct annoStreamDb *self = (struct annoStreamDb *)vSelf; if (self->sr != NULL) sqlFreeResult(&(self->sr)); } static void asdDoQuery(struct annoStreamDb *self, char *minChrom, uint minEnd) /* Return a sqlResult for a query on table items in position range. */ // NOTE: it would be possible to implement filters at this level, as in hgTables. { @@ -69,30 +70,32 @@ dyStringPrintf(query, " where %s='%s'", self->chromField, streamer->chrom); int chromSize = annoAssemblySeqSize(streamer->assembly, streamer->chrom); if (streamer->regionStart != 0 || streamer->regionEnd != chromSize) { dyStringAppend(query, " and "); if (self->hasBin) hAddBinToQuery(streamer->regionStart, streamer->regionEnd, query); dyStringPrintf(query, "%s < %u and %s > %u", self->startField, streamer->regionEnd, self->endField, streamer->regionStart); } if (self->notSorted) dyStringPrintf(query, " order by %s", self->startField); } else if (self->notSorted) dyStringPrintf(query, " order by %s,%s", self->chromField, self->startField); +if (self->maxOutRows > 0) + dyStringPrintf(query, " limit %d", self->maxOutRows); struct sqlResult *sr = sqlGetResult(self->conn, query->string); dyStringFree(&query); self->sr = sr; } static char **nextRowFiltered(struct annoStreamDb *self, boolean *retRightFail, char *minChrom, uint minEnd) /* Skip past any left-join failures until we get a right-join failure, a passing row, * or end of data. Return row or NULL, and return right-join fail status via retRightFail. */ { int numCols = self->streamer.numCols; char **row = sqlNextRow(self->sr); if (minChrom != NULL && row != NULL) { // Ignore rows that fall completely before (minChrom, minEnd) - save annoGrator's time @@ -278,31 +281,31 @@ struct sqlResult *sr = sqlGetResult(conn, query); char **row; while ((row = sqlNextRow(sr)) != NULL) { if (sameString(row[4], field)) { indexName = cloneString(row[2]); break; } } sqlFreeResult(&sr); return indexName; } struct annoStreamer *annoStreamDbNew(char *db, char *table, struct annoAssembly *aa, - struct asObject *asObj) + struct asObject *asObj, int maxOutRows) /* Create an annoStreamer (subclass) object from a database table described by asObj. */ { struct sqlConnection *conn = hAllocConn(db); if (!sqlTableExists(conn, table)) errAbort("annoStreamDbNew: table '%s' doesn't exist in database '%s'", table, db); struct annoStreamDb *self = NULL; AllocVar(self); struct annoStreamer *streamer = &(self->streamer); int dbtLen = strlen(db) + strlen(table) + 2; char dbTable[dbtLen]; safef(dbTable, dbtLen, "%s.%s", db, table); annoStreamerInit(streamer, aa, asObj, dbTable); streamer->rowType = arWords; streamer->setRegion = asdSetRegion; streamer->nextRow = asdNextRow; @@ -313,17 +316,18 @@ if (sqlFieldIndex(self->conn, self->table, "bin") == 0) { self->hasBin = 1; self->minFinestBin = binFromRange(0, 1); } if (self->hasBin && !sameString(asFirstColumnName, "bin")) self->omitBin = 1; if (!asdInitBed3Fields(self)) errAbort("annoStreamDbNew: can't figure out which fields of %s.%s to use as " "{chrom, chromStart, chromEnd}.", db, table); // When a table has an index on endField, sometimes the query optimizer uses it // and that ruins the sorting. Fortunately most tables don't anymore. self->endFieldIndexName = sqlTableIndexOnField(self->conn, self->table, self->endField); self->notSorted = FALSE; self->mergeBins = FALSE; +self->maxOutRows = maxOutRows; return (struct annoStreamer *)self; }