50bf71bc5c57303fb4d7225283f48cd13ba41bcf angie Wed Nov 18 23:20:57 2015 -0800 Change to interface of annoStreamDbNew: instead of making the caller pass in an asObject, the caller may now pass in a parsed JSON config object (or NULL). annoStreamDbNew will use this config object to determine whether related tables need to be joined with the track table, and produce its own asObj. It doesn't actually do that yet -- this is just an interface change that will enable it to do so. refs #15544 diff --git src/hg/lib/annoStreamDb.c src/hg/lib/annoStreamDb.c index 8348420..c278f8e 100644 --- src/hg/lib/annoStreamDb.c +++ src/hg/lib/annoStreamDb.c @@ -1,31 +1,33 @@ /* annoStreamDb -- subclass of annoStreamer for database tables */ /* Copyright (C) 2014 The Regents of the University of California * See README in this or parent directory for licensing information. */ #include "annoStreamDb.h" #include "annoGratorQuery.h" #include "binRange.h" +#include "hAnno.h" #include "hdb.h" #include "sqlNum.h" struct annoStreamDb { struct annoStreamer streamer; // Parent class members & methods // Private members - struct sqlConnection *conn; // Database connection (e.g. hg19 or customTrash) + char *db; // Database name (e.g. hg19 or customTrash) + struct sqlConnection *conn; // Database connection struct sqlResult *sr; // SQL query result from which we grab rows char *table; // Table name, must exist in database struct dyString *(*makeBaselineQuery)(struct annoStreamDb *self, boolean *retHasWhere); /* Provide baseline query, by default just 'select * from <table>'. * Override this to make a query with specific fields, joins etc. * If the returned query includes a join/where, set *retHasWhere to TRUE. */ // These members enable us to extract coords from the otherwise unknown row: char *chromField; // Name of chrom-ish column in table char *startField; // Name of chromStart-ish column in table char *endField; // Name of chromEnd-ish column in table int chromIx; // Index of chrom-ish col in autoSql or bin-less table int startIx; // Index of chromStart-ish col in autoSql or bin-less table int endIx; // Index of chromEnd-ish col in autoSql or bin-less table @@ -672,50 +674,89 @@ // because of new rows at end, so we have to 'order by'. { return (sameString(table, "refGene") || sameString(table, "refFlat") || sameString(table, "xenoRefGene") || sameString(table, "xenoRefFlat") || sameString(table, "all_mrna") || sameString(table, "xenoMrna") || sameString(table, "all_est") || sameString(table, "xenoEst") || sameString(table, "refSeqAli") || sameString(table, "xenoRefSeqAli")); } static boolean isPubsTable(char *table) // Not absolutely every pubs* table is unsorted, but most of them are. { return startsWith("pubs", table); } +static struct asObject *asdParseConfig(struct annoStreamDb *self, struct jsonElement *configEl) +/* Extract the autoSql for self->table from the database. + * If configEl is not NULL, expect it to be a description of related tables and fields like this: + * config = { "relatedTables": [ { "table": "hg19.kgXref", + * "fields": ["geneSymbol", "description"] }, + * { "table": "hg19.knownCanonical", + * "fields": ["clusterId"] } + * ] } + * If so, unpack the [db.]tables and fields into self->tableFieldList and append autoSql + * column descriptions for each field to the autoSql object that describes our output. */ +{ +//#*** TODO: hAnnoGetAutoSqlForDbTable should do its own split-table checking +char maybeSplitTable[HDB_MAX_TABLE_STRING]; +if (!hFindSplitTable(self->db, NULL, self->table, maybeSplitTable, NULL)) + errAbort("annoStreamDbNew: can't find table (or split table) for '%s.%s'", + self->db, self->table); +struct asObject *asObj = hAnnoGetAutoSqlForDbTable(self->db, maybeSplitTable, NULL, TRUE); +if (configEl != NULL) + { + uglyf("Implement me!\n"); + } +return asObj; +} + struct annoStreamer *annoStreamDbNew(char *db, char *table, struct annoAssembly *aa, - struct asObject *asObj, int maxOutRows) -/* Create an annoStreamer (subclass) object from a database table described by asObj. */ + int maxOutRows, struct jsonElement *configEl) +/* Create an annoStreamer (subclass) object from a database table. + * If config is NULL, then the streamer produces output from all fields + * (except bin, unless table's autoSql includes bin). + * Otherwise, config is a json object with a member 'relatedTables' that specifies + * related tables and fields to join with table, for example: + * config = { "relatedTables": [ { "table": "hg19.kgXref", + * "fields": ["geneSymbol", "description"] }, + * { "table": "hg19.knownCanonical", + * "fields": ["clusterId"] } + * ] } + * -- the streamer's autoSql will be constructed by appending autoSql column + * descriptions to the columns of table. + * Caller may free db, table, and dbTableFieldList when done with them, but must keep the + * annoAssembly aa alive for the lifetime of the returned annoStreamer. */ { struct sqlConnection *conn = hAllocConn(db); if (!sqlTableExists(conn, table)) errAbort("annoStreamDbNew: table '%s' doesn't exist in database '%s'", table, db); struct annoStreamDb *self = NULL; AllocVar(self); +self->conn = conn; +self->db = cloneString(db); +self->table = cloneString(table); +struct asObject *asObj = asdParseConfig(self, configEl); struct annoStreamer *streamer = &(self->streamer); int dbtLen = strlen(db) + strlen(table) + 2; -char dbTable[dbtLen]; -safef(dbTable, dbtLen, "%s.%s", db, table); -annoStreamerInit(streamer, aa, asObj, dbTable); +char streamerName[dbtLen]; +safef(streamerName, sizeof(streamerName), "%s.%s", db, table); +annoStreamerInit(streamer, aa, asObj, streamerName); streamer->rowType = arWords; streamer->setRegion = asdSetRegion; streamer->nextRow = asdNextRow; streamer->close = asdClose; -self->conn = conn; -self->table = cloneString(table); char *asFirstColumnName = streamer->asObj->columnList->name; if (sqlFieldIndex(self->conn, self->table, "bin") == 0) { self->hasBin = 1; self->minFinestBin = binFromRange(0, 1); } if (self->hasBin && !sameString(asFirstColumnName, "bin")) self->omitBin = 1; if (!asdInitBed3Fields(self)) errAbort("annoStreamDbNew: can't figure out which fields of %s.%s to use as " "{chrom, chromStart, chromEnd}.", db, table); self->makeBaselineQuery = asdMakeBaselineQuery; // When a table has an index on endField, sometimes the query optimizer uses it // and that ruins the sorting. Fortunately most tables don't anymore. self->endFieldIndexName = sqlTableIndexOnField(self->conn, self->table, self->endField);