50bf71bc5c57303fb4d7225283f48cd13ba41bcf
angie
  Wed Nov 18 23:20:57 2015 -0800
Change to interface of annoStreamDbNew: instead of making the caller
pass in an asObject, the caller may now pass in a parsed JSON config
object (or NULL).  annoStreamDbNew will use this config object to
determine whether related tables need to be joined with the track table,
and produce its own asObj.  It doesn't actually do that yet -- this
is just an interface change that will enable it to do so.
refs #15544

diff --git src/hg/lib/annoStreamDb.c src/hg/lib/annoStreamDb.c
index 8348420..c278f8e 100644
--- src/hg/lib/annoStreamDb.c
+++ src/hg/lib/annoStreamDb.c
@@ -1,31 +1,33 @@
 /* annoStreamDb -- subclass of annoStreamer for database tables */
 
 /* Copyright (C) 2014 The Regents of the University of California 
  * See README in this or parent directory for licensing information. */
 
 #include "annoStreamDb.h"
 #include "annoGratorQuery.h"
 #include "binRange.h"
+#include "hAnno.h"
 #include "hdb.h"
 #include "sqlNum.h"
 
 struct annoStreamDb
     {
     struct annoStreamer streamer;	// Parent class members & methods
     // Private members
-    struct sqlConnection *conn;		// Database connection (e.g. hg19 or customTrash)
+    char *db;                           // Database name (e.g. hg19 or customTrash)
+    struct sqlConnection *conn;	        // Database connection
     struct sqlResult *sr;		// SQL query result from which we grab rows
     char *table;			// Table name, must exist in database
 
     struct dyString *(*makeBaselineQuery)(struct annoStreamDb *self, boolean *retHasWhere);
     /* Provide baseline query, by default just 'select * from <table>'.
      * Override this to make a query with specific fields, joins etc.
      * If the returned query includes a join/where, set *retHasWhere to TRUE. */
 
     // These members enable us to extract coords from the otherwise unknown row:
     char *chromField;			// Name of chrom-ish column in table
     char *startField;			// Name of chromStart-ish column in table
     char *endField;			// Name of chromEnd-ish column in table
     int chromIx;			// Index of chrom-ish col in autoSql or bin-less table
     int startIx;			// Index of chromStart-ish col in autoSql or bin-less table
     int endIx;				// Index of chromEnd-ish col in autoSql or bin-less table
@@ -672,50 +674,89 @@
 // because of new rows at end, so we have to 'order by'.
 {
 return (sameString(table, "refGene") || sameString(table, "refFlat") ||
 	sameString(table, "xenoRefGene") || sameString(table, "xenoRefFlat") ||
 	sameString(table, "all_mrna") || sameString(table, "xenoMrna") ||
 	sameString(table, "all_est") || sameString(table, "xenoEst") ||
 	sameString(table, "refSeqAli") || sameString(table, "xenoRefSeqAli"));
 }
 
 static boolean isPubsTable(char *table)
 // Not absolutely every pubs* table is unsorted, but most of them are.
 {
 return startsWith("pubs", table);
 }
 
+static struct asObject *asdParseConfig(struct annoStreamDb *self, struct jsonElement *configEl)
+/* Extract the autoSql for self->table from the database.
+ * If configEl is not NULL, expect it to be a description of related tables and fields like this:
+ * config = { "relatedTables": [ { "table": "hg19.kgXref",
+ *                                 "fields": ["geneSymbol", "description"] },
+ *                               { "table": "hg19.knownCanonical",
+ *                                 "fields": ["clusterId"] }
+ *                             ] }
+ * If so, unpack the [db.]tables and fields into self->tableFieldList and append autoSql
+ * column descriptions for each field to the autoSql object that describes our output. */
+{
+//#*** TODO: hAnnoGetAutoSqlForDbTable should do its own split-table checking
+char maybeSplitTable[HDB_MAX_TABLE_STRING];
+if (!hFindSplitTable(self->db, NULL, self->table, maybeSplitTable, NULL))
+    errAbort("annoStreamDbNew: can't find table (or split table) for '%s.%s'",
+             self->db, self->table);
+struct asObject *asObj = hAnnoGetAutoSqlForDbTable(self->db, maybeSplitTable, NULL, TRUE);
+if (configEl != NULL)
+    {
+    uglyf("Implement me!\n");
+    }
+return asObj;
+}
+
 struct annoStreamer *annoStreamDbNew(char *db, char *table, struct annoAssembly *aa,
-				     struct asObject *asObj, int maxOutRows)
-/* Create an annoStreamer (subclass) object from a database table described by asObj. */
+				     int maxOutRows, struct jsonElement *configEl)
+/* Create an annoStreamer (subclass) object from a database table.
+ * If config is NULL, then the streamer produces output from all fields
+ * (except bin, unless table's autoSql includes bin).
+ * Otherwise, config is a json object with a member 'relatedTables' that specifies
+ * related tables and fields to join with table, for example:
+ * config = { "relatedTables": [ { "table": "hg19.kgXref",
+ *                                 "fields": ["geneSymbol", "description"] },
+ *                               { "table": "hg19.knownCanonical",
+ *                                 "fields": ["clusterId"] }
+ *                             ] }
+ * -- the streamer's autoSql will be constructed by appending autoSql column
+ * descriptions to the columns of table.
+ * Caller may free db, table, and dbTableFieldList when done with them, but must keep the
+ * annoAssembly aa alive for the lifetime of the returned annoStreamer. */
 {
 struct sqlConnection *conn = hAllocConn(db);
 if (!sqlTableExists(conn, table))
     errAbort("annoStreamDbNew: table '%s' doesn't exist in database '%s'", table, db);
 struct annoStreamDb *self = NULL;
 AllocVar(self);
+self->conn = conn;
+self->db = cloneString(db);
+self->table = cloneString(table);
+struct asObject *asObj = asdParseConfig(self, configEl);
 struct annoStreamer *streamer = &(self->streamer);
 int dbtLen = strlen(db) + strlen(table) + 2;
-char dbTable[dbtLen];
-safef(dbTable, dbtLen, "%s.%s", db, table);
-annoStreamerInit(streamer, aa, asObj, dbTable);
+char streamerName[dbtLen];
+safef(streamerName, sizeof(streamerName), "%s.%s", db, table);
+annoStreamerInit(streamer, aa, asObj, streamerName);
 streamer->rowType = arWords;
 streamer->setRegion = asdSetRegion;
 streamer->nextRow = asdNextRow;
 streamer->close = asdClose;
-self->conn = conn;
-self->table = cloneString(table);
 char *asFirstColumnName = streamer->asObj->columnList->name;
 if (sqlFieldIndex(self->conn, self->table, "bin") == 0)
     {
     self->hasBin = 1;
     self->minFinestBin = binFromRange(0, 1);
     }
 if (self->hasBin && !sameString(asFirstColumnName, "bin"))
     self->omitBin = 1;
 if (!asdInitBed3Fields(self))
     errAbort("annoStreamDbNew: can't figure out which fields of %s.%s to use as "
 	     "{chrom, chromStart, chromEnd}.", db, table);
 self->makeBaselineQuery = asdMakeBaselineQuery;
 // When a table has an index on endField, sometimes the query optimizer uses it
 // and that ruins the sorting.  Fortunately most tables don't anymore.
 self->endFieldIndexName = sqlTableIndexOnField(self->conn, self->table, self->endField);