50bf71bc5c57303fb4d7225283f48cd13ba41bcf
angie
  Wed Nov 18 23:20:57 2015 -0800
Change to interface of annoStreamDbNew: instead of making the caller
pass in an asObject, the caller may now pass in a parsed JSON config
object (or NULL).  annoStreamDbNew will use this config object to
determine whether related tables need to be joined with the track table,
and produce its own asObj.  It doesn't actually do that yet -- this
is just an interface change that will enable it to do so.
refs #15544

diff --git src/hg/lib/annoStreamDbKnownGene.c src/hg/lib/annoStreamDbKnownGene.c
index 5aaf6e4..729b64f 100644
--- src/hg/lib/annoStreamDbKnownGene.c
+++ src/hg/lib/annoStreamDbKnownGene.c
@@ -1,178 +1,178 @@
 /* annoStreamDbKnownGene -- knownGene with kgXref.geneSymbol added as an extra field */
 
 #include "annoStreamDbKnownGene.h"
 #include "annoStreamDb.h"
 #include "hdb.h"
 #include "sqlNum.h"
 
 static char *askgAutoSqlString =
 "table knownGenePlusSymbol\n"
 "\"Fields of the knownGene table plus symbolic gene name from kgXref.geneSymbol\"\n"
 "   ("
 "    string  name;               \"Name of gene\"\n"
 "    string  chrom;              \"Reference sequence chromosome or scaffold\"\n"
 "    char[1] strand;             \"+ or - for strand\"\n"
 "    uint    txStart;            \"Transcription start position\"\n"
 "    uint    txEnd;              \"Transcription end position\"\n"
 "    uint    cdsStart;           \"Coding region start\"\n"
 "    uint    cdsEnd;             \"Coding region end\"\n"
 "    uint    exonCount;          \"Number of exons\"\n"
 "    uint[exonCount] exonStarts; \"Exon start positions\"\n"
 "    uint[exonCount] exonEnds;   \"Exon end positions\"\n"
 "    string  proteinID;          \"UniProt display ID for Known Genes,  UniProt accession or RefSeq protein ID for UCSC Genes\" \n"
 "    string  alignID;            \"Unique identifier for each (known gene, alignment position) pair\"\n"
 "    string geneSymbol;          \"HGNC gene symbol\"\n"
 "   )\n";
 
 #define KNOWNGENEPLUS_NUM_COLS 13
 
 struct annoStreamDbKnownGene
 {
     struct annoStreamer streamer;	// Parent class members & methods (external interface)
     // Private members
     struct annoStreamer *mySource;	// Internal source of knownGene rows
     // Data from related table kgXref
     struct hash *geneSymbols;
 };
 
 struct asObject *annoStreamDbKnownGeneAsObj()
 /* Return an autoSql object that describs fields of a joining query on knownGene and
  * kgXref.geneSymbol. */
 {
 return asParseText(askgAutoSqlString);
 }
 
 // It would be nice for this to go in a knownGene.[ch], but to avoid having to add
 // two new files, just add what we need here:
 static char *kgAutoSqlString =
 "table knownGene\n"
 "\"Genes based on RefSeq, GenBank, and UniProt.\"\n"
 "(\n"
 "    string  name;               \"Name of gene\"\n"
 "    string  chrom;              \"Reference sequence chromosome or scaffold\"\n"
 "    char[1] strand;             \"+ or - for strand\"\n"
 "    uint    txStart;            \"Transcription start position\"\n"
 "    uint    txEnd;              \"Transcription end position\"\n"
 "    uint    cdsStart;           \"Coding region start\"\n"
 "    uint    cdsEnd;             \"Coding region end\"\n"
 "    uint    exonCount;          \"Number of exons\"\n"
 "    uint[exonCount] exonStarts; \"Exon start positions\"\n"
 "    uint[exonCount] exonEnds;   \"Exon end positions\"\n"
 "    string  proteinID;          \"UniProt display ID for Known Genes,  UniProt accession or RefSeq protein ID for UCSC Genes\" \n"
 "    string  alignID;            \"Unique identifier for each (known gene, alignment position) pair\"\n"
 ")\n";
 
 struct asObject *knownGeneAsObj()
 /* Return an autoSql object for knownGene. */
 {
 return asParseText(kgAutoSqlString);
 }
 
 #define KNOWNGENE_NUM_COLS 12
 
 static void askgSetAutoSqlObject(struct annoStreamer *self, struct asObject *asObj)
 /* Abort if something external tries to change the autoSql object. */
 {
 errAbort("annoStreamDbKnownGene %s: can't change autoSqlObject.",
 	 ((struct annoStreamer *)self)->name);
 }
 
 static void askgSetRegion(struct annoStreamer *sSelf, char *chrom, uint rStart, uint rEnd)
 /* Pass setRegion down to internal source. */
 {
 annoStreamerSetRegion(sSelf, chrom, rStart, rEnd);
 struct annoStreamDbKnownGene *self = (struct annoStreamDbKnownGene *)sSelf;
 self->mySource->setRegion(self->mySource, chrom, rStart, rEnd);
 }
 
 static char *getGeneSymbol(struct annoStreamDbKnownGene *self, char *kgID, struct lm *lm)
 /* Look up kgID in our geneSymbols hash from kgXref. */
 {
 char *symbol = hashFindVal(self->geneSymbols, kgID);
 if (symbol == NULL)
     symbol = "";
 return lmCloneString(lm, symbol);
 }
 
 static void knownGeneToKnownGenePlus(struct annoStreamDbKnownGene *self,
                                      char **kgWords, char **kgpWords, struct lm *lm)
 /* Copy kgWords into kgpWords and add column geneSymbol. */
 {
 CopyArray(kgWords, kgpWords, KNOWNGENE_NUM_COLS);
 char *kgID = kgWords[0];
 kgpWords[KNOWNGENE_NUM_COLS] = getGeneSymbol(self, kgID, lm);
 }
 
 static struct annoRow *askgNextRow(struct annoStreamer *sSelf, char *minChrom, uint minEnd,
 				    struct lm *lm)
 /* Join kgXref.geneSymbol with row from knownGene track table. */
 {
 struct annoStreamDbKnownGene *self = (struct annoStreamDbKnownGene *)sSelf;
 char **kgpWords;
 lmAllocArray(lm, kgpWords, KNOWNGENEPLUS_NUM_COLS);
 struct annoRow *kgRow;
 boolean rightJoinFail = FALSE;
 kgRow = self->mySource->nextRow(self->mySource, minChrom, minEnd, lm);
 if (kgRow != NULL)
     {
     char **kgWords = kgRow->data;
     knownGeneToKnownGenePlus(self, kgWords, kgpWords, lm);
     return annoRowFromStringArray(kgRow->chrom, kgRow->start, kgRow->end, rightJoinFail,
 				  kgpWords, KNOWNGENEPLUS_NUM_COLS, lm);
     }
 else
     return NULL;
 }
 
 static void getGeneSymbols(struct annoStreamDbKnownGene *self, char *db)
 /* Read in kgXref's columns kgID and geneSymbol; hash ids to symbols for joining later. */
 {
 struct sqlConnection *conn = hAllocConn(db);
 struct dyString *query = sqlDyStringCreate("select kgID, geneSymbol from kgXref");
 struct sqlResult *sr = sqlGetResult(conn, query->string);
 char **row;
 while ((row = sqlNextRow(sr)) != NULL)
     hashAdd(self->geneSymbols, row[0], cloneString(row[1]));
 sqlFreeResult(&sr);
 hFreeConn(&conn);
 }
 
 static void askgClose(struct annoStreamer **pSSelf)
 /* Close internal annoStreamer for knownGene, free geneSymbols hash and close self. */
 {
 if (pSSelf == NULL)
     return;
 struct annoStreamDbKnownGene *self = *(struct annoStreamDbKnownGene **)pSSelf;
 self->mySource->close(&(self->mySource));
 freeHashAndVals(&self->geneSymbols);
 annoStreamerFree(pSSelf);
 }
 
 struct annoStreamer *annoStreamDbKnownGeneNew(char *db, struct annoAssembly *aa, int maxOutRows)
 /* Create an annoStreamer (subclass) object using two database tables:
  * knownGene: the UCSC Genes main track table
  * kgXref: the related table that contains the HGNC gene symbol that everyone wants to see
  * This streamer's rows are just like a plain annoStreamDb on knownGene, but with an
  * extra column at the end, 'geneSymbol', which is recognized as a gene symbol column due to
  * its use in refGene.
  */
 {
 struct annoStreamDbKnownGene *self;
 AllocVar(self);
 struct annoStreamer *streamer = &(self->streamer);
 // Set up external streamer interface
 annoStreamerInit(streamer, aa, annoStreamDbKnownGeneAsObj(), "knownGene");
 streamer->rowType = arWords;
 // Get internal streamer for knownGene
-self->mySource = annoStreamDbNew(db, "knownGene", aa, knownGeneAsObj(), maxOutRows);
+self->mySource = annoStreamDbNew(db, "knownGene", aa, maxOutRows, NULL);
 // Slurp in data from kgXref
 self->geneSymbols = hashNew(7);
 getGeneSymbols(self, db);
 // Override methods that need to pass through to internal source:
 streamer->setAutoSqlObject = askgSetAutoSqlObject;
 streamer->setRegion = askgSetRegion;
 streamer->nextRow = askgNextRow;
 streamer->close = askgClose;
 
 return (struct annoStreamer *)self;
 }