834194a10ad846f3fd0bb2bc1eea5b0b5c029cd7
angie
  Wed Nov 18 23:38:39 2015 -0800
Interface change: removing setAutoSqlObject method from streamer and
exposing annoGratorSetAutoSqlObject because asObj should never be
externally imposed, but a streamer/grator may have a need to bootstrap
itself without an asObj and then install one.
refs #15544

diff --git src/hg/lib/annoStreamDbKnownGene.c src/hg/lib/annoStreamDbKnownGene.c
index 729b64f..bd528bb 100644
--- src/hg/lib/annoStreamDbKnownGene.c
+++ src/hg/lib/annoStreamDbKnownGene.c
@@ -1,178 +1,170 @@
 /* annoStreamDbKnownGene -- knownGene with kgXref.geneSymbol added as an extra field */
 
 #include "annoStreamDbKnownGene.h"
 #include "annoStreamDb.h"
 #include "hdb.h"
 #include "sqlNum.h"
 
 static char *askgAutoSqlString =
 "table knownGenePlusSymbol\n"
 "\"Fields of the knownGene table plus symbolic gene name from kgXref.geneSymbol\"\n"
 "   ("
 "    string  name;               \"Name of gene\"\n"
 "    string  chrom;              \"Reference sequence chromosome or scaffold\"\n"
 "    char[1] strand;             \"+ or - for strand\"\n"
 "    uint    txStart;            \"Transcription start position\"\n"
 "    uint    txEnd;              \"Transcription end position\"\n"
 "    uint    cdsStart;           \"Coding region start\"\n"
 "    uint    cdsEnd;             \"Coding region end\"\n"
 "    uint    exonCount;          \"Number of exons\"\n"
 "    uint[exonCount] exonStarts; \"Exon start positions\"\n"
 "    uint[exonCount] exonEnds;   \"Exon end positions\"\n"
 "    string  proteinID;          \"UniProt display ID for Known Genes,  UniProt accession or RefSeq protein ID for UCSC Genes\" \n"
 "    string  alignID;            \"Unique identifier for each (known gene, alignment position) pair\"\n"
 "    string geneSymbol;          \"HGNC gene symbol\"\n"
 "   )\n";
 
 #define KNOWNGENEPLUS_NUM_COLS 13
 
 struct annoStreamDbKnownGene
 {
     struct annoStreamer streamer;	// Parent class members & methods (external interface)
     // Private members
     struct annoStreamer *mySource;	// Internal source of knownGene rows
     // Data from related table kgXref
     struct hash *geneSymbols;
 };
 
 struct asObject *annoStreamDbKnownGeneAsObj()
 /* Return an autoSql object that describs fields of a joining query on knownGene and
  * kgXref.geneSymbol. */
 {
 return asParseText(askgAutoSqlString);
 }
 
 // It would be nice for this to go in a knownGene.[ch], but to avoid having to add
 // two new files, just add what we need here:
 static char *kgAutoSqlString =
 "table knownGene\n"
 "\"Genes based on RefSeq, GenBank, and UniProt.\"\n"
 "(\n"
 "    string  name;               \"Name of gene\"\n"
 "    string  chrom;              \"Reference sequence chromosome or scaffold\"\n"
 "    char[1] strand;             \"+ or - for strand\"\n"
 "    uint    txStart;            \"Transcription start position\"\n"
 "    uint    txEnd;              \"Transcription end position\"\n"
 "    uint    cdsStart;           \"Coding region start\"\n"
 "    uint    cdsEnd;             \"Coding region end\"\n"
 "    uint    exonCount;          \"Number of exons\"\n"
 "    uint[exonCount] exonStarts; \"Exon start positions\"\n"
 "    uint[exonCount] exonEnds;   \"Exon end positions\"\n"
 "    string  proteinID;          \"UniProt display ID for Known Genes,  UniProt accession or RefSeq protein ID for UCSC Genes\" \n"
 "    string  alignID;            \"Unique identifier for each (known gene, alignment position) pair\"\n"
 ")\n";
 
 struct asObject *knownGeneAsObj()
 /* Return an autoSql object for knownGene. */
 {
 return asParseText(kgAutoSqlString);
 }
 
 #define KNOWNGENE_NUM_COLS 12
 
-static void askgSetAutoSqlObject(struct annoStreamer *self, struct asObject *asObj)
-/* Abort if something external tries to change the autoSql object. */
-{
-errAbort("annoStreamDbKnownGene %s: can't change autoSqlObject.",
-	 ((struct annoStreamer *)self)->name);
-}
-
 static void askgSetRegion(struct annoStreamer *sSelf, char *chrom, uint rStart, uint rEnd)
 /* Pass setRegion down to internal source. */
 {
 annoStreamerSetRegion(sSelf, chrom, rStart, rEnd);
 struct annoStreamDbKnownGene *self = (struct annoStreamDbKnownGene *)sSelf;
 self->mySource->setRegion(self->mySource, chrom, rStart, rEnd);
 }
 
 static char *getGeneSymbol(struct annoStreamDbKnownGene *self, char *kgID, struct lm *lm)
 /* Look up kgID in our geneSymbols hash from kgXref. */
 {
 char *symbol = hashFindVal(self->geneSymbols, kgID);
 if (symbol == NULL)
     symbol = "";
 return lmCloneString(lm, symbol);
 }
 
 static void knownGeneToKnownGenePlus(struct annoStreamDbKnownGene *self,
                                      char **kgWords, char **kgpWords, struct lm *lm)
 /* Copy kgWords into kgpWords and add column geneSymbol. */
 {
 CopyArray(kgWords, kgpWords, KNOWNGENE_NUM_COLS);
 char *kgID = kgWords[0];
 kgpWords[KNOWNGENE_NUM_COLS] = getGeneSymbol(self, kgID, lm);
 }
 
 static struct annoRow *askgNextRow(struct annoStreamer *sSelf, char *minChrom, uint minEnd,
 				    struct lm *lm)
 /* Join kgXref.geneSymbol with row from knownGene track table. */
 {
 struct annoStreamDbKnownGene *self = (struct annoStreamDbKnownGene *)sSelf;
 char **kgpWords;
 lmAllocArray(lm, kgpWords, KNOWNGENEPLUS_NUM_COLS);
 struct annoRow *kgRow;
 boolean rightJoinFail = FALSE;
 kgRow = self->mySource->nextRow(self->mySource, minChrom, minEnd, lm);
 if (kgRow != NULL)
     {
     char **kgWords = kgRow->data;
     knownGeneToKnownGenePlus(self, kgWords, kgpWords, lm);
     return annoRowFromStringArray(kgRow->chrom, kgRow->start, kgRow->end, rightJoinFail,
 				  kgpWords, KNOWNGENEPLUS_NUM_COLS, lm);
     }
 else
     return NULL;
 }
 
 static void getGeneSymbols(struct annoStreamDbKnownGene *self, char *db)
 /* Read in kgXref's columns kgID and geneSymbol; hash ids to symbols for joining later. */
 {
 struct sqlConnection *conn = hAllocConn(db);
 struct dyString *query = sqlDyStringCreate("select kgID, geneSymbol from kgXref");
 struct sqlResult *sr = sqlGetResult(conn, query->string);
 char **row;
 while ((row = sqlNextRow(sr)) != NULL)
     hashAdd(self->geneSymbols, row[0], cloneString(row[1]));
 sqlFreeResult(&sr);
 hFreeConn(&conn);
 }
 
 static void askgClose(struct annoStreamer **pSSelf)
 /* Close internal annoStreamer for knownGene, free geneSymbols hash and close self. */
 {
 if (pSSelf == NULL)
     return;
 struct annoStreamDbKnownGene *self = *(struct annoStreamDbKnownGene **)pSSelf;
 self->mySource->close(&(self->mySource));
 freeHashAndVals(&self->geneSymbols);
 annoStreamerFree(pSSelf);
 }
 
 struct annoStreamer *annoStreamDbKnownGeneNew(char *db, struct annoAssembly *aa, int maxOutRows)
 /* Create an annoStreamer (subclass) object using two database tables:
  * knownGene: the UCSC Genes main track table
  * kgXref: the related table that contains the HGNC gene symbol that everyone wants to see
  * This streamer's rows are just like a plain annoStreamDb on knownGene, but with an
  * extra column at the end, 'geneSymbol', which is recognized as a gene symbol column due to
  * its use in refGene.
  */
 {
 struct annoStreamDbKnownGene *self;
 AllocVar(self);
 struct annoStreamer *streamer = &(self->streamer);
 // Set up external streamer interface
 annoStreamerInit(streamer, aa, annoStreamDbKnownGeneAsObj(), "knownGene");
 streamer->rowType = arWords;
 // Get internal streamer for knownGene
 self->mySource = annoStreamDbNew(db, "knownGene", aa, maxOutRows, NULL);
 // Slurp in data from kgXref
 self->geneSymbols = hashNew(7);
 getGeneSymbols(self, db);
 // Override methods that need to pass through to internal source:
-streamer->setAutoSqlObject = askgSetAutoSqlObject;
 streamer->setRegion = askgSetRegion;
 streamer->nextRow = askgNextRow;
 streamer->close = askgClose;
 
 return (struct annoStreamer *)self;
 }