83f07ccc670df11816b9c0acffb3bb35b8a89e04
angie
  Fri Sep 18 16:22:38 2015 -0700
Use annoStreamDb with config to add kgXref.geneSymbol to knownGene columns instead of annoStreamDbKnownGene.

diff --git src/hg/lib/annoStreamDbKnownGene.c src/hg/lib/annoStreamDbKnownGene.c
deleted file mode 100644
index bd528bb..0000000
--- src/hg/lib/annoStreamDbKnownGene.c
+++ /dev/null
@@ -1,170 +0,0 @@
-/* annoStreamDbKnownGene -- knownGene with kgXref.geneSymbol added as an extra field */
-
-#include "annoStreamDbKnownGene.h"
-#include "annoStreamDb.h"
-#include "hdb.h"
-#include "sqlNum.h"
-
-static char *askgAutoSqlString =
-"table knownGenePlusSymbol\n"
-"\"Fields of the knownGene table plus symbolic gene name from kgXref.geneSymbol\"\n"
-"   ("
-"    string  name;               \"Name of gene\"\n"
-"    string  chrom;              \"Reference sequence chromosome or scaffold\"\n"
-"    char[1] strand;             \"+ or - for strand\"\n"
-"    uint    txStart;            \"Transcription start position\"\n"
-"    uint    txEnd;              \"Transcription end position\"\n"
-"    uint    cdsStart;           \"Coding region start\"\n"
-"    uint    cdsEnd;             \"Coding region end\"\n"
-"    uint    exonCount;          \"Number of exons\"\n"
-"    uint[exonCount] exonStarts; \"Exon start positions\"\n"
-"    uint[exonCount] exonEnds;   \"Exon end positions\"\n"
-"    string  proteinID;          \"UniProt display ID for Known Genes,  UniProt accession or RefSeq protein ID for UCSC Genes\" \n"
-"    string  alignID;            \"Unique identifier for each (known gene, alignment position) pair\"\n"
-"    string geneSymbol;          \"HGNC gene symbol\"\n"
-"   )\n";
-
-#define KNOWNGENEPLUS_NUM_COLS 13
-
-struct annoStreamDbKnownGene
-{
-    struct annoStreamer streamer;	// Parent class members & methods (external interface)
-    // Private members
-    struct annoStreamer *mySource;	// Internal source of knownGene rows
-    // Data from related table kgXref
-    struct hash *geneSymbols;
-};
-
-struct asObject *annoStreamDbKnownGeneAsObj()
-/* Return an autoSql object that describs fields of a joining query on knownGene and
- * kgXref.geneSymbol. */
-{
-return asParseText(askgAutoSqlString);
-}
-
-// It would be nice for this to go in a knownGene.[ch], but to avoid having to add
-// two new files, just add what we need here:
-static char *kgAutoSqlString =
-"table knownGene\n"
-"\"Genes based on RefSeq, GenBank, and UniProt.\"\n"
-"(\n"
-"    string  name;               \"Name of gene\"\n"
-"    string  chrom;              \"Reference sequence chromosome or scaffold\"\n"
-"    char[1] strand;             \"+ or - for strand\"\n"
-"    uint    txStart;            \"Transcription start position\"\n"
-"    uint    txEnd;              \"Transcription end position\"\n"
-"    uint    cdsStart;           \"Coding region start\"\n"
-"    uint    cdsEnd;             \"Coding region end\"\n"
-"    uint    exonCount;          \"Number of exons\"\n"
-"    uint[exonCount] exonStarts; \"Exon start positions\"\n"
-"    uint[exonCount] exonEnds;   \"Exon end positions\"\n"
-"    string  proteinID;          \"UniProt display ID for Known Genes,  UniProt accession or RefSeq protein ID for UCSC Genes\" \n"
-"    string  alignID;            \"Unique identifier for each (known gene, alignment position) pair\"\n"
-")\n";
-
-struct asObject *knownGeneAsObj()
-/* Return an autoSql object for knownGene. */
-{
-return asParseText(kgAutoSqlString);
-}
-
-#define KNOWNGENE_NUM_COLS 12
-
-static void askgSetRegion(struct annoStreamer *sSelf, char *chrom, uint rStart, uint rEnd)
-/* Pass setRegion down to internal source. */
-{
-annoStreamerSetRegion(sSelf, chrom, rStart, rEnd);
-struct annoStreamDbKnownGene *self = (struct annoStreamDbKnownGene *)sSelf;
-self->mySource->setRegion(self->mySource, chrom, rStart, rEnd);
-}
-
-static char *getGeneSymbol(struct annoStreamDbKnownGene *self, char *kgID, struct lm *lm)
-/* Look up kgID in our geneSymbols hash from kgXref. */
-{
-char *symbol = hashFindVal(self->geneSymbols, kgID);
-if (symbol == NULL)
-    symbol = "";
-return lmCloneString(lm, symbol);
-}
-
-static void knownGeneToKnownGenePlus(struct annoStreamDbKnownGene *self,
-                                     char **kgWords, char **kgpWords, struct lm *lm)
-/* Copy kgWords into kgpWords and add column geneSymbol. */
-{
-CopyArray(kgWords, kgpWords, KNOWNGENE_NUM_COLS);
-char *kgID = kgWords[0];
-kgpWords[KNOWNGENE_NUM_COLS] = getGeneSymbol(self, kgID, lm);
-}
-
-static struct annoRow *askgNextRow(struct annoStreamer *sSelf, char *minChrom, uint minEnd,
-				    struct lm *lm)
-/* Join kgXref.geneSymbol with row from knownGene track table. */
-{
-struct annoStreamDbKnownGene *self = (struct annoStreamDbKnownGene *)sSelf;
-char **kgpWords;
-lmAllocArray(lm, kgpWords, KNOWNGENEPLUS_NUM_COLS);
-struct annoRow *kgRow;
-boolean rightJoinFail = FALSE;
-kgRow = self->mySource->nextRow(self->mySource, minChrom, minEnd, lm);
-if (kgRow != NULL)
-    {
-    char **kgWords = kgRow->data;
-    knownGeneToKnownGenePlus(self, kgWords, kgpWords, lm);
-    return annoRowFromStringArray(kgRow->chrom, kgRow->start, kgRow->end, rightJoinFail,
-				  kgpWords, KNOWNGENEPLUS_NUM_COLS, lm);
-    }
-else
-    return NULL;
-}
-
-static void getGeneSymbols(struct annoStreamDbKnownGene *self, char *db)
-/* Read in kgXref's columns kgID and geneSymbol; hash ids to symbols for joining later. */
-{
-struct sqlConnection *conn = hAllocConn(db);
-struct dyString *query = sqlDyStringCreate("select kgID, geneSymbol from kgXref");
-struct sqlResult *sr = sqlGetResult(conn, query->string);
-char **row;
-while ((row = sqlNextRow(sr)) != NULL)
-    hashAdd(self->geneSymbols, row[0], cloneString(row[1]));
-sqlFreeResult(&sr);
-hFreeConn(&conn);
-}
-
-static void askgClose(struct annoStreamer **pSSelf)
-/* Close internal annoStreamer for knownGene, free geneSymbols hash and close self. */
-{
-if (pSSelf == NULL)
-    return;
-struct annoStreamDbKnownGene *self = *(struct annoStreamDbKnownGene **)pSSelf;
-self->mySource->close(&(self->mySource));
-freeHashAndVals(&self->geneSymbols);
-annoStreamerFree(pSSelf);
-}
-
-struct annoStreamer *annoStreamDbKnownGeneNew(char *db, struct annoAssembly *aa, int maxOutRows)
-/* Create an annoStreamer (subclass) object using two database tables:
- * knownGene: the UCSC Genes main track table
- * kgXref: the related table that contains the HGNC gene symbol that everyone wants to see
- * This streamer's rows are just like a plain annoStreamDb on knownGene, but with an
- * extra column at the end, 'geneSymbol', which is recognized as a gene symbol column due to
- * its use in refGene.
- */
-{
-struct annoStreamDbKnownGene *self;
-AllocVar(self);
-struct annoStreamer *streamer = &(self->streamer);
-// Set up external streamer interface
-annoStreamerInit(streamer, aa, annoStreamDbKnownGeneAsObj(), "knownGene");
-streamer->rowType = arWords;
-// Get internal streamer for knownGene
-self->mySource = annoStreamDbNew(db, "knownGene", aa, maxOutRows, NULL);
-// Slurp in data from kgXref
-self->geneSymbols = hashNew(7);
-getGeneSymbols(self, db);
-// Override methods that need to pass through to internal source:
-streamer->setRegion = askgSetRegion;
-streamer->nextRow = askgNextRow;
-streamer->close = askgClose;
-
-return (struct annoStreamer *)self;
-}