13dddf36dc0f43a27a792519213d87939b51d99c
angie
  Wed Jul 25 16:23:50 2018 -0700
Revert "Use the new annoStreamDb params rightJoinDtf and rightJoinMainField to restrict ncbiRefSeqPsl rows to only those that appear in the selected genePred table (e.g. ncbiRefSeqCurated)."

This reverts commit dd285f81141e45dabf1caf0f90723810f2c6f8a1.

diff --git src/hg/lib/annoStreamDbPslPlus.c src/hg/lib/annoStreamDbPslPlus.c
index 0516169..9ec4612 100644
--- src/hg/lib/annoStreamDbPslPlus.c
+++ src/hg/lib/annoStreamDbPslPlus.c
@@ -1,244 +1,243 @@
 /* annoStreamDbPslPlus -- subclass of annoStreamer for joining PSL+CDS+seq database tables */
 
 /* Copyright (C) 2017 The Regents of the University of California 
  * See README in this or parent directory for licensing information. */
 
 #include "annoStreamDbPslPlus.h"
 #include "annoStreamDb.h"
 #include "hdb.h"
 
 static char *pslPlusAutoSqlString =
 "table pslPlus"
 "\"transcript PSL, CDS and seq info\""
 "   ("
 "    uint    match;      \"Number of bases that match that aren't repeats\""
 "    uint    misMatch;   \"Number of bases that don't match\""
 "    uint    repMatch;   \"Number of bases that match but are part of repeats\""
 "    uint    nCount;       \"Number of 'N' bases\""
 "    uint    qNumInsert;   \"Number of inserts in query (transcript)\""
 "    int     qBaseInsert;  \"Number of bases inserted in query (transcript)\""
 "    uint    tNumInsert;   \"Number of inserts in target (chromosome/scaffold)\""
 "    int     tBaseInsert;  \"Number of bases inserted in target (chromosome/scaffold)\""
 "    char[2] strand;       \"+ or - for query strand (transcript to genome orientation)\""
 "    string  qName;        \"Transcript accession\""
 "    uint    qSize;        \"Transcript sequence size\""
 "    uint    qStart;       \"Alignment start position in query (transcript)\""
 "    uint    qEnd;         \"Alignment end position in query (transcript)\""
 "    string  tName;        \"Target (chromosome/scaffold) name\""
 "    uint    tSize;        \"Target (chromosome/scaffold) size\""
 "    uint    tStart;       \"Alignment start position in target\""
 "    uint    tEnd;         \"Alignment end position in target\""
 "    uint    blockCount;   \"Number of blocks in alignment\""
 "    uint[blockCount] blockSizes;  \"Size of each block\""
 "    uint[blockCount] qStarts;     \"Start of each block in query.\""
 "    uint[blockCount] tStarts;     \"Start of each block in target.\""
 "    string  cds;          \"CDS start and end in transcript (if applicable)\""
 "    string  protAcc;      \"Protein accession (if applicable)\""
 "    string  name2;        \"Gene symbolic name\""
 "    string  path;         \"Path to FASTA file containing transcript sequence\""
 "    uint    fileOffset;   \"Offset of transcript record in FASTA file\""
 "    uint    fileSize;     \"Number of bytes of transcript record in FASTA file\""
 "   )";
 
 struct annoStreamDbPslPlus
     {
     struct annoStreamer streamer;	// Parent class members & methods (external interface)
     // Private members
     char *gpTable;                      // Associated genePred (refGene, ncbiRefSeqCurated etc)
     struct annoStreamer *mySource;	// Internal source of PSL+CDS+seq info
     };
 
 // select p.*, c.cds, l.protAcc, l.name, e.path, s.file_offset, s.file_size 
 //   from (((ncbiRefSeqPsl p join ncbiRefSeqCurated n on p.qName = n.name)
 //          left join ncbiRefSeqCds c on p.qName = c.id)
 //         join ncbiRefSeqLink l on p.qName = l.mrnaAcc)
 //        left join (seqNcbiRefSeq s left join extNcbiRefSeq e on s.extFile = e.id) on p.qName = s.acc
 //   where p.tName = "chr1"
 //   order by p.tName, p.tStart
 //  limit 5;
 
 static char *ncbiRefSeqConfigJsonFormat =
     "{ \"naForMissing\": false,"
-    "  \"rightJoinDtf\": \"%s.\%s.name\","
-    "  \"rightJoinMainField\": \"qName\","
+    "  \"rightJoinTable\": \"%s\","
     "  \"relatedTables\": [ { \"table\": \"ncbiRefSeqCds\","
     "                         \"fields\": [\"cds\"] },"
     "                       { \"table\": \"ncbiRefSeqLink\","
     "                         \"fields\": [\"protAcc\", \"name\"] },"
     "                       { \"table\": \"extNcbiRefSeq\","
     "                         \"fields\": [\"path\"] },"
     "                       { \"table\": \"seqNcbiRefSeq\","
     "                         \"fields\": [\"file_offset\", \"file_size\"] } ] }";
 
 //select p.*,c.name,l.protAcc,l.name,e.path,s.file_offset,s.file_size, i.version
 //from refSeqAli p
 //  join (hgFixed.gbCdnaInfo i
 //        left join hgFixed.cds c on i.cds = c.id) on i.acc = p.qName
 //       left join (hgFixed.gbSeq s
 //                  join hgFixed.gbExtFile e on e.id = s.gbExtFile) on s.acc = p.qName
 //       join hgFixed.refLink l on p.qName = l.mrnaAcc
 //  where p.tName = "chr1"
 //   order by p.tName, p.tStart
 //  limit 5;
 
 static char *refSeqAliConfigJson =
     "{ \"naForMissing\": false,"
     "  \"relatedTables\": [ { \"table\": \"hgFixed.cds\","
     "                         \"fields\": [\"name\"] },"
     "                       { \"table\": \"hgFixed.refLink\","
     "                         \"fields\": [\"protAcc\", \"name\"] },"
     "                       { \"table\": \"hgFixed.gbExtFile\","
     "                         \"fields\": [\"path\"] },"
     "                       { \"table\": \"hgFixed.gbSeq\","
     "                         \"fields\": [\"file_offset\", \"file_size\"] } ] }";
 
 struct asObject *annoStreamDbPslPlusAsObj()
 /* Return an autoSql object with PSL, gene name, protein acc, CDS and sequence file info fields.
  * An annoStreamDbPslPlus instance may return additional additional columns if configured, but
  * these columns will always be present. */
 {
 return asParseText(pslPlusAutoSqlString);
 }
 
 static void asdppSetRegion(struct annoStreamer *sSelf, char *chrom, uint rStart, uint rEnd)
 /* Pass setRegion down to internal source. */
 {
 annoStreamerSetRegion(sSelf, chrom, rStart, rEnd);
 struct annoStreamDbPslPlus *self = (struct annoStreamDbPslPlus *)sSelf;
 self->mySource->setRegion(self->mySource, chrom, rStart, rEnd);
 }
 
 static struct annoRow *asdppNextRow(struct annoStreamer *sSelf, char *minChrom, uint minEnd,
 				    struct lm *lm)
 /* Return next psl+ row. */
 {
 struct annoStreamDbPslPlus *self = (struct annoStreamDbPslPlus *)sSelf;
 char **ppWords;
 lmAllocArray(lm, ppWords, sSelf->numCols);
 struct annoRow *ppRow;
 boolean rightJoinFail = FALSE;
 while ((ppRow = self->mySource->nextRow(self->mySource, minChrom, minEnd, lm)) != NULL)
     {
     ppWords = ppRow->data;
     // If there are filters on experiment attributes, apply them, otherwise just return aRow.
     if (sSelf->filters)
 	{
 	boolean fails = annoFilterRowFails(sSelf->filters, ppWords, sSelf->numCols,
 					   &rightJoinFail);
 	// If this row passes the filter, or fails but is rightJoin, then we're done looking.
 	if (!fails || rightJoinFail)
 	    break;
 	}
     else
 	// no filtering to do, just use this row
 	break;
     }
 if (ppRow != NULL)
     return annoRowFromStringArray(ppRow->chrom, ppRow->start, ppRow->end, rightJoinFail,
 				  ppWords, sSelf->numCols, lm);
 else
     return NULL;
 }
 
 static void asdppClose(struct annoStreamer **pSSelf)
 /* Free up state. */
 {
 if (pSSelf == NULL)
     return;
 struct annoStreamDbPslPlus *self = *(struct annoStreamDbPslPlus **)pSSelf;
 freez(&self->gpTable);
 self->mySource->close(&(self->mySource));
 annoStreamerFree(pSSelf);
 }
 
 static struct asColumn *asColumnClone(struct asColumn *colIn)
 /* Return a full clone of colIn, or NULL if colIn is NULL. */
 {
 if (colIn == NULL)
     return NULL;
 if (colIn->obType != NULL || colIn->index != NULL)
     errAbort("asColumnClone: support for obType and index not implemented");
 struct asColumn *colOut;
 AllocVar(colOut);
 colOut->name = cloneString(colIn->name);
 colOut->comment = cloneString(colIn->comment);
 colOut->lowType = colIn->lowType; // static struct in asParse.c
 colOut->obName = cloneString(colIn->obName);
 colOut->fixedSize = colIn->fixedSize;
 colOut->linkedSizeName = cloneString(colIn->linkedSizeName);
 colOut->linkedSize = asColumnClone(colIn->linkedSize);
 colOut->isSizeLink = colIn->isSizeLink;
 colOut->isList = colIn->isList;
 colOut->isArray = colIn->isArray;
 colOut->autoIncrement = colIn->autoIncrement;
 colOut->values = slNameCloneList(colIn->values);
 return colOut;
 }
 
 static void asObjAppendExtraColumns(struct asObject *asObjTarget, struct asObject *asObjSource)
 /* If asObjSource has more columns than asObjTarget then clone and append those additional columns
  * to asObjTarget. */
 {
 int tColCount = slCount(asObjTarget->columnList);
 int sColCount = slCount(asObjSource->columnList);
 if (tColCount < 1)
     errAbort("asObjAppendExtraColumns: support for empty target columnList not implemented");
 if (sColCount > tColCount)
     {
     struct asColumn *tCol = asObjTarget->columnList, *sCol = asObjSource->columnList;
     int i;
     for (i = 0;  i < tColCount-1;  i++)
         {
         tCol = tCol->next;
         sCol = sCol->next;
         }
     while (sCol->next != NULL)
         {
         tCol->next = asColumnClone(sCol->next);
         tCol = tCol->next;
         sCol = sCol->next;
         }
     }
 }
 
 struct annoStreamer *annoStreamDbPslPlusNew(struct annoAssembly *aa, char *gpTable, int maxOutRows,
                                             struct jsonElement *extraConfig)
 /* Create an annoStreamer (subclass) object that streams PSL, CDS and seqFile info.
  * gpTable is a genePred table that has associated PSL, CDS and sequence info
  * (i.e. refGene, ncbiRefSeq, ncbiRefSeqCurated or ncbiRefSeqPredicted). */
 {
 char *pslTable = NULL, *configJson = NULL;
 if (sameString("refGene", gpTable))
     {
     pslTable = "refSeqAli";
     configJson = refSeqAliConfigJson;
     }
 else if (startsWith("ncbiRefSeq", gpTable))
     {
     pslTable = "ncbiRefSeqPsl";
-    struct dyString *dy = dyStringCreate(ncbiRefSeqConfigJsonFormat, aa->name, gpTable);
+    struct dyString *dy = dyStringCreate(ncbiRefSeqConfigJsonFormat, gpTable);
     configJson = dyStringCannibalize(&dy);
     }
 else
     errAbort("annoStreamDbPslPlusNew: unrecognized table \"%s\"", gpTable);
 struct annoStreamDbPslPlus *self;
 AllocVar(self);
 // Get internal streamer for joining PSL with other tables.
 struct jsonElement *config = jsonParse(configJson);
 if (extraConfig)
     jsonObjectMerge(config, extraConfig);
 self->mySource = annoStreamDbNew(aa->name, pslTable, aa, maxOutRows, config);
 struct asObject *asObj = annoStreamDbPslPlusAsObj();
 if (extraConfig)
     asObjAppendExtraColumns(asObj, self->mySource->asObj);
 // Set up external streamer interface
 struct annoStreamer *streamer = &(self->streamer);
 annoStreamerInit(streamer, aa, asObj, pslTable);
 streamer->rowType = arWords;
 self->gpTable = cloneString(gpTable);
 // Override methods that need to pass through to internal source:
 streamer->setRegion = asdppSetRegion;
 streamer->nextRow = asdppNextRow;
 streamer->close = asdppClose;
 return (struct annoStreamer *)self;
 }