3f06ffc10df40e72dfa18a150eeca23fda6b41df angie Thu Feb 8 13:42:04 2018 -0800 annoStreamDbPslPlus internally used a json config to join in CDS and sequence with the underlying PSL -- so annoStreamDbPslPlusNew did not accept an incoming config, so hgVai's RefSeq Status option was ignored. Add jsonObjectMerge so that the config from hgVai can be merged with the internal config, and make sure that the extra columns from the internal annoStreamDb are added to annoStreamDbPslPlusNew's asObj. We can't just use the internal annoStreamDb's asObj because the field names for CDS etc. fields would differ for refSeqAli vs ncbiRefSeq and column-matching to recognize annoStreamDbPslPlus would fail. refs #20948 diff --git src/hg/lib/annoStreamDbPslPlus.c src/hg/lib/annoStreamDbPslPlus.c index 1ce1dba..4e1b9b6 100644 --- src/hg/lib/annoStreamDbPslPlus.c +++ src/hg/lib/annoStreamDbPslPlus.c @@ -81,106 +81,163 @@ // order by p.tName, p.tStart // limit 5; static char *refSeqAliConfigJson = "{ \"naForMissing\": false," " \"relatedTables\": [ { \"table\": \"hgFixed.cds\"," " \"fields\": [\"name\"] }," " { \"table\": \"hgFixed.refLink\"," " \"fields\": [\"protAcc\", \"name\"] }," " { \"table\": \"hgFixed.gbExtFile\"," " \"fields\": [\"path\"] }," " { \"table\": \"hgFixed.gbSeq\"," " \"fields\": [\"file_offset\", \"file_size\"] } ] }"; struct asObject *annoStreamDbPslPlusAsObj() -/* Return an autoSql object with PSL, gene name, protein acc, CDS and sequence file info fields. */ +/* Return an autoSql object with PSL, gene name, protein acc, CDS and sequence file info fields. + * An annoStreamDbPslPlus instance may return additional additional columns if configured, but + * these columns will always be present. */ { return asParseText(pslPlusAutoSqlString); } static void asdppSetRegion(struct annoStreamer *sSelf, char *chrom, uint rStart, uint rEnd) /* Pass setRegion down to internal source. */ { annoStreamerSetRegion(sSelf, chrom, rStart, rEnd); struct annoStreamDbPslPlus *self = (struct annoStreamDbPslPlus *)sSelf; self->mySource->setRegion(self->mySource, chrom, rStart, rEnd); } static struct annoRow *asdppNextRow(struct annoStreamer *sSelf, char *minChrom, uint minEnd, struct lm *lm) /* Return next psl+ row. */ { struct annoStreamDbPslPlus *self = (struct annoStreamDbPslPlus *)sSelf; char **ppWords; -lmAllocArray(lm, ppWords, PSLPLUS_NUM_COLS); +lmAllocArray(lm, ppWords, sSelf->numCols); struct annoRow *ppRow; boolean rightJoinFail = FALSE; while ((ppRow = self->mySource->nextRow(self->mySource, minChrom, minEnd, lm)) != NULL) { ppWords = ppRow->data; // If there are filters on experiment attributes, apply them, otherwise just return aRow. if (sSelf->filters) { - boolean fails = annoFilterRowFails(sSelf->filters, ppWords, PSLPLUS_NUM_COLS, + boolean fails = annoFilterRowFails(sSelf->filters, ppWords, sSelf->numCols, &rightJoinFail); // If this row passes the filter, or fails but is rightJoin, then we're done looking. if (!fails || rightJoinFail) break; } else // no filtering to do, just use this row break; } if (ppRow != NULL) return annoRowFromStringArray(ppRow->chrom, ppRow->start, ppRow->end, rightJoinFail, - ppWords, PSLPLUS_NUM_COLS, lm); + ppWords, sSelf->numCols, lm); else return NULL; } static void asdppClose(struct annoStreamer **pSSelf) /* Free up state. */ { if (pSSelf == NULL) return; struct annoStreamDbPslPlus *self = *(struct annoStreamDbPslPlus **)pSSelf; freez(&self->gpTable); self->mySource->close(&(self->mySource)); annoStreamerFree(pSSelf); } -struct annoStreamer *annoStreamDbPslPlusNew(struct annoAssembly *aa, char *gpTable, int maxOutRows) +static struct asColumn *asColumnClone(struct asColumn *colIn) +/* Return a full clone of colIn, or NULL if colIn is NULL. */ +{ +if (colIn == NULL) + return NULL; +if (colIn->obType != NULL || colIn->index != NULL) + errAbort("asColumnClone: support for obType and index not implemented"); +struct asColumn *colOut; +AllocVar(colOut); +colOut->name = cloneString(colIn->name); +colOut->comment = cloneString(colIn->comment); +colOut->lowType = colIn->lowType; // static struct in asParse.c +colOut->obName = cloneString(colIn->obName); +colOut->fixedSize = colIn->fixedSize; +colOut->linkedSizeName = cloneString(colIn->linkedSizeName); +colOut->linkedSize = asColumnClone(colIn->linkedSize); +colOut->isSizeLink = colIn->isSizeLink; +colOut->isList = colIn->isList; +colOut->isArray = colIn->isArray; +colOut->autoIncrement = colIn->autoIncrement; +colOut->values = slNameCloneList(colIn->values); +return colOut; +} + +static void asObjAppendExtraColumns(struct asObject *asObjTarget, struct asObject *asObjSource) +/* If asObjSource has more columns than asObjTarget then clone and append those additional columns + * to asObjTarget. */ +{ +int tColCount = slCount(asObjTarget->columnList); +int sColCount = slCount(asObjSource->columnList); +if (tColCount < 1) + errAbort("asObjAppendExtraColumns: support for empty target columnList not implemented"); +if (sColCount > tColCount) + { + struct asColumn *tCol = asObjTarget->columnList, *sCol = asObjSource->columnList; + int i; + for (i = 0; i < tColCount-1; i++) + { + tCol = tCol->next; + sCol = sCol->next; + } + while (sCol->next != NULL) + { + tCol->next = asColumnClone(sCol->next); + tCol = tCol->next; + sCol = sCol->next; + } + } +} + +struct annoStreamer *annoStreamDbPslPlusNew(struct annoAssembly *aa, char *gpTable, int maxOutRows, + struct jsonElement *extraConfig) /* Create an annoStreamer (subclass) object that streams PSL, CDS and seqFile info. * gpTable is a genePred table that has associated PSL, CDS and sequence info * (i.e. refGene, ncbiRefSeq, ncbiRefSeqCurated or ncbiRefSeqPredicted). */ { char *pslTable = NULL, *configJson = NULL; if (sameString("refGene", gpTable)) { pslTable = "refSeqAli"; configJson = refSeqAliConfigJson; } else if (startsWith("ncbiRefSeq", gpTable)) { pslTable = "ncbiRefSeqPsl"; struct dyString *dy = dyStringCreate(ncbiRefSeqConfigJsonFormat, aa->name, gpTable); configJson = dyStringCannibalize(&dy); } else errAbort("annoStreamDbPslPlusNew: unrecognized table \"%s\"", gpTable); struct annoStreamDbPslPlus *self; AllocVar(self); -struct annoStreamer *streamer = &(self->streamer); +// Get internal streamer for joining PSL with other tables. +struct jsonElement *config = jsonParse(configJson); +jsonObjectMerge(config, extraConfig); +self->mySource = annoStreamDbNew(aa->name, pslTable, aa, maxOutRows, config); +struct asObject *asObj = annoStreamDbPslPlusAsObj(); +if (extraConfig) + asObjAppendExtraColumns(asObj, self->mySource->asObj); // Set up external streamer interface -annoStreamerInit(streamer, aa, annoStreamDbPslPlusAsObj(), pslTable); +struct annoStreamer *streamer = &(self->streamer); +annoStreamerInit(streamer, aa, asObj, pslTable); streamer->rowType = arWords; self->gpTable = cloneString(gpTable); -// Get internal streamer for joining PSL with other tables. -struct jsonElement *configEl = jsonParse(configJson); -self->mySource = annoStreamDbNew(aa->name, pslTable, aa, maxOutRows, configEl); // Override methods that need to pass through to internal source: streamer->setRegion = asdppSetRegion; streamer->nextRow = asdppNextRow; streamer->close = asdppClose; return (struct annoStreamer *)self; }