dd285f81141e45dabf1caf0f90723810f2c6f8a1 angie Fri Jan 12 14:50:04 2018 -0800 Use the new annoStreamDb params rightJoinDtf and rightJoinMainField to restrict ncbiRefSeqPsl rows to only those that appear in the selected genePred table (e.g. ncbiRefSeqCurated). diff --git src/hg/lib/annoStreamDbPslPlus.c src/hg/lib/annoStreamDbPslPlus.c index 2e633e4..1ce1dba 100644 --- src/hg/lib/annoStreamDbPslPlus.c +++ src/hg/lib/annoStreamDbPslPlus.c @@ -47,31 +47,32 @@ char *gpTable; // Associated genePred (refGene, ncbiRefSeqCurated etc) struct annoStreamer *mySource; // Internal source of PSL+CDS+seq info }; // select p.*, c.cds, l.protAcc, l.name, e.path, s.file_offset, s.file_size // from (((ncbiRefSeqPsl p join ncbiRefSeqCurated n on p.qName = n.name) // left join ncbiRefSeqCds c on p.qName = c.id) // join ncbiRefSeqLink l on p.qName = l.mrnaAcc) // left join (seqNcbiRefSeq s left join extNcbiRefSeq e on s.extFile = e.id) on p.qName = s.acc // where p.tName = "chr1" // order by p.tName, p.tStart // limit 5; static char *ncbiRefSeqConfigJsonFormat = "{ \"naForMissing\": false," - " \"rightJoinTable\": \"%s\"," + " \"rightJoinDtf\": \"%s.\%s.name\"," + " \"rightJoinMainField\": \"qName\"," " \"relatedTables\": [ { \"table\": \"ncbiRefSeqCds\"," " \"fields\": [\"cds\"] }," " { \"table\": \"ncbiRefSeqLink\"," " \"fields\": [\"protAcc\", \"name\"] }," " { \"table\": \"extNcbiRefSeq\"," " \"fields\": [\"path\"] }," " { \"table\": \"seqNcbiRefSeq\"," " \"fields\": [\"file_offset\", \"file_size\"] } ] }"; //select p.*,c.name,l.protAcc,l.name,e.path,s.file_offset,s.file_size, i.version //from refSeqAli p // join (hgFixed.gbCdnaInfo i // left join hgFixed.cds c on i.cds = c.id) on i.acc = p.qName // left join (hgFixed.gbSeq s // join hgFixed.gbExtFile e on e.id = s.gbExtFile) on s.acc = p.qName @@ -150,31 +151,31 @@ struct annoStreamer *annoStreamDbPslPlusNew(struct annoAssembly *aa, char *gpTable, int maxOutRows) /* Create an annoStreamer (subclass) object that streams PSL, CDS and seqFile info. * gpTable is a genePred table that has associated PSL, CDS and sequence info * (i.e. refGene, ncbiRefSeq, ncbiRefSeqCurated or ncbiRefSeqPredicted). */ { char *pslTable = NULL, *configJson = NULL; if (sameString("refGene", gpTable)) { pslTable = "refSeqAli"; configJson = refSeqAliConfigJson; } else if (startsWith("ncbiRefSeq", gpTable)) { pslTable = "ncbiRefSeqPsl"; - struct dyString *dy = dyStringCreate(ncbiRefSeqConfigJsonFormat, gpTable); + struct dyString *dy = dyStringCreate(ncbiRefSeqConfigJsonFormat, aa->name, gpTable); configJson = dyStringCannibalize(&dy); } else errAbort("annoStreamDbPslPlusNew: unrecognized table \"%s\"", gpTable); struct annoStreamDbPslPlus *self; AllocVar(self); struct annoStreamer *streamer = &(self->streamer); // Set up external streamer interface annoStreamerInit(streamer, aa, annoStreamDbPslPlusAsObj(), pslTable); streamer->rowType = arWords; self->gpTable = cloneString(gpTable); // Get internal streamer for joining PSL with other tables. struct jsonElement *configEl = jsonParse(configJson); self->mySource = annoStreamDbNew(aa->name, pslTable, aa, maxOutRows, configEl); // Override methods that need to pass through to internal source: