869708731727a455813ab88e67d46be978ab77b0 angie Fri Jan 12 14:48:37 2018 -0800 Added new config parameters rightJoinDtf and rightJoinMainField to give the option of limiting rows to those that join with some other table. The motivation for this is wanting to get only the rows of ncbiRefSeqPsl that join with ncbiRefSeqCurated. diff --git src/hg/lib/annoStreamDb.c src/hg/lib/annoStreamDb.c index f7a2dcc..eb62ee3 100644 --- src/hg/lib/annoStreamDb.c +++ src/hg/lib/annoStreamDb.c @@ -55,30 +55,32 @@ boolean needQuery; // TRUE when we haven't yet queried, or need to query again boolean doNextChunk; // TRUE if rowBuf ends before end of chrom/region uint nextChunkStart; // Start coord for next chunk of rows to query // Info for joining in related tables/fields struct joinerDtf *mainTableDtfList; // Fields from the main table to include in output struct joinerDtf *relatedDtfList; // Fields from related tables to include in output struct joiner *joiner; // Parsed all.joiner schema struct joinMixer *joinMixer; // Plan for joining related tables using sql and/or hash // (NULL if no joining is necessary) uint sqlRowSize; // Number of columns from sql query (may include related) uint bigRowSize; // Number of columns from sql + joinMixer->hashJoins boolean hasLeftJoin; // If we have to use 'left join' we'll have to 'order by'. boolean naForMissing; // If true, insert "n/a" for missing related table values // to match hgTables. + struct joinerDtf *rightJoinDtf; // If non-null, join with this table to limit output rows + char *rightJoinMainField; // Field of main table corresponding to rightJoinDtf struct rowBuf // Temporary storage for rows from chunked query { struct lm *lm; // storage for rows char ***buf; // array of pointers to rows int size; // number of rows int ix; // offset in buffer, [0..size] } rowBuf; char **(*nextRowRaw)(struct annoStreamDb *self); // Depending on query style, use either sqlNextRow or temporary row storage to get next row. // This may return NULL but set self->needQuery; asdNextRow watches for that. void (*doQuery)(struct annoStreamDb *self, char *minChrom, uint minEnd); @@ -270,30 +272,38 @@ } static boolean appendTableList(struct annoStreamDb *self, struct dyString *query) /* Append SQL table list to query, including tables used for output, filtering and joining. */ { boolean hasLeftJoin = FALSE; if (self->joinMixer == NULL || self->joinMixer->sqlRouteList == NULL) appendOneTable(self, NULL, query); else { // Use both a and b of the first pair and only b of each subsequent pair appendOneTable(self, self->joinMixer->sqlRouteList->a, query); appendJoin(self, self->joinMixer->sqlRouteList, query); hasLeftJoin = TRUE; } +if (self->rightJoinDtf) + { + dyStringAppend(query, " join "); + appendOneTable(self, self->rightJoinDtf, query); + char rjField[PATH_LEN]; + joinerDtfToSqlFieldString(self->rightJoinDtf, self->db, rjField, sizeof(rjField)); + dyStringPrintf(query, " on %s = %s.%s", rjField, self->table, self->rightJoinMainField); + } return hasLeftJoin; } // libify? static struct joinerDtf *joinerDtfCloneList(struct joinerDtf *listIn) /* Return a list with cloned items of listIn. */ { struct joinerDtf *listOut = NULL, *item; for (item = listIn; item != NULL; item = item->next) slAddHead(&listOut, joinerDtfClone(item)); slReverse(&listOut); return listOut; } static char *joinerFilePath() @@ -1194,30 +1204,45 @@ struct jsonElement *fieldEl = fieldRef->val; char *tfField = jsonStringVal(fieldEl, "field"); slAddHead(&self->relatedDtfList, joinerDtfNew(tfDb, tfTable, tfField)); } } } if (self->relatedDtfList) { slReverse(&self->relatedDtfList); asObj = asdAutoSqlFromTableFields(self, asObj); } } struct jsonElement *naForMissingEl = hashFindVal(config, "naForMissing"); if (naForMissingEl != NULL) self->naForMissing = jsonBooleanVal(naForMissingEl, "naForMissing"); + struct jsonElement *rightJoinDtfEl = hashFindVal(config, "rightJoinDtf"); + if (rightJoinDtfEl != NULL) + { + char *rjd = jsonStringVal(rightJoinDtfEl, "rightJoinDtf"); + if (isNotEmpty(rjd)) + { + self->rightJoinDtf = joinerDtfFromDottedTriple(rjd); + struct jsonElement *rjMainFieldEl = hashFindVal(config, "rightJoinMainField"); + if (! rjMainFieldEl) + errAbort("annoStreamDb (%s): rightJoinMainField must be provided " + "along with rightJoinDtf (%s)", + self->table, rjd); + self->rightJoinMainField = jsonStringVal(rjMainFieldEl, "rightJoinMainField"); + } + } } return asObj; } static char *sqlExplain(struct sqlConnection *conn, char *query) /* For now, just turn the values back into a multi-line "#"-comment string. */ { char *trimmedQuery = query; if (startsWith(NOSQLINJ, trimmedQuery)) trimmedQuery = trimmedQuery + strlen(NOSQLINJ); struct dyString *dy = dyStringCreate("# Output of 'explain %s':\n", trimmedQuery); char explainQuery[PATH_LEN*8]; safef(explainQuery, sizeof(explainQuery), NOSQLINJ "explain %s", trimmedQuery); struct sqlResult *sr = sqlGetResult(conn, explainQuery); struct slName *fieldList = sqlResultFieldList(sr);