c54488382199f754b2226109da2995eb16dff19b angie Thu Sep 28 10:06:31 2017 -0700 Call joinerPairListToTree on sqlRouteList and use tree structure to make more efficient joins. Also use 'full' keyword to do joins instead of left joins (more efficient). diff --git src/hg/lib/annoStreamDb.c src/hg/lib/annoStreamDb.c index 67bdc9d..f7a2dcc 100644 --- src/hg/lib/annoStreamDb.c +++ src/hg/lib/annoStreamDb.c @@ -226,56 +226,74 @@ joinerDtfToSqlTableString(dt, self->db, dbTable, sizeof(dbTable)); dyStringAppend(query, dbTable); ignoreEndIndexIfNecessary(self, dbTable, query); } INLINE void splitOrDtfToSqlField(struct annoStreamDb *self, struct joinerDtf *dtf, char *fieldName, size_t fieldNameSize) /* Write [db].table.field into fieldName, where table may be split. */ { if (useSplitTable(self, dtf)) safef(fieldName, fieldNameSize, "%s.%s", self->table, dtf->field); else joinerDtfToSqlFieldString(dtf, self->db, fieldName, fieldNameSize); } -static boolean appendTableList(struct annoStreamDb *self, struct dyString *query) -/* Append SQL table list to query, including tables used for output, filtering and joining. */ -{ -boolean hasLeftJoin = FALSE; -if (self->joinMixer == NULL || self->joinMixer->sqlRouteList == NULL) - appendOneTable(self, NULL, query); -else +static void appendJoin(struct annoStreamDb *self, struct joinerPair *routeList, + struct dyString *query) +/* Append join statement(s) for a possibly tree-structured routeList. */ { - // Use both a and b of the first pair and only b of each subsequent pair - appendOneTable(self, self->joinMixer->sqlRouteList->a, query); struct joinerPair *jp; - for (jp = self->joinMixer->sqlRouteList; jp != NULL; jp = jp->next) +for (jp = routeList; jp != NULL; jp = jp->next) + { + struct joinerField *jfB = joinerSetFindField(jp->identifier, jp->b); + if (! jfB->full) + dyStringAppend(query, " left"); + dyStringAppend(query, " join "); + if (jp->child) { - dyStringAppend(query, " left join "); + dyStringAppendC(query, '('); + appendOneTable(self, jp->child->a, query); + appendJoin(self, jp->child, query); + dyStringAppendC(query, ')'); + } + else appendOneTable(self, jp->b, query); char fieldA[PATH_LEN], fieldB[PATH_LEN]; splitOrDtfToSqlField(self, jp->a, fieldA, sizeof(fieldA)); splitOrDtfToSqlField(self, jp->b, fieldB, sizeof(fieldB)); struct joinerField *jfA = joinerSetFindField(jp->identifier, jp->a); if (sameOk(jfA->separator, ",")) dyStringPrintf(query, " on find_in_set(%s, %s)", fieldB, fieldA); else dyStringPrintf(query, " on %s = %s", fieldA, fieldB); - hasLeftJoin = TRUE; } } + +static boolean appendTableList(struct annoStreamDb *self, struct dyString *query) +/* Append SQL table list to query, including tables used for output, filtering and joining. */ +{ +boolean hasLeftJoin = FALSE; +if (self->joinMixer == NULL || self->joinMixer->sqlRouteList == NULL) + appendOneTable(self, NULL, query); +else + { + // Use both a and b of the first pair and only b of each subsequent pair + appendOneTable(self, self->joinMixer->sqlRouteList->a, query); + appendJoin(self, self->joinMixer->sqlRouteList, query); + hasLeftJoin = TRUE; + } return hasLeftJoin; } // libify? static struct joinerDtf *joinerDtfCloneList(struct joinerDtf *listIn) /* Return a list with cloned items of listIn. */ { struct joinerDtf *listOut = NULL, *item; for (item = listIn; item != NULL; item = item->next) slAddHead(&listOut, joinerDtfClone(item)); slReverse(&listOut); return listOut; } static char *joinerFilePath() @@ -289,30 +307,31 @@ } static void asdInitBaselineQuery(struct annoStreamDb *self) /* Build a dy SQL query with no position constraints (select ... from ...) * possibly including joins and filters if specified (where ...). */ { if (self->relatedDtfList) { struct joinerDtf *outputFieldList = slCat(joinerDtfCloneList(self->mainTableDtfList), joinerDtfCloneList(self->relatedDtfList)); if (self->joiner == NULL) self->joiner = joinerRead(joinerFilePath()); int expectedRows = sqlRowCount(self->conn, self->table); self->joinMixer = joinMixerNew(self->joiner, self->db, self->table, outputFieldList, expectedRows, self->naForMissing); + joinerPairListToTree(self->joinMixer->sqlRouteList); self->sqlRowSize = slCount(self->joinMixer->sqlFieldList); self->bigRowSize = self->joinMixer->bigRowSize; joinerDtfFreeList(&outputFieldList); } else { self->sqlRowSize = slCount(self->mainTableDtfList); self->bigRowSize = self->sqlRowSize; } } static void asdUpdateBaselineQuery(struct annoStreamDb *self) /* Build a dy SQL query with no position constraints (select ... from ...) * possibly including joins and filters if specified (where ...), using the current splitTable. */ {