78fd01e7d81e12b132a1a9d2238015c67e25475f angie Thu Sep 28 10:02:48 2017 -0700 Added option to structure joinerPair routeList as a tree using joinerPairListToTree() and new child pointer, so that if one pair's b is the same table as another pair's a, the second pair becomes the child of the first pair. This enables us to generate more efficient SQL joins. diff --git src/hg/lib/joiner.c src/hg/lib/joiner.c index 3231c55..d3dae0c 100644 --- src/hg/lib/joiner.c +++ src/hg/lib/joiner.c @@ -1054,64 +1054,81 @@ } void joinerDtfFreeList(struct joinerDtf **pList) /* Free up memory associated with list of joinerDtfs. */ { struct joinerDtf *el, *next; for (el = *pList; el != NULL; el = next) { next = el->next; joinerDtfFree(&el); } *pList = NULL; } +/* Forward declaration for mutual recursion: */ +void joinerPairFreeList(struct joinerPair **pList); +/* Free up memory associated with list of joinerPairs. */ + void joinerPairFree(struct joinerPair **pJp) /* Free up memory associated with joiner pair. */ { struct joinerPair *jp = *pJp; if (jp != NULL) { joinerDtfFree(&jp->a); joinerDtfFree(&jp->b); + if (jp->child != NULL) + joinerPairFreeList(&jp->child); freez(pJp); } } void joinerPairFreeList(struct joinerPair **pList) /* Free up memory associated with list of joinerPairs. */ { struct joinerPair *el, *next; for (el = *pList; el != NULL; el = next) { next = el->next; joinerPairFree(&el); } *pList = NULL; } -void joinerPairDump(struct joinerPair *jpList, FILE *out) +static void rJoinerPairDump(struct joinerPair *jpList, FILE *out, int level) /* Write out joiner pair list to file mostly for debugging. */ { struct joinerPair *jp; for (jp = jpList; jp != NULL; jp = jp->next) + { + fprintf(out, "%*s", level*2, ""); fprintf(out, "%s.%s.%s (via %s) %s.%s.%s\n", jp->a->database, jp->a->table, jp->a->field, jp->identifier->name, jp->b->database, jp->b->table, jp->b->field); + if (jp->child != NULL) + rJoinerPairDump(jp->child, out, level+1); + } +} + +void joinerPairDump(struct joinerPair *jpList, FILE *out) +/* Write out joiner pair list to file mostly for debugging. */ +{ +rJoinerPairDump(jpList, out, 0); } static struct joinerField *joinerSetIncludesTable(struct joinerSet *js, char *database, char *table) /* If joiner set includes database and table, return the associated field. */ { struct joinerField *jf; for (jf = js->fieldList; jf != NULL; jf = jf->next) { if (sameString(table, jf->table) && slNameInList(jf->dbList, database)) return jf; } return NULL; } @@ -1443,30 +1460,70 @@ if (first->next == NULL) return NULL; for (dtf = first->next; dtf != NULL; dtf = dtf->next) { if (!inRoute(fullRoute, dtf) && !joinerDtfSameTable(first, dtf)) { pairRoute = joinerFindRoute(joiner, first, dtf); fullRoute = slCat(fullRoute, pairRoute); } } joinerPairRemoveDupes(&fullRoute); return fullRoute; } +static boolean joinerDtfSameDt(struct joinerDtf *a, struct joinerDtf *b) +/* Return true if a and b have same database and table (ignore field). */ +{ +return (a == b || + (a && b && sameString(a->database, b->database) && sameString(a->table, b->table))); +} + +void joinerPairListToTree(struct joinerPair *routeList) +/* Convert a linear routeList (only next used, not child) to a tree structure in which + * pairs like {X,Y} and {Y,Z} (first b == second a) are connected using child instead of next. */ +{ +if (routeList == NULL) + return; +struct hash *bHash = hashNew(0); +struct joinerPair *pair; +for (pair = routeList; pair != NULL; pair = pair->next) + { + if (pair->child) + errAbort("joinerPairListToTree: found non-NULL child. Call this only on linear list."); + char bdt[4096]; + safef(bdt, sizeof(bdt), "%s.%s", pair->b->database, pair->b->table); + hashAdd(bHash, bdt, pair); + } +for (pair = routeList; pair != NULL; ) + { + struct joinerPair *nextPair = pair->next; + if (nextPair && !joinerDtfSameDt(pair->a, nextPair->a)) + { + char nextAdt[4096]; + safef(nextAdt, sizeof(nextAdt), "%s.%s", nextPair->a->database, nextPair->a->table); + struct joinerPair *parent = hashMustFindVal(bHash, nextAdt); + pair->next = nextPair->next; + slAddTail(&parent->child, nextPair); + } + else + pair = pair->next; + } +hashFree(&bHash); +} + char *joinerFieldChopKey(struct joinerField *jf, char *key) /* If jf includes chopBefore and/or chopAfter, apply those to key and return a starting * offset in key, which may be modified. */ { struct slName *n; for (n = jf->chopBefore; n != NULL; n = n->next) { if (startsWith(n->name, key)) { key += strlen(n->name); break; } } for (n = jf->chopAfter; n!=NULL; n = n->next) {