d8be7e6e88c0712589dac72c1f023c84028a0d1c angie Wed Feb 7 11:49:17 2018 -0800 To prevent joinerRoute from searching over all databases for routes from some genome db to hgFixed metadata tables, add a new optional arg to joinerRelate to apply joinerExclusiveCheck to the genome db in addition to the current table's database. This reduces the time to find the route hg19.refSeqAli --> hgFixed.gbCdnaInfo --> hgFixed.cds from ~1.5s to ~0.1s. It also keeps the related-table selection in hgTables and hgIntegrator from showing >5000 related tables when starting with a genome db table and adding hgFixed.gbCdnaInfo. refs #20949 diff --git src/hg/lib/joiner.c src/hg/lib/joiner.c index d3dae0c..9c5ed55 100644 --- src/hg/lib/joiner.c +++ src/hg/lib/joiner.c @@ -1246,59 +1246,62 @@ * exclusivity hash. Return TRUE if join can happen between * these two databases. */ { struct hash *exHash; if (sameString(aDatabase, bDatabase)) return TRUE; for (exHash = joiner->exclusiveSets; exHash != NULL; exHash = exHash->next) { if (hashLookup(exHash, aDatabase) && hashLookup(exHash, bDatabase)) return FALSE; } return TRUE; } struct joinerPair *joinerRelate(struct joiner *joiner, char *database, - char *table) + char *table, char *exclusiveDb) /* Get list of all ways to link table in given database to other tables, - * possibly in other databases. */ + * possibly in other databases. + * If exclusiveDb is not NULL then apply joinerExclusiveCheck to it in addition to database. */ { struct joinerSet *js, *jsChain; struct joinerField *jf, *jfBase; struct joinerPair *jpList = NULL, *jp; struct slRef *chainList, *chainEl; /* Return list of self, children, and parents (but not siblings) */ #ifdef SCREWS_UP_SPLITS if (!tableExists(database, table, NULL)) errAbort("%s.%s - table doesn't exist", database, table); #endif for (js = joiner->jsList; js != NULL; js = js->next) { if ((jfBase = joinerSetIncludesTable(js, database, table)) != NULL) { chainList = joinerSetInheritanceChain(js); for (chainEl = chainList; chainEl != NULL; chainEl = chainEl->next) { jsChain = chainEl->val; for (jf = jsChain->fieldList; jf != NULL; jf = jf->next) { struct slName *db; for (db = jf->dbList; db != NULL; db = db->next) { - if (joinerExclusiveCheck(joiner, database, db->name)) + if (joinerExclusiveCheck(joiner, database, db->name) && + (isEmpty(exclusiveDb) || + joinerExclusiveCheck(joiner, exclusiveDb, db->name))) { if (!sameString(database, db->name) || !sameString(table, jf->table)) { if (tableExists(db->name, jf->table, jf->splitPrefix)) { jp = joinerToField(database, jfBase, db->name, jf, jsChain); slAddHead(&jpList, jp); } } } } } @@ -1333,77 +1336,77 @@ { int i; for (i=0; i<count; ++i) { if (identifier == array[i]) { return TRUE; } } return FALSE; } static struct joinerSet *identifierStack[4]; /* Help keep search from looping. */ static struct joinerPair *rFindRoute(struct joiner *joiner, - struct joinerDtf *a, struct joinerDtf *b, int level, int maxLevel) + struct joinerDtf *a, struct joinerDtf *b, char *exclusiveDb, int level, int maxLevel) /* Find a path that connects the two fields if possible. Do limited * recursion. */ { struct joinerPair *jpList, *jp; struct joinerPair *path = NULL; -jpList = joinerRelate(joiner, a->database, a->table); +jpList = joinerRelate(joiner, a->database, a->table, exclusiveDb); for (jp = jpList; jp != NULL; jp = jp->next) { if (joinerDtfSameTable(jp->b, b)) { path = joinerPairClone(jp); break; } } if (path == NULL && level < maxLevel) { for (jp = jpList; jp != NULL; jp = jp->next) { identifierStack[level] = jp->identifier; if (!identifierInArray(jp->identifier, identifierStack, level)) { identifierStack[level] = jp->identifier; - path = rFindRoute(joiner, jp->b, b, level+1, maxLevel); + path = rFindRoute(joiner, jp->b, b, exclusiveDb, level+1, maxLevel); if (path != NULL) { struct joinerPair *jpClone = joinerPairClone(jp); slAddHead(&path, jpClone); break; } } } } joinerPairFreeList(&jpList); return path; } struct joinerPair *joinerFindRoute(struct joiner *joiner, struct joinerDtf *a, struct joinerDtf *b) /* Find route between a and b. Note the field element of a and b * are unused. */ { int i; struct joinerPair *jpList = NULL; for (i=1; i<ArraySize(identifierStack); ++i) { - jpList = rFindRoute(joiner, a, b, 0, i); + jpList = rFindRoute(joiner, a, b, a->database, 0, i); if (jpList != NULL) break; } return jpList; } boolean joinerDtfAllSameTable(struct joinerDtf *fieldList) /* Return TRUE if all joinerPairs refer to same table. */ { struct joinerDtf *first = fieldList, *field; if (first == NULL) return TRUE; for (field = first->next; field != NULL; field = field->next) if (!joinerDtfSameTable(first, field))