4801840ccde60df9698ce24f249392afac8fc55d braney Thu Mar 24 13:23:07 2022 -0700 fix problem getting table dumps from hgTables on custom tracks on assembly hubs diff --git src/hg/lib/joiner.c src/hg/lib/joiner.c index 21d0e30..d1915d5 100644 --- src/hg/lib/joiner.c +++ src/hg/lib/joiner.c @@ -1,1582 +1,1584 @@ /* Copyright (C) 2014 The Regents of the University of California * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */ /* joiner - information about what fields in what tables * in what databases can fruitfully be related together * or joined. Another way of looking at it is this * defines identifiers shared across tables. This also * defines what tables depend on what other tables * through dependency attributes and statements. * * The main routines you'll want to use here are * joinerRead - to read in a joiner file * joinerRelate - to get list of possible joins given a table. */ #include "common.h" #include "linefile.h" #include "hash.h" #include "dystring.h" #include "obscure.h" #include "jksql.h" #include "hdb.h" #include "joiner.h" static void joinerFieldFree(struct joinerField **pJf) /* Free up memory associated with joinerField. */ { struct joinerField *jf = *pJf; if (jf != NULL) { slFreeList(&jf->dbList); freeMem(jf->table); freeMem(jf->field); slFreeList(&jf->chopBefore); slFreeList(&jf->chopAfter); freeMem(jf->separator); freeMem(jf->splitPrefix); slFreeList(&jf->exclude); freez(pJf); } } static void joinerFieldFreeList(struct joinerField **pList) /* Free up memory associated with list of joinerFields. */ { struct joinerField *el, *next; for (el = *pList; el != NULL; el = next) { next = el->next; joinerFieldFree(&el); } *pList = NULL; } static void joinerSetFree(struct joinerSet **pJs) /* Free up memory associated with joinerSet. */ { struct joinerSet *js = *pJs; if (js != NULL) { freeMem(js->name); joinerFieldFreeList(&js->fieldList); freeMem(js->typeOf); slFreeList(&js->children); freeMem(js->external); freeMem(js->description); freez(pJs); } } static void joinerSetFreeList(struct joinerSet **pList) /* Free up memory associated with list of joinerSets. */ { struct joinerSet *el, *next; for (el = *pList; el != NULL; el = next) { next = el->next; joinerSetFree(&el); } *pList = NULL; } static void joinerTableFree(struct joinerTable **pTable) /* Free up memory associated with joinerTable. */ { struct joinerTable *table = *pTable; if (table != NULL) { slFreeList(&table->dbList); freeMem(table->table); freez(pTable); } } static void joinerTableFreeList(struct joinerTable **pList) /* Free up memory associated with list of joinerTables. */ { struct joinerTable *el, *next; for (el = *pList; el != NULL; el = next) { next = el->next; joinerTableFree(&el); } *pList = NULL; } static void joinerDependencyFree(struct joinerDependency **pDep) /* Free up memory associated with joinerDependency. */ { struct joinerDependency *dep = *pDep; if (dep != NULL) { joinerTableFree(&dep->table); joinerTableFreeList(&dep->dependsOnList); freez(pDep); } } static void joinerDependencyFreeList(struct joinerDependency **pList) /* Free up memory associated with list of joinerDependencys. */ { struct joinerDependency *el, *next; for (el = *pList; el != NULL; el = next) { next = el->next; joinerDependencyFree(&el); } *pList = NULL; } static void joinerIgnoreFree(struct joinerIgnore **pIg) /* Free up memory associated with joinerIgnore. */ { struct joinerIgnore *ig = *pIg; if (ig != NULL) { slFreeList(&ig->dbList); slFreeList(&ig->tableList); freez(pIg); } } static void joinerIgnoreFreeList(struct joinerIgnore **pList) /* Free up memory associated with list of joinerIgnores. */ { struct joinerIgnore *el, *next; for (el = *pList; el != NULL; el = next) { next = el->next; joinerIgnoreFree(&el); } *pList = NULL; } void joinerFree(struct joiner **pJoiner) /* Free up memory associated with joiner */ { struct joiner *joiner = *pJoiner; if (joiner != NULL) { freeMem(joiner->fileName); joinerSetFreeList(&joiner->jsList); freeHashAndVals(&joiner->symHash); hashFreeList(&joiner->exclusiveSets); freeHash(&joiner->databasesChecked); freeHash(&joiner->databasesIgnored); joinerDependencyFreeList(&joiner->dependencyList); joinerIgnoreFreeList(&joiner->tablesIgnored); freez(pJoiner); } } static boolean nextSubTok(struct lineFile *lf, char **pLine, char **retTok, int *retSize) /* Return next token for substitution purposes. Return FALSE * when no more left. */ { char *start, *s, c; s = start = *retTok = *pLine; c = *s; if (c == 0) return FALSE; if (isspace(c)) { while (isspace(*(++s))) ; } else if (isalnum(c)) { while (isalnum(*(++s))) ; } else if (c == '$') { if (s[1] == '{') { s += 1; for (;;) { c = *(++s); if (c == '}') { ++s; break; } if (c == 0) /* Arguably could warn/abort here. */ { errAbort("End of line in ${var} line %d of %s", lf->lineIx, lf->fileName); } } } else { while (isalnum(*(++s))) ; } } else { ++s; } *pLine = s; *retSize = s - start; return TRUE; } static char *subThroughHash(struct lineFile *lf, struct hash *hash, struct dyString *dy, char *s) /* Return string that has any variables in string-valued hash looked up. * The result is put in the passed in dyString, and also returned. */ { char *tok; int size; dyStringClear(dy); while (nextSubTok(lf, &s, &tok, &size)) { if (tok[0] == '$') { char tokBuf[256], *val; /* Extract 'var' out of '$var' or '${var}' into tokBuf*/ tok += 1; size -= 1; if (tok[0] == '{') { tok += 1; size -= 2; } if (size >= sizeof(tokBuf)) errAbort("Variable name too long line %d of %s", lf->lineIx, lf->fileName); memcpy(tokBuf, tok, size); tokBuf[size] = 0; /* Do substitution. */ val = hashFindVal(hash, tokBuf); if (val == NULL) errAbort("$%s not defined line %d of %s", tokBuf, lf->lineIx, lf->fileName); dyStringAppend(dy, val); } else { dyStringAppendN(dy, tok, size); } } return dy->string; } static char *nextSubbedLine(struct lineFile *lf, struct hash *hash, struct dyString *dy) /* Return next line after string substitutions. This removes comments too. */ { char *line, *s; for (;;) { if (!lineFileNext(lf, &line, NULL)) return NULL; s = strchr(line, '#'); if (s == NULL) /* No sharp, it's a real line. */ break; else { if (skipLeadingSpaces(line) != s) { *s = 0; /* Terminate line at sharp. */ break; } /* Eat line if starts with sharp */ } } return subThroughHash(lf, hash, dy, line); } static void unspecifiedVar(struct lineFile *lf, char *var) /* Complain about variable that needs to be followed by = but isn't */ { errAbort("%s must be followed by = line %d of %s", var, lf->lineIx, lf->fileName); } static char *cloneSpecified(struct lineFile *lf, char *var, char *val) /* Make sure that val exists, and return clone of it. */ { if (val == NULL) unspecifiedVar(lf, var); return cloneString(val); } static struct hash *makeNotHash(struct slName *list) /* Return hash of elements in list if any that are * starting with '!' */ { struct slName *el; struct hash *hash = NULL; for (el = list; el != NULL; el = el->next) { if (el->name[0] == '!') { if (hash == NULL) hash = hashNew(8); hashAdd(hash, el->name+1, NULL); } } return hash; } static struct slName *parseDatabaseList(struct lineFile *lf, char *s) /* Parse out comma-separated list of databases, with * possible !db's. */ { struct slName *list, *el; struct hash *notHash; /* Get comma-separated list. */ list = slNameListFromComma(s); if (list == NULL) errAbort("Empty database name line %d of %s", lf->lineIx, lf->fileName); /* Remove !'s */ notHash = makeNotHash(list); if (notHash != NULL) { struct slName *newList = NULL, *next; for (el = list; el != NULL; el = next) { next = el->next; if (el->name[0] != '!' && !hashLookup(notHash, el->name)) { slAddHead(&newList, el); } else { freeMem(el); } } hashFree(¬Hash); slReverse(&newList); list = newList; } return list; } static struct joinerSet *parseIdentifierSet(struct lineFile *lf, char *line, struct hash *symHash, struct dyString *dyBuf) /* Parse out one joiner record - keep going until blank line or * end of file. */ { struct joinerSet *js; struct joinerField *jf; char *word, *e; char *parts[3]; int partCount; /* Parse through first line - first word is name. */ word = nextWord(&line); if (word == NULL || strchr(word, '=') != NULL) errAbort("joiner without name line %d of %s\n", lf->lineIx, lf->fileName); AllocVar(js); js->name = cloneString(word); js->lineIx = lf->lineIx; while ((word = nextWord(&line)) != NULL) { char *e = strchr(word, '='); if (e != NULL) *e++ = 0; if (sameString(word, "typeOf")) { js->typeOf = cloneSpecified(lf, word, e); } else if (sameString(word, "external")) { js->external = cloneSpecified(lf, word, e); } else if (sameString(word, "fuzzy")) { js->isFuzzy = TRUE; } else if (sameString(word, "dependency")) { js->isDependency = TRUE; } else { errAbort("Unknown attribute %s line %d of %s", word, lf->lineIx, lf->fileName); } } /* Parse second line, make sure it is quoted, and save as description. */ line = nextSubbedLine(lf, symHash, dyBuf); if (line == NULL) lineFileUnexpectedEnd(lf); line = trimSpaces(line); if (line[0] != '"' || lastChar(line) != '"') errAbort("Expecting quoted line, line %d of %s\n", lf->lineIx, lf->fileName); line[strlen(line)-1] = 0; js->description = cloneString(line+1); /* Go through subsequent lines. */ while ((line = nextSubbedLine(lf, symHash, dyBuf)) != NULL) { /* Keep grabbing until we get a blank line. */ line = skipLeadingSpaces(line); if (line[0] == 0) break; /* First word in line should be database.tabe.field. */ word = nextWord(&line); partCount = chopString(word, ".", parts, ArraySize(parts)); if (partCount != 3) errAbort("Expecting database.table.field line %d of %s", lf->lineIx, lf->fileName); /* Allocate struct and save table and field. */ AllocVar(jf); jf->lineIx = lf->lineIx; jf->table = cloneString(parts[1]); jf->field = cloneString(parts[2]); if (js->fieldList == NULL && !js->isFuzzy) { jf->isPrimary = TRUE; jf->unique = TRUE; jf->full = TRUE; } jf->minCheck = 1.0; slAddHead(&js->fieldList, jf); /* Database may be a comma-separated list. Parse it here. */ jf->dbList = parseDatabaseList(lf, parts[0]); /* Look for other fields in subsequent space-separated words. */ while ((word = nextWord(&line)) != NULL) { if ((e = strchr(word, '=')) != NULL) *e++ = 0; if (sameString("comma", word)) { jf->separator = cloneString(","); } else if (sameString("separator", word)) { jf->separator = cloneSpecified(lf, word, e); } else if (sameString("chopBefore", word)) { if (e == NULL) unspecifiedVar(lf, word); slNameStore(&jf->chopBefore, e); } else if (sameString("chopAfter", word)) { if (e == NULL) unspecifiedVar(lf, word); slNameStore(&jf->chopAfter, e); } else if (sameString("indexOf", word)) { jf->indexOf = TRUE; } else if (sameString("dupeOk", word)) { if (!jf->isPrimary) warn("dupeOk outsite primary key line %d of %s", lf->lineIx, lf->fileName); jf->unique = FALSE; } else if (sameString("minCheck", word)) { if (e == NULL) unspecifiedVar(lf, word); jf->minCheck = atof(e); } else if (sameString("unique", word)) { jf->unique = TRUE; } else if (sameString("full", word)) { jf->full = TRUE; } else if (sameString("splitPrefix", word)) { jf->splitPrefix = cloneSpecified(lf, word, e); } else if (sameString("splitSuffix", word)) { jf->splitSuffix = cloneSpecified(lf, word, e); } else if (sameString("exclude", word)) { if (e == NULL) unspecifiedVar(lf, word); slNameStore(&jf->exclude, e); } else { errAbort("Unrecognized attribute %s line %d of %s", word, lf->lineIx, lf->fileName); } } if (jf->indexOf && jf->separator == NULL) errAbort("indexOf without comma or separator line %d of %s", lf->lineIx, lf->fileName); if (jf->isPrimary && jf->separator != NULL) errAbort("Error line %d of %s\n" "Primary key can't be a list (comma or separator)." , lf->lineIx, lf->fileName); } slReverse(&js->fieldList); return js; } static struct joinerIgnore *parseTablesIgnored(struct lineFile *lf, char *line, struct hash *symHash, struct dyString *dyBuf) /* Parse out one tables ignored record - keep going until blank line or * end of file. */ { struct joinerIgnore *ig; struct slName *table; AllocVar(ig); ig->dbList = parseDatabaseList(lf, trimSpaces(line)); while ((line = nextSubbedLine(lf, symHash, dyBuf)) != NULL) { /* Keep grabbing until we get a blank line. */ line = skipLeadingSpaces(line); if (line[0] == 0) break; table = slNameNew(trimSpaces(line)); slAddHead(&ig->tableList, table); } slReverse(&ig->tableList); return ig; } static struct joinerTable *parseTableSpec(struct lineFile *lf, char *spec) /* Parse out table from spec. Spec is form db1,db2...,dbN.table. */ { struct joinerTable *table; char *dotPos = strchr(spec, '.'); if (dotPos == NULL) errAbort("Need . in table specification line %d of %s", lf->lineIx, lf->fileName); AllocVar(table); *dotPos++ = 0; table->table = cloneString(dotPos); table->dbList = parseDatabaseList(lf, spec); if (table->dbList == NULL) errAbort("Need at least one database before . in table spec line %d of %s", lf->lineIx, lf->fileName); return table; } static void dependencySyntaxErr(struct lineFile *lf) /* Explain dependency syntax and exit. */ { errAbort("Expecting at least two table specifiers after dependency line %d of %s", lf->lineIx, lf->fileName); } static struct joinerDependency *parseDependency(struct lineFile *lf, char *line) /* Parse out dependency - just a space separated list of table specs, with * the first one having a special meaning. */ { struct joinerDependency *dep; struct joinerTable *table; int count = 0; char *word; AllocVar(dep); dep->lineIx = lf->lineIx; word = nextWord(&line); if (word == NULL) dependencySyntaxErr(lf); dep->table = parseTableSpec(lf, word); while ((word = nextWord(&line)) != NULL) { table = parseTableSpec(lf, word); slAddHead(&dep->dependsOnList, table); ++count; } if (count < 1) dependencySyntaxErr(lf); slReverse(&dep->dependsOnList); return dep; } static struct joinerType *parseType(struct lineFile *lf, char *line, struct hash *symHash, struct dyString *dyBuf) /* Parse one type record. */ { struct joinerType *type; AllocVar(type); type->name = cloneString(trimSpaces(line)); while ((line = nextSubbedLine(lf, symHash, dyBuf)) != NULL) { struct joinerTable *table; line = trimSpaces(line); if (line[0] == 0) break; table = parseTableSpec(lf, line); slAddHead(&type->tableList, table); } slReverse(&type->tableList); return type; } static void addDatabasesToHash(struct hash *hash, char *s, struct lineFile *lf) /* Add contents of comma-separated list to hash. */ { struct slName *el, *list = parseDatabaseList(lf, trimSpaces(s)); for (el = list; el != NULL; el = el->next) hashAdd(hash, el->name, NULL); slFreeList(&list); } static struct joiner *joinerParsePassOne(char *fileName) /* Do first pass parsing of joiner file and return list of * joinerSets. */ { struct lineFile *lf = lineFileOpen(fileName, TRUE); char *line, *word; struct dyString *dyBuf = dyStringNew(0); struct joiner *joiner; struct joinerSet *js; AllocVar(joiner); joiner->fileName = cloneString(fileName); joiner->symHash = newHash(9); joiner->databasesChecked = newHash(8); joiner->databasesIgnored = newHash(8); while ((line = nextSubbedLine(lf, joiner->symHash, dyBuf)) != NULL) { if ((word = nextWord(&line)) != NULL) { if (sameString("set", word)) { char *var, *val; var = nextWord(&line); if (var == NULL) errAbort("set what line %d of %s", lf->lineIx, lf->fileName); val = trimSpaces(line); if (isEmpty(val)) errAbort("Set with no value line %d of %s", lf->lineIx, lf->fileName); hashAdd(joiner->symHash, var, cloneString(val)); } else if (sameString("identifier", word)) { js = parseIdentifierSet(lf, line, joiner->symHash, dyBuf); if (js != NULL) slAddHead(&joiner->jsList, js); } else if (sameString("exclusiveSet", word)) { struct hash *exHash = newHash(8); addDatabasesToHash(exHash, line, lf); slAddHead(&joiner->exclusiveSets, exHash); } else if (sameString("databasesChecked", word)) { addDatabasesToHash(joiner->databasesChecked, line, lf); } else if (sameString("databasesIgnored", word)) { addDatabasesToHash(joiner->databasesIgnored, line, lf); } else if (sameString("tablesIgnored", word)) { struct joinerIgnore *ig; ig = parseTablesIgnored(lf, line, joiner->symHash, dyBuf); slAddHead(&joiner->tablesIgnored, ig); } else if (sameString("dependency", word)) { struct joinerDependency *dep; dep = parseDependency(lf, line); slAddHead(&joiner->dependencyList, dep); } else if (sameString("type", word)) { struct joinerType *type; type = parseType(lf, line, joiner->symHash, dyBuf); slAddHead(&joiner->typeList, type); } else { errAbort("unrecognized '%s' line %d of %s", word, lf->lineIx, lf->fileName); } } } lineFileClose(&lf); dyStringFree(&dyBuf); slReverse(&joiner->jsList); slReverse(&joiner->tablesIgnored); slReverse(&joiner->typeList); return joiner; } static void joinerExpand(struct joiner *joiner) /* Expand joiners that have [] in them. */ { struct joinerSet *js, *nextJs, *newJs, *newList = NULL; for (js=joiner->jsList; js != NULL; js = nextJs) { char *startBracket, *endBracket; nextJs = js->next; if ((startBracket = strchr(js->name, '[')) != NULL) { char *dbStart,*dbEnd; char *dbCommaList; struct joinerField *jf, *newJf; struct dyString *dy = dyStringNew(0); endBracket = strchr(startBracket, ']'); if (endBracket == NULL) errAbort("[ without ] line %d of %s", js->lineIx, joiner->fileName); dbCommaList = cloneStringZ(startBracket+1, endBracket - startBracket - 1); dbStart = dbCommaList; while (dbStart != NULL) { /* Parse out comma-separated list. */ dbEnd = strchr(dbStart, ','); if (dbEnd != NULL) { *dbEnd++ = 0; if (dbEnd[0] == 0) dbEnd = NULL; } if (dbStart[0] == 0) errAbort("Empty element in comma separated list line %d of %s", js->lineIx, joiner->fileName); /* Make up name for new joiner. */ dyStringClear(dy); dyStringAppendN(dy, js->name, startBracket-js->name); dyStringAppend(dy, dbStart); dyStringAppend(dy, endBracket+1); /* Allocate new joiner and fill in most data elements. */ AllocVar(newJs); newJs->name = cloneString(dy->string); newJs->typeOf = cloneString(js->typeOf); newJs->external = cloneString(js->external); newJs->description = cloneString(js->description); newJs->isFuzzy = js->isFuzzy; newJs->lineIx = js->lineIx; newJs->isDependency = js->isDependency; /* Fill in new joiner fieldList */ for (jf = js->fieldList; jf != NULL; jf = jf->next) { char *bs = NULL, *be = NULL; /* Allocate vars and do basic fields. */ AllocVar(newJf); newJf->dbList = slNameCloneList(jf->dbList); newJf->field = cloneString(jf->field); newJf->chopBefore = slNameCloneList(jf->chopBefore); newJf->chopAfter = slNameCloneList(jf->chopBefore); newJf->separator = cloneString(jf->separator); newJf->indexOf = jf->indexOf; newJf->isPrimary = jf->isPrimary; newJf->unique = jf->unique; newJf->full = jf->full; newJf->minCheck = jf->minCheck; newJf->splitPrefix = cloneString(jf->splitPrefix); newJf->exclude = slNameCloneList(jf->exclude); /* Do substituted table field. */ if ((bs = strchr(jf->table, '[')) != NULL) be = strchr(bs, ']'); if (bs == NULL || be == NULL) errAbort("Missing [] in field '%s' line %d of %s", jf->table, jf->lineIx, joiner->fileName); dyStringClear(dy); dyStringAppendN(dy, jf->table, bs - jf->table); dyStringAppend(dy, dbStart); dyStringAppend(dy, be+1); newJf->table = cloneString(dy->string); slAddHead(&newJs->fieldList, newJf); } newJs->expanded = TRUE; slReverse(&newJs->fieldList); slAddHead(&newList, newJs); dbStart = dbEnd; } dyStringFree(&dy); freez(&dbCommaList); joinerSetFree(&js); } else { slAddHead(&newList, js); } } slReverse(&newList); joiner->jsList = newList; } static void joinerLinkParents(struct joiner *joiner) /* Go through and link together parents and children. */ { struct joinerSet *js, *parent; struct hash *hash = newHash(0); for (js = joiner->jsList; js != NULL; js = js->next) { if (hashLookup(hash, js->name)) errAbort("Duplicate joiner %s line %d of %s", js->name, js->lineIx, joiner->fileName); hashAdd(hash, js->name, js); } for (js = joiner->jsList; js != NULL; js = js->next) { char *typeOf = js->typeOf; if (typeOf != NULL) { js->parent = hashFindVal(hash, typeOf); if (js->parent == NULL) errAbort("%s not define line %d of %s", typeOf, js->lineIx, joiner->fileName); refAdd(&js->parent->children, js); } } for (js = joiner->jsList; js != NULL; js = js->next) { for (parent = js->parent; parent != NULL; parent = parent->parent) { if (parent == js) errAbort("Circular typeOf dependency on joiner %s line %d of %s", js->name, js->lineIx, joiner->fileName); } slReverse(&js->children); } } static void checkIgnoreBalance(struct joiner *joiner) /* Check that databases in fields are in the checked list. * Check that there is no overlap between checked and ignored * list. */ { struct joinerSet *js; struct joinerField *jf; struct slName *db; struct hashEl *ignoreList, *ignore; /* Check that there is no overlap between databases ignored * and databases checked. */ ignoreList = hashElListHash(joiner->databasesIgnored); for (ignore=ignoreList; ignore != NULL; ignore = ignore->next) { if (hashLookup(joiner->databasesChecked, ignore->name)) errAbort("%s is in both databasesChecked and databasesIgnored", ignore->name); } slFreeList(&ignoreList); /* Check that all databases mentioned in fields are in * databasesChecked. */ for (js = joiner->jsList; js != NULL; js = js->next) { for (jf = js->fieldList; jf != NULL; jf = jf->next) { for (db = jf->dbList; db != NULL; db = db->next) { if (!hashLookup(joiner->databasesChecked, db->name)) { errAbort("database %s line %d of %s is not in databasesChecked", db->name, jf->lineIx, joiner->fileName); } } } } } static struct joinerTable *tableFromField(struct joinerField *jf) /* Extract database/table info out of field. */ { struct joinerTable *table; AllocVar(table); table->table = cloneString(jf->table); table->dbList = slNameCloneList(jf->dbList); return table; } static void addDependenciesFromIdentifiers(struct joiner *joiner) /* Add dependencies that are specified through dependency * attribute on identifier line. */ { struct joinerSet *js; for (js = joiner->jsList; js != NULL; js = js->next) { if (js->isDependency) { struct joinerField *primary = js->fieldList, *jf; for (jf = primary->next; jf != NULL; jf = jf->next) { struct joinerDependency *dep; AllocVar(dep); dep->lineIx = jf->lineIx; dep->table = tableFromField(jf); dep->dependsOnList = tableFromField(primary); slAddHead(&joiner->dependencyList, dep); } } } } struct joiner *joinerRead(char *fileName) /* Read in a .joiner file. */ { struct joiner *joiner = joinerParsePassOne(fileName); joinerExpand(joiner); joinerLinkParents(joiner); checkIgnoreBalance(joiner); addDependenciesFromIdentifiers(joiner); slReverse(&joiner->dependencyList); return joiner; } struct joinerDtf *joinerDtfNew(char *database, char *table, char *field) /* Create new joinerDtf. */ { struct joinerDtf *dtf; AllocVar(dtf); dtf->database = cloneString(database); dtf->table = cloneString(table); dtf->field = cloneString(field); return dtf; } struct joinerDtf *joinerDtfClone(struct joinerDtf *dtf) /* Return duplicate (deep copy) of joinerDtf. */ { return dtf ? joinerDtfNew(dtf->database, dtf->table, dtf->field) : NULL; } static void notTriple(char *s) /* Complain that s is not in dotted triple format. */ { errAbort("%s not a dotted triple", s); } struct joinerDtf *joinerDtfFromDottedTriple(char *triple) /* Get joinerDtf from something in db.table.field format. */ { char *s, *e; struct joinerDtf *dtf; AllocVar(dtf); s = triple; e = strstr(s, ".hub_"); if (e == NULL) + e = strstr(s, ".ct_"); +if (e == NULL) e = strchr(s, '.'); if (e == NULL) notTriple(triple); dtf->database = cloneStringZ(s, e-s); s = e+1; e = strrchr(s, '.'); if (e == NULL) notTriple(triple); dtf->table = cloneStringZ(s, e-s); dtf->field = cloneString(e+1); return dtf; } boolean joinerDtfSame(struct joinerDtf *dtfA, struct joinerDtf *dtfB) /* Return TRUE if both are NULL or if both have same db, table and field. */ { if (dtfA == NULL && dtfB == NULL) return TRUE; else if (dtfA != NULL && dtfB != NULL) return (sameString(dtfA->database, dtfB->database) && sameString(dtfA->table, dtfB->table) && sameString(dtfA->field, dtfB->field)); return FALSE; } struct joinerDtf *joinerDtfFind(struct joinerDtf *dtfList, struct joinerDtf *dtf) /* Return the first element of dtfList that is joinerDtfSame as dtf, or NULL if no such. */ { struct joinerDtf *el; for (el = dtfList; el != NULL; el = el->next) if (joinerDtfSame(el, dtf)) return el; return NULL; } void joinerDtfToSqlFieldString(struct joinerDtf *dtf, char *db, char *buf, size_t bufSize) /* If dtf->database is different from db (or db is NULL), write database.table.field info buf, * otherwise just table.field. */ { if (differentString(dtf->database, db)) safef(buf, bufSize, "%s.%s.%s", dtf->database, dtf->table, dtf->field); else safef(buf, bufSize, "%s.%s", dtf->table, dtf->field); } void joinerDtfToSqlTableString(struct joinerDtf *dtf, char *db, char *buf, size_t bufSize) /* If dtf->database is different from db (or db is NULL), write database.table info buf, * otherwise just table. */ { if (db == NULL || differentString(dtf->database, db)) safef(buf, bufSize, "%s.%s", dtf->database, dtf->table); else safef(buf, bufSize, "%s", dtf->table); } void joinerDtfFree(struct joinerDtf **pDtf) /* Free up resources associated with joinerDtf. */ { struct joinerDtf *dtf = *pDtf; if (dtf != NULL) { freeMem(dtf->database); freeMem(dtf->table); freeMem(dtf->field); freez(pDtf); } } void joinerDtfFreeList(struct joinerDtf **pList) /* Free up memory associated with list of joinerDtfs. */ { struct joinerDtf *el, *next; for (el = *pList; el != NULL; el = next) { next = el->next; joinerDtfFree(&el); } *pList = NULL; } /* Forward declaration for mutual recursion: */ void joinerPairFreeList(struct joinerPair **pList); /* Free up memory associated with list of joinerPairs. */ void joinerPairFree(struct joinerPair **pJp) /* Free up memory associated with joiner pair. */ { struct joinerPair *jp = *pJp; if (jp != NULL) { joinerDtfFree(&jp->a); joinerDtfFree(&jp->b); if (jp->child != NULL) joinerPairFreeList(&jp->child); freez(pJp); } } void joinerPairFreeList(struct joinerPair **pList) /* Free up memory associated with list of joinerPairs. */ { struct joinerPair *el, *next; for (el = *pList; el != NULL; el = next) { next = el->next; joinerPairFree(&el); } *pList = NULL; } static void rJoinerPairDump(struct joinerPair *jpList, FILE *out, int level) /* Write out joiner pair list to file mostly for debugging. */ { struct joinerPair *jp; for (jp = jpList; jp != NULL; jp = jp->next) { fprintf(out, "%*s", level*2, ""); fprintf(out, "%s.%s.%s (via %s) %s.%s.%s\n", jp->a->database, jp->a->table, jp->a->field, jp->identifier->name, jp->b->database, jp->b->table, jp->b->field); if (jp->child != NULL) rJoinerPairDump(jp->child, out, level+1); } } void joinerPairDump(struct joinerPair *jpList, FILE *out) /* Write out joiner pair list to file mostly for debugging. */ { rJoinerPairDump(jpList, out, 0); } static struct joinerField *joinerSetIncludesTable(struct joinerSet *js, char *database, char *table) /* If joiner set includes database and table, return the associated field. */ { struct joinerField *jf; for (jf = js->fieldList; jf != NULL; jf = jf->next) { if (sameString(table, jf->table) && slNameInList(jf->dbList, database)) return jf; } return NULL; } static char *skipPrefix(char *prefix, char *string) /* If string starts with prefix, return a pointer into string just past the prefix; * otherwise just return string. Do not free result. */ { if (startsWith(prefix, string)) return string + strlen(prefix); else return string; } static boolean wildExists(char *database, char *table) /* Do a SQL wildcard search for table in database. */ { boolean exists = FALSE; struct sqlConnection *conn = hAllocConnMaybe(database); if (conn != NULL) exists = sqlTableWildExists(conn, table); hFreeConn(&conn); return exists; } static boolean tableExists(char *database, char *table, char *splitPrefix) /* Return TRUE if database and table exist. If splitPrefix is given, * check for existence with and without it. */ { char t2[1024]; if (isNotEmpty(splitPrefix)) safef(t2, sizeof(t2), "%s%s", splitPrefix, table); else safef(t2, sizeof(t2), "%s", table); boolean hasSqlWildcard = (strchr(t2, '%') || strchr(skipPrefix("all_", t2), '_')); boolean exists = hasSqlWildcard ? wildExists(database, t2) : hTableExists(database, t2); if (!exists && isNotEmpty(splitPrefix)) { hasSqlWildcard = (strchr(table, '%') || strchr(table, '_')); exists = hasSqlWildcard ? wildExists(database, table) : hTableExists(database, table); } return exists; } static void addChildren(struct joinerSet *js, struct slRef **pList) /* Recursively add children to list. */ { struct slRef *childRef; for (childRef = js->children; childRef != NULL; childRef = childRef->next) { struct joinerSet *child = childRef->val; refAdd(pList, child); addChildren(child, pList); } } struct slRef *joinerSetInheritanceChain(struct joinerSet *js) /* Return list of self, children, and parents (but not siblings). * slFreeList result when done. */ { struct slRef *list = NULL; struct joinerSet *parent; /* Add self and parents. */ for (parent = js; parent != NULL; parent = parent->parent) refAdd(&list, parent); addChildren(js, &list); slReverse(&list); return list; } struct joinerField *joinerSetFindField(struct joinerSet *js, struct joinerDtf *dtf) /* Find field in set if any that matches dtf */ { struct slRef *chain = joinerSetInheritanceChain(js), *link; struct joinerField *jf, *ret = NULL; for (link = chain; link != NULL; link = link->next) { js = link->val; for (jf = js->fieldList; jf != NULL; jf = jf->next) { if (sameString(dtf->table, jf->table) && sameString(dtf->field, jf->field)) { if (slNameInList(jf->dbList, dtf->database)) { ret = jf; break; } } } if (ret != NULL) break; } slFreeList(&chain); return ret; } static struct joinerPair *joinerToField( char *aDatabase, struct joinerField *aJf, char *bDatabase, struct joinerField *bJf, struct joinerSet *identifier) /* Construct joiner pair linking from a to b. */ { struct joinerPair *jp; AllocVar(jp); jp->a = joinerDtfNew(aDatabase, aJf->table, aJf->field); jp->b = joinerDtfNew(bDatabase, bJf->table, bJf->field); jp->identifier = identifier; return jp; } boolean joinerExclusiveCheck(struct joiner *joiner, char *aDatabase, char *bDatabase) /* Check that aDatabase and bDatabase are not in the same * exclusivity hash. Return TRUE if join can happen between * these two databases. */ { struct hash *exHash; if (sameString(aDatabase, bDatabase)) return TRUE; for (exHash = joiner->exclusiveSets; exHash != NULL; exHash = exHash->next) { if (hashLookup(exHash, aDatabase) && hashLookup(exHash, bDatabase)) return FALSE; } return TRUE; } struct joinerPair *joinerRelate(struct joiner *joiner, char *database, char *table, char *exclusiveDb) /* Get list of all ways to link table in given database to other tables, * possibly in other databases. * If exclusiveDb is not NULL then apply joinerExclusiveCheck to it in addition to database. */ { struct joinerSet *js, *jsChain; struct joinerField *jf, *jfBase; struct joinerPair *jpList = NULL, *jp; struct slRef *chainList, *chainEl; /* Return list of self, children, and parents (but not siblings) */ #ifdef SCREWS_UP_SPLITS if (!tableExists(database, table, NULL)) errAbort("%s.%s - table doesn't exist", database, table); #endif for (js = joiner->jsList; js != NULL; js = js->next) { if ((jfBase = joinerSetIncludesTable(js, database, table)) != NULL) { chainList = joinerSetInheritanceChain(js); for (chainEl = chainList; chainEl != NULL; chainEl = chainEl->next) { jsChain = chainEl->val; for (jf = jsChain->fieldList; jf != NULL; jf = jf->next) { struct slName *db; for (db = jf->dbList; db != NULL; db = db->next) { if (joinerExclusiveCheck(joiner, database, db->name) && (isEmpty(exclusiveDb) || joinerExclusiveCheck(joiner, exclusiveDb, db->name))) { if (!sameString(database, db->name) || !sameString(table, jf->table)) { if (tableExists(db->name, jf->table, jf->splitPrefix)) { jp = joinerToField(database, jfBase, db->name, jf, jsChain); slAddHead(&jpList, jp); } } } } } } slFreeList(&chainList); } } slReverse(&jpList); return jpList; } static struct joinerPair *joinerPairClone(struct joinerPair *jp) /* Return duplicate (deep copy) of joinerPair. */ { struct joinerPair *dupe; AllocVar(dupe); dupe->a = joinerDtfClone(jp->a); dupe->b = joinerDtfClone(jp->b); dupe->identifier = jp->identifier; return dupe; } boolean joinerDtfSameTable(struct joinerDtf *a, struct joinerDtf *b) /* Return TRUE if they are in the same database and table. */ { return sameString(a->database, b->database) && sameString(a->table, b->table); } static boolean identifierInArray(struct joinerSet *identifier, struct joinerSet **array, int count) /* Return TRUE if identifier is in array. */ { int i; for (i=0; idatabase, a->table, exclusiveDb); for (jp = jpList; jp != NULL; jp = jp->next) { if (joinerDtfSameTable(jp->b, b)) { path = joinerPairClone(jp); break; } } if (path == NULL && level < maxLevel) { for (jp = jpList; jp != NULL; jp = jp->next) { identifierStack[level] = jp->identifier; if (!identifierInArray(jp->identifier, identifierStack, level)) { identifierStack[level] = jp->identifier; path = rFindRoute(joiner, jp->b, b, exclusiveDb, level+1, maxLevel); if (path != NULL) { struct joinerPair *jpClone = joinerPairClone(jp); slAddHead(&path, jpClone); break; } } } } joinerPairFreeList(&jpList); return path; } struct joinerPair *joinerFindRoute(struct joiner *joiner, struct joinerDtf *a, struct joinerDtf *b) /* Find route between a and b. Note the field element of a and b * are unused. */ { int i; struct joinerPair *jpList = NULL; for (i=1; idatabase, 0, i); if (jpList != NULL) break; } return jpList; } boolean joinerDtfAllSameTable(struct joinerDtf *fieldList) /* Return TRUE if all joinerPairs refer to same table. */ { struct joinerDtf *first = fieldList, *field; if (first == NULL) return TRUE; for (field = first->next; field != NULL; field = field->next) if (!joinerDtfSameTable(first, field)) return FALSE; return TRUE; } static boolean inRoute(struct joinerPair *routeList, struct joinerDtf *dtf) /* Return TRUE if table in dtf is already in route. */ { struct joinerPair *jp; for (jp = routeList; jp != NULL; jp = jp->next) { if (joinerDtfSameTable(dtf, jp->a) || joinerDtfSameTable(dtf, jp->b)) return TRUE; } return FALSE; } static void joinerPairRemoveDupes(struct joinerPair **pRouteList) /* Remove duplicate entries in route list. */ { struct hash *uniqHash = newHash(0); struct joinerPair *newList = NULL, *jp, *next; char buf[256]; for (jp = *pRouteList; jp != NULL; jp = next) { next = jp->next; safef(buf, sizeof(buf), "%s.%s->%s.%s", jp->a->database, jp->a->table, jp->b->database, jp->b->table); if (hashLookup(uniqHash, buf)) { joinerPairFree(&jp); } else { hashAdd(uniqHash, buf, NULL); slAddHead(&newList, jp); } } slReverse(&newList); *pRouteList = newList; } struct joinerPair *joinerFindRouteThroughAll(struct joiner *joiner, struct joinerDtf *tableList) /* Return route that gets to all tables in fieldList. Note that * the field element of the items in tableList can be NULL. */ { struct joinerPair *fullRoute = NULL, *pairRoute = NULL; struct joinerDtf *first = tableList, *dtf; if (first->next == NULL) return NULL; for (dtf = first->next; dtf != NULL; dtf = dtf->next) { if (!inRoute(fullRoute, dtf) && !joinerDtfSameTable(first, dtf)) { pairRoute = joinerFindRoute(joiner, first, dtf); fullRoute = slCat(fullRoute, pairRoute); } } joinerPairRemoveDupes(&fullRoute); return fullRoute; } static boolean joinerDtfSameDt(struct joinerDtf *a, struct joinerDtf *b) /* Return true if a and b have same database and table (ignore field). */ { return (a == b || (a && b && sameString(a->database, b->database) && sameString(a->table, b->table))); } void joinerPairListToTree(struct joinerPair *routeList) /* Convert a linear routeList (only next used, not child) to a tree structure in which * pairs like {X,Y} and {Y,Z} (first b == second a) are connected using child instead of next. */ { if (routeList == NULL) return; struct hash *bHash = hashNew(0); struct joinerPair *pair; for (pair = routeList; pair != NULL; pair = pair->next) { if (pair->child) errAbort("joinerPairListToTree: found non-NULL child. Call this only on linear list."); char bdt[4096]; safef(bdt, sizeof(bdt), "%s.%s", pair->b->database, pair->b->table); hashAdd(bHash, bdt, pair); } for (pair = routeList; pair != NULL; ) { struct joinerPair *nextPair = pair->next; if (nextPair && !joinerDtfSameDt(pair->a, nextPair->a)) { char nextAdt[4096]; safef(nextAdt, sizeof(nextAdt), "%s.%s", nextPair->a->database, nextPair->a->table); struct joinerPair *parent = hashMustFindVal(bHash, nextAdt); pair->next = nextPair->next; slAddTail(&parent->child, nextPair); } else pair = pair->next; } hashFree(&bHash); } char *joinerFieldChopKey(struct joinerField *jf, char *key) /* If jf includes chopBefore and/or chopAfter, apply those to key and return a starting * offset in key, which may be modified. */ { struct slName *n; for (n = jf->chopBefore; n != NULL; n = n->next) { if (startsWith(n->name, key)) { key += strlen(n->name); break; } } for (n = jf->chopAfter; n!=NULL; n = n->next) { char *e = strstr(key, n->name); if (e != NULL) { *e = 0; break; } } return key; } void joinerFieldIterateKey(struct joinerField *jf, void(*callback)(void *context, char *key), void *context, char *key) /* Process key according to jf -- if jf->separator, may result in list of processed keys -- * and invoke callback with each nonempty processed key and context. */ { if (isNotEmpty(jf->separator) || jf->chopBefore || jf->chopAfter) { int len = strlen(key); char keyClone[len+1]; safencpy(keyClone, sizeof(keyClone), key, len); if (isEmpty(jf->separator)) { // No separator; just chop. char *choppedKey = joinerFieldChopKey(jf, keyClone); if (isNotEmpty(choppedKey)) callback(context, choppedKey); } else { // Scan for separator; chop each separated key. char *s = keyClone, *e; char sep = jf->separator[0]; while (isNotEmpty(s)) { e = strchr(s, sep); if (e != NULL) *e++ = 0; s = joinerFieldChopKey(jf, s); if (s[0] != 0) callback(context, s); s = e; } } } else if (isNotEmpty(key)) // Just use the plain old key. callback(context, key); }