4898794edd81be5285ea6e544acbedeaeb31bf78 max Tue Nov 23 08:10:57 2021 -0800 Fixing pointers to README file for license in all source code files. refs #27614 diff --git src/hg/cgilib/hashJoin.c src/hg/cgilib/hashJoin.c index 06ca10b..0994857 100644 --- src/hg/cgilib/hashJoin.c +++ src/hg/cgilib/hashJoin.c @@ -1,312 +1,312 @@ /* hashJoin - join one or more columns of a hashed database table to an externally provided * char **row that contains the key and empty slot(s) for the column value(s) */ /* Copyright (C) 2015 The Regents of the University of California - * See README in this or parent directory for licensing information. */ + * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */ #include "hashJoin.h" #include "hdb.h" #include "obscure.h" struct hashJoin // Implements table join as a hash lookup: the key is taken from some column of an externally // provided row, and one or more values are retrieved and then stored in specified columns // of the external row. If a key has more than one set of matching columns, then each // column's values are glommed into a comma-separated list for that column in the external row. { struct hashJoin *next; struct hash *hash; // Hash some kind of key to char **row of column values uint extRowKeyIx; // Index of hash key to take from external row uint valCount; // Number of columns in hash value rows uint *extRowValIxs; // Index of each hash value column to store in external row struct dyString **colValues; // Accumulators for hash value columns -- multiple // results from hash lookup become comma-sep strings struct lm *lm; // For storing hash values, misc strings & arrays struct joinerField *jfA; // If non-NULL, its separator, chopBefore and chopAfter // are applied to each key accessed by hashJoinOneRow. struct joinerField *jfB; // If non-NULL, its chopBefore and chopAfter // are applied to each key passed to hashJoinAddMapping. char *db; // Database from which to load hash char *table; // Table from which to load hash char *query; // SQL query to execute when loading hash boolean loaded; // TRUE when table contents have been loaded into hash boolean naForMissing; // If TRUE, then output "n/a" when there's no match }; struct hashJoin *hashJoinNew(struct joinerDtf *keyDtf, uint extRowKeyIx, struct joinerDtf *valDtfs, uint *extRowValIxs, struct joinerField *jfA, struct joinerField *jfB, boolean naForMissing) /* Return a new hashJoin. extRowKeyIx is the index in an external row of the key * to use in the join. extRowValIxs[valCount] contains each hash val column's index * into an external row. jfA and jfB are optional; if given, then jfA's separator, * chopBefore and chopAfter will be applied to each key retrieved from the external row * and jfB's separator, chopBefore and chopAfter will be applied to each hash key. * If naForMissing is TRUE then the result columns will contain "n/a" when there is * no match in the hash. */ { struct hashJoin *self; AllocVar(self); self->extRowKeyIx = extRowKeyIx; int valCount = slCount(valDtfs); self->valCount = valCount; // Save some inner-loop tests if no separating or chopping will be required: if (jfA && (jfA->separator || jfA->chopBefore || jfA->chopAfter)) self->jfA = jfA; if (jfB && (jfB->separator || jfB->chopBefore || jfB->chopAfter)) self->jfB = jfB; self->lm = lmInit(0); lmAllocArray(self->lm, self->extRowValIxs, valCount); CopyArray(extRowValIxs, self->extRowValIxs, valCount); lmAllocArray(self->lm, self->colValues, valCount); int i; for (i = 0; i < valCount; i++) self->colValues[i] = dyStringNew(0); self->db = lmCloneString(self->lm, keyDtf->database); self->table = lmCloneString(self->lm, keyDtf->table); struct dyString *query = sqlDyStringCreate("select %s", keyDtf->field); struct joinerDtf *dtf; for (dtf = valDtfs; dtf != NULL; dtf = dtf->next) { if (differentString(dtf->database, self->db) || differentString(dtf->table, self->table)) errAbort("hashJoinNew: inconsistent key field (%s.%s.%s) and value field (%s.%s.%s)", keyDtf->database, keyDtf->table, keyDtf->field, dtf->database, dtf->table, dtf->field); dyStringAppendC(query, ','); dyStringAppend(query, dtf->field); } dyStringPrintf(query, " from %s", self->table); self->query = dyStringCannibalize(&query); self->naForMissing = naForMissing; return self; } struct hashJoin *hashJoinNext(struct hashJoin *el) /* Get the next hashJoin in a list of hashJoins. */ { return el->next; } struct hjAddOneContext // joinerFieldIterateKey context for use by hashJoinAddOne { struct hash *hash; char **clonedValues; }; static void hashJoinAddOne(void *context, char *key) /* Add values from context to hash from context for key. * This is a callback for joinerFieldIterateKey; context is struct hjAddOneContext *. */ { struct hjAddOneContext *ctx = context; hashAdd(ctx->hash, key, ctx->clonedValues); } static void hashJoinLoad(struct hashJoin *self) /* Load table contents into hash. */ { if (self->loaded) errAbort("hashJoinLoad: loaded flag already set"); struct sqlConnection *conn = hAllocConn(self->db); int rowCount = sqlRowCount(conn, self->table); int hashSize = min(digitsBaseTwo(rowCount), hashMaxSize); self->hash = hashNew(hashSize); char **row; struct sqlResult *sr = sqlGetResult(conn, self->query); while ((row = sqlNextRow(sr)) != NULL) { char **clonedValues = lmCloneRow(self->lm, row+1, self->valCount); struct hjAddOneContext context = { self->hash, clonedValues }; // If necessary, process key according to self->jfA. if (self->jfB) joinerFieldIterateKey(self->jfB, hashJoinAddOne, &context, row[0]); else hashAdd(self->hash, row[0], clonedValues); } self->loaded = TRUE; hFreeConn(&conn); } struct hjKeyContext { struct hashJoin *self; boolean includeEmpties; boolean matchCount; }; static void hashJoinOneKey(void *context, char *key) /* Look up some processed key in hash and accumulate results for each column. * This is a callback for joinerFieldIterateKey; context is struct hashJoin *. */ { struct hjKeyContext *ctx = context; struct hashJoin *self = ctx->self; struct hashEl *helFirst = hashLookup(self->hash, key); // hgTables accumulates multiple match values with slAddHead so they are // printed in reverse. Use arrays to accumulate multiple matched rows; we'll step // through them backwards in hashJoinGlomMultipleMatches to match hgTables' order. int helMaxCount = slCount(helFirst); char **matchRows[helMaxCount]; struct hashEl *hel; int matchIx; for (matchIx = 0, hel = helFirst; hel != NULL; hel = hashLookupNext(hel), matchIx++) { char **row = hel->val; matchRows[matchIx] = row; } int matchCount = matchIx; ctx->matchCount += matchCount; // When there are multiple matches, hgTables includes empty vals and prints a comma after each item. boolean includeEmpties = ctx->includeEmpties || (matchCount > 1); // Step through matchRows in reverse order to match hgTables. for (matchIx = matchCount - 1; matchIx >= 0; matchIx--) { char **row = matchRows[matchIx]; int valIx; for (valIx = 0; valIx < self->valCount; valIx++) { char *val = row[valIx]; if (isNotEmpty(val) || includeEmpties) { // Skip over adjacent duplicate values struct dyString *colDy = self->colValues[valIx]; int colDyLen = dyStringLen(colDy); boolean isDup = FALSE; if (matchIx < matchCount - 1) { char **prevRow = matchRows[matchIx+1]; char *prevVal = (prevRow == NULL) ? NULL : prevRow[valIx]; isDup = sameOk(val, prevVal); } else // If there's no previous row to compare to from this key, but colDy already // ends with the same value, consider this a duplicate: isDup = colDyLen > 0 && endsWithWordComma(colDy->string, val); if (! isDup) { if (includeEmpties) { if (isNotEmpty(val)) dyStringAppend(colDy, val); dyStringAppendC(colDy, ','); } else { if (colDyLen > 0) dyStringAppendC(colDy, ','); dyStringAppend(colDy, val); } } } } } } static void hashJoinChopCommaKey(struct hjKeyContext *context, struct joinerField *jfA, char *key) /* Chop key by comma, regardless of jfA->separator; for each item, apply jfA's chopBefore * and chopAfter if applicable, and try to join the result. */ { context->includeEmpties = TRUE; int len = strlen(key); char keyClone[len+1]; safencpy(keyClone, sizeof(keyClone), key, len); char *s = keyClone, *e; while (isNotEmpty(s)) { e = strchr(s, ','); if (e != NULL) *e++ = 0; if (jfA) s = joinerFieldChopKey(jfA, s); if (s[0] != 0) hashJoinOneKey(context, s); s = e; } } void hashJoinOneRow(struct hashJoin *self, char **extRow) /* Look up some column of extRow in hash and place result(s) in other columns of extRow. * Don't call this again until done with extRow -- column value storage is reused. */ { if (!self->loaded) hashJoinLoad(self); char *key = extRow[self->extRowKeyIx]; if (isNotEmpty(key)) { // Clear accumulators uint i; for (i = 0; i < self->valCount; i++) dyStringClear(self->colValues[i]); // If necessary, process key according to self->jfA. Look up key(s) and accumulate results. struct joinerField *jfA = self->jfA; struct hjKeyContext context = { self, FALSE, FALSE }; if (jfA) { context.includeEmpties = TRUE; joinerFieldIterateKey(jfA, hashJoinOneKey, &context, key); } else hashJoinOneKey(&context, key); // In case we're processing comma-glommed results from some other hash join -- // if there were no results, but the key contains commas and wasn't already comma-chopped // by joinerFieldIterateKey, try comma-chopping it and looking up the pieces. if (context.matchCount == 0 && ! (jfA && sameOk(jfA->separator, ",")) && strchr(key, ',')) { hashJoinChopCommaKey(&context, jfA, key); } // When includeEmpties is set, we assume we're going to have multiple outputs. // However, there might be only one match among multiple keys. If so, remove trailing commas. if (context.includeEmpties && context.matchCount == 1) { int valIx; for (valIx = 0; valIx < self->valCount; valIx++) { struct dyString *colDy = self->colValues[valIx]; char *end = colDy->string + dyStringLen(colDy) - 1; if (*end == ',') *end = '\0'; } } // Set the external row result columns to point to accumulated values. for (i = 0; i < self->valCount; i++) { struct dyString *colDy = self->colValues[i]; if (self->naForMissing && context.matchCount == 0) dyStringAppend(colDy, "n/a"); uint extRowValIx = self->extRowValIxs[i]; extRow[extRowValIx] = colDy->string; } } } void hashJoinFree(struct hashJoin **pSelf) /* Free hashJoin (if necessary). */ { if (pSelf == NULL || *pSelf == NULL) return; struct hashJoin *self = *pSelf; hashFree(&self->hash); uint i; for (i = 0; i < self->valCount; i++) dyStringFree(&self->colValues[i]); freeMem(self->query); lmCleanup(&self->lm); freez(pSelf); } void hashJoinFreeList(struct hashJoin **pList) /* Free a list of hashJoins. */ { if (pList == NULL || *pList == NULL) return; struct hashJoin *el = *pList; while (el != NULL) { struct hashJoin *elNext = el->next; hashJoinFree(&el); el = elNext; } *pList = NULL; }