533112afe2a2005e80cdb1f82904ea65032d4302 braney Sat Oct 2 11:37:34 2021 -0700 split hg/lib into two separate libaries, one only used by the cgis diff --git src/hg/lib/hashJoin.c src/hg/lib/hashJoin.c deleted file mode 100644 index 06ca10b..0000000 --- src/hg/lib/hashJoin.c +++ /dev/null @@ -1,312 +0,0 @@ -/* hashJoin - join one or more columns of a hashed database table to an externally provided - * char **row that contains the key and empty slot(s) for the column value(s) */ - -/* Copyright (C) 2015 The Regents of the University of California - * See README in this or parent directory for licensing information. */ - -#include "hashJoin.h" -#include "hdb.h" -#include "obscure.h" - -struct hashJoin -// Implements table join as a hash lookup: the key is taken from some column of an externally -// provided row, and one or more values are retrieved and then stored in specified columns -// of the external row. If a key has more than one set of matching columns, then each -// column's values are glommed into a comma-separated list for that column in the external row. -{ - struct hashJoin *next; - struct hash *hash; // Hash some kind of key to char **row of column values - uint extRowKeyIx; // Index of hash key to take from external row - uint valCount; // Number of columns in hash value rows - uint *extRowValIxs; // Index of each hash value column to store in external row - struct dyString **colValues; // Accumulators for hash value columns -- multiple - // results from hash lookup become comma-sep strings - struct lm *lm; // For storing hash values, misc strings & arrays - struct joinerField *jfA; // If non-NULL, its separator, chopBefore and chopAfter - // are applied to each key accessed by hashJoinOneRow. - struct joinerField *jfB; // If non-NULL, its chopBefore and chopAfter - // are applied to each key passed to hashJoinAddMapping. - char *db; // Database from which to load hash - char *table; // Table from which to load hash - char *query; // SQL query to execute when loading hash - boolean loaded; // TRUE when table contents have been loaded into hash - boolean naForMissing; // If TRUE, then output "n/a" when there's no match -}; - -struct hashJoin *hashJoinNew(struct joinerDtf *keyDtf, uint extRowKeyIx, - struct joinerDtf *valDtfs, uint *extRowValIxs, - struct joinerField *jfA, struct joinerField *jfB, - boolean naForMissing) -/* Return a new hashJoin. extRowKeyIx is the index in an external row of the key - * to use in the join. extRowValIxs[valCount] contains each hash val column's index - * into an external row. jfA and jfB are optional; if given, then jfA's separator, - * chopBefore and chopAfter will be applied to each key retrieved from the external row - * and jfB's separator, chopBefore and chopAfter will be applied to each hash key. - * If naForMissing is TRUE then the result columns will contain "n/a" when there is - * no match in the hash. */ -{ -struct hashJoin *self; -AllocVar(self); -self->extRowKeyIx = extRowKeyIx; -int valCount = slCount(valDtfs); -self->valCount = valCount; -// Save some inner-loop tests if no separating or chopping will be required: -if (jfA && (jfA->separator || jfA->chopBefore || jfA->chopAfter)) - self->jfA = jfA; -if (jfB && (jfB->separator || jfB->chopBefore || jfB->chopAfter)) - self->jfB = jfB; -self->lm = lmInit(0); -lmAllocArray(self->lm, self->extRowValIxs, valCount); -CopyArray(extRowValIxs, self->extRowValIxs, valCount); -lmAllocArray(self->lm, self->colValues, valCount); -int i; -for (i = 0; i < valCount; i++) - self->colValues[i] = dyStringNew(0); -self->db = lmCloneString(self->lm, keyDtf->database); -self->table = lmCloneString(self->lm, keyDtf->table); -struct dyString *query = sqlDyStringCreate("select %s", keyDtf->field); -struct joinerDtf *dtf; -for (dtf = valDtfs; dtf != NULL; dtf = dtf->next) - { - if (differentString(dtf->database, self->db) || - differentString(dtf->table, self->table)) - errAbort("hashJoinNew: inconsistent key field (%s.%s.%s) and value field (%s.%s.%s)", - keyDtf->database, keyDtf->table, keyDtf->field, - dtf->database, dtf->table, dtf->field); - dyStringAppendC(query, ','); - dyStringAppend(query, dtf->field); - } -dyStringPrintf(query, " from %s", self->table); -self->query = dyStringCannibalize(&query); -self->naForMissing = naForMissing; -return self; -} - -struct hashJoin *hashJoinNext(struct hashJoin *el) -/* Get the next hashJoin in a list of hashJoins. */ -{ -return el->next; -} - -struct hjAddOneContext -// joinerFieldIterateKey context for use by hashJoinAddOne -{ - struct hash *hash; - char **clonedValues; -}; - -static void hashJoinAddOne(void *context, char *key) -/* Add values from context to hash from context for key. - * This is a callback for joinerFieldIterateKey; context is struct hjAddOneContext *. */ -{ -struct hjAddOneContext *ctx = context; -hashAdd(ctx->hash, key, ctx->clonedValues); -} - -static void hashJoinLoad(struct hashJoin *self) -/* Load table contents into hash. */ -{ -if (self->loaded) - errAbort("hashJoinLoad: loaded flag already set"); -struct sqlConnection *conn = hAllocConn(self->db); -int rowCount = sqlRowCount(conn, self->table); -int hashSize = min(digitsBaseTwo(rowCount), hashMaxSize); -self->hash = hashNew(hashSize); -char **row; -struct sqlResult *sr = sqlGetResult(conn, self->query); -while ((row = sqlNextRow(sr)) != NULL) - { - char **clonedValues = lmCloneRow(self->lm, row+1, self->valCount); - struct hjAddOneContext context = { self->hash, clonedValues }; - // If necessary, process key according to self->jfA. - if (self->jfB) - joinerFieldIterateKey(self->jfB, hashJoinAddOne, &context, row[0]); - else - hashAdd(self->hash, row[0], clonedValues); - } -self->loaded = TRUE; -hFreeConn(&conn); -} - -struct hjKeyContext -{ - struct hashJoin *self; - boolean includeEmpties; - boolean matchCount; -}; - -static void hashJoinOneKey(void *context, char *key) -/* Look up some processed key in hash and accumulate results for each column. - * This is a callback for joinerFieldIterateKey; context is struct hashJoin *. */ -{ -struct hjKeyContext *ctx = context; -struct hashJoin *self = ctx->self; -struct hashEl *helFirst = hashLookup(self->hash, key); -// hgTables accumulates multiple match values with slAddHead so they are -// printed in reverse. Use arrays to accumulate multiple matched rows; we'll step -// through them backwards in hashJoinGlomMultipleMatches to match hgTables' order. -int helMaxCount = slCount(helFirst); -char **matchRows[helMaxCount]; -struct hashEl *hel; -int matchIx; -for (matchIx = 0, hel = helFirst; hel != NULL; hel = hashLookupNext(hel), matchIx++) - { - char **row = hel->val; - matchRows[matchIx] = row; - } -int matchCount = matchIx; -ctx->matchCount += matchCount; -// When there are multiple matches, hgTables includes empty vals and prints a comma after each item. -boolean includeEmpties = ctx->includeEmpties || (matchCount > 1); -// Step through matchRows in reverse order to match hgTables. -for (matchIx = matchCount - 1; matchIx >= 0; matchIx--) - { - char **row = matchRows[matchIx]; - int valIx; - for (valIx = 0; valIx < self->valCount; valIx++) - { - char *val = row[valIx]; - if (isNotEmpty(val) || includeEmpties) - { - // Skip over adjacent duplicate values - struct dyString *colDy = self->colValues[valIx]; - int colDyLen = dyStringLen(colDy); - boolean isDup = FALSE; - if (matchIx < matchCount - 1) - { - char **prevRow = matchRows[matchIx+1]; - char *prevVal = (prevRow == NULL) ? NULL : prevRow[valIx]; - isDup = sameOk(val, prevVal); - } - else - // If there's no previous row to compare to from this key, but colDy already - // ends with the same value, consider this a duplicate: - isDup = colDyLen > 0 && endsWithWordComma(colDy->string, val); - if (! isDup) - { - if (includeEmpties) - { - if (isNotEmpty(val)) - dyStringAppend(colDy, val); - dyStringAppendC(colDy, ','); - } - else - { - if (colDyLen > 0) - dyStringAppendC(colDy, ','); - dyStringAppend(colDy, val); - } - } - } - } - } -} - -static void hashJoinChopCommaKey(struct hjKeyContext *context, struct joinerField *jfA, char *key) -/* Chop key by comma, regardless of jfA->separator; for each item, apply jfA's chopBefore - * and chopAfter if applicable, and try to join the result. */ -{ -context->includeEmpties = TRUE; -int len = strlen(key); -char keyClone[len+1]; -safencpy(keyClone, sizeof(keyClone), key, len); -char *s = keyClone, *e; -while (isNotEmpty(s)) - { - e = strchr(s, ','); - if (e != NULL) - *e++ = 0; - if (jfA) - s = joinerFieldChopKey(jfA, s); - if (s[0] != 0) - hashJoinOneKey(context, s); - s = e; - } -} - -void hashJoinOneRow(struct hashJoin *self, char **extRow) -/* Look up some column of extRow in hash and place result(s) in other columns of extRow. - * Don't call this again until done with extRow -- column value storage is reused. */ -{ -if (!self->loaded) - hashJoinLoad(self); -char *key = extRow[self->extRowKeyIx]; -if (isNotEmpty(key)) - { - // Clear accumulators - uint i; - for (i = 0; i < self->valCount; i++) - dyStringClear(self->colValues[i]); - // If necessary, process key according to self->jfA. Look up key(s) and accumulate results. - struct joinerField *jfA = self->jfA; - struct hjKeyContext context = { self, FALSE, FALSE }; - if (jfA) - { - context.includeEmpties = TRUE; - joinerFieldIterateKey(jfA, hashJoinOneKey, &context, key); - } - else - hashJoinOneKey(&context, key); - // In case we're processing comma-glommed results from some other hash join -- - // if there were no results, but the key contains commas and wasn't already comma-chopped - // by joinerFieldIterateKey, try comma-chopping it and looking up the pieces. - if (context.matchCount == 0 && - ! (jfA && sameOk(jfA->separator, ",")) && - strchr(key, ',')) - { - hashJoinChopCommaKey(&context, jfA, key); - } - // When includeEmpties is set, we assume we're going to have multiple outputs. - // However, there might be only one match among multiple keys. If so, remove trailing commas. - if (context.includeEmpties && context.matchCount == 1) - { - int valIx; - for (valIx = 0; valIx < self->valCount; valIx++) - { - struct dyString *colDy = self->colValues[valIx]; - char *end = colDy->string + dyStringLen(colDy) - 1; - if (*end == ',') - *end = '\0'; - } - } - // Set the external row result columns to point to accumulated values. - for (i = 0; i < self->valCount; i++) - { - struct dyString *colDy = self->colValues[i]; - if (self->naForMissing && context.matchCount == 0) - dyStringAppend(colDy, "n/a"); - uint extRowValIx = self->extRowValIxs[i]; - extRow[extRowValIx] = colDy->string; - } - } -} - -void hashJoinFree(struct hashJoin **pSelf) -/* Free hashJoin (if necessary). */ -{ -if (pSelf == NULL || *pSelf == NULL) - return; -struct hashJoin *self = *pSelf; -hashFree(&self->hash); -uint i; -for (i = 0; i < self->valCount; i++) - dyStringFree(&self->colValues[i]); -freeMem(self->query); -lmCleanup(&self->lm); -freez(pSelf); -} - -void hashJoinFreeList(struct hashJoin **pList) -/* Free a list of hashJoins. */ -{ -if (pList == NULL || *pList == NULL) - return; -struct hashJoin *el = *pList; -while (el != NULL) - { - struct hashJoin *elNext = el->next; - hashJoinFree(&el); - el = elNext; - } -*pList = NULL; -}