src/hg/lib/hashJoin.c 533112afe2a2005e80cdb1f82904ea65032d4302

533112afe2a2005e80cdb1f82904ea65032d4302
braney
  Sat Oct 2 11:37:34 2021 -0700
split hg/lib into two separate libaries, one only used by the cgis

diff --git src/hg/lib/hashJoin.c src/hg/lib/hashJoin.c
deleted file mode 100644
index 06ca10b..0000000
--- src/hg/lib/hashJoin.c
+++ /dev/null
@@ -1,312 +0,0 @@
-/* hashJoin - join one or more columns of a hashed database table to an externally provided
- * char **row that contains the key and empty slot(s) for the column value(s) */
-
-/* Copyright (C) 2015 The Regents of the University of California 
- * See README in this or parent directory for licensing information. */
-
-#include "hashJoin.h"
-#include "hdb.h"
-#include "obscure.h"
-
-struct hashJoin
-// Implements table join as a hash lookup: the key is taken from some column of an externally
-// provided row, and one or more values are retrieved and then stored in specified columns
-// of the external row.  If a key has more than one set of matching columns, then each
-// column's values are glommed into a comma-separated list for that column in the external row.
-{
-    struct hashJoin *next;
-    struct hash *hash;			// Hash some kind of key to char **row of column values
-    uint extRowKeyIx;			// Index of hash key to take from external row
-    uint valCount;			// Number of columns in hash value rows
-    uint *extRowValIxs;			// Index of each hash value column to store in external row
-    struct dyString **colValues;	// Accumulators for hash value columns -- multiple
-					// results from hash lookup become comma-sep strings
-    struct lm *lm;			// For storing hash values, misc strings & arrays
-    struct joinerField *jfA;		// If non-NULL, its separator, chopBefore and chopAfter
-					// are applied to each key accessed by hashJoinOneRow.
-    struct joinerField *jfB;		// If non-NULL, its chopBefore and chopAfter
-					// are applied to each key passed to hashJoinAddMapping.
-    char *db;				// Database from which to load hash
-    char *table;			// Table from which to load hash
-    char *query;			// SQL query to execute when loading hash
-    boolean loaded;			// TRUE when table contents have been loaded into hash
-    boolean naForMissing;		// If TRUE, then output "n/a" when there's no match
-};
-
-struct hashJoin *hashJoinNew(struct joinerDtf *keyDtf, uint extRowKeyIx,
-                             struct joinerDtf *valDtfs, uint *extRowValIxs,
-                             struct joinerField *jfA, struct joinerField *jfB,
-                             boolean naForMissing)
-/* Return a new hashJoin.  extRowKeyIx is the index in an external row of the key
- * to use in the join.  extRowValIxs[valCount] contains each hash val column's index
- * into an external row.  jfA and jfB are optional; if given, then jfA's separator,
- * chopBefore and chopAfter will be applied to each key retrieved from the external row
- * and jfB's separator, chopBefore and chopAfter will be applied to each hash key.
- * If naForMissing is TRUE then the result columns will contain "n/a" when there is
- * no match in the hash. */
-{
-struct hashJoin *self;
-AllocVar(self);
-self->extRowKeyIx = extRowKeyIx;
-int valCount = slCount(valDtfs);
-self->valCount = valCount;
-// Save some inner-loop tests if no separating or chopping will be required:
-if (jfA && (jfA->separator || jfA->chopBefore || jfA->chopAfter))
-    self->jfA = jfA;
-if (jfB && (jfB->separator || jfB->chopBefore || jfB->chopAfter))
-    self->jfB = jfB;
-self->lm = lmInit(0);
-lmAllocArray(self->lm, self->extRowValIxs, valCount);
-CopyArray(extRowValIxs, self->extRowValIxs, valCount);
-lmAllocArray(self->lm, self->colValues, valCount);
-int i;
-for (i = 0;  i < valCount;  i++)
-    self->colValues[i] = dyStringNew(0);
-self->db = lmCloneString(self->lm, keyDtf->database);
-self->table = lmCloneString(self->lm, keyDtf->table);
-struct dyString *query = sqlDyStringCreate("select %s", keyDtf->field);
-struct joinerDtf *dtf;
-for (dtf = valDtfs;  dtf != NULL;  dtf = dtf->next)
-    {
-    if (differentString(dtf->database, self->db) ||
-        differentString(dtf->table, self->table))
-        errAbort("hashJoinNew: inconsistent key field (%s.%s.%s) and value field (%s.%s.%s)",
-                 keyDtf->database, keyDtf->table, keyDtf->field,
-                 dtf->database, dtf->table, dtf->field);
-    dyStringAppendC(query, ',');
-    dyStringAppend(query, dtf->field);
-    }
-dyStringPrintf(query, " from %s", self->table);
-self->query = dyStringCannibalize(&query);
-self->naForMissing = naForMissing;
-return self;
-}
-
-struct hashJoin *hashJoinNext(struct hashJoin *el)
-/* Get the next hashJoin in a list of hashJoins. */
-{
-return el->next;
-}
-
-struct hjAddOneContext
-// joinerFieldIterateKey context for use by hashJoinAddOne
-{
-    struct hash *hash;
-    char **clonedValues;
-};
-
-static void hashJoinAddOne(void *context, char *key)
-/* Add values from context to hash from context for key.
- * This is a callback for joinerFieldIterateKey; context is struct hjAddOneContext *. */
-{
-struct hjAddOneContext *ctx = context;
-hashAdd(ctx->hash, key, ctx->clonedValues);
-}
-
-static void hashJoinLoad(struct hashJoin *self)
-/* Load table contents into hash. */
-{
-if (self->loaded)
-    errAbort("hashJoinLoad: loaded flag already set");
-struct sqlConnection *conn = hAllocConn(self->db);
-int rowCount = sqlRowCount(conn, self->table);
-int hashSize = min(digitsBaseTwo(rowCount), hashMaxSize);
-self->hash = hashNew(hashSize);
-char **row;
-struct sqlResult *sr = sqlGetResult(conn, self->query);
-while ((row = sqlNextRow(sr)) != NULL)
-    {
-    char **clonedValues = lmCloneRow(self->lm, row+1, self->valCount);
-    struct hjAddOneContext context = { self->hash, clonedValues };
-    // If necessary, process key according to self->jfA.
-    if (self->jfB)
-        joinerFieldIterateKey(self->jfB, hashJoinAddOne, &context, row[0]);
-    else
-        hashAdd(self->hash, row[0], clonedValues);
-    }
-self->loaded = TRUE;
-hFreeConn(&conn);
-}
-
-struct hjKeyContext
-{
-    struct hashJoin *self;
-    boolean includeEmpties;
-    boolean matchCount;
-};
-
-static void hashJoinOneKey(void *context, char *key)
-/* Look up some processed key in hash and accumulate results for each column.
- * This is a callback for joinerFieldIterateKey; context is struct hashJoin *. */
-{
-struct hjKeyContext *ctx = context;
-struct hashJoin *self = ctx->self;
-struct hashEl *helFirst = hashLookup(self->hash, key);
-// hgTables accumulates multiple match values with slAddHead so they are
-// printed in reverse.  Use arrays to accumulate multiple matched rows; we'll step
-// through them backwards in hashJoinGlomMultipleMatches to match hgTables' order.
-int helMaxCount = slCount(helFirst);
-char **matchRows[helMaxCount];
-struct hashEl *hel;
-int matchIx;
-for (matchIx = 0, hel = helFirst;  hel != NULL;  hel = hashLookupNext(hel), matchIx++)
-    {
-    char **row = hel->val;
-    matchRows[matchIx] = row;
-    }
-int matchCount = matchIx;
-ctx->matchCount += matchCount;
-// When there are multiple matches, hgTables includes empty vals and prints a comma after each item.
-boolean includeEmpties = ctx->includeEmpties || (matchCount > 1);
-// Step through matchRows in reverse order to match hgTables.
-for (matchIx = matchCount - 1;  matchIx >= 0;  matchIx--)
-    {
-    char **row = matchRows[matchIx];
-    int valIx;
-    for (valIx = 0;  valIx < self->valCount;  valIx++)
-        {
-        char *val = row[valIx];
-        if (isNotEmpty(val) || includeEmpties)
-            {
-            // Skip over adjacent duplicate values
-            struct dyString *colDy = self->colValues[valIx];
-            int colDyLen = dyStringLen(colDy);
-            boolean isDup = FALSE;
-            if (matchIx < matchCount - 1)
-                {
-                char **prevRow = matchRows[matchIx+1];
-                char *prevVal = (prevRow == NULL) ? NULL : prevRow[valIx];
-                isDup = sameOk(val, prevVal);
-                }
-            else
-                // If there's no previous row to compare to from this key, but colDy already
-                // ends with the same value, consider this a duplicate:
-                isDup = colDyLen > 0 && endsWithWordComma(colDy->string, val);
-            if (! isDup)
-                {
-                if (includeEmpties)
-                    {
-                    if (isNotEmpty(val))
-                        dyStringAppend(colDy, val);
-                    dyStringAppendC(colDy, ',');
-                    }
-                else
-                    {
-                    if (colDyLen > 0)
-                        dyStringAppendC(colDy, ',');
-                    dyStringAppend(colDy, val);
-                    }
-                }
-            }
-        }
-    }
-}
-
-static void hashJoinChopCommaKey(struct hjKeyContext *context, struct joinerField *jfA, char *key)
-/* Chop key by comma, regardless of jfA->separator; for each item, apply jfA's chopBefore
- * and chopAfter if applicable, and try to join the result. */
-{
-context->includeEmpties = TRUE;
-int len = strlen(key);
-char keyClone[len+1];
-safencpy(keyClone, sizeof(keyClone), key, len);
-char *s = keyClone, *e;
-while (isNotEmpty(s))
-    {
-    e = strchr(s, ',');
-    if (e != NULL)
-        *e++ = 0;
-    if (jfA)
-        s = joinerFieldChopKey(jfA, s);
-    if (s[0] != 0)
-        hashJoinOneKey(context, s);
-    s = e;
-    }
-}
-
-void hashJoinOneRow(struct hashJoin *self, char **extRow)
-/* Look up some column of extRow in hash and place result(s) in other columns of extRow.
- * Don't call this again until done with extRow -- column value storage is reused. */
-{
-if (!self->loaded)
-    hashJoinLoad(self);
-char *key = extRow[self->extRowKeyIx];
-if (isNotEmpty(key))
-    {
-    // Clear accumulators
-    uint i;
-    for (i = 0;  i < self->valCount;  i++)
-        dyStringClear(self->colValues[i]);
-    // If necessary, process key according to self->jfA. Look up key(s) and accumulate results.
-    struct joinerField *jfA = self->jfA;
-    struct hjKeyContext context = { self, FALSE, FALSE };
-    if (jfA)
-        {
-        context.includeEmpties = TRUE;
-        joinerFieldIterateKey(jfA, hashJoinOneKey, &context, key);
-        }
-    else
-        hashJoinOneKey(&context, key);
-    // In case we're processing comma-glommed results from some other hash join --
-    // if there were no results, but the key contains commas and wasn't already comma-chopped
-    // by joinerFieldIterateKey, try comma-chopping it and looking up the pieces.
-    if (context.matchCount == 0 &&
-        ! (jfA && sameOk(jfA->separator, ",")) &&
-        strchr(key, ','))
-        {
-        hashJoinChopCommaKey(&context, jfA, key);
-        }
-    // When includeEmpties is set, we assume we're going to have multiple outputs.
-    // However, there might be only one match among multiple keys.  If so, remove trailing commas.
-    if (context.includeEmpties && context.matchCount == 1)
-        {
-        int valIx;
-        for (valIx = 0;  valIx < self->valCount;  valIx++)
-            {
-            struct dyString *colDy = self->colValues[valIx];
-            char *end = colDy->string + dyStringLen(colDy) - 1;
-            if (*end == ',')
-                *end = '\0';
-            }
-        }
-    // Set the external row result columns to point to accumulated values.
-    for (i = 0;  i < self->valCount;  i++)
-        {
-        struct dyString *colDy = self->colValues[i];
-        if (self->naForMissing && context.matchCount == 0)
-            dyStringAppend(colDy, "n/a");
-        uint extRowValIx = self->extRowValIxs[i];
-        extRow[extRowValIx] = colDy->string;
-        }
-    }
-}
-
-void hashJoinFree(struct hashJoin **pSelf)
-/* Free hashJoin (if necessary). */
-{
-if (pSelf == NULL || *pSelf == NULL)
-    return;
-struct hashJoin *self = *pSelf;
-hashFree(&self->hash);
-uint i;
-for (i = 0;  i < self->valCount;  i++)
-    dyStringFree(&self->colValues[i]);
-freeMem(self->query);
-lmCleanup(&self->lm);
-freez(pSelf);
-}
-
-void hashJoinFreeList(struct hashJoin **pList)
-/* Free a list of hashJoins. */
-{
-if (pList == NULL || *pList == NULL)
-    return;
-struct hashJoin *el = *pList;
-while (el != NULL)
-    {
-    struct hashJoin *elNext = el->next;
-    hashJoinFree(&el);
-    el = elNext;
-    }
-*pList = NULL;
-}