44ccfacbe3a3d4b300f80d48651c77837a4b571e
galt
  Tue Apr 26 11:12:02 2022 -0700
SQL INJECTION Prevention Version 2 - this improves our methods by making subclauses of SQL that get passed around be both easy and correct to use. The way that was achieved was by getting rid of the obscure and not well used functions sqlSafefFrag and sqlDyStringPrintfFrag and replacing them with the plain versions of those functions, since these are not needed anymore. The new version checks for NOSQLINJ in unquoted %-s which is used to include SQL clauses, and will give an error the NOSQLINJ clause is not present, and this will automatically require the correct behavior by developers. sqlDyStringPrint is a very useful function, however because it was not enforced, users could use various other dyString functions and they operated without any awareness or checking for SQL correct use. Now those dyString functions are prohibited and it will produce an error if you try to use a dyString function on a SQL string, which is simply detected by the presence of the NOSQLINJ prefix.

diff --git src/hg/cgilib/hashJoin.c src/hg/cgilib/hashJoin.c
index 0994857..7a92ee9 100644
--- src/hg/cgilib/hashJoin.c
+++ src/hg/cgilib/hashJoin.c
@@ -1,312 +1,314 @@
 /* hashJoin - join one or more columns of a hashed database table to an externally provided
  * char **row that contains the key and empty slot(s) for the column value(s) */
 
 /* Copyright (C) 2015 The Regents of the University of California 
  * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */
 
 #include "hashJoin.h"
 #include "hdb.h"
 #include "obscure.h"
 
 struct hashJoin
 // Implements table join as a hash lookup: the key is taken from some column of an externally
 // provided row, and one or more values are retrieved and then stored in specified columns
 // of the external row.  If a key has more than one set of matching columns, then each
 // column's values are glommed into a comma-separated list for that column in the external row.
 {
     struct hashJoin *next;
     struct hash *hash;			// Hash some kind of key to char **row of column values
     uint extRowKeyIx;			// Index of hash key to take from external row
     uint valCount;			// Number of columns in hash value rows
     uint *extRowValIxs;			// Index of each hash value column to store in external row
     struct dyString **colValues;	// Accumulators for hash value columns -- multiple
 					// results from hash lookup become comma-sep strings
     struct lm *lm;			// For storing hash values, misc strings & arrays
     struct joinerField *jfA;		// If non-NULL, its separator, chopBefore and chopAfter
 					// are applied to each key accessed by hashJoinOneRow.
     struct joinerField *jfB;		// If non-NULL, its chopBefore and chopAfter
 					// are applied to each key passed to hashJoinAddMapping.
     char *db;				// Database from which to load hash
     char *table;			// Table from which to load hash
     char *query;			// SQL query to execute when loading hash
     boolean loaded;			// TRUE when table contents have been loaded into hash
     boolean naForMissing;		// If TRUE, then output "n/a" when there's no match
 };
 
 struct hashJoin *hashJoinNew(struct joinerDtf *keyDtf, uint extRowKeyIx,
                              struct joinerDtf *valDtfs, uint *extRowValIxs,
                              struct joinerField *jfA, struct joinerField *jfB,
                              boolean naForMissing)
 /* Return a new hashJoin.  extRowKeyIx is the index in an external row of the key
  * to use in the join.  extRowValIxs[valCount] contains each hash val column's index
  * into an external row.  jfA and jfB are optional; if given, then jfA's separator,
  * chopBefore and chopAfter will be applied to each key retrieved from the external row
  * and jfB's separator, chopBefore and chopAfter will be applied to each hash key.
  * If naForMissing is TRUE then the result columns will contain "n/a" when there is
  * no match in the hash. */
 {
 struct hashJoin *self;
 AllocVar(self);
 self->extRowKeyIx = extRowKeyIx;
 int valCount = slCount(valDtfs);
 self->valCount = valCount;
 // Save some inner-loop tests if no separating or chopping will be required:
 if (jfA && (jfA->separator || jfA->chopBefore || jfA->chopAfter))
     self->jfA = jfA;
 if (jfB && (jfB->separator || jfB->chopBefore || jfB->chopAfter))
     self->jfB = jfB;
 self->lm = lmInit(0);
 lmAllocArray(self->lm, self->extRowValIxs, valCount);
 CopyArray(extRowValIxs, self->extRowValIxs, valCount);
 lmAllocArray(self->lm, self->colValues, valCount);
 int i;
 for (i = 0;  i < valCount;  i++)
     self->colValues[i] = dyStringNew(0);
 self->db = lmCloneString(self->lm, keyDtf->database);
 self->table = lmCloneString(self->lm, keyDtf->table);
 struct dyString *query = sqlDyStringCreate("select %s", keyDtf->field);
 struct joinerDtf *dtf;
 for (dtf = valDtfs;  dtf != NULL;  dtf = dtf->next)
     {
     if (differentString(dtf->database, self->db) ||
         differentString(dtf->table, self->table))
         errAbort("hashJoinNew: inconsistent key field (%s.%s.%s) and value field (%s.%s.%s)",
                  keyDtf->database, keyDtf->table, keyDtf->field,
                  dtf->database, dtf->table, dtf->field);
-    dyStringAppendC(query, ',');
-    dyStringAppend(query, dtf->field);
+    sqlDyStringPrintf(query, ",");
+    sqlDyStringPrintf(query, "%s", dtf->field);
     }
-dyStringPrintf(query, " from %s", self->table);
+sqlDyStringPrintf(query, " from %s", self->table);
 self->query = dyStringCannibalize(&query);
 self->naForMissing = naForMissing;
 return self;
 }
 
 struct hashJoin *hashJoinNext(struct hashJoin *el)
 /* Get the next hashJoin in a list of hashJoins. */
 {
 return el->next;
 }
 
 struct hjAddOneContext
 // joinerFieldIterateKey context for use by hashJoinAddOne
 {
     struct hash *hash;
     char **clonedValues;
 };
 
 static void hashJoinAddOne(void *context, char *key)
 /* Add values from context to hash from context for key.
  * This is a callback for joinerFieldIterateKey; context is struct hjAddOneContext *. */
 {
 struct hjAddOneContext *ctx = context;
 hashAdd(ctx->hash, key, ctx->clonedValues);
 }
 
 static void hashJoinLoad(struct hashJoin *self)
 /* Load table contents into hash. */
 {
 if (self->loaded)
     errAbort("hashJoinLoad: loaded flag already set");
 struct sqlConnection *conn = hAllocConn(self->db);
-int rowCount = sqlRowCount(conn, self->table);
+char queryTblSafe[1024];
+sqlSafef(queryTblSafe, sizeof queryTblSafe, "%s", self->table);
+int rowCount = sqlRowCount(conn, queryTblSafe);
 int hashSize = min(digitsBaseTwo(rowCount), hashMaxSize);
 self->hash = hashNew(hashSize);
 char **row;
 struct sqlResult *sr = sqlGetResult(conn, self->query);
 while ((row = sqlNextRow(sr)) != NULL)
     {
     char **clonedValues = lmCloneRow(self->lm, row+1, self->valCount);
     struct hjAddOneContext context = { self->hash, clonedValues };
     // If necessary, process key according to self->jfA.
     if (self->jfB)
         joinerFieldIterateKey(self->jfB, hashJoinAddOne, &context, row[0]);
     else
         hashAdd(self->hash, row[0], clonedValues);
     }
 self->loaded = TRUE;
 hFreeConn(&conn);
 }
 
 struct hjKeyContext
 {
     struct hashJoin *self;
     boolean includeEmpties;
     boolean matchCount;
 };
 
 static void hashJoinOneKey(void *context, char *key)
 /* Look up some processed key in hash and accumulate results for each column.
  * This is a callback for joinerFieldIterateKey; context is struct hashJoin *. */
 {
 struct hjKeyContext *ctx = context;
 struct hashJoin *self = ctx->self;
 struct hashEl *helFirst = hashLookup(self->hash, key);
 // hgTables accumulates multiple match values with slAddHead so they are
 // printed in reverse.  Use arrays to accumulate multiple matched rows; we'll step
 // through them backwards in hashJoinGlomMultipleMatches to match hgTables' order.
 int helMaxCount = slCount(helFirst);
 char **matchRows[helMaxCount];
 struct hashEl *hel;
 int matchIx;
 for (matchIx = 0, hel = helFirst;  hel != NULL;  hel = hashLookupNext(hel), matchIx++)
     {
     char **row = hel->val;
     matchRows[matchIx] = row;
     }
 int matchCount = matchIx;
 ctx->matchCount += matchCount;
 // When there are multiple matches, hgTables includes empty vals and prints a comma after each item.
 boolean includeEmpties = ctx->includeEmpties || (matchCount > 1);
 // Step through matchRows in reverse order to match hgTables.
 for (matchIx = matchCount - 1;  matchIx >= 0;  matchIx--)
     {
     char **row = matchRows[matchIx];
     int valIx;
     for (valIx = 0;  valIx < self->valCount;  valIx++)
         {
         char *val = row[valIx];
         if (isNotEmpty(val) || includeEmpties)
             {
             // Skip over adjacent duplicate values
             struct dyString *colDy = self->colValues[valIx];
             int colDyLen = dyStringLen(colDy);
             boolean isDup = FALSE;
             if (matchIx < matchCount - 1)
                 {
                 char **prevRow = matchRows[matchIx+1];
                 char *prevVal = (prevRow == NULL) ? NULL : prevRow[valIx];
                 isDup = sameOk(val, prevVal);
                 }
             else
                 // If there's no previous row to compare to from this key, but colDy already
                 // ends with the same value, consider this a duplicate:
                 isDup = colDyLen > 0 && endsWithWordComma(colDy->string, val);
             if (! isDup)
                 {
                 if (includeEmpties)
                     {
                     if (isNotEmpty(val))
                         dyStringAppend(colDy, val);
                     dyStringAppendC(colDy, ',');
                     }
                 else
                     {
                     if (colDyLen > 0)
 			dyStringAppendC(colDy, ',');
 		    dyStringAppend(colDy, val);
                     }
                 }
             }
         }
     }
 }
 
 static void hashJoinChopCommaKey(struct hjKeyContext *context, struct joinerField *jfA, char *key)
 /* Chop key by comma, regardless of jfA->separator; for each item, apply jfA's chopBefore
  * and chopAfter if applicable, and try to join the result. */
 {
 context->includeEmpties = TRUE;
 int len = strlen(key);
 char keyClone[len+1];
 safencpy(keyClone, sizeof(keyClone), key, len);
 char *s = keyClone, *e;
 while (isNotEmpty(s))
     {
     e = strchr(s, ',');
     if (e != NULL)
         *e++ = 0;
     if (jfA)
         s = joinerFieldChopKey(jfA, s);
     if (s[0] != 0)
         hashJoinOneKey(context, s);
     s = e;
     }
 }
 
 void hashJoinOneRow(struct hashJoin *self, char **extRow)
 /* Look up some column of extRow in hash and place result(s) in other columns of extRow.
  * Don't call this again until done with extRow -- column value storage is reused. */
 {
 if (!self->loaded)
     hashJoinLoad(self);
 char *key = extRow[self->extRowKeyIx];
 if (isNotEmpty(key))
     {
     // Clear accumulators
     uint i;
     for (i = 0;  i < self->valCount;  i++)
         dyStringClear(self->colValues[i]);
     // If necessary, process key according to self->jfA. Look up key(s) and accumulate results.
     struct joinerField *jfA = self->jfA;
     struct hjKeyContext context = { self, FALSE, FALSE };
     if (jfA)
         {
         context.includeEmpties = TRUE;
         joinerFieldIterateKey(jfA, hashJoinOneKey, &context, key);
         }
     else
         hashJoinOneKey(&context, key);
     // In case we're processing comma-glommed results from some other hash join --
     // if there were no results, but the key contains commas and wasn't already comma-chopped
     // by joinerFieldIterateKey, try comma-chopping it and looking up the pieces.
     if (context.matchCount == 0 &&
         ! (jfA && sameOk(jfA->separator, ",")) &&
         strchr(key, ','))
         {
         hashJoinChopCommaKey(&context, jfA, key);
         }
     // When includeEmpties is set, we assume we're going to have multiple outputs.
     // However, there might be only one match among multiple keys.  If so, remove trailing commas.
     if (context.includeEmpties && context.matchCount == 1)
         {
         int valIx;
         for (valIx = 0;  valIx < self->valCount;  valIx++)
             {
             struct dyString *colDy = self->colValues[valIx];
             char *end = colDy->string + dyStringLen(colDy) - 1;
             if (*end == ',')
                 *end = '\0';
             }
         }
     // Set the external row result columns to point to accumulated values.
     for (i = 0;  i < self->valCount;  i++)
         {
         struct dyString *colDy = self->colValues[i];
         if (self->naForMissing && context.matchCount == 0)
             dyStringAppend(colDy, "n/a");
         uint extRowValIx = self->extRowValIxs[i];
         extRow[extRowValIx] = colDy->string;
         }
     }
 }
 
 void hashJoinFree(struct hashJoin **pSelf)
 /* Free hashJoin (if necessary). */
 {
 if (pSelf == NULL || *pSelf == NULL)
     return;
 struct hashJoin *self = *pSelf;
 hashFree(&self->hash);
 uint i;
 for (i = 0;  i < self->valCount;  i++)
     dyStringFree(&self->colValues[i]);
 freeMem(self->query);
 lmCleanup(&self->lm);
 freez(pSelf);
 }
 
 void hashJoinFreeList(struct hashJoin **pList)
 /* Free a list of hashJoins. */
 {
 if (pList == NULL || *pList == NULL)
     return;
 struct hashJoin *el = *pList;
 while (el != NULL)
     {
     struct hashJoin *elNext = el->next;
     hashJoinFree(&el);
     el = elNext;
     }
 *pList = NULL;
 }