44ccfacbe3a3d4b300f80d48651c77837a4b571e
galt
  Tue Apr 26 11:12:02 2022 -0700
SQL INJECTION Prevention Version 2 - this improves our methods by making subclauses of SQL that get passed around be both easy and correct to use. The way that was achieved was by getting rid of the obscure and not well used functions sqlSafefFrag and sqlDyStringPrintfFrag and replacing them with the plain versions of those functions, since these are not needed anymore. The new version checks for NOSQLINJ in unquoted %-s which is used to include SQL clauses, and will give an error the NOSQLINJ clause is not present, and this will automatically require the correct behavior by developers. sqlDyStringPrint is a very useful function, however because it was not enforced, users could use various other dyString functions and they operated without any awareness or checking for SQL correct use. Now those dyString functions are prohibited and it will produce an error if you try to use a dyString function on a SQL string, which is simply detected by the presence of the NOSQLINJ prefix.

diff --git src/hg/hgTables/identifiers.c src/hg/hgTables/identifiers.c
index 703b6d7..150fd3e 100644
--- src/hg/hgTables/identifiers.c
+++ src/hg/hgTables/identifiers.c
@@ -249,31 +249,31 @@
 static void addPrimaryIdsToHash(struct sqlConnection *conn, struct hash *hash,
 				char *idField, struct slName *tableList,
 				struct lm *lm, char *extraWhere)
 /* For each table in tableList, query all idField values and add to hash,
  * id -> uppercased id for case-insensitive matching. */
 {
 struct slName *table;
 struct sqlResult *sr;
 char **row;
 struct dyString *query = dyStringNew(0);
 for (table = tableList;  table != NULL;  table = table->next)
     {
     dyStringClear(query);
     sqlDyStringPrintf(query, "select %s from %s", idField, table->name);
     if (extraWhere != NULL)
-	dyStringPrintf(query, " where %s", extraWhere);
+	sqlDyStringPrintf(query, " where %s %-s", idField, extraWhere);
     sr = sqlGetResult(conn, query->string);
     while ((row = sqlNextRow(sr)) != NULL)
 	{
 	if (isNotEmpty(row[0]))
 	    {
 	    char *origCase = lmCloneString(lm, row[0]);
 	    touppers(row[0]);
 	    hashAdd(hash, row[0], origCase);
 	    }
 	}
     sqlFreeResult(&sr);
     }
 }
 
 static void addXrefIdsToHash(struct sqlConnection *conn, struct hash *hash,
@@ -285,35 +285,40 @@
  * Ignore self (alias = id) mappings -- we already got those above. */
 {
 struct sqlResult *sr;
 char **row;
 struct dyString *query = dyStringNew(0);
 if (sameString(xrefTable, curTable))
     sqlDyStringPrintf(query, "select %s,%s from %s", aliasField, xrefIdField, xrefTable);
 else
     /* Get only the aliases for items actually in curTable.idField: */
     sqlDyStringPrintf(query,
 	  "select %s.%s,%s.%s from %s,%s where %s.%s = %s.%s",
 	  xrefTable, aliasField, xrefTable, xrefIdField,
 	  xrefTable, curTable,
 	  xrefTable, xrefIdField, curTable, idField);
 if (extraWhere != NULL)
+    {
     // extraWhere begins w/ID field of curTable=xrefTable.  Skip that field name and
     // use "xrefTable.aliasField" with the IN (...) condition that follows:
-    sqlDyStringPrintf(query, " %s %s.%s %-s",
-		   (sameString(xrefTable, curTable) ? "where" : "and"),
-		   xrefTable, aliasField, skipToSpaces(extraWhere));
+    if (sameString(xrefTable, curTable)) 
+	sqlDyStringPrintf(query, " where "); 
+    else
+	sqlDyStringPrintf(query, " and ");
+    sqlDyStringPrintf(query, "%s.%s %-s",
+		   xrefTable, aliasField, extraWhere);
+    }
 sr = sqlGetResult(conn, query->string);
 while ((row = sqlNextRow(sr)) != NULL)
     {
     if (sameString(row[0], row[1]))
 	continue;
     touppers(row[0]);
     hashAdd(hash, row[0], lmCloneString(lm, row[1]));
     }
 sqlFreeResult(&sr);
 }
 
 static struct hash *getAllPossibleIds(struct sqlConnection *conn,
 				      struct lm *lm, char *idField, char *extraWhere)
 /* If curTable is a custom track or bigFile, return NULL.  Otherwise,
  * make a hash of all identifiers in curTable (and alias tables if specified)
@@ -339,47 +344,47 @@
 else
     tableList = hSplitTableNames(database, curTable);
 if (idField != NULL)
     addPrimaryIdsToHash(alternateConn, matchHash, idField, tableList, lm, extraWhere);
 getXrefInfo(alternateConn, &xrefTable, &xrefIdField, &aliasField);
 if (xrefTable != NULL)
     {
     addXrefIdsToHash(alternateConn, matchHash, idField,
 		     xrefTable, xrefIdField, aliasField, lm, extraWhere);
     }
 if (sameWord(curTable, WIKI_TRACK_TABLE))
     wikiDisconnect(&alternateConn);
 return matchHash;
 }
 
-static char *slNameToInExpression(char *field, struct slName *allTerms)
+static char *slNameToInExpression(struct slName *allTerms)
 /* Given an slName list, return a SQL "field IN ('term1', 'term2', ...)" expression
  * to be used in a WHERE clause. */
 {
 struct dyString *dy = dyStringNew(0);
-sqlDyStringPrintfFrag(dy, "%s in (", field);
+sqlDyStringPrintf(dy, " in (");
 boolean first = TRUE;
 struct slName *term;
 for (term = allTerms;  term != NULL;  term = term->next)
     {
     if (first)
 	first = FALSE;
     else
-	dyStringAppend(dy, ", ");
+	sqlDyStringPrintf(dy, ", ");
     sqlDyStringPrintf(dy, "'%s'", term->name);
     }
-dyStringAppend(dy, ")");
+sqlDyStringPrintf(dy, ")");
 return dyStringCannibalize(&dy);
 }
 
 #define MAX_IDTEXT (64 * 1024)
 #define DEFAULT_MAX_IDS_IN_WHERE 10000
 
 
 void doPastedIdentifiers(struct sqlConnection *conn)
 /* Process submit in paste identifiers page. */
 {
 char *idText = trimSpaces(cartString(cart, hgtaPastedIdentifiers));
 htmlOpen("Table Browser (Input Identifiers)");
 if (isNotEmpty(idText))
     {
     /* Write terms to temp file, checking whether they have matches, and
@@ -413,31 +418,31 @@
     struct slName *allTerms = NULL, *term;
     while (lineFileNext(lf, &line, NULL))
 	{
 	while ((word = nextWord(&line)) != NULL)
 	    {
 	    term = slNameNew(word);
 	    slAddHead(&allTerms, term);
 	    totalTerms++;
 	    }
 	}
     slReverse(&allTerms);
     lineFileClose(&lf);
     char *extraWhere = NULL;
     int maxIdsInWhere = cartUsualInt(cart, "hgt_maxIdsInWhere", DEFAULT_MAX_IDS_IN_WHERE);
     if (totalTerms > 0 && totalTerms <= maxIdsInWhere)
-	extraWhere = slNameToInExpression(idField, allTerms);
+	extraWhere = slNameToInExpression(allTerms);
 
     struct lm *lm = lmInit(0);
     struct hash *matchHash = getAllPossibleIds(conn, lm, idField, extraWhere);
     trashDirFile(&tn, "hgtData", "identifiers", ".key");
     f = mustOpen(tn.forCgi, "w");
     for (term = allTerms;  term != NULL;  term = term->next)
 	{
 	struct slName *matchList = NULL, *match;
 	if (matchHash == NULL)
 	    {
 	    matchList = slNameNew(term->name);
 	    }
 	else
 	    {
 	    /* Support multiple alias->id mappings: */
@@ -576,43 +581,43 @@
     lineFileClose(&lf);
     return hash;
     }
 }
 
 char *identifierWhereClause(char *idField, struct hash *idHash)
 /* If the number of pasted IDs is reasonably low, return a where-clause component for the IDs. */
 {
 if (idHash == NULL || idField == NULL)
     return NULL;
 int numIds = hashNumEntries(idHash);
 int maxIdsInWhere = cartUsualInt(cart, "hgt_maxIdsInWhere", DEFAULT_MAX_IDS_IN_WHERE);
 if (numIds > 0 && numIds <= maxIdsInWhere)
     {
     struct dyString *dy = dyStringNew(16 * numIds);
-    dyStringPrintf(dy, "%s in (", idField);
+    sqlDyStringPrintf(dy, "%s in (", idField);
     struct hashCookie hc = hashFirst(idHash);
     boolean first = TRUE;
     char *id;
     while ((id = hashNextName(&hc)) != NULL)
 	{
 	if (first)
 	    first = FALSE;
 	else
-	    dyStringAppend(dy, ", ");
-	dyStringPrintf(dy, "'%s'", id);
+	    sqlDyStringPrintf(dy, ", ");
+	sqlDyStringPrintf(dy, "'%s'", id);
 	}
-    dyStringAppend(dy, ")");
+    sqlDyStringPrintf(dy, ")");
     return dyStringCannibalize(&dy);
     }
 return NULL;
 }
 
 void doClearPasteIdentifierText(struct sqlConnection *conn)
 /* Respond to clear within paste identifier page. */
 {
 cartRemove(cart, hgtaPastedIdentifiers);
 doPasteIdentifiers(conn);
 }
 
 void doClearIdentifiers(struct sqlConnection *conn)
 /* Respond to clear identifiers button. */
 {