44ccfacbe3a3d4b300f80d48651c77837a4b571e
galt
  Tue Apr 26 11:12:02 2022 -0700
SQL INJECTION Prevention Version 2 - this improves our methods by making subclauses of SQL that get passed around be both easy and correct to use. The way that was achieved was by getting rid of the obscure and not well used functions sqlSafefFrag and sqlDyStringPrintfFrag and replacing them with the plain versions of those functions, since these are not needed anymore. The new version checks for NOSQLINJ in unquoted %-s which is used to include SQL clauses, and will give an error the NOSQLINJ clause is not present, and this will automatically require the correct behavior by developers. sqlDyStringPrint is a very useful function, however because it was not enforced, users could use various other dyString functions and they operated without any awareness or checking for SQL correct use. Now those dyString functions are prohibited and it will produce an error if you try to use a dyString function on a SQL string, which is simply detected by the presence of the NOSQLINJ prefix.

diff --git src/hg/sqlToXml/sqlToXml.c src/hg/sqlToXml/sqlToXml.c
index 259bbb7..a53edeb 100644
--- src/hg/sqlToXml/sqlToXml.c
+++ src/hg/sqlToXml/sqlToXml.c
@@ -1,510 +1,511 @@
 /* sqlToXml - Given a database, .as file, .joiner file, and a sql select 
  * statement, dump out results as XML. */
 /* This file is copyright 2005 Jim Kent, but license is hereby
  * granted for all use - public, private or commercial. */
 
 #include "common.h"
 #include "linefile.h"
 #include "hash.h"
 #include "dystring.h"
 #include "options.h"
 #include "obscure.h"
 #include "jksql.h"
 
 
 void usage()
 /* Explain usage and exit. */
 {
 errAbort(
   "sqlToXml - dump out all or part of a relational database to XML, guided\n"
   "by a dump specification.  See sqlToXml.doc for additional information.\n"
   "usage:\n"
   "   sqlToXml database dumpSpec.od output.xml\n"
   "options:\n"
   "   -topTag=name - Give the top level XML tag the given name.  By\n"
   "               default it will be the same as the database name.\n"
   "   -query=file.sql - Instead of dumping whole database, just dump those\n"
   "                  records matching SQL select statement in file.sql.\n"
   "                  This statement should be of the form:\n"
   "           select * from table where ...\n"
   "                   or\n"
   "           select table.* from table,otherTables where ...\n"
   "                   Where the table is the same as the table in the first\n"
   "                   line of dumpSpec.\n"
   "   -tab=N - number of spaces betweeen tabs in xml.dumpSpec - by default it's 8.\n"
   "            (It may be best just to avoid tabs in that file though.)\n"
   "   -maxList=N - This will limit any lists in the output to no more than\n"
   "                size N.  This is mostly just for testing.\n"
   );
 }
 
 int tabSize = 8;
 int maxList = 0;
 char *topTag = NULL;
 
 static struct optionSpec options[] = {
    {"topTag", OPTION_STRING},
    {"query", OPTION_STRING},
    {"tab", OPTION_INT},
    {"maxList", OPTION_INT},
    {NULL, 0},
 };
 
 struct dyString *escaper;
 
 struct specTree 
 /* Holds dump specification tree. */
    {
    struct specTree *next;	/* Sibling */
    struct specTree *children;
    char *field;		/* Field in table to link on. 
                          * At top level is master table name instead.  */
    int fieldIx;		/* Index of field in table. */
    char *target;	/* Target table.field  */
    char *targetTable;	/* Table part of target */
    char *targetField;	/* Field part of target */
    char *outName;	/* Name to use for output, defaults to table name. */
    bool needsQuote;	/* Does join need value in quotes? */
    bool hideTable;	/* If true hide table (but not children) */
    bool hideField;	/* If true hide field. */
    bool textField;	/* IF true this is text field. */
    bool allInAttributes; /* If true all info is in attributes. */
    bool gotChildTags;    /* If true has child tags. */
    };
 
 #ifdef DEBUG
 static void dumpSpecTree(FILE *f, struct specTree *tree, int level)
 {
 struct specTree *child;
 spaceOut(f, level+2);
 fprintf(f, "f %s, fIx %d, target %s, hideTable %d, hideField %d, textField %d, allInI %d\n",
 	tree->field, tree->fieldIx, tree->target, tree->hideTable, tree->hideField, 
 	tree->textField, tree->allInAttributes);
 for (child = tree->children; child != NULL; child = child->next)
     dumpSpecTree(f, child, level+1);
 } 
 #endif /* DEBUG */
 
 struct specTree *specTreeTarget(struct specTree *parent, char *field)
 /* Look through all children for matching field. */
 {
 struct specTree *tree;
 for (tree = parent->children; tree != NULL; tree = tree->next)
     if (sameString(tree->field, field))
 	break;
 return tree;
 }
 
 struct typedField
 /* A field - name and type */
     {
     struct typedField *next;
     char *name;	/* Field name. */
     char type;  /* Value is " or space. */
     };
 
 int countIndent(char *s, int size)
 /* Count the number of spaces and tabs. */
 {
 int count = 0;
 int i;
 char c;
 for (i=0; i<size; ++i)
     {
     c = s[i];
     if (c == ' ')
         count += 1;
     else if (c == '\t')
         {
 	int nextTab = (count + tabSize)/tabSize * tabSize;
 	count = nextTab;
 	}
     else
         internalErr();
     }
 return count;
 }
 
 struct typedField *findField(struct typedField *list, char *name)
 /* Return field of given name, or NULL if none. */
 {
 struct typedField *field;
 for (field = list; field != NULL; field = field->next)
     if (sameString(field->name, name))
         break;
 return field;
 }
 
 
 boolean isStringField(struct hash *tableHash, char *tableDotField)
 /* Return true if given field is a string type field, needing quotations
  * in sql. */
 {
 char *dot = strchr(tableDotField, '.');
 if (dot != NULL)
     {
     struct typedField *field, *fieldList;
     *dot = 0;
     fieldList = hashMustFindVal(tableHash, tableDotField);
     field = findField(fieldList, dot+1);
     *dot = '.';
     if (field == NULL)
         errAbort("No field %s", tableDotField);
     return field->type == '"';
     }
 return FALSE;
 }
 
 void rSpecTreeLoad(struct lineFile *lf, 
 	struct specTree *parent, struct hash *tableHash)
 /* Load all children of parent. */
 {
 char *line, *pastSpace;
 int indent, curIndent;
 struct specTree *branch = NULL;
 struct typedField *tf, *tfList = hashMustFindVal(tableHash, parent->targetTable);
 if (!lineFileNextReal(lf, &line))
     return;
 pastSpace = skipLeadingSpaces(line);
 curIndent = countIndent(line, pastSpace - line);
 lineFileReuse(lf);
 for (;;)
     {
     if (!lineFileNextReal(lf, &line))
         break;
     pastSpace = skipLeadingSpaces(line);
     indent = countIndent(line, pastSpace - line);
     if (indent > curIndent)
 	{
 	lineFileReuse(lf);
 	rSpecTreeLoad(lf, branch, tableHash);
 	}
     else if (indent < curIndent)
 	{
 	lineFileReuse(lf);
 	break;
 	}
     else
 	{
 	char *field = nextWord(&pastSpace);
 	char *target = nextWord(&pastSpace);
 	char *extra = nextWord(&pastSpace);
 	char *dot, targetTable[256], *targetField;
 	if (target == NULL)
 	   errAbort("Missing target line %d of %s", lf->lineIx, lf->fileName);
 	AllocVar(branch);
 	branch->allInAttributes = TRUE;
 	if (field[0] != '.')
 	    errAbort("Field must start with . line %d of %s", 
 	    	lf->lineIx, lf->fileName);
 	field += 1;
 	branch->field = cloneString(field);
 	branch->target = cloneString(target);
 	dot = strchr(target, '.');
 	if (dot != NULL)
 	    {
 	    /* Parse out tableName and fieldName. */
 	    targetField = dot+1;
 	    *dot = 0;
 	    safef(targetTable, sizeof(targetTable), "%s", target);
 	    *dot = '.';
 	    branch->targetTable = cloneString(targetTable);
 	    branch->targetField = cloneString(targetField);
 
 	    /* Get info on current field. */
 	    tf = findField(tfList, field);
 	    if (tf == NULL)
 		errAbort("Field %s.%s doesn't exists",  parent->targetTable, field);
 	    branch->needsQuote = (tf->type == '"');
 	    branch->fieldIx = slIxFromElement(tfList, tf);
 
 	    /* See if it is followed by 'hide' */
 	    if (extra != NULL && sameString(extra, "hide"))
 	        {
 		extra = nextWord(&pastSpace);
 		branch->hideTable = TRUE;
 		}
 
 	    parent->allInAttributes = FALSE;
 	    parent->gotChildTags = TRUE;
 	    }
 	else
 	    {
 	    if (sameString(target, "hide"))
 		{
 		branch->hideField = TRUE;
 		}
 	    else if (sameString(target, "text"))
 	        {
 		tf = findField(tfList, field);
 		branch->fieldIx = slIxFromElement(tfList, tf);
 		branch->textField = TRUE;
 		parent->allInAttributes = FALSE;
 		}
 	    else
 	        {
 		errAbort("Don't recognize %s line %d of %s", 
 			target, lf->lineIx, lf->fileName);
 		}
 	    }
 	if (extra != NULL)
 	   {
 	   if (sameString(extra, "as"))
 	       {
 	       char *outName = nextWord(&pastSpace);
 	       if (outName == NULL)
 	           errAbort("Expecting something after 'as' line %d of %s.",
 		   	lf->lineIx, lf->fileName);
 	       branch->outName = cloneString(outName);
 	       extra = nextWord(&pastSpace);
 	       }
 	   }
 	if (extra != NULL)
 	    errAbort("There's extra stuff I don't understand line %d of %s.",
 		    lf->lineIx, lf->fileName);
 	if (branch->outName == NULL)
 	    branch->outName = cloneString(branch->targetTable);
 	slAddTail(&parent->children, branch);
 	}
     }
 }
 
 struct specTree *specTreeLoad(char *fileName, struct hash *tableHash)
 /* Load up a spec-tree from a file. */
 {
 struct specTree *root = NULL;
 struct lineFile *lf = lineFileOpen(fileName, TRUE);
 char *line;
 char *table, *as, *outName;
 
 AllocVar(root);
 if (!lineFileNextReal(lf, &line))
     lineFileUnexpectedEnd(lf);
 line = trimSpaces(line);
 table = nextWord(&line);
 as = nextWord(&line);
 outName = nextWord(&line);
 root->targetTable = cloneString(table);
 if (as != NULL)
    {
    if (outName == NULL)
        errAbort("Expecting something after 'as' line %d of %s", 
        		lf->lineIx, lf->fileName);
    root->outName = cloneString(outName);
    }
 else
    root->outName = cloneString(root->targetTable);
 rSpecTreeLoad(lf, root, tableHash);
 
 lineFileClose(&lf);
 return root;
 }
 
 struct hash *tablesAndFields(struct sqlConnection *conn)
 /* Get hash of all tables.  Hash is keyed by table name.
  * Hash values are lists of typedField.   */
 {
 struct hash *hash = hashNew(0);
 struct slName *table, *tableList = sqlListTables(conn);
 for (table = tableList; table != NULL; table = table->next)
     {
     char query[256], **row;
     struct sqlResult *sr;
     struct typedField *fieldList = NULL, *field;
     sqlSafef(query, sizeof(query), "describe %s", table->name);
     sr = sqlGetResult(conn, query);
     while ((row = sqlNextRow(sr)) != NULL)
         {
 	char *name = row[0];
 	char *type = row[1];
 	char code = '"';
 	if ( startsWith("int", type)  
 	  || startsWith("float", type)
 	  || startsWith("tinyint", type)
 	  || startsWith("smallint", type))
 	    code = ' ';
 	AllocVar(field);
 	field->name = cloneString(name);
 	field->type = code;
 	slAddHead(&fieldList, field);
 	}
     slReverse(&fieldList);
     hashAdd(hash, table->name, fieldList);
     }
 return hash;
 }
 
 void escToXmlString(char *in, char *out)
 /* Put in the escapes xml needs inside of a string */
 {
 char c;
 while ((c = *in++) != 0)
     {
     if (c == '&')
 	{
         strcpy(out, "&amp;");
 	out += 5;
 	}
     else if (c == '"')
         {
 	strcpy(out, "&quot;");
 	out += 6;
 	}
     else
         *out++ = c;
     }
 *out = 0;
 }
 
 void rSqlToXml(struct sqlConnCache *cc, char *database, char *table, char *entryField,
 	char *query, struct hash *tableHash, struct specTree *tree,
 	FILE *f, int depth)
 /* Recursively output XML */
 {
 struct sqlConnection *conn = sqlConnCacheAlloc(cc, database);
 struct sqlResult *sr;
 char **row;
 struct typedField *col, *colList = hashMustFindVal(tableHash, table);
 
 verbose(2, "%s\n", query);
 sr = sqlGetResult(conn, query);
 while ((row = sqlNextRow(sr)) != NULL)
     {
     int rowIx = 0;
 
     /* Print non-joining columns as attributes. */
     if (!tree->hideTable)
 	{
 	spaceOut(f, depth*2);
 	fprintf(f, "<%s", tree->outName);
 	}
     for (col = colList; col != NULL; col = col->next, rowIx+=1)
         {
 	char *val = row[rowIx];
 	struct specTree *branch = specTreeTarget(tree, col->name);
 	if (branch == NULL && !sameString(col->name, entryField))
 	    {
 	    int len = strlen(val);
 	    dyStringBumpBufSize(escaper, len*5);
 	    escToXmlString(val, escaper->string);
 	    if (!tree->hideTable)
 		fprintf(f, " %s=\"%s\"", col->name, escaper->string);
 	    }
 	}
 
     /* If no subobjects, then close up tag, else proceed to subobjects. */
     if (tree->allInAttributes)
 	{
 	if (!tree->hideTable)
 	    fprintf(f, "/>\n");
 	}
     else
         {
 	struct specTree *branch;
 	if (!tree->hideTable)
 	    fprintf(f, ">");
 	rowIx = 0;
 	for (branch = tree->children; branch != NULL; branch = branch->next)
 	    {
 	    if (sameString(branch->target, "text"))
 	        {
 		char *val = row[branch->fieldIx];
 		int len = strlen(val);
 		dyStringBumpBufSize(escaper, len*5);
 		escToXmlString(val, escaper->string);
 		fprintf(f, "%s", escaper->string);
 		}
 	    }
 	if (tree->gotChildTags)
 	    if (!tree->hideTable)
 		fprintf(f, "\n");
 	for (branch = tree->children; branch != NULL; branch = branch->next)
 	    {
 	    char *target = branch->target;
 	    if (sameString(target, "text"))
 	        ;
 	    else if (!sameString(target, "hide"))
 		{
 		struct dyString *sql = dyStringNew(0);
 		int newDepth = depth;
 		if (!tree->hideTable)
 		    newDepth += 1;
 		sqlDyStringPrintf(sql, "select * from %s where %s = ",
 			branch->targetTable, target);
 		if (branch->needsQuote)
-		    dyStringPrintf(sql, "\"%s\"", row[branch->fieldIx]);
+		    sqlDyStringPrintf(sql, "'%s'", row[branch->fieldIx]);
 		else
-		    dyStringPrintf(sql, "%s", row[branch->fieldIx]);
+		    sqlDyStringPrintf(sql, "%s", row[branch->fieldIx]);
 		if (maxList != 0)
-		    dyStringPrintf(sql, " limit %d", maxList);
+		    sqlDyStringPrintf(sql, " limit %d", maxList);
 		rSqlToXml(cc, database, branch->targetTable, branch->targetField, 
 			sql->string, tableHash, branch, f, newDepth);
 		dyStringFree(&sql);
 		}
 	    }
 	if (!tree->hideTable)
 	    {
 	    if (tree->gotChildTags)
 		spaceOut(f, depth*2);
 	    fprintf(f, "</%s>\n", tree->outName);
 	    }
 	}
     }
 sqlFreeResult(&sr);
 sqlConnCacheDealloc(cc, &conn);
 }
 
 
 void sqlToXml(char *database, char *dumpSpec, char *outputXml)
 /* sqlToXml - Given a database, .as file, .joiner file, and a sql select 
  * statement, dump out results as XML. */
 {
 struct sqlConnCache *cc = sqlConnCacheNew();
 struct sqlConnection *conn = sqlConnCacheAlloc(cc, database);
 struct hash *tableHash = tablesAndFields(conn);
 struct specTree *tree = specTreeLoad(dumpSpec, tableHash);
 FILE *f = mustOpen(outputXml, "w");
 char *topTag = optionVal("topTag", database);
 char *table = tree->targetTable;
 struct dyString *sql = dyStringNew(0);
 
 if (optionExists("query"))
     {
     char *queryFile = optionVal("query", NULL);
     char *query;
     readInGulp(queryFile, &query, NULL);
     if (!stringIn(table, query))
 	errAbort("No mention of table %s in %s.", table, queryFile);
-    dyStringAppend(sql, query);
+    // Trust reading sql from disk file.
+    sqlDyStringPrintf(sql, query, NULL);
     }
 else
     sqlDyStringPrintf(sql, "select * from %s", table);
 
 if (maxList > 0)
-    dyStringPrintf(sql, " limit %d", maxList);
+    sqlDyStringPrintf(sql, " limit %d", maxList);
 
 if (!sqlTableExists(conn, table))
     errAbort("No table %s in %s", table, database);
 sqlConnCacheDealloc(cc, &conn);
 
 verbose(1, "%d tables in %s\n",
 	tableHash->elCount,  database);
 
 escaper = dyStringNew(0);
 fprintf(f, "<%s>\n", topTag);
 rSqlToXml(cc, database, table, "", sql->string, tableHash, tree, f, 1);
 fprintf(f, "</%s>\n", topTag);
 carefulClose(&f);
 }
 
 int main(int argc, char *argv[])
 /* Process command line. */
 {
 optionInit(&argc, argv, options);
 if (argc != 4)
     usage();
 tabSize = optionInt("tab", tabSize);
 maxList = optionInt("maxList", maxList);
 sqlToXml(argv[1], argv[2], argv[3]);
 return 0;
 }