b8023f3df3e17b6f60875f88935e60c232d64d16
galt
  Fri Sep 16 22:40:13 2016 -0700
refs #177282. fixes textarea xss vulnerability in cheapcgi.c visible in hgVai. Added functions to allow outputs of large but indeterminate size such as dyString and file streams like fprintf.

diff --git src/lib/htmshell.c src/lib/htmshell.c
index 9c43c48..a8996b5 100644
--- src/lib/htmshell.c
+++ src/lib/htmshell.c
@@ -35,31 +35,31 @@
 
 void htmlNoEscape()
 {
 NoEscape = TRUE;
 }
 
 void htmlDoEscape()
 {
 NoEscape = FALSE;
 }
 
 void htmlVaEncodeErrorText(char *format, va_list args)
 /* Write an error message encoded against XSS. */
 {
 char warning[1024];
-int sz = vaHtmlSafefNoAbort(warning, sizeof(warning), format, args, TRUE);
+int sz = vaHtmlSafefNoAbort(warning, sizeof(warning), format, args, TRUE, FALSE);
 if (sz < 0)
     {
     safecpy(warning, sizeof(warning), "Low level error in htmlSafef. See error logs for details.");
     vfprintf(stderr, format, args);
     fprintf(stderr, "\n");
     fflush(stderr);
     }
 fprintf(stdout, "%s\n", warning);
 }
 
 
 void htmlVaParagraph(char *line, va_list args)
 /* Print a line in it's own paragraph. */
 {
 fputs("<P>", stdout);
@@ -509,31 +509,31 @@
             "if(endOfPage.lastIndexOf('-- ERROR --') > 0) { history.back(); }"
           "}\n"); // Note OK button goes to prev page when this page is interrupted by the error.
 fprintf(f,"window.onunload = function(){}; // Trick to avoid FF back button issue.\n");
 fprintf(f,"</script>\n");
 }
 
 void htmlVaWarn(char *format, va_list args)
 /* Write an error message. */
 {
 va_list argscp;
 va_copy(argscp, args);
 htmlWarnBoxSetup(stdout); // sets up the warnBox if it hasn't already been done.
 char warning[1024];
 
 // html-encode arguments to fight XSS
-int sz = vaHtmlSafefNoAbort(warning, sizeof(warning), format, args, TRUE);
+int sz = vaHtmlSafefNoAbort(warning, sizeof(warning), format, args, TRUE, FALSE);
 if (sz < 0)
     {
     safecpy(warning, sizeof(warning), "Low level error in htmlSafef. See error logs for details.");
     }
 
 // Replace newlines with BR tag
 char *warningBR = htmlWarnEncode(warning); 
 
 // Javascript-encode the entire message because it is
 // going to appear as a javascript string literal
 // as it gets appended to the warnList html.
 // JS-encoding here both allows us to use any character in the message
 // and keeps js-encodings in events like onmouseover="stuff %s|js| stuff" secure.
 char *jsEncodedMessage = javascriptEncode (warningBR); 
 freeMem(warningBR);
@@ -925,132 +925,135 @@
 
 /* Include an HTML file in a CGI.
  *   The file path is relative to the web server document root */
 void htmlIncludeWebFile(char *file)
 {
 char path[256];
 char *docRoot = "/usr/local/apache/htdocs";
 
 safef(path, sizeof path, "%s/%s", docRoot, file);
 htmlIncludeFile(path);
 }
 
 
 /* ===== HTML printf-style escaping functions ===== */
 
-int htmlSafefAbort(boolean noAbort, char *format, ...)
+int htmlSafefAbort(boolean noAbort, int errCode, char *format, ...)
 /* handle noAbort stderror logging and errAbort */
 {
 va_list args;
 va_start(args, format);
 if (noAbort)
     {
     vfprintf(stderr, format, args);
     fprintf(stderr, "\n");
     fflush(stderr);
     }
 else
     {
     vaErrAbort(format, args);
     }
 va_end(args);
-return -1;
+return errCode;
 }
 
 
 
 #define htmlSafefPunc 0x01  // using char 1 as special char to denote strings needing escaping
 enum htmlSafefEncoding {dummyzero, none, html, js, css, attr, url};
 
-int htmlEscapeAllStrings(char *buffer, char *s, int bufSize, boolean noAbort)
+int htmlEscapeAllStrings(char *buffer, char *s, int bufSize, boolean noAbort, boolean noWarnOverflow)
 /* Escape all strings. *
  * Returns final size not including terminating 0. 
  * User needs to pre-allocate enough space that escape functions will never run out of space.
  * This function should be efficient on statements with many strings to be escaped. */
 {
 char *sOrig = s;
 int sz = 0;
 int remainder = bufSize;
 boolean done = FALSE;
 while (1)
     {
     char *start = strchr(s, htmlSafefPunc);
     char *end = NULL;
     if (start)
 	{
     	end = strchr(start+1, htmlSafefPunc); // skip over punc mark
 	if (!end)
 	    {
-	    return htmlSafefAbort(noAbort, "Unexpected error in htmlEscapeAllStrings. s=[%s]", sOrig);
+	    return htmlSafefAbort(noAbort, -2, "Unexpected error in htmlEscapeAllStrings. s=[%s]", sOrig);
 	    }
 	}
     else
 	{
 	// just copy remainder of the input string to output
     	start = strchr(s, 0); // find end of string
 	done = TRUE;	
 	}
     // move any non-escaped part
     int moveSize = start - s;
     if (moveSize > remainder)
 	{
-	return htmlSafefAbort(noAbort, "Buffer too small in htmlEscapeAllStrings. s=[%s] bufSize = %d", sOrig, bufSize);
+	if (noWarnOverflow) return -1; // speed
+	return htmlSafefAbort(noAbort, -1, "Buffer too small in htmlEscapeAllStrings. s=[%s] bufSize = %d", sOrig, bufSize);
 	}
     memmove(buffer, s, moveSize);
     buffer += moveSize;
     sz += moveSize;
     remainder -= moveSize;
     if (done)
 	{
 	if (remainder < 1)
 	    {
-	    return htmlSafefAbort(noAbort, "Buffer too small for terminating zero in htmlEscapeAllStrings. s=[%s] bufSize = %d", sOrig, bufSize);
+	    if (noWarnOverflow) return -1; // speed
+	    return htmlSafefAbort(noAbort, -1, "Buffer too small for terminating zero in htmlEscapeAllStrings. s=[%s] bufSize = %d", sOrig, bufSize);
 	    }
 	--remainder;
 	*buffer++ = 0;  // terminating 0
 	// do not include term 0 in sz count;
 	break;
 	}
     // escape the quoted part
     s = start + 1;
     *end = 0;  // mark end of "input" string, replacing htmlSafefPunc. input string is temporary anyway.
 
     int escSize;
     char enc = *(end+1);
     if (enc == (enum htmlSafefEncoding) html)
 	{
 	escSize = htmlEncodeTextExtended(s,buffer,remainder);
 	}
     else if (enc == (enum htmlSafefEncoding) js)
 	escSize = javascriptEncodeTextExtended(s,buffer,remainder);
     else if (enc == (enum htmlSafefEncoding) css)
 	escSize = cssEncodeTextExtended(s,buffer,remainder);
     else if (enc == (enum htmlSafefEncoding) attr)
 	escSize = attrEncodeTextExtended(s,buffer,remainder);
     else if (enc == (enum htmlSafefEncoding) url)
 	{
 	escSize = urlEncodeTextExtended(s,buffer,remainder);
 	}
     else 
 	{
-	return htmlSafefAbort(noAbort, "Unexpected error in htmlEscapeAllStrings. (enum htmlSafefEncoding)=%c", *(end+1));
+	return htmlSafefAbort(noAbort, -2, "Unexpected error in htmlEscapeAllStrings. (enum htmlSafefEncoding)=%c", *(end+1));
 	}
     *end = htmlSafefPunc;  // restore mark, helps error message
 	
     if (escSize < 0)
 	{
-	return htmlSafefAbort(noAbort, "Buffer too small for escaping in htmlEscapeAllStrings. s=[%s] bufSize = %d", sOrig, bufSize);
+	if (noWarnOverflow) return -1; // speed
+	return htmlSafefAbort(noAbort, -1, "Buffer too small for escaping in htmlEscapeAllStrings. s=[%s] bufSize = %d", sOrig, bufSize);
 	}
 
     buffer += escSize;
     sz += escSize;
     remainder -= escSize;
     s = end + 2; // skip past htmlSafefPunc and htmlSafefEncoding (encoding type)
     }
 return sz;
 }
 
 char htmlSpecifierToEncoding(char *format, int *pI, boolean noAbort)
 /* translate specifier to encoding type */
 {
 int i = *pI + 1;
 int cnt =  0;
@@ -1084,43 +1087,44 @@
     }
 if (sameString(spec,"js"))
     enc  = (enum htmlSafefEncoding) js;
 else if (sameString(spec,"css"))
     enc = (enum htmlSafefEncoding) css;
 else if (sameString(spec,"attr"))
     enc = (enum htmlSafefEncoding) attr;
 else if (sameString(spec,"url"))
     enc = (enum htmlSafefEncoding) url;
 else if (sameString(spec,""))
     enc = (enum htmlSafefEncoding) html;
 else if (sameString(spec,"none"))
     enc = (enum htmlSafefEncoding) none;
 else
     {
-    htmlSafefAbort(noAbort, "Unknown spec [%s] in format string [%s].", spec, format);
+    htmlSafefAbort(noAbort, -2, "Unknown spec [%s] in format string [%s].", spec, format);
     return 0;
     }
 
 *pI = i - 1;
 return enc;
 }
 
 
-int vaHtmlSafefNoAbort(char* buffer, int bufSize, char *format, va_list args, boolean noAbort)
+int vaHtmlSafefNoAbort(char* buffer, int bufSize, char *format, va_list args, boolean noAbort, boolean noWarnOverflow)
 /* VarArgs Format string to buffer, vsprintf style, only with buffer overflow
  * checking.  The resulting string is always terminated with zero byte.
  * Automatically escapes string values.
+ * Returns count of bytes written or -1 for overflow or -2 for other errors.
  * This function should be efficient on statements with many strings to be escaped. */
 {
 int formatLen = strlen(format);
 
 char *newFormat = NULL;
 int newFormatSize = 2*formatLen + 1;
 newFormat = needMem(newFormatSize);
 char *nf = newFormat;
 char *lastPct = NULL;
 int escStringsCount = 0;
 
 char c = 0;
 int i = 0;
 boolean inPct = FALSE;
 while (i < formatLen)
@@ -1136,91 +1140,167 @@
 	inPct = FALSE;
     else if (inPct) 
         {
 	if (c == 'l')
 	    { // used to handle 'l' long
 	    }
 	else if (strchr("diuoxXeEfFgGpcs",c))
 	    {
 	    inPct = FALSE;
 	    // we finally have the expected format
 	    // finally, the string we care about!
 	    if (c == 's')
 		{
 		char enc = htmlSpecifierToEncoding(format, &i, noAbort);
 		if (enc == 0)
-		    return -1;
+		    return -2;
 		if (enc != (enum htmlSafefEncoding) none) // Not a Pre-escaped String
 		    {
 		    // go back and insert htmlSafefPunc before the leading % char saved in lastPct
 		    // move the accumulated %s descriptor
 		    memmove(lastPct+1, lastPct, nf - lastPct); // this is typically very small, src and dest overlap.
 		    ++nf;
 		    *lastPct = htmlSafefPunc;
 		    *nf++ = htmlSafefPunc;
 		    *nf++ = enc;
 		    ++escStringsCount;
 		    }
 		}
 	    }
 	else if (strchr("+-.1234567890",c))
 	    {
 	    // Do nothing.
 	    }
 	else
 	    {
-	    return htmlSafefAbort(noAbort, "String format not understood in vaHtmlSafef: %s", format);
+	    return htmlSafefAbort(noAbort, -2, "String format not understood in vaHtmlSafef: %s", format);
 	    }
 	}
     ++i;	    
     }
 
 int sz = 0; 
 boolean overflow = FALSE;
 if (escStringsCount > 0)
     {
     int tempSize = bufSize + 3*escStringsCount;  // allow for temporary escPunc chars + spectype-char
     char *tempBuf = needMem(tempSize);
     sz = vsnprintf(tempBuf, tempSize, newFormat, args);
     /* note that some versions return -1 if too small */
     if (sz != -1 && sz + 1 <= tempSize)
 	{
-	sz = htmlEscapeAllStrings(buffer, tempBuf, bufSize, noAbort);
+	sz = htmlEscapeAllStrings(buffer, tempBuf, bufSize, noAbort, noWarnOverflow);
 	}
     else
 	overflow = TRUE;
     freeMem(tempBuf);
     }
 else
     {
     sz = vsnprintf(buffer, bufSize, newFormat, args);
     /* note that some version return -1 if too small */
     if ((sz < 0) || (sz >= bufSize))
 	overflow = TRUE;
     }
 if (overflow)
     {
     buffer[bufSize-1] = (char) 0;
-    htmlSafefAbort(noAbort, "buffer overflow, size %d, format: %s", bufSize, format);
+    if (!noWarnOverflow)
+	htmlSafefAbort(noAbort, -1, "buffer overflow, size %d, format: %s", bufSize, format);
     sz = -1;
     }
 
 freeMem(newFormat);
 va_end(args);
 
 return sz;
 
 }
 
 int htmlSafef(char* buffer, int bufSize, char *format, ...)
 /* Format string to buffer, vsprintf style, only with buffer overflow
  * checking.  The resulting string is always terminated with zero byte. 
  * Escapes string parameters. */
 {
 int sz;
 va_list args;
 va_start(args, format);
-sz = vaHtmlSafefNoAbort(buffer, bufSize, format, args, TRUE);
+sz = vaHtmlSafefNoAbort(buffer, bufSize, format, args, FALSE, FALSE);
 va_end(args);
 return sz;
 }
 
+
+void vaHtmlDyStringPrintf(struct dyString *ds, char *format, va_list args)
+/* VarArgs Printf append to dyString
+ * Strings are escaped according to format type. */
+{
+/* attempt to format the string in the current space.  If there
+ * is not enough room, increase the buffer size and try again */
+int avail, sz;
+while (TRUE)
+    {
+    va_list argscp;
+    va_copy(argscp, args);
+    avail = ds->bufSize - ds->stringSize;
+    if (avail <= 0)
+        {
+        /* Don't pass zero sized buffers to vsnprintf, because who knows
+         * if the library function will handle it. */
+        dyStringBumpBufSize(ds, ds->bufSize+ds->bufSize);
+        avail = ds->bufSize - ds->stringSize;
+        }
+    sz = vaHtmlSafefNoAbort(ds->string + ds->stringSize, avail, format, argscp, FALSE, TRUE);
+    va_end(argscp);
+    /* note that some version return -1 if too small */
+    if ((sz < 0) || (sz >= avail))
+	{
+        dyStringBumpBufSize(ds, ds->bufSize+ds->bufSize);
+	}
+    else
+        {
+        ds->stringSize += sz;
+        break;
+        }
+    }
+}
+
+void htmlDyStringPrintf(struct dyString *ds, char *format, ...)
+/* VarArgs Printf append to dyString
+ * Strings are escaped according to format type. */
+{
+va_list args;
+va_start(args, format);
+vaHtmlDyStringPrintf(ds, format, args);
+va_end(args);
+}
+
+void vaHtmlFprintf(FILE *f, char *format, va_list args)
+/* fprintf using html encoding types */
+{
+struct dyString *ds = newDyString(1024);
+vaHtmlDyStringPrintf(ds, format, args);
+fputs(ds->string, f);  // does not append newline
+freeDyString(&ds);
+}
+
+
+void htmlFprintf(FILE *f, char *format, ...)
+/* fprintf using html encoding types */
+{
+va_list args;
+va_start(args, format);
+vaHtmlFprintf(f, format, args);
+va_end(args);
+}
+
+
+void htmlPrintf(char *format, ...)
+/* fprintf using html encoding types */
+{
+va_list args;
+va_start(args, format);
+vaHtmlFprintf(stdout, format, args);
+va_end(args);
+}
+
+