src/lib/htmshell.c 89cacc8b69a80a7e2787aaf9082782a83352d789

89cacc8b69a80a7e2787aaf9082782a83352d789
galt
  Tue Oct 4 14:34:03 2016 -0700
refs #18175. more elegant code for the hex decoder for attr css js url. Thanks Angie.

diff --git src/lib/htmshell.c src/lib/htmshell.c
index 9bfe1ad..7539218 100644
--- src/lib/htmshell.c
+++ src/lib/htmshell.c
@@ -289,39 +289,40 @@
 
 
 char *htmlEncode(char *s)
 /* Returns a cloned string with quotes replaced by html codes.
    Changes ',",\n and >,<,& to code equivalents.
    This differs from cgiEncode as it handles text that will
    be displayed in an html page or tooltip style title.  */
 {
 int size = htmlEncodeTextSize(s);
 char *out = needMem(size+1);
 htmlEncodeTextExtended(s, out, size+1);
 return out;
 }
 
 int nonAlphaNumericHexEncodeText(char *s, char *out, int outSize, 
-   char *prefix, char *postfix, int encodedSize)
+   char *prefix, char *postfix)
 /* For html tag attributes, it replaces non-alphanumeric characters
  * with <prefix>HH<postfix> hex codes to fight XSS.
  * out result must be large enough to receive the encoded string.
  * Returns size of encoded string or -1 if output larger than outSize. 
  * To just get the final encoded size, pass in NULL for out and 0 for outSize. 
  * To output without checking sizes, pass in non-NULL for out and 0 for outSize. 
  */
 {
+int encodedSize = strlen(prefix) + 2 + strlen(postfix);
 int total = 0;
 char c = 0;
 do
     {
     c=*s++;
     int size = 1;
     if (!isalnum(c)) // alpha-numeric
 	{
 	size = encodedSize;
 	}
     if (c == 0)
 	size = 1;    // do not encode the terminating 0
     if (out)
 	{
 	if (outSize > 0 && (total+size+1) > outSize) // 1 for terminator
@@ -339,150 +340,94 @@
 		*out++ = x;
 	    char h1 = (c >> 4 ) + 0x30; if (h1 > 0x39) h1 += 7;
 	    *out++ = h1;
 	    char h2 = (c & 0xF) + 0x30; if (h2 > 0x39) h2 += 7;
 	    *out++ = h2;
 	    pf = postfix;
 	    while ((x = *pf++) != 0)
 		*out++ = x;
 	    }
 	}
     total += size;
     } while (c != 0);
 return total - 1; // do not count terminating 0
 }
 
+static boolean decodeOneHexChar(char c, char *h)
+/* Return true if c is a hex char and decode it to h. */
+{
+*h = *h << 4;
+if (c >= '0' && c <= '9')
+    *h += (c - '0');	    
+else if (c >= 'A' && c <= 'F')
+    *h += (c - 'A' + 10);	    
+else if (c >= 'a' && c <= 'f')
+    *h += (c - 'a' + 10);
+else
+    return FALSE;
+return TRUE;
+}
+
+static boolean decodeTwoHexChars(char *s, char *h)
+/* Return true if hex char */
+{
+*h = 0;
+if (decodeOneHexChar(*s++, h)
+&& (decodeOneHexChar(*s  , h)))
+    return TRUE;
+return FALSE;
+}
 
 void nonAlphaNumericHexDecodeText(char *s, char *prefix, char *postfix)
 /* For html tag attributes, it decodes non-alphanumeric characters
  * with <prefix>HH<postfix> hex codes.
  * Decoding happens in-place, changing the input string s.
  * prefix must not be empty string or null, but postfix can be empty string.
  * Because the decoded string is always equal to or shorter than the input string,
  * the decoding is just done in-place modifying the input string.
  * Accepts upper and lower case values in entities.
  */
 {
-char c = 0;
-char *d = s;  // where are we decoding to right nowA
+char *d = s;  // where are we decoding to right now
 int pfxLen = strlen(prefix);
-int pfxMatch = 0;
 int postLen = strlen(postfix);
-int postMatch = 0;
-int state = 0;  // 0=copy 1=prefix
-                // 2=hex-started 3=hex-completed 
-                // 4=postfix 5=postfix
-                // 5 = failed to match, abandon fantasy. append from s2 to s onto e2. set e to e2.
-                //  and set state to 0.
-char *s2 = NULL; // save s when prefix started
-char *d2 = NULL; // save d when prefix started.
-char de = 0;
-do
-    {
-    c=*s++;
-    if (state == 0) // default string
+while (isNotEmpty(s))
     {
-	if (tolower(c) == prefix[0])
+    char h;
+    if (startsWithNoCase(prefix, s) &&
+        decodeTwoHexChars(s+pfxLen, &h) &&
+        startsWithNoCase(postfix, s+pfxLen+2))
         {
-	    state = 1;
-	    pfxMatch = 0;
-	    s2 = s - 1;  // back up to real start of s without ++
-	    d2 = d;
+        *d++ = h;
+        s += pfxLen + 2 + postLen;
         }
     else
-	    {
-	    *d++ = c;  // copy string
-	    }
-	}
-
-    if (state == 1)
-	{
-	if (tolower(c) == prefix[pfxMatch])
-	    {
-	    ++pfxMatch;
-	    if (pfxMatch == pfxLen)
-		{
-		state = 2;
-		de = 0;
-		}
-	    }
-	else
-	    {
-	    state = 5; // mismatch in prefix, abandon
-	    }
+        *d++ = *s++;
     }
-    else if (state == 2 || state == 3)
-	{  // expecting 2 hex chars
-	if (state == 3)
-	    de *= 16;
-	++state;
-	if (c >= '0' && c <= '9')
-	    de += (c - '0');	    
-	else if (c >= 'A' && c <= 'F')
-	    de += (c - 'A' + 10);	    
-	else if (c >= 'a' && c <= 'f')
-	    de += (c - 'a' + 10);	    
-	else
-	    {
-	    state = 5; // not hex chars, abandon to another state.
-	    }
-	if (state == 4)
-	    {
-	    *d++ = de;
-	    postMatch = 0;
-	    if (postMatch == postLen) // bale out without consuming
-		{
-		state = 0;
-		}
-	    }
-	}	
-    else if (state == 4)
-	{
-	if (tolower(c) == postfix[postMatch])
-	    {
-	    ++postMatch;
-	    if (postMatch == postLen)
-		{
-		state = 0;
-		}
-	    }
-	else
-	    {
-	    state = 5;
-	    }
-	}
-
-    if (state == 5) // false match did not complete, just advance one character
-	{
-	s = s2;
-        d = d2;
-	*d++ = c = *s++;  // consume one character to avoid infinite loop.
-	state = 0;	
-	}
-    } while (c != 0);
+*d = 0;
 }
 
 int attrEncodeTextExtended(char *s, char *out, int outSize)
 /* For html tag attribute values, it replaces non-alphanumeric characters
  * with html entities &#xHH; to fight XSS.
  * out result must be large enough to receive the encoded string.
  * Returns size of encoded string or -1 if output larger than outSize. 
  * To just get the final encoded size, pass in NULL for out and 0 for outSize. 
  * To output without checking sizes, pass in non-NULL for out and 0 for outSize. 
  */
 {
-return nonAlphaNumericHexEncodeText(s, out, outSize, "&#x", ";", 6);
+return nonAlphaNumericHexEncodeText(s, out, outSize, "&#x", ";");
 }
 
 int attrEncodeTextSize(char *s)
 /* Returns what the encoded size will be after replacing characters with escape codes. */
 {
 return attrEncodeTextExtended(s, NULL, 0);
 }
 
 char *attributeEncode(char *s)
 /* Returns a cloned string with non-alphanumeric characters replaced by escape codes. */
 {
 int size = attrEncodeTextSize(s);
 char *out = needMem(size+1);
 attrEncodeTextExtended(s, out, size+1);
 return out;
@@ -493,31 +438,31 @@
 {
 return nonAlphaNumericHexDecodeText(s, "&#x", ";");
 }
 
 
 
 int cssEncodeTextExtended(char *s, char *out, int outSize)
 /* For CSS values, it replaces non-alphanumeric characters with "\HH " to fight XSS.
  * (Yes, the trailing space is critical.)
  * out result must be large enough to receive the encoded string.
  * Returns size of encoded string or -1 if output larger than outSize. 
  * To just get the final encoded size, pass in NULL for out and 0 for outSize. 
  * To output without checking sizes, pass in non-NULL for out and 0 for outSize. 
  */
 {
-return nonAlphaNumericHexEncodeText(s, out, outSize, "\\", " ", 4);
+return nonAlphaNumericHexEncodeText(s, out, outSize, "\\", " ");
 }
 
 int cssEncodeTextSize(char *s)
 /* Returns what the encoded size will be after replacing characters with escape codes. */
 {
 return cssEncodeTextExtended(s, NULL, 0);
 }
 
 char *cssEncode(char *s)
 /* Returns a cloned string with non-alphanumeric characters replaced by escape codes. */
 {
 int size = cssEncodeTextSize(s);
 char *out = needMem(size+1);
 cssEncodeTextExtended(s, out, size+1);
 return out;
@@ -528,31 +473,31 @@
  * (Yes, the trailing space is critical.) */
 {
 return nonAlphaNumericHexDecodeText(s, "\\", " ");
 }
 
 
 
 int javascriptEncodeTextExtended(char *s, char *out, int outSize)
 /* For javascript string values, it replaces non-alphanumeric characters with "\xHH" to fight XSS.
  * out result must be large enough to receive the encoded string.
  * Returns size of encoded string or -1 if output larger than outSize. 
  * To just get the final encoded size, pass in NULL for out and 0 for outSize. 
  * To output without checking sizes, pass in non-NULL for out and 0 for outSize. 
  */
 {
-return nonAlphaNumericHexEncodeText(s, out, outSize, "\\x", "", 4);
+return nonAlphaNumericHexEncodeText(s, out, outSize, "\\x", "");
 }
 
 int javascriptEncodeTextSize(char *s)
 /* Returns what the encoded size will be after replacing characters with escape codes. */
 {
 return javascriptEncodeTextExtended(s, NULL, 0);
 }
 
 char *javascriptEncode(char *s)
 /* Returns a cloned string with non-alphanumeric characters replaced by escape codes. */
 {
 int size = javascriptEncodeTextSize(s);
 char *out = needMem(size+1);
 javascriptEncodeTextExtended(s, out, size+1);
 return out;
@@ -561,31 +506,31 @@
 void jsDecode(char *s)
 /* For JS string values decode "\xHH" */
 {
 return nonAlphaNumericHexDecodeText(s, "\\x", "");
 }
 
 
 int urlEncodeTextExtended(char *s, char *out, int outSize)
 /* For URL parameter values, it replaces non-alphanumeric characters with "%HH" to fight XSS.
  * out result must be large enough to receive the encoded string.
  * Returns size of encoded string or -1 if output larger than outSize. 
  * To just get the final encoded size, pass in NULL for out and 0 for outSize. 
  * To output without checking sizes, pass in non-NULL for out and 0 for outSize. 
  */
 {
-return nonAlphaNumericHexEncodeText(s, out, outSize, "%", "", 3);
+return nonAlphaNumericHexEncodeText(s, out, outSize, "%", "");
 }
 
 int urlEncodeTextSize(char *s)
 /* Returns what the encoded size will be after replacing characters with escape codes. */
 {
 return urlEncodeTextExtended(s, NULL, 0);
 }
 
 char *urlEncode(char *s)
 /* Returns a cloned string with non-alphanumeric characters replaced by escape codes. */
 {
 int size = urlEncodeTextSize(s);
 char *out = needMem(size+1);
 urlEncodeTextExtended(s, out, size+1);
 return out;