89cacc8b69a80a7e2787aaf9082782a83352d789 galt Tue Oct 4 14:34:03 2016 -0700 refs #18175. more elegant code for the hex decoder for attr css js url. Thanks Angie. diff --git src/lib/htmshell.c src/lib/htmshell.c index 9bfe1ad..7539218 100644 --- src/lib/htmshell.c +++ src/lib/htmshell.c @@ -289,39 +289,40 @@ char *htmlEncode(char *s) /* Returns a cloned string with quotes replaced by html codes. Changes ',",\n and >,<,& to code equivalents. This differs from cgiEncode as it handles text that will be displayed in an html page or tooltip style title. */ { int size = htmlEncodeTextSize(s); char *out = needMem(size+1); htmlEncodeTextExtended(s, out, size+1); return out; } int nonAlphaNumericHexEncodeText(char *s, char *out, int outSize, - char *prefix, char *postfix, int encodedSize) + char *prefix, char *postfix) /* For html tag attributes, it replaces non-alphanumeric characters * with <prefix>HH<postfix> hex codes to fight XSS. * out result must be large enough to receive the encoded string. * Returns size of encoded string or -1 if output larger than outSize. * To just get the final encoded size, pass in NULL for out and 0 for outSize. * To output without checking sizes, pass in non-NULL for out and 0 for outSize. */ { +int encodedSize = strlen(prefix) + 2 + strlen(postfix); int total = 0; char c = 0; do { c=*s++; int size = 1; if (!isalnum(c)) // alpha-numeric { size = encodedSize; } if (c == 0) size = 1; // do not encode the terminating 0 if (out) { if (outSize > 0 && (total+size+1) > outSize) // 1 for terminator @@ -339,150 +340,94 @@ *out++ = x; char h1 = (c >> 4 ) + 0x30; if (h1 > 0x39) h1 += 7; *out++ = h1; char h2 = (c & 0xF) + 0x30; if (h2 > 0x39) h2 += 7; *out++ = h2; pf = postfix; while ((x = *pf++) != 0) *out++ = x; } } total += size; } while (c != 0); return total - 1; // do not count terminating 0 } +static boolean decodeOneHexChar(char c, char *h) +/* Return true if c is a hex char and decode it to h. */ +{ +*h = *h << 4; +if (c >= '0' && c <= '9') + *h += (c - '0'); +else if (c >= 'A' && c <= 'F') + *h += (c - 'A' + 10); +else if (c >= 'a' && c <= 'f') + *h += (c - 'a' + 10); +else + return FALSE; +return TRUE; +} + +static boolean decodeTwoHexChars(char *s, char *h) +/* Return true if hex char */ +{ +*h = 0; +if (decodeOneHexChar(*s++, h) +&& (decodeOneHexChar(*s , h))) + return TRUE; +return FALSE; +} void nonAlphaNumericHexDecodeText(char *s, char *prefix, char *postfix) /* For html tag attributes, it decodes non-alphanumeric characters * with <prefix>HH<postfix> hex codes. * Decoding happens in-place, changing the input string s. * prefix must not be empty string or null, but postfix can be empty string. * Because the decoded string is always equal to or shorter than the input string, * the decoding is just done in-place modifying the input string. * Accepts upper and lower case values in entities. */ { -char c = 0; -char *d = s; // where are we decoding to right nowA +char *d = s; // where are we decoding to right now int pfxLen = strlen(prefix); -int pfxMatch = 0; int postLen = strlen(postfix); -int postMatch = 0; -int state = 0; // 0=copy 1=prefix - // 2=hex-started 3=hex-completed - // 4=postfix 5=postfix - // 5 = failed to match, abandon fantasy. append from s2 to s onto e2. set e to e2. - // and set state to 0. -char *s2 = NULL; // save s when prefix started -char *d2 = NULL; // save d when prefix started. -char de = 0; -do - { - c=*s++; - if (state == 0) // default string +while (isNotEmpty(s)) { - if (tolower(c) == prefix[0]) + char h; + if (startsWithNoCase(prefix, s) && + decodeTwoHexChars(s+pfxLen, &h) && + startsWithNoCase(postfix, s+pfxLen+2)) { - state = 1; - pfxMatch = 0; - s2 = s - 1; // back up to real start of s without ++ - d2 = d; + *d++ = h; + s += pfxLen + 2 + postLen; } else - { - *d++ = c; // copy string - } - } - - if (state == 1) - { - if (tolower(c) == prefix[pfxMatch]) - { - ++pfxMatch; - if (pfxMatch == pfxLen) - { - state = 2; - de = 0; - } - } - else - { - state = 5; // mismatch in prefix, abandon - } + *d++ = *s++; } - else if (state == 2 || state == 3) - { // expecting 2 hex chars - if (state == 3) - de *= 16; - ++state; - if (c >= '0' && c <= '9') - de += (c - '0'); - else if (c >= 'A' && c <= 'F') - de += (c - 'A' + 10); - else if (c >= 'a' && c <= 'f') - de += (c - 'a' + 10); - else - { - state = 5; // not hex chars, abandon to another state. - } - if (state == 4) - { - *d++ = de; - postMatch = 0; - if (postMatch == postLen) // bale out without consuming - { - state = 0; - } - } - } - else if (state == 4) - { - if (tolower(c) == postfix[postMatch]) - { - ++postMatch; - if (postMatch == postLen) - { - state = 0; - } - } - else - { - state = 5; - } - } - - if (state == 5) // false match did not complete, just advance one character - { - s = s2; - d = d2; - *d++ = c = *s++; // consume one character to avoid infinite loop. - state = 0; - } - } while (c != 0); +*d = 0; } int attrEncodeTextExtended(char *s, char *out, int outSize) /* For html tag attribute values, it replaces non-alphanumeric characters * with html entities &#xHH; to fight XSS. * out result must be large enough to receive the encoded string. * Returns size of encoded string or -1 if output larger than outSize. * To just get the final encoded size, pass in NULL for out and 0 for outSize. * To output without checking sizes, pass in non-NULL for out and 0 for outSize. */ { -return nonAlphaNumericHexEncodeText(s, out, outSize, "&#x", ";", 6); +return nonAlphaNumericHexEncodeText(s, out, outSize, "&#x", ";"); } int attrEncodeTextSize(char *s) /* Returns what the encoded size will be after replacing characters with escape codes. */ { return attrEncodeTextExtended(s, NULL, 0); } char *attributeEncode(char *s) /* Returns a cloned string with non-alphanumeric characters replaced by escape codes. */ { int size = attrEncodeTextSize(s); char *out = needMem(size+1); attrEncodeTextExtended(s, out, size+1); return out; @@ -493,31 +438,31 @@ { return nonAlphaNumericHexDecodeText(s, "&#x", ";"); } int cssEncodeTextExtended(char *s, char *out, int outSize) /* For CSS values, it replaces non-alphanumeric characters with "\HH " to fight XSS. * (Yes, the trailing space is critical.) * out result must be large enough to receive the encoded string. * Returns size of encoded string or -1 if output larger than outSize. * To just get the final encoded size, pass in NULL for out and 0 for outSize. * To output without checking sizes, pass in non-NULL for out and 0 for outSize. */ { -return nonAlphaNumericHexEncodeText(s, out, outSize, "\\", " ", 4); +return nonAlphaNumericHexEncodeText(s, out, outSize, "\\", " "); } int cssEncodeTextSize(char *s) /* Returns what the encoded size will be after replacing characters with escape codes. */ { return cssEncodeTextExtended(s, NULL, 0); } char *cssEncode(char *s) /* Returns a cloned string with non-alphanumeric characters replaced by escape codes. */ { int size = cssEncodeTextSize(s); char *out = needMem(size+1); cssEncodeTextExtended(s, out, size+1); return out; @@ -528,31 +473,31 @@ * (Yes, the trailing space is critical.) */ { return nonAlphaNumericHexDecodeText(s, "\\", " "); } int javascriptEncodeTextExtended(char *s, char *out, int outSize) /* For javascript string values, it replaces non-alphanumeric characters with "\xHH" to fight XSS. * out result must be large enough to receive the encoded string. * Returns size of encoded string or -1 if output larger than outSize. * To just get the final encoded size, pass in NULL for out and 0 for outSize. * To output without checking sizes, pass in non-NULL for out and 0 for outSize. */ { -return nonAlphaNumericHexEncodeText(s, out, outSize, "\\x", "", 4); +return nonAlphaNumericHexEncodeText(s, out, outSize, "\\x", ""); } int javascriptEncodeTextSize(char *s) /* Returns what the encoded size will be after replacing characters with escape codes. */ { return javascriptEncodeTextExtended(s, NULL, 0); } char *javascriptEncode(char *s) /* Returns a cloned string with non-alphanumeric characters replaced by escape codes. */ { int size = javascriptEncodeTextSize(s); char *out = needMem(size+1); javascriptEncodeTextExtended(s, out, size+1); return out; @@ -561,31 +506,31 @@ void jsDecode(char *s) /* For JS string values decode "\xHH" */ { return nonAlphaNumericHexDecodeText(s, "\\x", ""); } int urlEncodeTextExtended(char *s, char *out, int outSize) /* For URL parameter values, it replaces non-alphanumeric characters with "%HH" to fight XSS. * out result must be large enough to receive the encoded string. * Returns size of encoded string or -1 if output larger than outSize. * To just get the final encoded size, pass in NULL for out and 0 for outSize. * To output without checking sizes, pass in non-NULL for out and 0 for outSize. */ { -return nonAlphaNumericHexEncodeText(s, out, outSize, "%", "", 3); +return nonAlphaNumericHexEncodeText(s, out, outSize, "%", ""); } int urlEncodeTextSize(char *s) /* Returns what the encoded size will be after replacing characters with escape codes. */ { return urlEncodeTextExtended(s, NULL, 0); } char *urlEncode(char *s) /* Returns a cloned string with non-alphanumeric characters replaced by escape codes. */ { int size = urlEncodeTextSize(s); char *out = needMem(size+1); urlEncodeTextExtended(s, out, size+1); return out;