ea3a67989847a2b731be271d112feab992a5eb9a galt Wed Sep 28 10:08:47 2016 -0700 Fixes hgNearTest robot by having htmlPage.c call attributeDecode on attribute values since we are now encoding them against XSS. refs #18077. hgNearTest was crashing right away with the error Couldn't get main form on orgPage. diff --git src/lib/htmshell.c src/lib/htmshell.c index a8996b5..9afc9cd 100644 --- src/lib/htmshell.c +++ src/lib/htmshell.c @@ -282,31 +282,31 @@ } char *htmlEncode(char *s) /* Returns a cloned string with quotes replaced by html codes. Changes ',",\n and >,<,& to code equivalents. This differs from cgiEncode as it handles text that will be displayed in an html page or tooltip style title. */ { int size = htmlEncodeTextSize(s); char *out = needMem(size+1); htmlEncodeTextExtended(s, out, size+1); return out; } -int nonAlphaNumericHexEncodeTextExtended(char *s, char *out, int outSize, +int nonAlphaNumericHexEncodeText(char *s, char *out, int outSize, char *prefix, char *postfix, int encodedSize) /* For html tag attributes, it replaces non-alphanumeric characters * with <prefix>HH<postfix> hex codes to fight XSS. * out result must be large enough to receive the encoded string. * Returns size of encoded string or -1 if output larger than outSize. * To just get the final encoded size, pass in NULL for out and 0 for outSize. * To output without checking sizes, pass in non-NULL for out and 0 for outSize. */ { int total = 0; char c = 0; do { c=*s++; int size = 1; @@ -334,139 +334,275 @@ char h1 = (c >> 4 ) + 0x30; if (h1 > 0x39) h1 += 7; *out++ = h1; char h2 = (c & 0xF) + 0x30; if (h2 > 0x39) h2 += 7; *out++ = h2; pf = postfix; while ((x = *pf++) != 0) *out++ = x; } } total += size; } while (c != 0); return total - 1; // do not count terminating 0 } +void nonAlphaNumericHexDecodeText(char *s, char *prefix, char *postfix) +/* For html tag attributes, it decodes non-alphanumeric characters + * with <prefix>HH<postfix> hex codes. + * Decoding happens in-place, changing the input string s. + * prefix must not be empty string or null, but postfix can be empty string. + * Because the decoded string is always equal to or shorter than the input string, + * the decoding is just done in-place modifying the input string. + * Accepts upper and lower case values in entities. + */ +{ +char c = 0; +char *d = s; // where are we decoding to right nowA +int pfxLen = strlen(prefix); +int pfxMatch = 0; +int postLen = strlen(postfix); +int postMatch = 0; +int state = 0; // 0=copy 1=prefix + // 2=hex-started 3=hex-completed + // 4=postfix 5=postfix + // 5 = failed to match, abandon fantasy. append from s2 to s onto e2. set e to e2. + // and set state to 0. +char *s2 = NULL; // save s when prefix started +char *d2 = NULL; // save d when prefix started. +char de = 0; +do + { + c=*s++; + if (state == 0) // default string + { + if (tolower(c) == prefix[0]) + { + state = 1; + pfxMatch = 0; + s2 = s - 1; // back up to real start of s without ++ + d2 = d; + } + else + { + *d++ = c; // copy string + } + } + + if (state == 1) + { + if (tolower(c) == prefix[pfxMatch]) + { + ++pfxMatch; + if (pfxMatch == pfxLen) + { + state = 2; + de = 0; + } + } + else + { + state = 5; // mismatch in prefix, abandon + } + } + else if (state == 2 || state == 3) + { // expecting 2 hex chars + if (state == 3) + de *= 16; + ++state; + if (c >= '0' && c <= '9') + de += (c - '0'); + else if (c >= 'A' && c <= 'F') + de += (c - 'A' + 10); + else if (c >= 'a' && c <= 'f') + de += (c - 'a' + 10); + else + { + state = 5; // not hex chars, abandon to another state. + } + if (state == 4) + { + *d++ = de; + postMatch = 0; + if (postMatch == postLen) // bale out without consuming + { + state = 0; + } + } + } + else if (state == 4) + { + if (tolower(c) == postfix[postMatch]) + { + ++postMatch; + if (postMatch == postLen) + { + state = 0; + } + } + else + { + state = 5; + } + } + + if (state == 5) // false match did not complete, just advance one character + { + s = s2; + d = d2; + *d++ = c = *s++; // consume one character to avoid infinite loop. + state = 0; + } + } while (c != 0); +} + int attrEncodeTextExtended(char *s, char *out, int outSize) -/* For html tag attributes, it replaces non-alphanumeric characters +/* For html tag attribute values, it replaces non-alphanumeric characters * with html entities &#xHH; to fight XSS. * out result must be large enough to receive the encoded string. * Returns size of encoded string or -1 if output larger than outSize. * To just get the final encoded size, pass in NULL for out and 0 for outSize. * To output without checking sizes, pass in non-NULL for out and 0 for outSize. */ { -return nonAlphaNumericHexEncodeTextExtended(s, out, outSize, "&#x", ";", 6); +return nonAlphaNumericHexEncodeText(s, out, outSize, "&#x", ";", 6); } int attrEncodeTextSize(char *s) /* Returns what the encoded size will be after replacing characters with escape codes. */ { return attrEncodeTextExtended(s, NULL, 0); } char *attributeEncode(char *s) /* Returns a cloned string with non-alphanumeric characters replaced by escape codes. */ { int size = attrEncodeTextSize(s); char *out = needMem(size+1); attrEncodeTextExtended(s, out, size+1); return out; } +void attributeDecode(char *s) +/* For html tag attribute values decode html entities &#xHH; */ +{ +return nonAlphaNumericHexDecodeText(s, "&#x", ";"); +} + + int cssEncodeTextExtended(char *s, char *out, int outSize) -/* For CSS, it replaces non-alphanumeric characters with "\HH " to fight XSS. +/* For CSS values, it replaces non-alphanumeric characters with "\HH " to fight XSS. * (Yes, the trailing space is critical.) * out result must be large enough to receive the encoded string. * Returns size of encoded string or -1 if output larger than outSize. * To just get the final encoded size, pass in NULL for out and 0 for outSize. * To output without checking sizes, pass in non-NULL for out and 0 for outSize. */ { -return nonAlphaNumericHexEncodeTextExtended(s, out, outSize, "\\", " ", 4); +return nonAlphaNumericHexEncodeText(s, out, outSize, "\\", " ", 4); } int cssEncodeTextSize(char *s) /* Returns what the encoded size will be after replacing characters with escape codes. */ { return cssEncodeTextExtended(s, NULL, 0); } char *cssEncode(char *s) /* Returns a cloned string with non-alphanumeric characters replaced by escape codes. */ { int size = cssEncodeTextSize(s); char *out = needMem(size+1); cssEncodeTextExtended(s, out, size+1); return out; } +void cssDecode(char *s) +/* For CSS values decode "\HH " + * (Yes, the trailing space is critical.) */ +{ +return nonAlphaNumericHexDecodeText(s, "\\", " "); +} + + int javascriptEncodeTextExtended(char *s, char *out, int outSize) -/* For javascript, it replaces non-alphanumeric characters with "\xHH" to fight XSS. +/* For javascript string values, it replaces non-alphanumeric characters with "\xHH" to fight XSS. * out result must be large enough to receive the encoded string. * Returns size of encoded string or -1 if output larger than outSize. * To just get the final encoded size, pass in NULL for out and 0 for outSize. * To output without checking sizes, pass in non-NULL for out and 0 for outSize. */ { -return nonAlphaNumericHexEncodeTextExtended(s, out, outSize, "\\x", "", 4); +return nonAlphaNumericHexEncodeText(s, out, outSize, "\\x", "", 4); } int javascriptEncodeTextSize(char *s) /* Returns what the encoded size will be after replacing characters with escape codes. */ { return javascriptEncodeTextExtended(s, NULL, 0); } char *javascriptEncode(char *s) /* Returns a cloned string with non-alphanumeric characters replaced by escape codes. */ { int size = javascriptEncodeTextSize(s); char *out = needMem(size+1); javascriptEncodeTextExtended(s, out, size+1); return out; } +void jsDecode(char *s) +/* For JS string values decode "\xHH" */ +{ +return nonAlphaNumericHexDecodeText(s, "\\x", ""); +} + int urlEncodeTextExtended(char *s, char *out, int outSize) -/* For URL parameters, it replaces non-alphanumeric characters with "%HH" to fight XSS. +/* For URL parameter values, it replaces non-alphanumeric characters with "%HH" to fight XSS. * out result must be large enough to receive the encoded string. * Returns size of encoded string or -1 if output larger than outSize. * To just get the final encoded size, pass in NULL for out and 0 for outSize. * To output without checking sizes, pass in non-NULL for out and 0 for outSize. */ { -return nonAlphaNumericHexEncodeTextExtended(s, out, outSize, "%", "", 3); +return nonAlphaNumericHexEncodeText(s, out, outSize, "%", "", 3); } int urlEncodeTextSize(char *s) /* Returns what the encoded size will be after replacing characters with escape codes. */ { return urlEncodeTextExtended(s, NULL, 0); } char *urlEncode(char *s) /* Returns a cloned string with non-alphanumeric characters replaced by escape codes. */ { int size = urlEncodeTextSize(s); char *out = needMem(size+1); urlEncodeTextExtended(s, out, size+1); return out; } +void urlDecode(char *s) +/* For URL paramter values decode "%HH" */ +{ +return nonAlphaNumericHexDecodeText(s, "%", ""); +} + char *htmlWarnStartPattern() /* Return starting pattern for warning message. */ { return "<!-- HGERROR-START -->\n"; } char *htmlWarnEndPattern() /* Return ending pattern for warning message. */ { return "<!-- HGERROR-END -->\n"; }