9367c4abbf200b833b19c5fe77a198c41a48c809 galt Fri Feb 24 18:29:08 2017 -0800 Adding new function strictTagNestCheck to htmlCheck. CSP testing turned up CGI webpage output that was not even being parsed correctly or the same by various browsers because of reversed end-tags. htmlCheck should be able to find these problems too. Also added the pipe and comma chars to the chars allowed in urls by htmlPage.c diff --git src/lib/htmlPage.c src/lib/htmlPage.c index 8cce92f..a4a300a 100644 --- src/lib/htmlPage.c +++ src/lib/htmlPage.c @@ -1655,31 +1655,50 @@ checkNest(page, nesters[i], startTag, endTag); } static char *bodyNesters[] = /* Nesting tags that appear in body. */ { "ADDRESS", "DIV", "H1", "H2", "H3", "H4", "H5", "H6", "ACRONYM", "BLOCKQUOTE", "CITE", "CODE", "DEL", "DFN" "DIR", "DL", "MENU", "OL", "UL", "CAPTION", "TABLE", "A", "MAP", "OBJECT", "FORM", "DIV", "SCRIPT", "SVG" }; static char *headNesters[] = /* Nesting tags that appear in header. */ { - "TITLE", "DIV", "SCRIPT" + "TITLE", "SCRIPT" +}; + +static char *singleTons[] = +/* Tags which do not have closing tags. */ +{ +"AREA", +"BASE", +"BR", +"COL", +"COMMAND", +"EMBED", +"FRAME", // not in html5 +"HR", +"IMG", +"INPUT", +"LINK", +"META", +"PARAM", +"SOURCE" }; static struct htmlTag *validateBody(struct htmlPage *page, struct htmlTag *startTag) /* Go through tags from current position (just past ) * up to and including and check some things. */ { struct htmlTag *tag, *endTag = NULL; /* First search for end tag. */ for (tag = startTag; tag != NULL; tag = tag->next) { if (sameWord(tag->name, "/BODY")) { endTag = tag; break; @@ -1720,30 +1739,32 @@ okChars['/'] = 1; okChars['%'] = 1; okChars['.'] = 1; okChars[';'] = 1; okChars[':'] = 1; okChars['_'] = 1; okChars['&'] = 1; okChars['+'] = 1; okChars['('] = 1; okChars[')'] = 1; okChars['$'] = 1; okChars['!'] = 1; okChars['*'] = 1; okChars['@'] = 1; okChars['\''] = 1; // apparently the apostrophe itself is ok +okChars['|'] = 1; // apparently the google uses pipe char +okChars[','] = 1; // apparently the google uses comma char okChars['#'] = 1; // URI fragment, typically an anchor return okChars; } static void validateCgiUrl(char *url) /* Make sure URL follows basic CGI encoding rules. */ { if (startsWith("http:", url) || startsWith("https:", url)) { static char *okChars = NULL; UBYTE c, *s; if (okChars == NULL) okChars = urlOkChars(); url = strchr(url, '?'); if (url != NULL) @@ -1854,15 +1875,66 @@ } if (!gotTitle) warn("No title in "); validateNestingTags(page, page->tags, tag, headNesters, ArraySize(headNesters)); tag = tag->next; } if ((tag = nextTagOfTypeInList(tag, "BODY")) == NULL) errAbort(" tag does not follow tag"); tag = validateBody(page, tag->next); if (tag == NULL || !sameWord(tag->name, "/HTML")) errAbort("Missing "); validateCgiUrls(page); } } +void htmlPageStrictTagNestCheck(struct htmlPage *page) +/* Do strict tag nesting check. Aborts if there is a problem. */ +{ +struct htmlTag *tag; +/* To simplify things upper case all tag names. */ +for (tag = page->tags; tag != NULL; tag = tag->next) + touppers(tag->name); + +/* Add singleton tags to hash. */ +struct hash *hash = hashNew(8); +int i; +int nesterCount=ArraySize(singleTons); +for (i=0; itags; tag != NULL; tag = tag->next) + { + if (startsWith("/", tag->name)) + { + if (hashLookup(hash, tag->name+1)) + tagAbort(page, tag, "Tag %s closing tag not allowed for singleton tags.", tag->name); + if (!sameString("P", tag->name+1)) + { + if (!tagStack) + tagAbort(page, tag, "No tags still left on stack. Closing tag %s has no corresponding open tag.", tag->name); + struct slName *top = slPopHead(&tagStack); + // flush LI tags still on stack when /UL or /OL encountered + // since the missing /LI tags are usually tolerated. + while ((sameString(tag->name, "/UL") || sameString(tag->name, "/OL")) && sameString(top->name,"LI")) + { + tagWarn(page, tag, "Closing tag %s found. LI tag on stack. Missing /LI tag. Please fix. Continuing.", tag->name); + top = slPopHead(&tagStack); + } + if (!sameString(top->name,tag->name+1)) + { + tagAbort(page, tag, "Closing tag %s found, tag %s at top of stack.", tag->name, top->name); + } + } + } + else + { + if (!hashLookup(hash, tag->name) && !sameString("P", tag->name)) + { + slAddHead(&tagStack, slNameNew(tag->name)); + } + } + } +if (tagStack) + errAbort("Some tags still left on stack. Open tag %s missing its closing tag.", tagStack->name); +}