596fceb9da320bd456bbd4cda8571d4a62cc329f kent Mon Oct 31 12:03:18 2016 -0700 Relaxing some validations to accommodate newer web pages. diff --git src/lib/htmlPage.c src/lib/htmlPage.c index b1449c2..69fc65f 100644 --- src/lib/htmlPage.c +++ src/lib/htmlPage.c @@ -1679,35 +1679,37 @@ for (tag = startTag; tag != NULL; tag = tag->next) { if (sameWord(tag->name, "/BODY")) { endTag = tag; break; } } if (endTag == NULL) errAbort("Missing </BODY>"); validateTables(page, startTag, endTag); checkTagIsInside(page, "DIR MENU OL UL", "LI", startTag, endTag); checkTagIsInside(page, "DL", "DD DT", startTag, endTag); checkTagIsInside(page, "COLGROUP TABLE", "COL", startTag, endTag); checkTagIsInside(page, "MAP", "AREA", startTag, endTag); +#ifdef OLD /* These days input type controls allowed outside forms because of javascript */ checkTagIsInside(page, "FORM SCRIPT", "INPUT BUTTON /BUTTON OPTION SELECT /SELECT TEXTAREA /TEXTAREA" "FIELDSET /FIELDSET" , startTag, endTag); +#endif /* OLD */ validateNestingTags(page, startTag, endTag, bodyNesters, ArraySize(bodyNesters)); return endTag->next; } static char *urlOkChars() /* Return array character indexed array that has * 1 for characters that are ok in URLs and 0 * elsewhere. */ { char *okChars; int c; AllocArray(okChars, 256); for (c=0; c<256; ++c) if (isalnum(c)) okChars[c] = 1; @@ -1757,30 +1759,40 @@ } static void validateCgiUrls(struct htmlPage *page) /* Make sure URLs in page follow basic CGI encoding rules. */ { struct htmlForm *form; struct slName *linkList = htmlPageLinks(page), *link; for (form = page->forms; form != NULL; form = form->next) validateCgiUrl(form->action); for (link = linkList; link != NULL; link = link->next) validateCgiUrl(link->name); slFreeList(&linkList); } +static struct htmlTag *nextTagOfTypeInList(struct htmlTag *tagList, char *type) +/* Return next tag of given type in list or NULL if none. */ +{ +struct htmlTag *tag; +for (tag = tagList; tag != NULL; tag = tag->next) + if (sameString(tag->name, type)) + return tag; +return NULL; +} + static int countTagsOfType(struct htmlTag *tagList, char *type) /* Count number of tags of given type. */ { struct htmlTag *tag; int count = 0; for (tag = tagList; tag != NULL; tag = tag->next) if (sameString(tag->name, type)) ++count; return count; } static void checkExactlyOne(struct htmlTag *tagList, char *type) /* Check there is exactly one of tag in list. */ { int count = countTagsOfType(tagList, type); @@ -1810,44 +1822,43 @@ if (page->header != NULL) contentType = hashFindVal(page->header, "Content-Type:"); if (contentType == NULL || startsWith("text/html", contentType)) { /* To simplify things upper case all tag names. */ for (tag = page->tags; tag != NULL; tag = tag->next) touppers(tag->name); checkExactlyOne(page->tags, "BODY"); /* Validate header, and make a suggestion or two */ if ((tag = page->tags) == NULL) errAbort("No tags"); if (!sameWord(tag->name, "HTML")) errAbort("Doesn't start with <HTML> tag"); - tag = tag->next; - if (tag == NULL || !sameWord(tag->name, "HEAD")) - warn("<HEAD> tag does not immediately follow <HTML> tag"); + if ((tag = nextTagOfTypeInList(tag->next, "HEAD")) == NULL) + warn("No <HEAD> tag after <HTML> tag"); else { for (;;) { tag = tag->next; if (tag == NULL) errAbort("Missing </HEAD>"); if (sameWord(tag->name, "TITLE")) gotTitle = TRUE; if (sameWord(tag->name, "/HEAD")) break; } if (!gotTitle) warn("No title in <HEAD>"); validateNestingTags(page, page->tags, tag, headNesters, ArraySize(headNesters)); tag = tag->next; } - if (tag == NULL || !sameWord(tag->name, "BODY")) + if ((tag = nextTagOfTypeInList(tag, "BODY")) == NULL) errAbort("<BODY> tag does not follow <HTML> tag"); tag = validateBody(page, tag->next); if (tag == NULL || !sameWord(tag->name, "/HTML")) errAbort("Missing </HTML>"); validateCgiUrls(page); } }