f01299b4b6f140d82fb6ad08576b50823b47ff40 galt Thu May 16 22:08:12 2019 -0700 Fixes a bug that happens if space appears before = after attribute name. Fixed it to strip out newlines from quoted attribute values which the standard supports for input but which are not actually part of attribute value, found in a very long url in cite.html. Also now make validate function check for two more illegal conditions: a space directly before the tag name which is illegal. diff --git src/lib/htmlPage.c src/lib/htmlPage.c index 437d1d6..33fb36e 100644 --- src/lib/htmlPage.c +++ src/lib/htmlPage.c @@ -579,32 +579,30 @@ tag->end += 1; break; } /* Get name - everything up to equals. */ e = s; for (;;) { c = *e; if (c == '=') break; else if (c == '>') break; else if (c == 0) break; - else if (isspace(c)) - break; e += 1; } if (c == 0) { warn("End of file in tag"); break; } name = s; *e++ = 0; eraseTrailingSpaces(name); if (c == '>') { val = ""; gotEnd = TRUE; tag->end = html + (e - dupe); @@ -632,33 +630,39 @@ *e++ = 0; tag->end = html + (e - dupe); break; } else if (isspace(c)) { *e++ = 0; break; } else if (c == 0) break; ++e; } } } + AllocVar(att); att->name = cloneString(name); att->val = cloneString(val); + // The html standard allows us to break quoted attributes into multiple lines using newlines, + // but they are not part of the tag value itself, so + // Strip \n and \r chars from value (att->val); + stripChar(att->val, '\n'); + stripChar(att->val, '\r'); attributeDecode(att->val); slAddTail(&tag->attributes, att); s = e; if (gotEnd) break; } } } } slReverse(&tagList); return tagList; } static struct htmlFormVar *findOrMakeVar(struct htmlPage *page, char *name, struct hash *hash, struct htmlTag *tag, struct htmlFormVar **pVarList) @@ -1697,31 +1701,34 @@ "BR", "COL", "COMMAND", "EMBED", "FRAME", // not in html5 "HR", "IMG", "INPUT", "LINK", "META", "PARAM", "SOURCE" }; static char *selfClosers[] = -/* Tags which can be optionally self-closing in html5 or SVG. */ +/* Tags which can be optionally self-closing in html5 or SVG. + * Note that a space is required BEFORE the /> which provides disambiguation, + * e.g. We do not know if the trailing slash is part of SRC URL: <img src=http://domain.com/image.jpg/> + */ { "CIRCLE", // SVG "ELLIPSE", // SVG "LINE", // SVG "PATH", // SVG "POLYGON", // SVG "POLYLINE", // SVG "RECT" // SVG }; static struct htmlTag *validateBody(struct htmlPage *page, struct htmlTag *startTag) /* Go through tags from current position (just past <BODY>) * up to and including </BODY> and check some things. */ { struct htmlTag *tag, *endTag = NULL; @@ -1867,31 +1874,43 @@ void htmlPageValidateOrAbort(struct htmlPage *page) /* Do some basic validations. Aborts if there is a problem. */ { struct htmlTag *tag; boolean gotTitle = FALSE; char *contentType = NULL; if (page == NULL) errAbort("Can't validate NULL page"); if (page->header != NULL) contentType = hashFindVal(page->header, "Content-Type:"); if (contentType == NULL || startsWith("text/html", contentType)) { /* To simplify things upper case all tag names. */ for (tag = page->tags; tag != NULL; tag = tag->next) + { touppers(tag->name); + if (isEmpty(tag->name)) // causes a blank tag + tagAbort(page, tag, "Space not allowed between opening bracket < and tag name"); + if (startsWith("/", tag->name)) + { + if (sameString(tag->name,"/")) // causes a blank close tag + tagAbort(page, tag, "Space not allowed between opening bracket </ and closing tag name"); + if (tag->attributes) + tagAbort(page, tag, "Attributes are not allowed in closing tag: [%s]", tag->name); + } + } + checkExactlyOne(page->tags, "BODY"); /* Validate header, and make a suggestion or two */ if ((tag = page->tags) == NULL) errAbort("No tags"); if (!sameWord(tag->name, "HTML")) errAbort("Doesn't start with <HTML> tag"); struct htmlTag *headTag = nextTagOfTypeInList(tag->next, "HEAD"); if (headTag == NULL) warn("No <HEAD> tag after <HTML> tag"); else { tag = headTag; for (;;) @@ -1930,32 +1949,38 @@ struct hash *singleTonHash = hashNew(8); int i; int count=ArraySize(singleTons); for (i=0; i<count; ++i) hashAdd(singleTonHash, singleTons[i], NULL); /* Add selfCloser tags to hash. */ struct hash *selfCloserHash = hashNew(8); count=ArraySize(selfClosers); for (i=0; i<count; ++i) hashAdd(selfCloserHash, selfClosers[i], NULL); struct slName *tagStack = NULL; for (tag = page->tags; tag != NULL; tag = tag->next) { + if (isEmpty(tag->name)) // causes a blank tag + tagAbort(page, tag, "Space not allowed between opening bracket < and tag name"); if (startsWith("/", tag->name)) { + if (sameString(tag->name,"/")) // causes a blank close tag + tagAbort(page, tag, "Space not allowed between opening bracket </ and closing tag name"); + if (tag->attributes) + tagAbort(page, tag, "Attributes are not allowed in closing tag: [%s]", tag->name); if (hashLookup(singleTonHash, tag->name+1)) tagAbort(page, tag, "Tag %s closing tag not allowed for singleton tags.", tag->name); if (!sameString("P", tag->name+1)) { if (!tagStack) tagAbort(page, tag, "No tags still left on stack. Closing tag %s has no corresponding open tag.", tag->name); struct slName *top = slPopHead(&tagStack); // flush LI tags still on stack when /UL or /OL encountered // since the missing /LI tags are usually tolerated. while ((sameString(tag->name, "/UL") || sameString(tag->name, "/OL")) && sameString(top->name,"LI")) { tagWarn(page, tag, "Closing tag %s found. LI tag on stack. Missing /LI tag. Please fix. Continuing.", tag->name); top = slPopHead(&tagStack); } if (!sameString(top->name,tag->name+1))