f01299b4b6f140d82fb6ad08576b50823b47ff40
galt
Thu May 16 22:08:12 2019 -0700
Fixes a bug that happens if space appears before = after attribute name. Fixed it to strip out newlines from quoted attribute values which the standard supports for input but which are not actually part of attribute value, found in a very long url in cite.html. Also now make validate function check for two more illegal conditions: a space directly before the tag name which is illegal.
diff --git src/lib/htmlPage.c src/lib/htmlPage.c
index 437d1d6..33fb36e 100644
--- src/lib/htmlPage.c
+++ src/lib/htmlPage.c
@@ -579,32 +579,30 @@
tag->end += 1;
break;
}
/* Get name - everything up to equals. */
e = s;
for (;;)
{
c = *e;
if (c == '=')
break;
else if (c == '>')
break;
else if (c == 0)
break;
- else if (isspace(c))
- break;
e += 1;
}
if (c == 0)
{
warn("End of file in tag");
break;
}
name = s;
*e++ = 0;
eraseTrailingSpaces(name);
if (c == '>')
{
val = "";
gotEnd = TRUE;
tag->end = html + (e - dupe);
@@ -632,33 +630,39 @@
*e++ = 0;
tag->end = html + (e - dupe);
break;
}
else if (isspace(c))
{
*e++ = 0;
break;
}
else if (c == 0)
break;
++e;
}
}
}
+
AllocVar(att);
att->name = cloneString(name);
att->val = cloneString(val);
+ // The html standard allows us to break quoted attributes into multiple lines using newlines,
+ // but they are not part of the tag value itself, so
+ // Strip \n and \r chars from value (att->val);
+ stripChar(att->val, '\n');
+ stripChar(att->val, '\r');
attributeDecode(att->val);
slAddTail(&tag->attributes, att);
s = e;
if (gotEnd)
break;
}
}
}
}
slReverse(&tagList);
return tagList;
}
static struct htmlFormVar *findOrMakeVar(struct htmlPage *page, char *name,
struct hash *hash, struct htmlTag *tag, struct htmlFormVar **pVarList)
@@ -1697,31 +1701,34 @@
"BR",
"COL",
"COMMAND",
"EMBED",
"FRAME", // not in html5
"HR",
"IMG",
"INPUT",
"LINK",
"META",
"PARAM",
"SOURCE"
};
static char *selfClosers[] =
-/* Tags which can be optionally self-closing in html5 or SVG. */
+/* Tags which can be optionally self-closing in html5 or SVG.
+ * Note that a space is required BEFORE the /> which provides disambiguation,
+ * e.g. We do not know if the trailing slash is part of SRC URL:
+ */
{
"CIRCLE", // SVG
"ELLIPSE", // SVG
"LINE", // SVG
"PATH", // SVG
"POLYGON", // SVG
"POLYLINE", // SVG
"RECT" // SVG
};
static struct htmlTag *validateBody(struct htmlPage *page, struct htmlTag *startTag)
/* Go through tags from current position (just past