596fceb9da320bd456bbd4cda8571d4a62cc329f
kent
  Mon Oct 31 12:03:18 2016 -0700
Relaxing some validations to accommodate newer web pages.

diff --git src/lib/htmlPage.c src/lib/htmlPage.c
index b1449c2..69fc65f 100644
--- src/lib/htmlPage.c
+++ src/lib/htmlPage.c
@@ -1679,35 +1679,37 @@
 for (tag = startTag; tag != NULL; tag = tag->next)
     {
     if (sameWord(tag->name, "/BODY"))
         {
 	endTag = tag;
 	break;
 	}
     }
 if (endTag == NULL)
     errAbort("Missing </BODY>");
 validateTables(page, startTag, endTag);
 checkTagIsInside(page, "DIR MENU OL UL", "LI", startTag, endTag);
 checkTagIsInside(page, "DL", "DD DT", startTag, endTag);
 checkTagIsInside(page, "COLGROUP TABLE", "COL", startTag, endTag);
 checkTagIsInside(page, "MAP", "AREA", startTag, endTag);
+#ifdef OLD   /* These days input type controls allowed outside forms because of javascript */
 checkTagIsInside(page, "FORM SCRIPT", 
 	"INPUT BUTTON /BUTTON OPTION SELECT /SELECT TEXTAREA /TEXTAREA"
 	"FIELDSET /FIELDSET"
 	, 
 	startTag, endTag);
+#endif /* OLD */
 validateNestingTags(page, startTag, endTag, bodyNesters, ArraySize(bodyNesters));
 return endTag->next;
 }
 
 static char *urlOkChars()
 /* Return array character indexed array that has
  * 1 for characters that are ok in URLs and 0
  * elsewhere. */
 {
 char *okChars;
 int c;
 AllocArray(okChars, 256);
 for (c=0; c<256; ++c)
     if (isalnum(c))
         okChars[c] = 1;
@@ -1757,30 +1759,40 @@
 }
 
 static void validateCgiUrls(struct htmlPage *page)
 /* Make sure URLs in page follow basic CGI encoding rules. */
 {
 struct htmlForm *form;
 struct slName *linkList = htmlPageLinks(page), *link;
 
 for (form = page->forms; form != NULL; form = form->next)
     validateCgiUrl(form->action);
 for (link = linkList; link != NULL; link = link->next)
     validateCgiUrl(link->name);
 slFreeList(&linkList);
 }
 
+static struct htmlTag *nextTagOfTypeInList(struct htmlTag *tagList, char *type)
+/* Return next tag of given type in list or NULL if none. */
+{
+struct htmlTag *tag;
+for (tag = tagList; tag != NULL; tag = tag->next)
+    if (sameString(tag->name, type))
+	return tag;
+return NULL;
+}
+
 static int countTagsOfType(struct htmlTag *tagList, char *type)
 /* Count number of tags of given type. */
 {
 struct htmlTag *tag;
 int count = 0;
 for (tag = tagList; tag != NULL; tag = tag->next)
     if (sameString(tag->name, type))
         ++count;
 return count;
 }
 
 static void checkExactlyOne(struct htmlTag *tagList, char *type)
 /* Check there is exactly one of tag in list. */
 {
 int count = countTagsOfType(tagList, type);
@@ -1810,44 +1822,43 @@
 if (page->header != NULL)
     contentType = hashFindVal(page->header, "Content-Type:");
 if (contentType == NULL || startsWith("text/html", contentType))
     {
     /* To simplify things upper case all tag names. */
     for (tag = page->tags; tag != NULL; tag = tag->next)
 	touppers(tag->name);
 
     checkExactlyOne(page->tags, "BODY");
 
     /* Validate header, and make a suggestion or two */
     if ((tag = page->tags) == NULL)
 	errAbort("No tags");
     if (!sameWord(tag->name, "HTML"))
 	errAbort("Doesn't start with <HTML> tag");
-    tag = tag->next;
-    if (tag == NULL || !sameWord(tag->name, "HEAD"))
-	warn("<HEAD> tag does not immediately follow <HTML> tag");
+    if ((tag = nextTagOfTypeInList(tag->next, "HEAD")) == NULL)
+        warn("No <HEAD> tag after <HTML> tag");
     else
 	{
 	for (;;)
 	    {
 	    tag = tag->next;
 	    if (tag == NULL)
 		errAbort("Missing </HEAD>");
 	    if (sameWord(tag->name, "TITLE"))
 		gotTitle = TRUE;
 	    if (sameWord(tag->name, "/HEAD"))
 		break;
 	    }
 	if (!gotTitle)
 	    warn("No title in <HEAD>");
 	validateNestingTags(page, page->tags, tag, headNesters, ArraySize(headNesters));
 	tag = tag->next;
 	}
-    if (tag == NULL || !sameWord(tag->name, "BODY"))
+    if ((tag = nextTagOfTypeInList(tag, "BODY")) == NULL)
 	errAbort("<BODY> tag does not follow <HTML> tag");
     tag = validateBody(page, tag->next);
     if (tag == NULL || !sameWord(tag->name, "/HTML"))
 	errAbort("Missing </HTML>");
     validateCgiUrls(page);
     }
 }