ccfbdd7909fa1514365a2a2294aca4094653d816 jcasper Thu Jun 22 11:37:23 2017 -0700 Adding htmlTextStripJavascriptCssAndTags as a library function following code review, refs #19645 diff --git src/lib/htmshell.c src/lib/htmshell.c index 52a8c2c..da213f2 100644 --- src/lib/htmshell.c +++ src/lib/htmshell.c @@ -175,30 +175,76 @@ if (*from == '<') { from++; while (*from!='\0' && *from != '>') from++; if (*from == '\0') // The last open tag was never closed! break; from++; } else *to++ = *from++; } return scrubbed; } +char *htmlTextStripJavascriptCssAndTags(char *s) +/* Returns a cloned string with all inline javascript, css, and html tags stripped out */ +{ +if (s == NULL) + return NULL; +char *scrubbed = needMem(strlen(s)); +char *from=s; +char *to=scrubbed; +while (*from!='\0') + { + if (startsWithNoCase("<script", from)) + { + from++; + while (*from!='\0' && !startsWithNoCase("</script>", from)) + from++; + if (*from == '\0') // The last open tag was never closed! + break; + from += strlen("</script>"); + *to++ = ' '; + } + else if (startsWithNoCase("<style", from)) + { + from++; + while (*from!='\0' && !startsWithNoCase("</style>", from)) + from++; + if (*from == '\0') // The last open tag was never closed! + break; + from += strlen("</style>"); + *to++ = ' '; + } + else if (*from == '<') + { + from++; + while (*from!='\0' && *from != '>') + from++; + if (*from == '\0') // The last open tag was never closed! + break; + from++; + *to++ = ' '; + } + else + *to++ = *from++; + } +return scrubbed; +} + char *htmlTextReplaceTagsWithChar(char *s, char ch) /* Returns a cloned string with all html tags replaced with given char (useful for tokenizing) */ { if (s == NULL) return NULL; char *scrubbed = needMem(strlen(s) + 1); char *from=s; char *to=scrubbed; while(*from!='\0') { if (*from == '<') { from++; *to++ = ch; while (*from!='\0' && *from != '>')