fa22e76fd69504695f343f51fdd8368050063ed6 galt Wed May 15 23:57:53 2019 -0700 adding -withSrc option to htmlCheck commands getLinks checkLinks checkLinks2 checkLocalLinks checkLocalLinks2 diff --git src/utils/htmlCheck/htmlCheck.c src/utils/htmlCheck/htmlCheck.c index f46266c..b4b8beb 100644 --- src/utils/htmlCheck/htmlCheck.c +++ src/utils/htmlCheck/htmlCheck.c @@ -33,65 +33,74 @@ " getVars - print the form variables to stdout\n" " getLinks - print links\n" " getTags - print out just the tags\n" " checkLinks - check links in page\n" " checkLinks2 - check links in page and all subpages in same host\n" " (Just one level of recursion)\n" " checkLocalLinks - check local links in page\n" " checkLocalLinks2 - check local links in page and connected local pages\n" " (Just one level of recursion)\n" " submit - submit first form in page if any using 'GET' method\n" " validate - do some basic validations including TABLE/TR/TD nesting\n" " strictTagNestCheck - check tags are correctly nested\n" "options:\n" " cookies=cookie.txt - Cookies is a two column file\n" " containing \n" + " withSrc - causes the get and checkLinks commands to also include SRC= links.\n" "note: url will need to be in quotes if it contains an ampersand or question mark." ); } +boolean withSrc = FALSE; + static struct optionSpec options[] = { + {"withSrc", OPTION_BOOLEAN}, {"cookies", OPTION_STRING}, {NULL, 0}, }; void checkOk(char *fullText) /* Parse out first line and check it's ok. */ { struct htmlStatus *status = htmlStatusParse(&fullText); if (status == NULL) noWarnAbort(); if (status->status != 200) errAbort("Status code %d", status->status); } void getHeader(char *html) /* Parse out and print header. */ { char *line; while ((line = htmlNextCrLfLine(&html)) != NULL) { if (line == NULL || line[0] == 0) break; printf("%s\r\n", line); } } void getLinks(struct htmlPage *page) /* Print out all links. */ { struct slName *link, *linkList = htmlPageLinks(page); +if (withSrc) + { + struct slName *srcLinkList = htmlPageSrcLinks(page); + linkList = slCat(linkList, srcLinkList); + } for (link = linkList; link != NULL; link = link->next) { printf("%s\n", link->name); } } void htmlPrintForms(struct htmlPage *page, FILE *f) /* Print out all forms. */ { struct htmlForm *form; for (form = page->forms; form != NULL; form = form->next) htmlFormPrint(form, f); } void getVars(struct htmlPage *page) @@ -217,30 +226,36 @@ if (status == 0) /* Always true except after long jump. */ { struct dyString *dy = netSlurpUrl(url); retVal = dyStringCannibalize(&dy); } popAbortHandler(); return retVal; } void checkRecursiveLinks(struct hash *uniqHash, struct htmlPage *page, int depth, boolean justLocal) /* Check links recursively up to depth. */ { struct slName *linkList = htmlPageLinks(page), *link; +if (withSrc) + { + struct slName *srcLinkList = htmlPageSrcLinks(page); + linkList = slCat(linkList, srcLinkList); + } + for (link = linkList; link != NULL; link = link->next) { if (link->name[0] == '#') { if (findNamedAnchor(page, link->name+1) == NULL) { warn("%s%s doesn't exist", page->url, link->name); } } else { char *url = htmlExpandUrl(page->url, link->name); if (url != NULL) { boolean isLocal = sameHost(page->url, url); @@ -314,30 +329,40 @@ cookie->value = cloneString(line); slAddHead(&list, cookie); } lineFileClose(&lf); slReverse(&list); return list; } void htmlCheck(char *command, char *url, char *cookieFile) /* Read url. Switch on command and dispatch to appropriate routine. */ { char *fullText; struct htmlCookie *cookies = NULL; boolean isLocal = (stringIn("://", url) == NULL); +if (withSrc) + { + if (!(sameString(command, "getLinks") + || sameString(command, "checkLinks") + || sameString(command, "checkLinks2") + || sameString(command, "checkLocalLinks") + || sameString(command, "checkLocalLinks2"))) + errAbort("-withSrc can only be used with these commands: getLinks, checkLinks, checkLinks2, checkLocalLinks, checkLocalLinks2"); + } + if (cookieFile != NULL) cookies = readCookies(cookieFile); if (isLocal) readInGulp(url, &fullText, NULL); else fullText = htmlSlurpWithCookies(url, cookies); if (sameString(command, "getAll")) mustWrite(stdout, fullText, strlen(fullText)); else if (sameString(command, "ok")) checkOk(fullText); else if (sameString(command, "getHeader")) getHeader(fullText); else /* Do everything that requires full parsing. */ { struct htmlPage *page = NULL; @@ -378,19 +403,20 @@ else if (sameString(command, "checkLocalLinks2")) checkLinks(page, 2, TRUE); else errAbort("Unrecognized command %s", command); htmlPageFree(&page); } } int main(int argc, char *argv[]) /* Process command line. */ { pushCarefulMemHandler(400000000); optionInit(&argc, argv, options); if (argc != 3) usage(); +withSrc = optionExists("withSrc"); htmlCheck(argv[1], argv[2], optionVal("cookies",NULL)); carefulCheckHeap(); return 0; }