df65a42b7feebadc0c1270a5ffa82417e4e6030f kent Mon May 14 09:03:18 2012 -0700 Making it do a GET rather than a HEAD call when checking links, to avoid 405 errors on some sites. diff --git src/utils/htmlCheck/htmlCheck.c src/utils/htmlCheck/htmlCheck.c index 1e121d0..e177e9f 100644 --- src/utils/htmlCheck/htmlCheck.c +++ src/utils/htmlCheck/htmlCheck.c @@ -207,56 +207,57 @@ /* Grab url. If there's a problem report error and return NULL */ { int status; char *retVal = NULL; pushAbortHandler(recoverAbort); status = setjmp(recoverJumpBuf); if (status == 0) /* Always true except after long jump. */ { struct dyString *dy = netSlurpUrl(url); retVal = dyStringCannibalize(&dy); } popAbortHandler(); return retVal; } + void checkRecursiveLinks(struct hash *uniqHash, struct htmlPage *page, int depth, boolean justLocal) /* Check links recursively up to depth. */ { struct slName *linkList = htmlPageLinks(page), *link; for (link = linkList; link != NULL; link = link->next) { if (link->name[0] == '#') { if (findNamedAnchor(page, link->name+1) == NULL) { warn("%s%s doesn't exist", page->url, link->name); } } else { char *url = htmlExpandUrl(page->url, link->name); if (url != NULL) { boolean isLocal = sameHost(page->url, url); if (isLocal || !justLocal) { if (!hashLookup(uniqHash, url)) { struct hash *headerHash = newHash(8); - int status = netUrlHead(url, headerHash); + int status = netUrlHeadExt(url, "GET", headerHash); hashAdd(uniqHash, url, NULL); if (status != 200 && status != 302 && status != 301) warn("%d from %s", status, url); else { if (depth > 1 && isLocal) { char *contentType = hashFindValUpperCase(headerHash, "Content-Type:"); if (contentType != NULL && startsWith("text/html", contentType)) { char *fullText = slurpUrl(url); if (fullText != NULL) { struct htmlPage *newPage = htmlPageParse(url, fullText);