fa22e76fd69504695f343f51fdd8368050063ed6
galt
  Wed May 15 23:57:53 2019 -0700
adding -withSrc option to htmlCheck commands getLinks checkLinks checkLinks2 checkLocalLinks checkLocalLinks2

diff --git src/utils/htmlCheck/htmlCheck.c src/utils/htmlCheck/htmlCheck.c
index f46266c..b4b8beb 100644
--- src/utils/htmlCheck/htmlCheck.c
+++ src/utils/htmlCheck/htmlCheck.c
@@ -33,65 +33,74 @@
   "   getVars - print the form variables to stdout\n"
   "   getLinks - print links\n"
   "   getTags - print out just the tags\n"
   "   checkLinks - check links in page\n"
   "   checkLinks2 - check links in page and all subpages in same host\n"
   "             (Just one level of recursion)\n"
   "   checkLocalLinks - check local links in page\n"
   "   checkLocalLinks2 - check local links in page and connected local pages\n"
   "             (Just one level of recursion)\n"
   "   submit - submit first form in page if any using 'GET' method\n"
   "   validate - do some basic validations including TABLE/TR/TD nesting\n"
   "   strictTagNestCheck - check tags are correctly nested\n"
   "options:\n"
   "   cookies=cookie.txt - Cookies is a two column file\n"
   "           containing <cookieName><space><value><newLine>\n"
+  "   withSrc - causes the get and checkLinks commands to also include SRC= links.\n"
   "note: url will need to be in quotes if it contains an ampersand or question mark."
   );
 }
 
+boolean withSrc = FALSE;
+
 static struct optionSpec options[] = {
+   {"withSrc", OPTION_BOOLEAN},
    {"cookies", OPTION_STRING},
    {NULL, 0},
 };
 
 void checkOk(char *fullText)
 /* Parse out first line and check it's ok. */
 {
 struct htmlStatus *status = htmlStatusParse(&fullText);
 if (status == NULL)
     noWarnAbort();
 if (status->status != 200)
     errAbort("Status code %d", status->status);
 }
 
 void getHeader(char *html)
 /* Parse out and print header. */
 {
 char *line;
 while ((line = htmlNextCrLfLine(&html)) != NULL)
     {
     if (line == NULL || line[0] == 0)
 	break;
     printf("%s\r\n", line);
     }
 }
 
 void getLinks(struct htmlPage *page)
 /* Print out all links. */
 {
 struct slName *link, *linkList = htmlPageLinks(page);
+if (withSrc)
+    {
+    struct slName *srcLinkList = htmlPageSrcLinks(page);
+    linkList = slCat(linkList, srcLinkList);
+    }
 for (link = linkList; link != NULL; link = link->next)
     {
     printf("%s\n", link->name);
     }
 }
 
 void htmlPrintForms(struct htmlPage *page, FILE *f)
 /* Print out all forms. */
 {
 struct htmlForm *form;
 for (form = page->forms; form != NULL; form = form->next)
     htmlFormPrint(form, f);
 }
 
 void getVars(struct htmlPage *page)
@@ -217,30 +226,36 @@
 if (status == 0)    /* Always true except after long jump. */
     {
     struct dyString *dy = netSlurpUrl(url);
     retVal = dyStringCannibalize(&dy);
     }
 popAbortHandler();
 return retVal;
 }
 
 
 void checkRecursiveLinks(struct hash *uniqHash, struct htmlPage *page, 
 	int depth, boolean justLocal)
 /* Check links recursively up to depth. */
 {
 struct slName *linkList = htmlPageLinks(page), *link;
+if (withSrc)
+    {
+    struct slName *srcLinkList = htmlPageSrcLinks(page);
+    linkList = slCat(linkList, srcLinkList);
+    }
+
 for (link = linkList; link != NULL; link = link->next)
     {
     if (link->name[0] == '#')
         {
 	if (findNamedAnchor(page, link->name+1) == NULL)
 	    {
 	    warn("%s%s doesn't exist", page->url, link->name);
 	    }
 	}
     else
 	{
 	char *url = htmlExpandUrl(page->url, link->name);
 	if (url != NULL)
 	    {
 	    boolean isLocal = sameHost(page->url, url);
@@ -314,30 +329,40 @@
     cookie->value = cloneString(line);
     slAddHead(&list, cookie);
     }
 lineFileClose(&lf);
 slReverse(&list);
 return list;
 }
 
 void htmlCheck(char *command, char *url, char *cookieFile)
 /* Read url. Switch on command and dispatch to appropriate routine. */
 {
 char *fullText;
 struct htmlCookie *cookies = NULL;
 boolean isLocal = (stringIn("://", url) == NULL);
 
+if (withSrc)
+    {
+    if (!(sameString(command, "getLinks")
+       || sameString(command, "checkLinks")
+       || sameString(command, "checkLinks2")
+       || sameString(command, "checkLocalLinks")
+       || sameString(command, "checkLocalLinks2")))
+    errAbort("-withSrc can only be used with these commands: getLinks, checkLinks, checkLinks2, checkLocalLinks, checkLocalLinks2");
+    }
+
 if (cookieFile != NULL)
     cookies = readCookies(cookieFile);
 if (isLocal)
     readInGulp(url, &fullText, NULL);
 else
     fullText = htmlSlurpWithCookies(url, cookies);
 if (sameString(command, "getAll"))
     mustWrite(stdout, fullText, strlen(fullText));
 else if (sameString(command, "ok"))
     checkOk(fullText);
 else if (sameString(command, "getHeader"))
     getHeader(fullText);
 else /* Do everything that requires full parsing. */
     {
     struct htmlPage *page = NULL;
@@ -378,19 +403,20 @@
     else if (sameString(command, "checkLocalLinks2"))
         checkLinks(page, 2, TRUE);
     else
 	errAbort("Unrecognized command %s", command);
     htmlPageFree(&page);
     }
 }
 
 int main(int argc, char *argv[])
 /* Process command line. */
 {
 pushCarefulMemHandler(400000000);
 optionInit(&argc, argv, options);
 if (argc != 3)
     usage();
+withSrc = optionExists("withSrc");
 htmlCheck(argv[1], argv[2], optionVal("cookies",NULL));
 carefulCheckHeap();
 return 0;
 }