9ae100f68581af8c1a38dab93954173ef2f76254
max
  Tue Jun 24 01:32:21 2025 -0700
captcha user-agent exceptions are defined in hg.conf, refs #35790

diff --git src/hg/lib/cart.c src/hg/lib/cart.c
index 52dd27dd04b..7b5c6214570 100644
--- src/hg/lib/cart.c
+++ src/hg/lib/cart.c
@@ -1537,63 +1537,74 @@
          "   },\n"
        "});\n"
        "}\n", cfSiteKey);
     puts("</script>");
     puts("</head><body>");
     puts("<style>body, h1, h2, h3, h4, h5, h6  { font-family: Helvetica, Arial, sans-serif; }</style>\n");
     puts("<h4>The Genome Browser is protecting itself from bots. This will just take a few seconds.</h4>");
     puts("<small>If you are a bot and were made for a research project, please contact us by email.</small>");
     puts("<script src='https://challenges.cloudflare.com/turnstile/v0/api.js?onload=showWidget' async defer></script>");
     puts("<div id='myWidget'></div>");
     puts("</body></html>");
     sqlCleanupAll(); // we are wondering about hanging connections, so just in case, close them.
     exit(0);
 }
 
+static boolean isUserAgentException() 
+/* return true if HTTP user-agent is in list of exceptions in hg.conf */
+{
+char *agent = cgiUserAgent();
+if (!agent)
+    return FALSE;
+
+struct slName *excStrs = cfgValsWithPrefix("noCaptchaAgent.");
+if (!excStrs)
+    return FALSE;
+
+struct excReStr;
+for (struct slName *sl = excStrs;  sl != NULL;  sl = sl->next)
+    {
+    if (regexMatch(agent, sl->name))
+        return TRUE;
+    }
+
+return FALSE;
+}
+
 void forceUserIdOrCaptcha(struct cart* cart, char *userId, boolean userIdFound, boolean fromCommandLine)
-/* print captcha is user did not sent a valid hguid cookie or a valid cloudflare token. Always allow rtracklayer. */
+/* print captcha is user did not sent a valid hguid cookie or a valid
+ * cloudflare token. Allow certain IPs and user-agents. */
 {
 if (fromCommandLine || isEmpty(cfgOption(CLOUDFLARESITEKEY)))
     return;
 
 // no captcha for our own QA scripts running on a server with our IP address
 if (botException())
     return;
 
-// let rtracklayer user agent pass, but allow us to remove this exception in case the bots discover it one day
-if ( (!cfgOption("blockRtracklayer") && sameOk(cgiUserAgent(), "rtracklayer")) || 
-        (isNotEmpty(cgiUserAgent()) && startsWith("IGV", cgiUserAgent())) )
-    return;
-
-// QA can add a user agent after release, in case someone complains that their library is blocked
-char *okUserAgent = cfgOption("okUserAgent");
-if (okUserAgent && sameOk(cgiUserAgent(), okUserAgent))
+if (isUserAgentException())
     return;
 
 // Do not show a captcha if we have a valid cookie 
-// but for debugging, it's nice to be force the captcha to come up
+// but for debugging, it's nice to be able to force the captcha
 if (userId && userIdFound && !cgiOptionalString("captcha"))
     return;
 
-// This is a hack to let all AJAX requests pass without cookies, no needed anymore.
-// It's a hack because the header can be set by any curl script to get around
-// the captcha. 
-//if (sameOk(getenv("HTTP_X_REQUESTED_WITH"), "XMLHttpRequest"))
-    //return;
-
+// when the captcha is solved, our JS code does a full page-reload, no AJAX. That saves us one round-trip.
+// After the reload, the new page URL has the captcha token in the URL argument list, so now we need to validate it
+// and remove it from the cart
 char *token = cgiOptionalString("token");
-
 if (token && isValidToken(token))
 {
     cartRemove(cart, "token");
     return;
 }
 
 printCaptcha();
 }
 
 void cartRemove(struct cart *cart, char *var);
 
 struct cart *cartNew(char *userId, char *sessionId,
                      char **exclude, struct hash *oldVars)
 /* Load up cart from user & session id's.  Exclude is a null-terminated list of
  * strings to not include */