9ae100f68581af8c1a38dab93954173ef2f76254 max Tue Jun 24 01:32:21 2025 -0700 captcha user-agent exceptions are defined in hg.conf, refs #35790 diff --git src/hg/lib/cart.c src/hg/lib/cart.c index 52dd27dd04b..7b5c6214570 100644 --- src/hg/lib/cart.c +++ src/hg/lib/cart.c @@ -1537,63 +1537,74 @@ " },\n" "});\n" "}\n", cfSiteKey); puts(""); puts(""); puts("\n"); puts("

The Genome Browser is protecting itself from bots. This will just take a few seconds.

"); puts("If you are a bot and were made for a research project, please contact us by email."); puts(""); puts("
"); puts(""); sqlCleanupAll(); // we are wondering about hanging connections, so just in case, close them. exit(0); } +static boolean isUserAgentException() +/* return true if HTTP user-agent is in list of exceptions in hg.conf */ +{ +char *agent = cgiUserAgent(); +if (!agent) + return FALSE; + +struct slName *excStrs = cfgValsWithPrefix("noCaptchaAgent."); +if (!excStrs) + return FALSE; + +struct excReStr; +for (struct slName *sl = excStrs; sl != NULL; sl = sl->next) + { + if (regexMatch(agent, sl->name)) + return TRUE; + } + +return FALSE; +} + void forceUserIdOrCaptcha(struct cart* cart, char *userId, boolean userIdFound, boolean fromCommandLine) -/* print captcha is user did not sent a valid hguid cookie or a valid cloudflare token. Always allow rtracklayer. */ +/* print captcha is user did not sent a valid hguid cookie or a valid + * cloudflare token. Allow certain IPs and user-agents. */ { if (fromCommandLine || isEmpty(cfgOption(CLOUDFLARESITEKEY))) return; // no captcha for our own QA scripts running on a server with our IP address if (botException()) return; -// let rtracklayer user agent pass, but allow us to remove this exception in case the bots discover it one day -if ( (!cfgOption("blockRtracklayer") && sameOk(cgiUserAgent(), "rtracklayer")) || - (isNotEmpty(cgiUserAgent()) && startsWith("IGV", cgiUserAgent())) ) - return; - -// QA can add a user agent after release, in case someone complains that their library is blocked -char *okUserAgent = cfgOption("okUserAgent"); -if (okUserAgent && sameOk(cgiUserAgent(), okUserAgent)) +if (isUserAgentException()) return; // Do not show a captcha if we have a valid cookie -// but for debugging, it's nice to be force the captcha to come up +// but for debugging, it's nice to be able to force the captcha if (userId && userIdFound && !cgiOptionalString("captcha")) return; -// This is a hack to let all AJAX requests pass without cookies, no needed anymore. -// It's a hack because the header can be set by any curl script to get around -// the captcha. -//if (sameOk(getenv("HTTP_X_REQUESTED_WITH"), "XMLHttpRequest")) - //return; - +// when the captcha is solved, our JS code does a full page-reload, no AJAX. That saves us one round-trip. +// After the reload, the new page URL has the captcha token in the URL argument list, so now we need to validate it +// and remove it from the cart char *token = cgiOptionalString("token"); - if (token && isValidToken(token)) { cartRemove(cart, "token"); return; } printCaptcha(); } void cartRemove(struct cart *cart, char *var); struct cart *cartNew(char *userId, char *sessionId, char **exclude, struct hash *oldVars) /* Load up cart from user & session id's. Exclude is a null-terminated list of * strings to not include */