43d1009151fe8dfff680d387ed6772491b17d51f max Tue May 27 16:59:30 2025 -0700 adding cloudflare bot protection, without extranous files, refs #35790 diff --git src/hg/lib/cart.c src/hg/lib/cart.c index 1a769655b32..471adae1e1e 100644 --- src/hg/lib/cart.c +++ src/hg/lib/cart.c @@ -26,30 +26,32 @@ #endif /* GBROWSE */ #include "hgMaf.h" #include "hui.h" #include "geoMirror.h" #include "hubConnect.h" #include "trackHub.h" #include "cgiApoptosis.h" #include "customComposite.h" #include "regexHelper.h" #include "windowsToAscii.h" #include "jsonWrite.h" #include "verbose.h" #include "genark.h" #include "quickLift.h" +#include + static char *sessionVar = "hgsid"; /* Name of cgi variable session is stored in. */ static char *positionCgiName = "position"; DbConnector cartDefaultConnector = hConnectCart; DbDisconnect cartDefaultDisconnector = hDisconnectCart; static boolean cartDidContentType = FALSE; struct slPair *httpHeaders = NULL; // A list of headers to output before the content-type static void hashUpdateDynamicVal(struct hash *hash, char *name, void *val) /* Val is a dynamically allocated (freeMem-able) entity to put * in hash. Override existing hash item with that name if any. * Otherwise make new hash item. */ { struct hashEl *hel = hashLookup(hash, name); @@ -1438,64 +1440,195 @@ else { char *url = genarkUrl(db); if (url != NULL) { cartSetString(cart, "genome", db); cartAddString(cart, "hubUrl", url); cartRemove(cart, "db"); } else if (!hDbIsActive(db)) errAbort("Can not find database '%s'", db); } } +// ------ libify this in the next release ---- +// +struct curlString { + char *ptr; + size_t len; +}; +void init_string(struct curlString *s) { + s->len = 0; + s->ptr = malloc(1); + s->ptr[0] = '\0'; +} + +size_t writefunc(void *ptr, size_t size, size_t nmemb, void *userData) { + struct curlString *s = (struct curlString *)userData; + size_t new_len = s->len + size * nmemb; + s->ptr = realloc(s->ptr, new_len + 1); + memcpy(s->ptr + s->len, ptr, size * nmemb); + s->ptr[new_len] = '\0'; + s->len = new_len; + return size * nmemb; +} + +char* curlPostUrl(char *url, char *data) +/* post data to URL and return as string. Must be freed. */ +{ +CURL *curl = curl_easy_init(); +if (!curl) + errAbort("Cannot init curl library"); + +struct curlString response; +init_string(&response); + +curl_easy_setopt(curl, CURLOPT_URL, url); +curl_easy_setopt(curl, CURLOPT_POSTFIELDS, data); +curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, writefunc); +curl_easy_setopt(curl, CURLOPT_WRITEDATA, &response); +curl_easy_perform(curl); +curl_easy_cleanup(curl); + +char *resp = cloneString(response.ptr); +free(response.ptr); +return resp; +} + +boolean isValidToken(char *token) +/* send https req to cloudflare, check if the token that we got from the captcha is really the one made by cloudflare */ +{ + char *url = "https://challenges.cloudflare.com/turnstile/v0/siteverify"; + char *secret = cfgVal("cloudFlareSecretKey"); + if (!secret) + errAbort("'cloudFlareSecretKey' must be set in hg.conf if cloudflare is activated in hg.conf"); + + char data[3000]; // cloudflare token is at most 2000 bytes + safef(data, sizeof(data), "secret=%s&response=%s", secret, token); + char *reply = curlPostUrl(url, data); + + boolean res = startsWith("{\"success\":true", reply); + freez(&reply); + return res; +} + +#define CLOUDFLARESITEKEY "cloudFlareSiteKey" + +void printCaptcha() +/* print an html page that shows the captcha and on success, reloads the page with the token added as token=x */ +{ + char *cfSiteKey = cfgVal(CLOUDFLARESITEKEY); + if (!cfSiteKey) + return; + + puts("Content-Type:text/html"); + puts("\n"); + puts(""); + puts(""); + puts(""); + puts("\n"); + puts("

The Genome Browser is protecting itself from bots. This will just take a few seconds.

"); + puts("If you are a bot and were made for a research project, please contact us by email."); + puts(""); + puts("
"); + puts(""); + sqlCleanupAll(); // we are wondering about hanging connections, so just in case, close them. + exit(0); +} + +void forceUserIdOrCaptcha(struct cart* cart, char *userId, boolean userIdFound, boolean fromCommandLine) +/* print captcha is user did not sent a valid hguid cookie or a valid cloudflare token. Always allow rtracklayer. */ +{ +if (fromCommandLine || !cfgOption(CLOUDFLARESITEKEY)) + return; + +if (!cfgOption("blockRtracklayer") && sameOk(cgiUserAgent(), "rtracklayer")) + return; + +// so QA can add a user agent after release, in case someone complains +char *okUserAgent = cfgOption("okUserAgent"); +if (okUserAgent && sameOk(cgiUserAgent(), okUserAgent)) + return; + +if (userId && userIdFound) + return; + +char *token = cgiOptionalString("token"); + +if (token && isValidToken(token)) +{ + cartRemove(cart, "token"); + return; +} + +printCaptcha(); +} + +void cartRemove(struct cart *cart, char *var); + struct cart *cartNew(char *userId, char *sessionId, char **exclude, struct hash *oldVars) /* Load up cart from user & session id's. Exclude is a null-terminated list of * strings to not include */ { cgiApoptosisSetup(); if (cfgOptionBooleanDefault("showEarlyErrors", TRUE)) errAbortSetDoContentType(TRUE); if (cfgOptionBooleanDefault("suppressVeryEarlyErrors", FALSE)) htmlSuppressErrors(); setUdcCacheDir(); netSetTimeoutErrorMsg("A connection timeout means that either the server is offline or its firewall, the UCSC firewall or any router between the two blocks the connection."); - struct cart *cart; struct sqlConnection *conn = cartDefaultConnector(); char *ex; boolean userIdFound = FALSE, sessionIdFound = FALSE; AllocVar(cart); cart->hash = newHash(12); cart->exclude = newHash(7); cart->userId = userId; cart->sessionId = sessionId; cart->userInfo = loadDb(conn, userDbTable(), userId, &userIdFound); + cart->sessionInfo = loadDb(conn, sessionDbTable(), sessionId, &sessionIdFound); -boolean fromCli = cgiWasSpoofed(); +boolean fromCli = cgiWasSpoofed(); // QA runs our CGIs from the command line and we debug from there -if (sessionIdFound && !userIdFound && !fromCli && cfgOptionBooleanDefault("cartTrace", FALSE)) - fprintf(stderr, "HGSID_WITHOUT_COOKIE\n"); +forceUserIdOrCaptcha(cart, userId, userIdFound, fromCli); +// we rely on the cookie being validated, so if we reset a cookie, do this after the captcha +if ( cgiOptionalString("ignoreCookie") != NULL ) + cart->userInfo = loadDb(conn, userDbTable(), NULL, &userIdFound); + +// Leaving this in the code temporarily, until June 2025 release. if (!fromCli && ((sessionId && !sessionIdFound) || !sessionId) && (!userId || !userIdFound) && cfgOptionBooleanDefault("punishInvalidHgsid", FALSE)) { fprintf(stderr, "HGSID_WAIT no sessionId and no cookie: 5 seconds penalty"); sleep(5); if (sessionId && !sessionIdFound) { fprintf(stderr, "HGSID_WAIT2 sessionId sent but invalid: 10 seconds penalty"); sleep(10); } } if (sessionIdFound) @@ -2510,35 +2643,31 @@ char *noProxy = cfgOption("noProxy"); if (noProxy) setenv("no_proxy", noProxy, TRUE); char *logProxy = cfgOption("logProxy"); if (logProxy) setenv("log_proxy", logProxy, TRUE); /* noSqlInj settings so they are accessible in src/lib too */ char *noSqlInj_level = cfgOption("noSqlInj.level"); if (noSqlInj_level) setenv("noSqlInj_level", noSqlInj_level, TRUE); char *noSqlInj_dumpStack = cfgOption("noSqlInj.dumpStack"); if (noSqlInj_dumpStack) setenv("noSqlInj_dumpStack", noSqlInj_dumpStack, TRUE); - -// if ignoreCookie is on the URL, don't check for cookies -char *hguid = NULL; -if ( cgiOptionalString("ignoreCookie") == NULL ) - hguid = getCookieId(cookieName); +char *hguid = getCookieId(cookieName); // if _dumpToLog is on the URL, we can exit early with whatever // message we are trying to write to the stderr/error_log char *logMsg = NULL; if ( (logMsg = cgiOptionalString("_dumpToLog")) != NULL) { cartJsonStart(); fprintf(stderr, "%s", logMsg); cartJsonEnd(NULL); exit(0); } char *hgsid = getSessionId(); struct cart *cart = cartNew(hguid, hgsid, exclude, oldVars); cartExclude(cart, sessionVar);