0bad11bffc60ba7d250b33495d2f392d1964826b jcasper Tue May 20 08:39:04 2025 -0700 Initial bottleneck change to validate hgsids without database access, refs #35763 diff --git src/hg/lib/botDelay.c src/hg/lib/botDelay.c index d9bea151b67..51dce3f79e4 100644 --- src/hg/lib/botDelay.c +++ src/hg/lib/botDelay.c @@ -2,30 +2,31 @@ * for a little bit if IP address looks like it is * being just too demanding. */ /* Copyright (C) 2014 The Regents of the University of California * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */ #include "common.h" #include "net.h" #include "portable.h" #include "hgConfig.h" #include "cheapcgi.h" #include "hui.h" #include "hCommon.h" #include "botDelay.h" #include "jsonWrite.h" +#include "regexHelper.h" #define defaultDelayFrac 1.0 /* standard penalty for most CGIs */ #define defaultWarnMs 10000 /* warning at 10 to 20 second delay */ #define defaultExitMs 20000 /* error 429 Too Many Requests after 20+ second delay */ int botDelayWarnMs = 0; /* global so the previously used value can be retrieved */ int botDelayTime(char *host, int port, char *botCheckString) /* Figure out suggested delay time for ip address in * milliseconds. */ { int sd = netMustConnect(host, port); char buf[256]; netSendString(sd, botCheckString); netRecieveString(sd, buf); @@ -90,49 +91,69 @@ , ip, asctime(localtime(&now)), millis); } static char *getCookieUser() /* get user from hguid cookie */ { char *user = NULL; char *centralCookie = hUserCookie(); if (centralCookie) user = findCookieData(centralCookie); return user; } + +boolean isValidHgsidForEarlyBotCheck(char *raw_hgsid) +/* We want to use the hgsid from the CGI parameters, but sometimes requests come in with bogus strings that + * need to be ignored. We don't want to run this against the database just yet, but we can at least check + * the format. */ +{ +char hgsid[1024]; +// Just in case it's egregiously large, we only need the first part to decide if it's valid. +safencpy(hgsid, sizeof(hgsid), raw_hgsid, 50); +if (regexMatch(hgsid, "^[0-9][0-9]*_[a-zA-Z0-9]{28}$")) + return TRUE; +return FALSE; +} + + char *getBotCheckString(char *ip, double fraction) /* compose "user.ip fraction" string for bot check */ { char *user = getCookieUser(); char *botCheckString = needMem(256); boolean useNew = cfgOptionBooleanDefault("newBotDelay", FALSE); if (useNew) { - char *hgsid = cgiOptionalString("hgsid"); if (user) safef(botCheckString, 256, "uid%s %f", user, fraction); else { - if (hgsid) + char *hgsid = cgiOptionalString("hgsid"); + if (hgsid && isValidHgsidForEarlyBotCheck(hgsid)) safef(botCheckString, 256, "sid%s %f", hgsid, fraction); else + { + if (hgsid) + // We were given an invalid hgsid - penalize this source in case of abuse + fraction *= 5; safef(botCheckString, 256, "%s %f", ip, fraction); } } + } else { if (user) safef(botCheckString, 256, "%s.%s %f", user, ip, fraction); else safef(botCheckString, 256, "%s %f", ip, fraction); } return botCheckString; } boolean botException() /* check if the remote ip address is on the exceptions list */ { char *exceptIps = cfgOption("bottleneck.except"); if (exceptIps)