a47f57d008f2db28a7b140ece87cbcfb8e8a5c0c hiram Thu Sep 12 14:18:19 2019 -0700 expand earlyBotCheck to manage two types of outputs, recognize defaults, and adding hgGateway to the game refs #23217 diff --git src/hg/lib/botDelay.c src/hg/lib/botDelay.c index 5304629..9b7d1a9 100644 --- src/hg/lib/botDelay.c +++ src/hg/lib/botDelay.c @@ -1,30 +1,31 @@ /* botDelay.c - contact bottleneck server and sleep * for a little bit if IP address looks like it is * being just too demanding. */ /* Copyright (C) 2014 The Regents of the University of California * See README in this or parent directory for licensing information. */ #include "common.h" #include "net.h" #include "portable.h" #include "hgConfig.h" #include "cheapcgi.h" #include "hui.h" #include "hCommon.h" #include "botDelay.h" +#include "jsonWrite.h" #define defaultDelayFrac 1.0 /* standard penalty for most CGIs */ #define defaultWarnMs 10000 /* warning at 10 to 20 second delay */ #define defaultExitMs 20000 /* error 429 Too Many Requests after 20+ second delay */ int botDelayTime(char *host, int port, char *botCheckString) /* Figure out suggested delay time for ip address in * milliseconds. */ { int sd = netMustConnect(host, port); char buf[256]; netSendString(sd, botCheckString); netRecieveString(sd, buf); close(sd); return atoi(buf); @@ -208,89 +209,135 @@ int delay = 0; if (host != NULL && port != NULL && ip != NULL) { char *botCheckString = getBotCheckString(ip, fraction); delay = botDelayTime(host, atoi(port), botCheckString); freeMem(botCheckString); } return delay; } #define err429 429 #define err429Msg "Too Many Requests" int botDelayMillis = 0; -static void hogExit(char *cgiName, long enteredMainTime) +static void jsonHogExit(char *cgiExitName, long enteredMainTime, char *hogHost, + int retryAfterSeconds) +/* err429 Too Many Requests to be returned as JSON data */ +{ +puts("Content-Type:application/json"); +printf("Status: %d %s\n", err429, err429Msg); +if (retryAfterSeconds > 0) + printf("Retry-After: %d", retryAfterSeconds); +puts("\n"); /* blank line between header and body */ + +struct jsonWrite *jw = jsonWriteNew(); +jsonWriteObjectStart(jw, NULL); +jsonWriteString(jw, "error", err429Msg); +jsonWriteNumber(jw, "statusCode", err429); + +char msg[1024]; + +safef(msg, sizeof(msg), "Your host, %s, has been sending too many requests " + "lately and is unfairly loading our site, impacting performance for " + "other users. Please contact genome@soe.ucsc.edu to ask that your site " + "be reenabled. Also, please consider downloading sequence and/or " + "annotations in bulk -- see http://genome.ucsc.edu/downloads.html.", + hogHost); + +jsonWriteString(jw, "statusMessage", msg); +if (retryAfterSeconds > 0) + jsonWriteNumber(jw, "retryAfterSeconds", retryAfterSeconds); + +jsonWriteObjectEnd(jw); + +puts(jw->dy->string); +} + +static void hogExit(char *cgiName, long enteredMainTime, char *exitType, + int retryAfterSeconds) /* earlyBotCheck requests exit before CGI has done any output or * setups of any kind. HTML output has not yet started. */ { char *hogHost = getenv("REMOTE_ADDR"); char cgiExitName[1024]; safef(cgiExitName, ArraySize(cgiExitName), "%s hogExit", cgiName); +if (sameOk("json", exitType)) + jsonHogExit(cgiExitName, enteredMainTime, hogHost, retryAfterSeconds); +else + { + puts("Content-Type:text/html"); printf("Status: %d %s\n", err429, err429Msg); -puts("Retry-After: 30"); -puts("\n"); + if (retryAfterSeconds > 0) + printf("Retry-After: %d", retryAfterSeconds); + puts("\n"); /* blank line between header and body */ puts("<!DOCTYPE HTML 4.01 Transitional>\n"); puts("<html lang='en'>"); puts("<head>"); puts("<meta charset=\"utf-8\">"); printf("<title>Status %d: %s</title></head>\n", err429, err429Msg); printf("<body><h1>Status %d: %s</h1><p>\n", err429, err429Msg); time_t now = time(NULL); printf("There is an exceedingly high volume of traffic coming from your " "site (IP address %s) as of %s (California time). It looks like " "a web robot is launching queries quickly, and not even waiting for " "the results of one query to finish before launching another query. " "<b>We cannot service requests from your IP address under</b> these " "conditions. (code %d) " "To use the genome browser functionality from a Unix command line, " "please read <a href='http://genome.ucsc.edu/FAQ/FAQdownloads.html#download36'>our FAQ</a> on this topic. " "For further help on how to access our data from a command line, " "or if " "you think this delay is being imposed unfairly, please contact genome-www@soe.ucsc.edu." ,hogHost, asctime(localtime(&now)), botDelayMillis); puts("</body></html>"); + } cgiExitTime(cgiExitName, enteredMainTime); exit(0); } /* static void hogExit() */ - -boolean earlyBotCheck(long enteredMainTime, char *cgiName, double delayFrac, int warnMs, int exitMs) +boolean earlyBotCheck(long enteredMainTime, char *cgiName, double delayFrac, int warnMs, int exitMs, char *exitType) /* similar to botDelayCgi but for use before the CGI has started any * output or setup the cart of done any MySQL operations. The boolean * return is used later in the CGI after it has done all its setups and * started output so it can issue the warning. Pass in delayFrac 0.0 - * to use the default 1.0 + * to use the default 1.0, pass in 0 for warnMs and exitMs to use defaults, + * and exitType is either 'html' or 'json' to do that type of exit output in + * the case of hogExit(); */ { boolean issueWarning = FALSE; if (botException()) /* don't do this if caller is on the exception list */ return issueWarning; if (delayFrac < 0.000001) /* passed in zero, use default */ delayFrac = defaultDelayFrac; if (warnMs < 1) /* passed in zero, use default */ warnMs = defaultWarnMs; if (exitMs < 1) /* passed in zero, use default */ exitMs = defaultExitMs; botDelayMillis = hgBotDelayTimeFrac(delayFrac); if (botDelayMillis > 0) { - sleep1000(botDelayMillis); + int msAboveWarning = botDelayMillis - warnMs; + int retryAfterSeconds = 0; + if (msAboveWarning > 0) + retryAfterSeconds = 1 + (msAboveWarning / 10); if (botDelayMillis > warnMs) { - if (botDelayMillis > exitMs) - hogExit(cgiName, enteredMainTime); + if (botDelayMillis > exitMs) /* returning immediately */ + hogExit(cgiName, enteredMainTime, exitType, retryAfterSeconds); else issueWarning = TRUE; } + sleep1000(botDelayMillis); /* sleeping while still < exitMs */ } -return issueWarning; +return issueWarning; /* caller can decide on their type of warning */ } /* boolean earlyBotCheck() */