c14f04a77954e2abf3747d2943c42e4a499d0f6b hiram Mon Sep 20 10:03:35 2021 -0700 restore obsolete botDelay code required by hgPhyloPlace and do not set variables that are in environment diff --git src/hg/lib/botDelay.c src/hg/lib/botDelay.c index 534c793..6086423 100644 --- src/hg/lib/botDelay.c +++ src/hg/lib/botDelay.c @@ -1,290 +1,357 @@ /* botDelay.c - contact bottleneck server and sleep * for a little bit if IP address looks like it is * being just too demanding. */ /* Copyright (C) 2014 The Regents of the University of California * See README in this or parent directory for licensing information. */ #include "common.h" #include "net.h" #include "portable.h" #include "hgConfig.h" #include "cheapcgi.h" #include "hui.h" #include "hCommon.h" #include "botDelay.h" #include "jsonWrite.h" #define defaultDelayFrac 1.0 /* standard penalty for most CGIs */ #define defaultWarnMs 10000 /* warning at 10 to 20 second delay */ #define defaultExitMs 20000 /* error 429 Too Many Requests after 20+ second delay */ int botDelayTime(char *host, int port, char *botCheckString) /* Figure out suggested delay time for ip address in * milliseconds. */ { int sd = netMustConnect(host, port); char buf[256]; netSendString(sd, botCheckString); netRecieveString(sd, buf); close(sd); return atoi(buf); } char *botDelayWarningMsg(char *ip, int millis) /* return the string for the default botDelay message * not all users of botDelay want the message to go to stderr * return it for their own use case */ { time_t now = time(NULL); char *delayMsg = needMem(2048); safef(delayMsg, 2048, "There is a very high volume of traffic coming from your " "site (IP address %s) as of %s (California time). So that other " "users get a fair share " "of our bandwidth, we are putting in a delay of %3.1f seconds " "before we service your request. This delay will slowly " "decrease over a half hour as activity returns to normal. This " "high volume of traffic is likely due to program-driven rather than " "interactive access, or the submission of queries on a large " "number of sequences. If you are making large batch queries, " "please write to our genome@soe.ucsc.edu public mailing list " "and inquire about more efficient ways to access our data. " "If you are sharing an IP address with someone who is submitting " "large batch queries, we apologize for the " "inconvenience. " "To use the genome browser functionality from a Unix command line, " "please read our FAQ on this topic. " "For further help on how to access our data from a command line, " "or if " "you think this delay is being imposed unfairly, please contact genome-www@soe.ucsc.edu.", ip, asctime(localtime(&now)), 0.001*millis); return delayMsg; } /* char *botDelayWarningMsg(char *ip, int millis) */ void botDelayMessage(char *ip, int millis) /* Print out message saying why you are stalled. */ { warn("%s", botDelayWarningMsg(ip, millis)); } void botTerminateMessage(char *ip, int millis) /* Print out message saying why you are terminated. */ { time_t now = time(NULL); hUserAbort("There is an exceedingly high volume of traffic coming from your " "site (IP address %s) as of %s (California time). It looks like " "a web robot is launching queries quickly, and not even waiting for " "the results of one query to finish before launching another query. " "/* We cannot service requests from your IP address under */ these " "conditions. (code %d)" "To use the genome browser functionality from a Unix command line, " "please read our FAQ on this topic. " "For further help on how to access our data from a command line, " "or if " "you think this delay is being imposed unfairly, please contact genome-www@soe.ucsc.edu." , ip, asctime(localtime(&now)), millis); } static char *getCookieUser() /* get user from hguid cookie */ { char *user = NULL; char *centralCookie = hUserCookie(); if (centralCookie) user = findCookieData(centralCookie); return user; } char *getBotCheckString(char *ip, double fraction) /* compose "user.ip fraction" string for bot check */ { char *user = getCookieUser(); char *botCheckString = needMem(256); if (user) safef(botCheckString, 256, "%s.%s %f", user, ip, fraction); else safef(botCheckString, 256, "%s %f", ip, fraction); return botCheckString; } +void botDelayCgi(char *host, int port, boolean noWarn, double fraction) +/* Connect with bottleneck server and sleep the + * amount it suggests for IP address calling CGI script, + * after imposing the specified fraction of the access penalty. */ +{ +int millis; +char *ip = getenv("REMOTE_ADDR"); +if (ip != NULL) + { + char *botCheckString = getBotCheckString(ip, fraction); + millis = botDelayTime(host, port, botCheckString); + freeMem(botCheckString); + if (millis > 0) + { + if (millis > 10000) + { + if (millis > 20000) + botTerminateMessage(ip, millis); + else + { + if (!noWarn) + botDelayMessage(ip, millis); + } + } + sleep1000(millis); + } + } +} + boolean botException() /* check if the remote ip address is on the exceptions list */ { char *exceptIps = cfgOption("bottleneck.except"); if (exceptIps) { char *remoteAddr = getenv("REMOTE_ADDR"); if (remoteAddr) { char *s = exceptIps; boolean found = FALSE; while (s && !found) { char *e = strchr(s, ' '); if (e) *e = 0; if (sameString(remoteAddr, s)) found = TRUE; if (e) *e++ = ' '; s = e; } if (found) return TRUE; } } return FALSE; } +static void hgBotDelayExt(boolean noWarn, double fraction) +/* High level bot delay call - looks up bottleneck server + * in hg.conf. */ +{ +if (botException()) + return; + +char *host = cfgOption("bottleneck.host"); +char *port = cfgOption("bottleneck.port"); + +if (host != NULL && port != NULL) + botDelayCgi(host, atoi(port), noWarn, fraction); +} + +void hgBotDelay() +/* High level bot delay call - for use with regular webpage output */ +{ +hgBotDelayExt(FALSE, defaultDelayFrac); +} + +void hgBotDelayFrac(double fraction) +/* Like hgBotDelay, but imposes a fraction of the standard access penalty */ +{ +hgBotDelayExt(FALSE, fraction); +} + +void hgBotDelayNoWarn() +/* High level bot delay call without warning - for use with non-webpage outputs */ +{ +hgBotDelayExt(TRUE, defaultDelayFrac); +} + +void hgBotDelayNoWarnFrac(double fraction) +/* Like hgBotDelayNoWarn, but imposes a fraction of the standard access penalty */ +{ +hgBotDelayExt(TRUE, fraction); +} + int hgBotDelayTime() { return hgBotDelayTimeFrac(defaultDelayFrac); } int hgBotDelayTimeFrac(double fraction) /* Get suggested delay time from cgi using the standard penalty. */ { char *ip = getenv("REMOTE_ADDR"); char *host = cfgOption("bottleneck.host"); char *port = cfgOption("bottleneck.port"); int delay = 0; if (host != NULL && port != NULL && ip != NULL) { char *botCheckString = getBotCheckString(ip, fraction); delay = botDelayTime(host, atoi(port), botCheckString); freeMem(botCheckString); } return delay; } #define err429 429 #define err429Msg "Too Many Requests" int botDelayMillis = 0; static void jsonHogExit(char *cgiExitName, long enteredMainTime, char *hogHost, int retryAfterSeconds) /* err429 Too Many Requests to be returned as JSON data */ { puts("Content-Type:application/json"); printf("Status: %d %s\n", err429, err429Msg); if (retryAfterSeconds > 0) printf("Retry-After: %d", retryAfterSeconds); puts("\n"); /* blank line between header and body */ struct jsonWrite *jw = jsonWriteNew(); jsonWriteObjectStart(jw, NULL); jsonWriteString(jw, "error", err429Msg); jsonWriteNumber(jw, "statusCode", err429); char msg[1024]; safef(msg, sizeof(msg), "Your host, %s, has been sending too many requests " "lately and is unfairly loading our site, impacting performance for " "other users. Please contact genome@soe.ucsc.edu to ask that your site " "be reenabled. Also, please consider downloading sequence and/or " "annotations in bulk -- see http://genome.ucsc.edu/downloads.html.", hogHost); jsonWriteString(jw, "statusMessage", msg); if (retryAfterSeconds > 0) jsonWriteNumber(jw, "retryAfterSeconds", retryAfterSeconds); jsonWriteObjectEnd(jw); puts(jw->dy->string); } static void hogExit(char *cgiName, long enteredMainTime, char *exitType, int retryAfterSeconds) /* earlyBotCheck requests exit before CGI has done any output or * setups of any kind. HTML output has not yet started. */ { char *hogHost = getenv("REMOTE_ADDR"); char cgiExitName[1024]; safef(cgiExitName, ArraySize(cgiExitName), "%s hogExit", cgiName); if (sameOk("json", exitType)) jsonHogExit(cgiExitName, enteredMainTime, hogHost, retryAfterSeconds); else { puts("Content-Type:text/html"); printf("Status: %d %s\n", err429, err429Msg); if (retryAfterSeconds > 0) printf("Retry-After: %d", retryAfterSeconds); puts("\n"); /* blank line between header and body */ puts("\n"); puts(""); puts(""); puts(""); printf("Status %d: %s\n", err429, err429Msg); printf("

Status %d: %s

\n", err429, err429Msg); time_t now = time(NULL); printf("There is an exceedingly high volume of traffic coming from your " "site (IP address %s) as of %s (California time). It looks like " "a web robot is launching queries quickly, and not even waiting for " "the results of one query to finish before launching another query. " "We cannot service requests from your IP address under these " "conditions. (code %d) " "To use the genome browser functionality from a Unix command line, " "please read our FAQ on this topic. " "For further help on how to access our data from a command line, " "or if " "you think this delay is being imposed unfairly, please contact genome-www@soe.ucsc.edu." ,hogHost, asctime(localtime(&now)), botDelayMillis); puts(""); } cgiExitTime(cgiExitName, enteredMainTime); exit(0); } /* static void hogExit() */ boolean earlyBotCheck(long enteredMainTime, char *cgiName, double delayFrac, int warnMs, int exitMs, char *exitType) /* similar to botDelayCgi but for use before the CGI has started any * output or setup the cart of done any MySQL operations. The boolean * return is used later in the CGI after it has done all its setups and * started output so it can issue the warning. Pass in delayFrac 0.0 * to use the default 1.0, pass in 0 for warnMs and exitMs to use defaults, * and exitType is either 'html' or 'json' to do that type of exit output in * the case of hogExit(); */ { boolean issueWarning = FALSE; if (botException()) /* don't do this if caller is on the exception list */ return issueWarning; if (delayFrac < 0.000001) /* passed in zero, use default */ delayFrac = defaultDelayFrac; if (warnMs < 1) /* passed in zero, use default */ warnMs = defaultWarnMs; if (exitMs < 1) /* passed in zero, use default */ exitMs = defaultExitMs; botDelayMillis = hgBotDelayTimeFrac(delayFrac); if (botDelayMillis > 0) { int msAboveWarning = botDelayMillis - warnMs; int retryAfterSeconds = 0; if (msAboveWarning > 0) retryAfterSeconds = 1 + (msAboveWarning / 10); if (botDelayMillis > warnMs) { if (botDelayMillis > exitMs) /* returning immediately */ hogExit(cgiName, enteredMainTime, exitType, retryAfterSeconds); else issueWarning = TRUE; sleep1000(botDelayMillis); /* sleep when > warnMs and < exitMs */ } } return issueWarning; /* caller can decide on their type of warning */ } /* boolean earlyBotCheck() */