b39119efb42cec014dfe6cba948bc7717b0176c4
lrnassar
  Fri Oct 20 14:10:18 2023 -0700
Changing the abuse message to point to private ML instead of public, refs #22423

diff --git src/hg/lib/botDelay.c src/hg/lib/botDelay.c
index 8d1dc6c..919156d 100644
--- src/hg/lib/botDelay.c
+++ src/hg/lib/botDelay.c
@@ -1,304 +1,304 @@
 /* botDelay.c - contact bottleneck server and sleep
  * for a little bit if IP address looks like it is
  * being just too demanding. */
 
 /* Copyright (C) 2014 The Regents of the University of California 
  * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */
 
 #include "common.h"
 #include "net.h"
 #include "portable.h"
 #include "hgConfig.h"
 #include "cheapcgi.h"
 #include "hui.h"
 #include "hCommon.h"
 #include "botDelay.h"
 #include "jsonWrite.h"
 
 #define defaultDelayFrac 1.0   /* standard penalty for most CGIs */
 #define defaultWarnMs 10000    /* warning at 10 to 20 second delay */
 #define defaultExitMs 20000    /* error 429 Too Many Requests after 20+ second delay */
 
 int botDelayWarnMs  = 0;       /* global so the previously used value can be retrieved */
 
 int botDelayTime(char *host, int port, char *botCheckString)
 /* Figure out suggested delay time for ip address in
  * milliseconds. */
 {
 int sd = netMustConnect(host, port);
 char buf[256];
 netSendString(sd, botCheckString);
 netRecieveString(sd, buf);
 close(sd);
 return atoi(buf);
 }
 
 char *botDelayWarningMsg(char *ip, int millis)
 /* return the string for the default botDelay message
  * not all users of botDelay want the message to go to stderr
  * return it for their own use case
  */
 {
 time_t now = time(NULL);
 char *delayMsg = needMem(2048);
 safef(delayMsg, 2048,
     "There is a very high volume of traffic coming from your "
     "site (IP address %s) as of %s (California time).  So that other "
     "users get a fair share "
     "of our bandwidth, we are putting in a delay of %3.1f seconds "
     "before we service your request.  This delay will slowly "
     "decrease over a half hour as activity returns to normal.  This "
     "high volume of traffic is likely due to program-driven rather than "
     "interactive access, or the submission of queries on a large "
     "number of sequences.  If you are making large batch queries, "
     "please write to our genome@soe.ucsc.edu public mailing list "
     "and inquire about more efficient ways to access our data.  "
     "If you are sharing an IP address with someone who is submitting "
     "large batch queries, we apologize for the "
     "inconvenience. "
     "To use the genome browser functionality from a Unix command line, "
     "please read <a href='http://genome.ucsc.edu/FAQ/FAQdownloads.html#download36'>our FAQ</a> on this topic. "
     "For further help on how to access our data from a command line, "
     "or if "
     "you think this delay is being imposed unfairly, please contact genome-www@soe.ucsc.edu.",
     ip, asctime(localtime(&now)), 0.001*millis);
 
 return delayMsg;
 }	/*	char *botDelayWarningMsg(char *ip, int millis)	*/
 
 void botDelayMessage(char *ip, int millis)
 /* Print out message saying why you are stalled. */
 {
 warn("%s", botDelayWarningMsg(ip, millis));
 }
 
 void botTerminateMessage(char *ip, int millis)
 /* Print out message saying why you are terminated. */
 {
 time_t now = time(NULL);
 hUserAbort("There is an exceedingly high volume of traffic coming from your "
        "site (IP address %s) as of %s (California time).  It looks like "
        "a web robot is launching queries quickly, and not even waiting for "
        "the results of one query to finish before launching another query. "
        "/* We cannot service requests from your IP address under */ these "
        "conditions.  (code %d)"
        "To use the genome browser functionality from a Unix command line, "
        "please read <a href='http://genome.ucsc.edu/FAQ/FAQdownloads.html#download36'>our FAQ</a> on this topic. "
        "For further help on how to access our data from a command line, "
        "or if "
        "you think this delay is being imposed unfairly, please contact genome-www@soe.ucsc.edu."
        , ip, asctime(localtime(&now)), millis);
 }
 
 static char *getCookieUser()
 /* get user from hguid cookie */
 {
 char *user = NULL;
 char *centralCookie = hUserCookie();
 
 if (centralCookie)
     user = findCookieData(centralCookie);
 
 return user;
 }
 
 char *getBotCheckString(char *ip, double fraction)
 /* compose "user.ip fraction" string for bot check */
 {
 char *user = getCookieUser();
 char *botCheckString = needMem(256);
 if (user)
   safef(botCheckString, 256, "%s.%s %f", user, ip, fraction);
 else
   safef(botCheckString, 256, "%s %f", ip, fraction);
 return botCheckString;
 }
 
 boolean botException()
 /* check if the remote ip address is on the exceptions list */
 {
 char *exceptIps = cfgOption("bottleneck.except");
 if (exceptIps)
     {
     char *remoteAddr = getenv("REMOTE_ADDR");
     if (remoteAddr)
 	{
 	char *s = exceptIps;
 	boolean found = FALSE;
 	while (s && !found)
 	    {
 	    char *e = strchr(s, ' ');
 	    if (e)
 		*e = 0;
 	    if (sameString(remoteAddr, s))
 		found = TRUE;
 	    if (e)
 		*e++ = ' ';
 	    s = e;
 	    }
 	if (found)
 	    return TRUE;
 	}
     }
 return FALSE;
 }
 
 int hgBotDelayTime()
 {
 return hgBotDelayTimeFrac(defaultDelayFrac);
 }
 
 int hgBotDelayTimeFrac(double fraction)
 /* Get suggested delay time from cgi using the standard penalty. */
 {
 char *ip = getenv("REMOTE_ADDR");
 char *host = cfgOption("bottleneck.host");
 char *port = cfgOption("bottleneck.port");
 
 int delay = 0;
 if (host != NULL && port != NULL && ip != NULL)
     {
     char *botCheckString = getBotCheckString(ip, fraction);
     delay = botDelayTime(host, atoi(port), botCheckString);
     freeMem(botCheckString);
     }
 return delay;
 }
 
 #define err429  429
 #define err429Msg       "Too Many Requests"
 int botDelayMillis = 0;
 
 static void jsonHogExit(char *cgiExitName, long enteredMainTime, char *hogHost,
     int retryAfterSeconds)
 /* err429 Too Many Requests to be returned as JSON data */
 {
 puts("Content-Type:application/json");
 printf("Status: %d %s\n", err429, err429Msg);
 if (retryAfterSeconds > 0)
     printf("Retry-After: %d", retryAfterSeconds);
 puts("\n");	/* blank line between header and body */
 
 struct jsonWrite *jw = jsonWriteNew();
 jsonWriteObjectStart(jw, NULL);
 jsonWriteString(jw, "error", err429Msg);
 jsonWriteNumber(jw, "statusCode", err429);
 
 char msg[1024];
 
 safef(msg, sizeof(msg), "Your host, %s, has been sending too many requests "
        "lately and is unfairly loading our site, impacting performance for "
-       "other users.  Please contact genome@soe.ucsc.edu to ask that your site "
+       "other users.  Please contact genome-www@soe.ucsc.edu to ask that your site "
        "be reenabled.  Also, please consider downloading sequence and/or "
        "annotations in bulk -- see http://genome.ucsc.edu/downloads.html.",
        hogHost);
 
 jsonWriteString(jw, "statusMessage", msg);
 if (retryAfterSeconds > 0)
     jsonWriteNumber(jw, "retryAfterSeconds", retryAfterSeconds);
 
 jsonWriteObjectEnd(jw);
 
 puts(jw->dy->string);
 }
 
 static void hogExit(char *cgiName, long enteredMainTime, char *exitType,
     int retryAfterSeconds)
 /* earlyBotCheck requests exit before CGI has done any output or
  * setups of any kind.  HTML output has not yet started.
  */
 {
 char *hogHost = getenv("REMOTE_ADDR");
 char cgiExitName[1024];
 safef(cgiExitName, ArraySize(cgiExitName), "%s hogExit", cgiName);
 
 if (sameOk("json", exitType))
    jsonHogExit(cgiExitName, enteredMainTime, hogHost, retryAfterSeconds);
 else
     {
 
     puts("Content-Type:text/html");
     printf("Status: %d %s\n", err429, err429Msg);
     if (retryAfterSeconds > 0)
         printf("Retry-After: %d", retryAfterSeconds);
     puts("\n");	/* blank line between header and body */
 
     puts("<!DOCTYPE HTML 4.01 Transitional>\n");
     puts("<html lang='en'>");
     puts("<head>");
     puts("<meta charset=\"utf-8\">");
     printf("<title>Status %d: %s</title></head>\n", err429, err429Msg);
 
     printf("<body><h1>Status %d: %s</h1><p>\n", err429, err429Msg);
     time_t now = time(NULL);
     printf("There is an exceedingly high volume of traffic coming from your "
            "site (IP address %s) as of %s (California time).  It looks like "
            "a web robot is launching queries quickly, and not even waiting for "
            "the results of one query to finish before launching another query. "
            "<b>We cannot service requests from your IP address under</b> these "
            "conditions.  (code %d) "
            "To use the genome browser functionality from a Unix command line, "
            "please read <a href='http://genome.ucsc.edu/FAQ/FAQdownloads.html#download36'>our FAQ</a> on this topic. "
            "For further help on how to access our data from a command line, "
            "or if "
            "you think this delay is being imposed unfairly, please contact genome-www@soe.ucsc.edu."
            ,hogHost, asctime(localtime(&now)), botDelayMillis);
     puts("</body></html>");
     }
 cgiExitTime(cgiExitName, enteredMainTime);
 exit(0);
 }       /*      static void hogExit()   */
 
 boolean earlyBotCheck(long enteredMainTime, char *cgiName, double delayFrac, int warnMs, int exitMs, char *exitType)
 /* replaces the former botDelayCgi now in use before the CGI has started any
  * output or setup the cart of done any MySQL operations.  The boolean
  * return is used later in the CGI after it has done all its setups and
  * started output so it can issue the warning.  Pass in delayFrac 0.0
  * to use the default 1.0, pass in 0 for warnMs and exitMs to use defaults,
  * and exitType is either 'html' or 'json' to do that type of exit output in
  * the case of hogExit();
  */
 {
 boolean issueWarning = FALSE;
 
 if (botException())	/* don't do this if caller is on the exception list */
     return issueWarning;
 
 if (delayFrac < 0.000001) /* passed in zero, use default */
     delayFrac = defaultDelayFrac;
 
 botDelayWarnMs = warnMs;
 if (botDelayWarnMs < 1)	/* passed in zero, use default */
     botDelayWarnMs = defaultWarnMs;
 
 if (exitMs < 1)	/* passed in zero, use default */
     exitMs = defaultExitMs;
 
 botDelayMillis = hgBotDelayTimeFrac(delayFrac);
 if (botDelayMillis > 0)
     {
     if (botDelayMillis > botDelayWarnMs)
 	{
 	if (botDelayMillis > exitMs) /* returning immediately */
             {
             int msAboveWarning = botDelayMillis - botDelayWarnMs;
             int retryAfterSeconds = 0;
             if (msAboveWarning > 0)
                retryAfterSeconds = 1 + (msAboveWarning / 10);
 	    hogExit(cgiName, enteredMainTime, exitType, retryAfterSeconds);
             }
 	else
 	    issueWarning = TRUE;
 
         sleep1000(botDelayMillis); /* sleep when > botDelayWarnMs and < exitMs */
 	}
     }
 return issueWarning;	/* caller can decide on their type of warning */
 }	/*	boolean earlyBotCheck()	*/
 
 int hgBotDelayCurrWarnMs()
 /* get number of millis that are tolerated until a warning is shown on the most recent call to earlyBotCheck */
 {
     return botDelayWarnMs;
 }