a47f57d008f2db28a7b140ece87cbcfb8e8a5c0c
hiram
  Thu Sep 12 14:18:19 2019 -0700
expand earlyBotCheck to manage two types of outputs, recognize defaults, and adding hgGateway to the game refs #23217

diff --git src/hg/lib/botDelay.c src/hg/lib/botDelay.c
index 5304629..9b7d1a9 100644
--- src/hg/lib/botDelay.c
+++ src/hg/lib/botDelay.c
@@ -1,30 +1,31 @@
 /* botDelay.c - contact bottleneck server and sleep
  * for a little bit if IP address looks like it is
  * being just too demanding. */
 
 /* Copyright (C) 2014 The Regents of the University of California 
  * See README in this or parent directory for licensing information. */
 
 #include "common.h"
 #include "net.h"
 #include "portable.h"
 #include "hgConfig.h"
 #include "cheapcgi.h"
 #include "hui.h"
 #include "hCommon.h"
 #include "botDelay.h"
+#include "jsonWrite.h"
 
 #define defaultDelayFrac 1.0   /* standard penalty for most CGIs */
 #define defaultWarnMs 10000    /* warning at 10 to 20 second delay */
 #define defaultExitMs 20000    /* error 429 Too Many Requests after 20+ second delay */
 
 int botDelayTime(char *host, int port, char *botCheckString)
 /* Figure out suggested delay time for ip address in
  * milliseconds. */
 {
 int sd = netMustConnect(host, port);
 char buf[256];
 netSendString(sd, botCheckString);
 netRecieveString(sd, buf);
 close(sd);
 return atoi(buf);
@@ -208,89 +209,135 @@
 
 int delay = 0;
 if (host != NULL && port != NULL && ip != NULL)
     {
     char *botCheckString = getBotCheckString(ip, fraction);
     delay = botDelayTime(host, atoi(port), botCheckString);
     freeMem(botCheckString);
     }
 return delay;
 }
 
 #define err429  429
 #define err429Msg       "Too Many Requests"
 int botDelayMillis = 0;
 
-static void hogExit(char *cgiName, long enteredMainTime)
+static void jsonHogExit(char *cgiExitName, long enteredMainTime, char *hogHost,
+    int retryAfterSeconds)
+/* err429 Too Many Requests to be returned as JSON data */
+{
+puts("Content-Type:application/json");
+printf("Status: %d %s\n", err429, err429Msg);
+if (retryAfterSeconds > 0)
+    printf("Retry-After: %d", retryAfterSeconds);
+puts("\n");	/* blank line between header and body */
+
+struct jsonWrite *jw = jsonWriteNew();
+jsonWriteObjectStart(jw, NULL);
+jsonWriteString(jw, "error", err429Msg);
+jsonWriteNumber(jw, "statusCode", err429);
+
+char msg[1024];
+
+safef(msg, sizeof(msg), "Your host, %s, has been sending too many requests "
+       "lately and is unfairly loading our site, impacting performance for "
+       "other users.  Please contact genome@soe.ucsc.edu to ask that your site "
+       "be reenabled.  Also, please consider downloading sequence and/or "
+       "annotations in bulk -- see http://genome.ucsc.edu/downloads.html.",
+       hogHost);
+
+jsonWriteString(jw, "statusMessage", msg);
+if (retryAfterSeconds > 0)
+    jsonWriteNumber(jw, "retryAfterSeconds", retryAfterSeconds);
+
+jsonWriteObjectEnd(jw);
+
+puts(jw->dy->string);
+}
+
+static void hogExit(char *cgiName, long enteredMainTime, char *exitType,
+    int retryAfterSeconds)
 /* earlyBotCheck requests exit before CGI has done any output or
  * setups of any kind.  HTML output has not yet started.
  */
 {
 char *hogHost = getenv("REMOTE_ADDR");
 char cgiExitName[1024];
 safef(cgiExitName, ArraySize(cgiExitName), "%s hogExit", cgiName);
 
+if (sameOk("json", exitType))
+   jsonHogExit(cgiExitName, enteredMainTime, hogHost, retryAfterSeconds);
+else
+    {
+
     puts("Content-Type:text/html");
     printf("Status: %d %s\n", err429, err429Msg);
-puts("Retry-After: 30");
-puts("\n");
+    if (retryAfterSeconds > 0)
+        printf("Retry-After: %d", retryAfterSeconds);
+    puts("\n");	/* blank line between header and body */
 
     puts("<!DOCTYPE HTML 4.01 Transitional>\n");
     puts("<html lang='en'>");
     puts("<head>");
     puts("<meta charset=\"utf-8\">");
     printf("<title>Status %d: %s</title></head>\n", err429, err429Msg);
 
     printf("<body><h1>Status %d: %s</h1><p>\n", err429, err429Msg);
     time_t now = time(NULL);
     printf("There is an exceedingly high volume of traffic coming from your "
            "site (IP address %s) as of %s (California time).  It looks like "
            "a web robot is launching queries quickly, and not even waiting for "
            "the results of one query to finish before launching another query. "
            "<b>We cannot service requests from your IP address under</b> these "
            "conditions.  (code %d) "
            "To use the genome browser functionality from a Unix command line, "
            "please read <a href='http://genome.ucsc.edu/FAQ/FAQdownloads.html#download36'>our FAQ</a> on this topic. "
            "For further help on how to access our data from a command line, "
            "or if "
            "you think this delay is being imposed unfairly, please contact genome-www@soe.ucsc.edu."
            ,hogHost, asctime(localtime(&now)), botDelayMillis);
     puts("</body></html>");
+    }
 cgiExitTime(cgiExitName, enteredMainTime);
 exit(0);
 }       /*      static void hogExit()   */
 
-
-boolean earlyBotCheck(long enteredMainTime, char *cgiName, double delayFrac, int warnMs, int exitMs)
+boolean earlyBotCheck(long enteredMainTime, char *cgiName, double delayFrac, int warnMs, int exitMs, char *exitType)
 /* similar to botDelayCgi but for use before the CGI has started any
  * output or setup the cart of done any MySQL operations.  The boolean
  * return is used later in the CGI after it has done all its setups and
  * started output so it can issue the warning.  Pass in delayFrac 0.0
- * to use the default 1.0
+ * to use the default 1.0, pass in 0 for warnMs and exitMs to use defaults,
+ * and exitType is either 'html' or 'json' to do that type of exit output in
+ * the case of hogExit();
  */
 {
 boolean issueWarning = FALSE;
 
 if (botException())	/* don't do this if caller is on the exception list */
     return issueWarning;
 
 if (delayFrac < 0.000001) /* passed in zero, use default */
     delayFrac = defaultDelayFrac;
 if (warnMs < 1)	/* passed in zero, use default */
     warnMs = defaultWarnMs;
 if (exitMs < 1)	/* passed in zero, use default */
     exitMs = defaultExitMs;
 
 botDelayMillis = hgBotDelayTimeFrac(delayFrac);
 if (botDelayMillis > 0)
     {
-    sleep1000(botDelayMillis);
+    int msAboveWarning = botDelayMillis - warnMs;
+    int retryAfterSeconds = 0;
+    if (msAboveWarning > 0)
+       retryAfterSeconds = 1 + (msAboveWarning / 10);
     if (botDelayMillis > warnMs)
 	{
-	if (botDelayMillis > exitMs)
-	    hogExit(cgiName, enteredMainTime);
+	if (botDelayMillis > exitMs) /* returning immediately */
+	    hogExit(cgiName, enteredMainTime, exitType, retryAfterSeconds);
 	else
 	    issueWarning = TRUE;
 	}
+    sleep1000(botDelayMillis); /* sleeping while still < exitMs */
     }
-return issueWarning;
+return issueWarning;	/* caller can decide on their type of warning */
 }	/*	boolean earlyBotCheck()	*/