ab24e76f229a7159903ed55f6374127f29ffe446 max Tue Feb 26 03:11:16 2019 -0800 adding constants and one more fix to botdelay system in gene graph, refs #22999 diff --git src/hg/pyLib/hgLib.py src/hg/pyLib/hgLib.py index add7004..89564a5 100644 --- src/hg/pyLib/hgLib.py +++ src/hg/pyLib/hgLib.py @@ -35,31 +35,36 @@ if "hgwdev" in platform.node(): import cgitb cgitb.enable() # debug level: a number. the higher, the more debug info is printed # to see most debug messages, set to 1 # another way to change this variable is by setting the URL variable "debug" to 1 verboseLevel = 0 cgiArgs = None # like in the kent tree, we keep track of whether we have already output the content-type line contentLineDone = False # the effective total delay that was added before showing the page -botDelay = 0 +doWarnBot = False + +# two global variables: the first is the botDelay limit after which the page is slowed down and a warning is shown +# the second is the limit after which the page is not shown anymore +botDelayWarn = 1000 +botDelayBlock = 5000 jksqlTrace = False def warn(format, *args): print (format % args) def errAbort(msg): " show msg and abort. Like errAbort.c " printContentType() print msg exit(0) def debug(level, msg): " output debug message with a given verbosity level " if verboseLevel >= level: @@ -333,31 +338,31 @@ if ret!=0 and mustRun: errAbort("Could not run command %s" % cmd) return ret def printContentType(contType="text/html", fname=None): " print the HTTP Content-type header line with an optional file name. Also print bot delay note. " global contentLineDone if not contentLineDone: contentLineDone = True print("Content-type: %s; charset=utf-8" % contType) if fname is not None: print("Content-Disposition: attachment; filename=%s" % fname) print - if botDelay!=0: + if doWarnBot: print ("<div style='background-color:yellow; border:2px solid black'>") print ("We have a suspicion that you are an automated web bot software, not a real user. ") print ("To keep our site fast for other users, we have slowed down this page. ") print ("The slowdown will gradually disappear. ") print ("If you think this is a mistake, please contact us at genome-www@soe.ucsc.edu. ") print ("Also note that all data for hgGeneGraph can be obtained through our public MySQL server and") print ("all our software source code is available and can be installed locally onto your own computer. ") print ("If you are unsure how to use these resources, do not hesitate to contact us.") print ("</div>") def queryBottleneck(host, port, ip): " contact UCSC-style bottleneck server to get current delay time. From hg/lib/botDelay.c " # send ip address import socket @@ -378,47 +383,49 @@ if totalLen==expLen: break return int("".join(buf)) def hgBotDelay(): """ Implement bottleneck delay, get bottleneck server from hg.conf. This behaves similar to the function src/hg/lib/botDelay.c:hgBotDelay It does not use the hgsid, currently it always uses the IP address. Using the hgsid makes little sense. It is more lenient than the C version. """ import time if "DOCUMENT_ROOT" not in os.environ: # skip if not called from Apache return global hgConf - global botDelay + global doWarnBot hgConf = parseHgConf() if "bottleneck.host" not in hgConf: return ip = os.environ["REMOTE_ADDR"] delay = queryBottleneck(hgConf["bottleneck.host"], hgConf["bottleneck.port"], ip) debug(1, "Bottleneck delay: %d msecs" % delay) - if delay>1000: - time.sleep(delay/1000.0) - botDelay = delay # show warning message in printContentType() - if delay>5000: + if delay>botDelayBlock: errAbort("Too many HTTP requests. Your IP has been blocked to keep this website responsive for other users. " "Please contact genome-www@soe.ucsc.edu to unblock your IP address. We can also help you obtain the data you need without " "web crawling. ") sys.exit(0) + if delay>botDelayWarn: + time.sleep(delay/1000.0) + doWarnBot = True # = show warning message later in printContentType() + + def parseRa(text): " Parse ra-style string and return as dict name -> value " import string lines = text.split("\n") data = dict() for l in lines: if len(l)==0: continue key, val = string.split(l, " ", maxsplit=1) data[key] = val return data def lineFileNextRow(inFile): """ parses tab-sep file with headers in first line. Yields collection.namedtuples.