2b9a3a726493d66c526e3d8256178ff3e12c7e3c max Fri Apr 28 16:48:03 2017 -0700 renaming pylib to pyLib after Angie's suggestion (thanks) and adding a better error message when graphviz was not found. refs #13634 diff --git src/hg/pylib/hgLib.py src/hg/pylib/hgLib.py deleted file mode 100644 index 4af48f3..0000000 --- src/hg/pylib/hgLib.py +++ /dev/null @@ -1,508 +0,0 @@ -# Library functions for genome browser CGI scripts written in Python 2.7 - -# Because this library is loaded for every CGI execution, only a -# fairly minimal set of functions is implemented here, e.g. hg.conf parsing, -# bottleneck, cart loading, mysql queries. - -# The cart is currently read-only. More work is needed to allow writing a cart. - -# General rules for CGI in Python: -# - never insert values into SQL queries. Write %s in the query and provide the -# arguments to sqlQuery as a list. -# - never print incoming HTTP argument as raw text. Run it through cgi.escape to -# destroy javascript code in them. - -# Non-standard imports. They need to be installed on the machine. -# We provide a pre-compiled library as part of our cgi-bin distribution as a -# fallback in the "pylib" directory. The idea of having pylib be the last -# directory in sys.path is that the system MySQLdb takes precedence. -try: - import MySQLdb -except: - print "Installation error - could not load MySQLdb for Python. Please tell your system administrator to run " \ - "one of these commands as root: 'yum install MySQL-python', 'apt-get install python-mysqldb' or 'pip install MySQL-python'." - exit(0) - -# Imports from the Python 2.7 standard library -# Minimize global imports. Each library import can take up to 20msecs. -import os, cgi, sys -from os.path import join, isfile, normpath, abspath, dirname -from collections import namedtuple - -# activate debugging output output only on dev -import platform -if "hgwdev" in platform.node(): - import cgitb - cgitb.enable() - -# debug level: a number. the higher, the more debug info is printed -verboseLevel = None - -cgiArgs = None - -# like in the kent tree, we keep track of whether we have already output the content-type line -contentLineDone = False - -def errAbort(msg): - " show msg and abort. Like errAbort.c " - if not contentLineDone: - printContentType() - print msg - exit(0) - -def debug(level, msg): - " output debug message with a given verbosity level " - if level >= verboseLevel: - print(msg+"
") - sys.stdout.flush() - -def parseConf(fname): - " parse a hg.conf style file, return as dict key -> value (all strings) " - conf = {} - for line in open(fname): - line = line.strip() - if line.startswith("#"): - continue - elif line.startswith("include "): - inclFname = line.split()[1] - absFname = normpath(join(dirname(fname), inclFname)) - if os.path.isfile(absFname): - inclDict = parseConf(absFname) - conf.update(inclDict) - elif "=" in line: # string search for "=" - key, value = line.split("=") - conf[key] = value - return conf - - -# cache of hg.conf contents -hgConf = None - -def parseHgConf(): - """ return hg.conf as dict key:value. """ - global hgConf - if hgConf is not None: - return hgConf - - hgConf = dict() # python dict = hash table - - confDir = os.path.dirname(__file__) # look for hg.conf in parent dir - fname = os.path.join(confDir, "..", "hg.conf") - hgConf = parseConf(fname) - - return hgConf - -def cfgOption(name, default=None): - " return hg.conf option or default " - return hgConf.get(name, default) - -def sqlConnect(db, host=None, user=None, passwd=None): - """ connect to sql server specified in hg.conf with given db. Like jksql.c. """ - cfg = parseHgConf() - if host==None: - host, user, passwd = cfg["db.host"], cfg["db.user"], cfg["db.password"] - conn = MySQLdb.connect(host=host, user=user, passwd=passwd, db=db) - return conn - -def sqlTableExists(conn, table): - " return True if table exists. Like jksql.c " - query = "SHOW TABLES LIKE %s" - sqlQueryExists(conn, query, table) - -def sqlQueryExists(conn, query, args=None): - " return true if query returns a result. Like jksql.c. No caching for now, unlike hdb.c. " - cursor = conn.cursor() - rows = cursor.execute(query, args) - row = cursor.fetchone() - res = (row!=None) - cursor.close() - return res - -def sqlQuery(conn, query, args=None): - """ Return all rows for query, placeholders can be used, args is a list to - replace placeholders, to prevent Mysql injection. Never do replacement - with %s yourself, unless the value is coming from inside the program. This - is called a "parameterized query". There is only %s, %d does not work. - - example: - query = "SELECT contents FROM table WHERE id=%(id)s and name=%(name)s;" - rows = sqlQuery(conn, query, {"id":1234, "name":"hiram"}) - """ - cursor = conn.cursor() - rows = cursor.execute(query, args) - data = cursor.fetchall() - colNames = [desc[0] for desc in cursor.description] - Rec = namedtuple("MysqlRow", colNames) - recs = [Rec(*row) for row in data] - cursor.close() - return recs - -def htmlPageEnd(oldJquery=False): - " close html body/page " - print "" - print "" - -def printMenuBar(oldJquery=False): - baseDir = "../" - " print the menubar. Mostly copied from src/hg/hgMenuBar.c " - - print ("\n") - - menuPath = "../htdocs/inc/globalNavBar.inc" - navBarStr = open(menuPath, "r").read() - print (navBarStr) - - # fixup old menubar, copied from hgGtexTrackSettings, for now - print("") - print("") - print("") - - print("
") - -def printHgcHeader(assembly, shortLabel, longLabel, addGoButton=True, infoUrl="#INFO_SECTION"): - " copied from hgGtexTrackSettings, uses bootstrap styling " - #print("
") - - print("") - print("
") - print("
") - print("") - print(shortLabel) - print("%s" % assembly) - print("") - print("%s" % longLabel) - print("" % infoUrl) - print("") - print("") - print("") - print("") - print("
") - if addGoButton: - print("
") - print("
") - print("
GO
") - print("") - print("
") - print("
") - print("
") - - print("") - print("
") - print("
") - -def printHgcSection(name, rightSideContent, id=None): - " print a section header for the hgc page " - print("") - if id is None: - print("
") - else: - print("
" % id) - print("
%s
" % name) - print("
") - print(rightSideContent) - print("
") - print("
") - -def getGbHeader(): - " Python cannot include files with the preprocessor, so manually copied kent/src/hg/lib/gbHeader.h for now " - return """ - - - %s - - - - UCSC %s - - - - - - - - - - - - - - - - """ - -def webStartGbNoBanner(prefix, title): - " output the part, largely copied from web.c / webStartGbNoBanner " - print (getGbHeader() % (prefix, title)) - -def runCmd(cmd, mustRun=True): - " wrapper around system() that prints error messages. cmd preferably a list, not just a string. " - import subprocess - ret = subprocess.call(cmd) - if ret!=0 and mustRun: - errAbort("Could not run command %s" % cmd) - return ret - -def printContentType(contType="text/html", fname=None): - " print the HTTP Content-type header line with an optional file name " - global contentLineDone - contentLineDone = True - print("Content-type: %s; charset=utf-8" % contType) - if fname is not None: - print("Content-Disposition: attachment; filename=%s" % fname) - print - -def queryBottleneck(host, port, ip): - " contact UCSC-style bottleneck server to get current delay time. From hg/lib/botDelay.c " - # send ip address - import socket - s = socket.socket() - s.connect((host, int(port))) - msg = ip - d = chr(len(msg))+msg - s.send(d) - - # read delay time - expLen = ord(s.recv(1)) - totalLen = 0 - buf = list() - while True: - resp = s.recv(1024) - buf.append(resp) - totalLen+= len(resp) - if totalLen==expLen: - break - return int("".join(buf)) - -def hgBotDelay(): - """ - Implement bottleneck delay, get bottleneck server from hg.conf. - This behaves similar to the function src/hg/lib/botDelay.c:hgBotDelay - It does not use the hgsid, currently it always uses the IP address. - Using the hgsid makes little sense. It is more lenient than that C version. - """ - if "DOCUMENT_ROOT" not in os.environ: # skip if not called from Apache - return - global hgConf - hgConf = parseHgConf() - if "bottleneck.host" not in hgConf: - return - ip = os.environ["REMOTE_ADDR"] - delay = queryBottleneck(hgConf["bottleneck.host"], hgConf["bottleneck.port"], ip) - if delay>10000: - time.sleep(delay/1000.0) - if delay>20000: - errAbort("Too many queries. Your IP has been blocked. Please contact genome-www@soe.ucsc.edu to unblock your IP address.") - sys.exit(0) - -def parseRa(text): - " Parse ra-style string and return as dict name -> value " - lines = text.split("\n") - data = dict() - for l in lines: - if len(l)==0: - continue - key, val = string.split(l, " ", maxsplit=1) - data[key] = val - return data - -def lineFileNextRow(inFile): - """ - parses tab-sep file with headers in first line. Yields collection.namedtuples. - Strips "#"-prefix from header line. - Cannot parse headers with non-alpha characters and fields that are not ASCII. Code - for these cases is commented out, for performance reasons. - """ - - if isinstance(inFile, str): - if inFile.endswith(".gz"): - fh = gzip.open(inFile, 'rb') - else: - fh = open(inFile) - else: - fh = inFile - - line1 = fh.readline() - line1 = line1.rstrip("\n").lstrip("#") - headers = line1.split("\t") - #headers = [re.sub("[^a-zA-Z0-9_]","_", h) for h in headers] - #headers = [x if x!="" else "noName" for x in headers] - - #filtHeads = [] - #for h in headers: - #if h[0].isdigit(): - #filtHeads.append("x"+h) - #else: - #filtHeads.append(h) - #headers = filtHeads - - Record = namedtuple('tsvRec', headers) - for line in fh: - if line.startswith("#"): - continue - #line = line.decode("latin1") - line = line.rstrip("\n") - fields = string.split(line, "\t", maxsplit=len(headers)-1) - try: - rec = Record(*fields) - except Exception, msg: - logging.error("Exception occured while parsing line, %s" % msg) - logging.error("Filename %s" % fh.name) - logging.error("Line was: %s" % line) - logging.error("Does number of fields match headers?") - logging.error("Headers are: %s" % headers) - raise Exception("header count: %d != field count: %d wrong field count in line %s" % (len(headers), len(fields), line)) - yield rec - -def parseDict(fname): - """ Parse text file in format keyvalue and return as dict key->val. - Does not abort on duplicate keys, for performance reasons. """ - d = {} - - if fname.endswith(".gz"): - fh = gzip.open(fname) - else: - fh = open(fname) - - for line in fh: - key, val = line.rstrip("\n").split("\t") - d[key] = val - return d - -def cgiString(name, default=None): - " get named cgi variable as a string, like lib/cheapcgi.c " - val = cgiArgs.getfirst(name, default=default) - return val - -def cgiGetAll(): - return cgiArgs - -def makeRandomKey(numBits=128+33): - " copied line-by-line from kent/src/lib/htmlshell.c:makeRandomKey " - import base64 - numBytes = (numBits + 7) / 8 # round up to nearest whole byte. - numBytes = ((numBytes+2)/3)*3 # round up to the nearest multiple of 3 to avoid equals-char padding in base64 output - f = open("/dev/urandom", "r") # open random system device for read-only access. - binaryString = f.read(numBytes) - f.close() - return base64.b64encode(binaryString, "Aa") # replace + and / with characters that are URL-friendly. - -def cartDbLoadFromId(conn, table, cartId, oldCart): - " Like src/hg/lib/cart.c, opens cart table and parses cart contents given a cartId of the format 123123_csctac " - import urlparse - - if cartId==None: - return {} - cartFields = cartId.split("_") - if len(cartFields)!=2: - errAbort("Could not parse identifier %s for cart table %s" % (cgi.escape(cartId), table)) - idStr, secureId = cartFields - - query = "SELECT contents FROM "+table+" WHERE id=%(id)s and sessionKey=%(sessionKey)s" - rows = sqlQuery(conn, query, {"id":idStr, "sessionKey":secureId}) - if len(rows)==0: - # silently ignore invalid cart IDs for now. Future code may want to generate a new cart. - return {} - - cartList = urlparse.parse_qs(rows[0][0]) - - # by default, python returns a dict with key -> list of vals. We need only the first one - for key, vals in cartList.iteritems(): - oldCart[key] =vals[0] - return oldCart - -centralConn = None - -def hConnectCentral(): - " similar to src/hg/lib/hdb.c:hConnectCentral. We use a much simpler cache, because we usually read all rows into memory. " - global centralConn - if centralConn: - return centralConn - - centralDb = cfgOption("central.db") - if centralDb is None: - errAbort("Could not find central.db in hg.conf. Installation error.") - - centralUser = cfgOption("central.user") - if centralUser is None: - errAbort("Could not find central.user in hg.conf. Installation error.") - - centralPwd = cfgOption("central.password") - if centralPwd is None: - errAbort("Could not find central.password in hg.conf. Installation error.") - - centralHost = cfgOption("central.host") - if centralHost is None: - errAbort("Could not find central.host in hg.conf. Installation error.") - - conn = sqlConnect(centralDb, host=centralHost, user=centralUser, passwd=centralPwd) - - centralConn = conn - return conn - -def cartNew(conn, table): - " create a new cart and return ID " - sessionKey = makeRandomKey() - sqlQuery(conn, "INSERT %s VALUES(0,'',0,now(),now(),0,%s" % (table, sessionKey)); - sqlLastId = conn.insert_id() - newId = "%d_%s" % (sqlLastId, sessionKey) - return newId - -def cartAndCookieSimple(): - """ Make the cart from the user cookie (user settings) and the hgsid CGI parameter (session settings, e.g. a browser tab). - This is somewhat similar to cartAndCookie from hg/lib/cart.c - Two important differences: this cart does not add all CGI arguments automatically. - It also does not run cart.c:cartJustify, so track priorities are not applied. - Also, if there is no hgsid parameter or no cookie, we do not create a new cart. - """ - import Cookie - - # no cgiApoptosis yet - maybe needed in the future. see cart.c / cartNew - - if "HTTP_COOKIE" in os.environ: - cookies = Cookie.SimpleCookie(os.environ["HTTP_COOKIE"]) - cookieName = cfgOption("central.cookie", "hguid") - hguid = cookies.get(cookieName) - if hguid is not None: - hguid = hguid.value # cookies have values and other attributes. We only need the value - else: - hguid = None - - hgsid = cgiString("hgsid") - - conn = hConnectCentral() - - cart = {} - userInfo = cartDbLoadFromId(conn, "userDb", hguid, cart) - sessionInfo = cartDbLoadFromId(conn, "sessionDb", hgsid, cart) - return cart - -def cartString(cart, default=None): - " Get a string from the cart. For better readability for programmers used to the C code. " - return cart.get(cart, default) - -def cgiSetup(): - """ do the usual browser CGI setup: parse the hg.conf file, parse the CGI - variables, get the cart, do bottleneck delay. Returns the cart. - - This is not part of the C code (though it maybe should). - """ - parseHgConf() - global cgiArgs - cgiArgs = cgi.FieldStorage() # Python has built-in cgiSpoof support: sys.argv[1] is the query string if run from the command line - - hgBotDelay() - - if cgiString("debug"): - global verboseLevel - verboseLevel = int(cgiString("debug")) - - cart = cartAndCookieSimple() - return cart - -#if __file__=="__main__": - #pass