cf32bd93dfcf0e307c889aaef4e5b1845bbd5a98 max Tue Jul 11 17:55:00 2017 -0700 Adding support for GBIB-style remote /gbdb files to hgGeneGraph. refs #19745 diff --git src/hg/pyLib/hgLib.py src/hg/pyLib/hgLib.py index 5e74625..1f4206b 100644 --- src/hg/pyLib/hgLib.py +++ src/hg/pyLib/hgLib.py @@ -15,31 +15,31 @@ # Non-standard imports. They need to be installed on the machine. # We provide a pre-compiled library as part of our cgi-bin distribution as a # fallback in the "pyLib" directory. The idea of having pyLib be the last # directory in sys.path is that the system MySQLdb takes precedence. try: import MySQLdb except: print "Installation error - could not load MySQLdb for Python. Please tell your system administrator to run " \ "one of these commands as root: 'yum install MySQL-python', 'apt-get install python-mysqldb' or 'pip install MySQL-python'." exit(0) # Imports from the Python 2.7 standard library # Please minimize global imports. Each library import can take up to 20msecs. import os, cgi, sys, logging -from os.path import join, isfile, normpath, abspath, dirname +from os.path import join, isfile, normpath, abspath, dirname, isdir, splitext from collections import namedtuple # activate debugging output output only on dev import platform if "hgwdev" in platform.node(): import cgitb cgitb.enable() # debug level: a number. the higher, the more debug info is printed verboseLevel = None cgiArgs = None # like in the kent tree, we keep track of whether we have already output the content-type line contentLineDone = False @@ -443,30 +443,117 @@ """ Parse text file in format key<tab>value<newline> and return as dict key->val. Does not abort on duplicate keys, for performance reasons. """ import gzip d = {} if fname.endswith(".gz"): fh = gzip.open(fname) else: fh = open(fname) for line in fh: key, val = line.rstrip("\n").split("\t") d[key] = val return d +def gbdbReplace(fname, hgConfSetting): + " replace /gbdb/ in fname with hgConfSetting " + if not fname.startswith("/gbdb/"): + return None + + gbdbLoc = hgConf.get(hgConfSetting) + if gbdbLoc is None: + return None + + return fname.replace("/gbdb/", gbdbLoc) + +def getUdcCacheDir(): + " return the udc cache dir and create it if it doesn't exist " + udcDir = hgConf.get("udc.cacheDir", "/tmp/udcCache") + if not isdir(udcDir): + os.makedirs(udcDir) + return udcDir + +def hGbdbReplace(fname): + " hdb.c : get the local filename on disk or construct a URL to a /gbdb file and return it " + if not isfile(fname): + # try using gbdbLoc1 + fname2 = gbdbReplace(fname, "gbdbLoc1") + if fname2 and isfile(fname2): + return fname2 + + if not isfile(fname): + # try using gbdbLoc2, which can be a URL + fname2 = gbdbReplace(fname, "gbdbLoc2") + return fname2 + return fname + +def netUrlOpen(url): + " net.c: open a URL and return a file object " + import urllib2, time, errno + from socket import error as SocketError + + # let our webservers know that we are not a Firefox + opener = urllib2.build_opener() + opener.addheaders = [('User-Agent', 'Genome Browser pyLib/hgLib.py:netUrlOpen()')] + + resp = None + for x in range(5): # limit number of retries + try: + resp = opener.open(url) + break + except SocketError as e: + if e.errno != errno.ECONNRESET: + raise # re-raise any other error + time.sleep(1) + return resp + +def readSmallFile(fname): + """ read a small file, usually from /gbdb/, entirely into memory and return lines. + If the file doesn't exist, try gbdbLoc1, then gbdbLoc2 + and keep a local copy if only found on gbdbLoc2. + + This is similar but not the same as the UDC system in the kent C code, but + for small files and a complete read over it, the UDC system may be overkill + for Python. + """ + fname = hGbdbReplace(fname) + + if fname.startswith("http"): + # download to local disk; local filename is the hash of the URL + import hashlib + udcCacheDir = getUdcCacheDir() + m = hashlib.md5() + m.update(fname) + fileExt = splitext(fname)[-1] + tmpFname = join(udcCacheDir, "hgLibCache_"+m.hexdigest()+"."+fileExt) + if not isfile(tmpFname): + data = netUrlOpen(fname).read() + fh = open(tmpFname, "w") + fh.write(data) + fh.close() + fname = tmpFname + + if fname.endswith(".gz"): + import gzip + fh = gzip.open(fname) + else: + fh = open(fname) + + lines = fh.read().splitlines() + return lines + def cgiString(name, default=None): " get named cgi variable as a string, like lib/cheapcgi.c " val = cgiArgs.getfirst(name, default=default) return val def cgiGetAll(): return cgiArgs def makeRandomKey(numBits=128+33): " copied line-by-line from kent/src/lib/htmlshell.c:makeRandomKey " import base64 numBytes = (numBits + 7) / 8 # round up to nearest whole byte. numBytes = ((numBytes+2)/3)*3 # round up to the nearest multiple of 3 to avoid equals-char padding in base64 output f = open("/dev/urandom", "r") # open random system device for read-only access. binaryString = f.read(numBytes)