b88f6844c2f27ff5d7df6e211d4497d8cc3a2bdf
max
  Thu Dec 18 06:38:40 2014 -0800
making hgBeacon source slightly better
diff --git src/hg/hgBeacon/hgBeacon src/hg/hgBeacon/hgBeacon
index 6e9b759..82e8be6 100755
--- src/hg/hgBeacon/hgBeacon
+++ src/hg/hgBeacon/hgBeacon
@@ -1,28 +1,84 @@
 #!/usr/bin/env python
 # A beacon for tracks we can't distribute: LOVD and HGMD
 # see ga4gh.org/#/beacon and redmine 14393
 # Please do read the GA4GH website on the background of this CGI, otherwise
 # the functionality provided does not make any sense, without the context
 
-import cgi, subprocess, sys, cgitb, os, datetime
-from os.path import join, isfile
+import cgi, subprocess, sys, cgitb, os, datetime, socket
+from os.path import join, isfile, dirname, isdir
 cgitb.enable() # deactivate this on the RR, but useful for debugging: prints a http header on errors
 
 # dir for bigBed files and insdcToUcsc.tab
 DATADIR = "/gbdb/hg19/beacon"
 
+# cache of hg.conf dict
+hgConf = None
+
+def queryBottleneck(host, port, ip):
+    " contact bottleneck server to get current delay time "
+    # send ip address
+    s =  socket.socket()
+    s.connect((host, int(port)))
+    msg = ip
+    d = chr(len(msg))+msg
+    s.send(d)
+
+    # read delay time
+    expLen = ord(s.recv(1))
+    totalLen = 0
+    buf = list()
+    while True:
+        resp = s.recv(1024)
+        buf.append(resp)
+        totalLen+= len(resp)
+        if totalLen==expLen:
+            break
+    return int("".join(buf))
+
+def parseConf(fname):
+    " parse a hg.conf style file, return as dict key -> value (all strings) "
+    conf = {}
+    for line in open(fname):
+        line = line.strip()
+        if line.startswith("#"):
+            continue
+        elif line.startswith("include "):
+            inclFname = line.split()[1]
+            inclPath = join(dirname(fname), inclFname)
+            if isfile(inclPath):
+                inclDict = parseConf(inclPath)
+                conf.update(inclDict)
+        elif "=" in line: # string search for "="
+            key, value = line.split("=")
+            conf[key] = value
+    return conf
+
+def parseHgConf():
+    """ return hg.conf as dict key:value """
+    global hgConf
+    if hgConf is not None:
+        return hgConf
+
+    hgConf = dict() # python dict = hash table
+
+    confDir = dirname(__file__)
+    fname = join(confDir, "..", "hg.conf")
+    hgConf = parseConf(fname)
+
+    return hgConf
+
 def errAbort(errMsg=None):
     print "<html><body><pre>"
     if errMsg is not None:
         print "error: "+errMsg
 
     host = os.environ.get("HTTP_HOST", "")
 
     print """
     A Genomics Alliance Beacon at UCSC. 
     
     In collaboration with the Leiden Open Variation Database and Biobase/HGMD.
 
     parameters:
     - genome: ignored, always hg19/GRCh37
     - chrom: either a number or X/Y or UCSC format like "chr1" or INSDC
@@ -40,75 +96,87 @@
 
     Examples:
     http://%(host)s/cgi-bin/hgBeacon/hgBeacon?dataset=lovd&chrom=chr1&pos=808921&allele=A
     - returns "Yes"
 
     http://%(host)s/cgi-bin/hgBeacon/hgBeacon?dataset=hgmd?chrom=1&pos=2160493&allele=T
     - returns "Maybe", because the allele is in a small deletion
 
     Number of variants:
     - LOVD: 180k, version from Nov 2014
     - HGMD: 97k, "public" version from end of 2014
     """ % locals()
     print "</body></pre></html>"
     sys.exit(0)
 
+def main():
+    # send ip address of client to bottleneck and get delay
+    print "Content-Type: text/html\n"
+
+    global hgConf
+    hgConf = parseHgConf()
+    ip = os.environ["REMOTE_ADDR"]
+    delay = queryBottleneck(hgConf["bottleneck.host"], hgConf["bottleneck.port"], ip)
+
+    if not isdir(DATADIR):
+        errAbort("hgBeacon is intended to be run at UCSC. <br> On this "
+            "machine the directory %s does not exist so the program will not run.<p>" \
+            % DATADIR)
+
     # parse INSDC -> ucsc chrom name table
     insdcToUcsc = {}
     for line in open(join(DATADIR, "insdcToUcsc.tab")):
         name, chrom = line.rstrip("\n").split("\t")
         insdcToUcsc[name] = chrom
 
-print "Content-Type: text/html\n"
-
+    # get CGI parameters
     form = cgi.FieldStorage()
 
     track = form.getfirst("dataset")
     if track is None:
         track = form.getfirst("track")
 
     chrom = form.getfirst("chrom")
     pos = form.getfirst("pos")
     allele = form.getfirst("allele")
 
     # check parameters
     if chrom==None:
         errAbort()
     if pos==None or not pos.isdigit():
         errAbort("pos parameter is not an int")
     if allele==None:
         errAbort("need allele parameter")
+    if allele==None or len(allele)!=1 or allele[0].lower() not in "actg":
+        errAbort("invalid allele, can only be a single letter and ACTG")
     if chrom.isdigit() or chrom in "XY":
         chrom = "chr"+chrom
     if track is None:
         errAbort("missing dataset parameter")
     if not track.isalpha():
         errAbort("dataset/track parameter must contain only alphanumberic characters")
 
+    trackFname = join(DATADIR, track+".bb")
+    if not isfile(trackFname):
+        errAbort("illegal dataset/track name %s" % track)
+
     # convert chrom from genbank acc to ucsc string like "chr1"
     if chrom not in insdcToUcsc.values():
         chrom = insdcToUcsc.get(chrom, None)
     if chrom==None:
         errAbort( "ERROR: illegal chrom identifier %s" % chrom)
         
-trackFname = join(DATADIR, track+".bb")
-if not isfile(trackFname):
-    errAbort("illegal dataset/track name %s" % track)
-
-if allele==None or len(allele)!=1 or allele[0].lower() not in "actg":
-    errAbort("invalid allele, can only be a single letter and ACTG")
-    
     # change parameters slightly
     allele = allele.upper()
     pos = int(pos)
 
     # run tool
     args = ["./bigBedToBed", trackFname, "stdout", "-chrom=%s"%chrom, "-start=%d"%pos, "-end=%d"%(pos+1)]
     p = subprocess.Popen(args, stdout=subprocess.PIPE)
 
     # parse output
     isMaybe = False
     for line in p.stdout:
         fs = line.rstrip("\n").split("\t")
         if fs[-1]=="maybe":
             isMaybe=True
         bedLen = int(fs[2])-int(fs[1])
@@ -119,15 +187,17 @@
         if fs[3][-1]==allele or track in ["hgmd", "uniprot"]:
             print "Yes"
             sys.exit(0)
 
     if isMaybe:
         print "Maybe"
         sys.exit(0)
 
     # cannot check exit code, no overlap makes bigBedToBed return an error, too
     # so the following does not work
     #if p.returncode != 0:
         #print("Unable to run command")
         #sys.exit(0)
 
     print "No"
+
+main()