c7da6212382f6efa86b8c2f4ca9f267d8273bfdc max Mon Mar 9 03:08:07 2015 -0700 fixing the help message and the tester in hgBeacon. diff --git src/hg/hgBeacon/hgBeacon src/hg/hgBeacon/hgBeacon index 6f2c39e..a3eeba1 100755 --- src/hg/hgBeacon/hgBeacon +++ src/hg/hgBeacon/hgBeacon @@ -1,23 +1,23 @@ #!/usr/bin/env python # A beacon allows very limited queries against a set of variants without allowing someone # to download the list of variants # see ga4gh.org/#/beacon (UCSC redmine 14393) import cgi, subprocess, sys, cgitb, os, socket, time, json, glob, urlparse, time import sqlite3, gzip, optparse, gc, string, re, socket -from os.path import join, isfile, dirname, isdir, basename +from os.path import join, isfile, dirname, isdir, basename, exists # general info about this beacon hostName = os.environ.get("HTTP_HOST", "localhost") homepage = "http://%s/cgi-bin/hgBeacon" % hostName beaconDesc = { "id":"ucsc-browser", \ "name":"Genome Browser", \ "organization" : "UCSC", \ "description" : "UCSC Genome Browser", "api" : "0.2", "homepage": homepage } # the list genome reference we support beaconReferences = ["GRCh37"] @@ -101,39 +101,40 @@ def errAbort(errMsg=None): " exit with error message " if errMsg == None: sys.exit(0) helpUrl = "http://%s/cgi-bin/hgBeacon?page=help" % hostName ret = {"errormsg":errMsg, "more_info":"for a complete description of the parameters, read the help message at %s" % helpUrl} printJson(ret) sys.exit(0) def printHelp(): " print help text to stdout and exit " + print "Content-Type: text/html\n" print "" host = hostName # convert from global to local var if host.endswith(".ucsc.edu"): helpDir = "/gbdb/hg19/beacon" else: helpDir = dirname(__file__) helpPath = join(helpDir, "help.txt") - if not isfile(helpPath): + if not exists(helpPath): errAbort("no file %s found. The beacon is not activated on this machine" % helpPath) helpText = open(helpPath).read() print helpText % locals() print "" sys.exit(0) def dataSetResources(): " Returns the list of DataSetResources " totalSize = 0 dsrList = [] conn = dbOpen(mustExist=True) for tableName in dbListTables(conn): rows = dbQuery(conn, "SELECT COUNT(*) from %s" % tableName, None) itemCount = rows[0][0] @@ -145,129 +146,130 @@ totalSize += itemCount return totalSize, dsrList def beaconInfo(): " return a beaconInfo dict " size, dsrList = dataSetResources() return \ { "beacon": beaconDesc, "references": beaconReferences, "datasets": dsrList, "size": size } def printJson(data): + print "Content-Type: application/json\n" print json.dumps(data, indent=4, sort_keys=True,separators=(',', ': ')) def hgBotDelay(): " implement bottleneck delay, get bottleneck server from hg.conf " global hgConf hgConf = parseHgConf("..") if "bottleneck.host" not in hgConf: return ip = os.environ["REMOTE_ADDR"] delay = queryBottleneck(hgConf["bottleneck.host"], hgConf["bottleneck.port"], ip) if delay>10000: time.sleep(delay/1000.0) if delay>20000: - print("Blocked") + errAbort("IP blocked") sys.exit(0) def checkParams(chrom, pos, allele, reference, track): " make sure the parameters follow the spec " if reference==None or reference=="": reference="GRCh37" if reference not in beaconReferences: errAbort("invalid 'reference' parameter, valid ones are %s" % ",".join(beaconReferences)) if chrom==None or chrom=="": errAbort("missing chromosome parameter") if allele==None or allele=="": - errAbort("missing allele parameter") + errAbort("missing alternateBases parameter") allele = allele.upper() if not (re.compile("[ACTG]+").match(allele)!=None or \ re.compile("I[ACTG]+").match(allele)!=None or \ re.compile("D[0-9]+").match(allele)!=None): - errAbort("invalid allele parameter, can only be a [ACTG]+ or I[ACTG]+ or D[0-9]+") - if track is not None: + errAbort("invalid alternateBases parameter, can only be a [ACTG]+ or I[ACTG]+ or D[0-9]+") + if track is not None and track!="": if not track.isalnum(): errAbort("'dataset' parameter must contain only alphanumeric characters") if len(track)>100: errAbort("'dataset' parameter must not be longer than 100 chars") if pos==None or not pos.isdigit(): errAbort("'position' parameter is not a number") pos = int(pos) # convert chrom to hg19 format if chrom==None: errAbort( "missing chromosome parameter") if not ((chrom.isdigit() and int(chrom)>=1 and int(chrom)<=22) or chrom in ["X","Y","M","test"]): errAbort( "invalid chromosome name %s" % chrom) return chrom, pos, allele, reference, track def lookupAllele(chrom, pos, allele, reference, dataset): " check if an allele is present in a sqlite DB " conn = dbOpen(mustExist=True) tableList = dbListTables(conn) - if dataset!=None: + if dataset!=None and dataset!="": if dataset not in tableList: errAbort("dataset %s is not present on this server" % dataset) tableList = [dataset] for tableName in tableList: cur = conn.cursor() if tableName in NoAltDataSets: # some datasets don't have alt alleles, e.g. HGMD sql = "SELECT * from %s WHERE chrom=? AND pos=?" % tableName cur.execute(sql, (chrom, pos)) else: sql = "SELECT * from %s WHERE chrom=? AND pos=? AND allele=?" % tableName cur.execute(sql, (chrom, pos, allele)) row = cur.fetchone() if row!=None: return "true" return "false" -def lookupAlleleJson(chrom, pos, allele, reference, dataset): +def lookupAlleleJson(chrom, pos, altBases, refBases, reference, dataset): " call lookupAllele and wrap the result into dictionaries " - chrom, pos, allele, reference, dataset = checkParams(chrom, pos, allele, reference, dataset) + chrom, pos, altBases, reference, dataset = checkParams(chrom, pos, altBases, reference, dataset) - exists = lookupAllele(chrom, pos, allele, reference, dataset) + exists = lookupAllele(chrom, pos, altBases, reference, dataset) if chrom=="test" and pos==0: exists = "true" query = { - "allele": allele, + "alternateBases": altBases, + "referenceBases" : refBases, "chromosome": chrom.replace("chr",""), "position": pos, "reference": reference, "dataset": dataset } ret = {"beacon" : beaconDesc, "query" : query, "response" : {"exists":exists} } return ret def main(): # detect if running under apache or was run from command line if 'REQUEST_METHOD' in os.environ: - fqdn = socket.getfqdn() - if not (fqdn.startswith("hgw") and fqdn.endswith("ucsc.edu")) \ - or fqdn.startswith("hgwdev."): + if not (hostName.startswith("hgw") and hostName.endswith("ucsc.edu")) \ + or hostName.startswith("hgwdev."): # enable special CGI error handler not on the RR, but on hgwdev cgitb.enable() mainCgi() else: mainCommandLine() def parseArgs(): " parse command line options into args and options " parser = optparse.OptionParser("""usage: %prog [options] filename [datasetName] - import VCF or BED files into the beacon database. - parameter 'datasetName' is optional and defaults to 'defaultDataset'. - any existing dataset of the same name will be overwritten - the data is written to beaconData.sqlite. You can use 'sqlite3' to inspect the data file. - the input file can be gzipped """) @@ -290,35 +292,38 @@ _tableDef = ( 'CREATE TABLE IF NOT EXISTS %s ' '(' ' chrom text,' # chromosome ' pos int,' # start position, 0-based ' allele text' # alternate allele, can also be IATG = insertion of ATG or D15 = deletion of 15 bp ')') conn.execute(_tableDef % tableName) conn.commit() #' PRIMARY KEY (chrom, pos, allele) ' def dbOpen(mustExist=False): " open the sqlite db and return a DB connection object " dbDir = dirname(__file__) # directory where script is located - if hostName.endswith("ucsc.edu"): # data is not in CGI directory at UCSC + fqdn = socket.getfqdn() + if hostName.endswith("ucsc.edu") or fqdn.endswith(".ucsc.edu") or fqdn.endswith(".sdsc.edu"): + # at UCSC, the data is not in the CGI-BIN directory dbDir = "/gbdb/hg19/beacon/" + dbName = join(dbDir, SQLITEDB) - if not isfile(dbName) and mustExist: + if not exists(dbName) and mustExist: errAbort("Database file %s does not exist. This beacon is not serving any data." % dbName) conn = sqlite3.Connection(dbName) return conn def dbListTables(conn): " return list of tables in sqlite db " cursor = conn.cursor() cursor.execute("SELECT name FROM sqlite_master WHERE type='table';") rows = cursor.fetchall() tables = [] for row in rows: tables.append(row[0]) return tables def dbQuery(conn, query, params): @@ -501,52 +506,53 @@ printTime(insertTime, indexTime, len(alleles)) def mainCommandLine(): " main function if called from command line " args, options = parseArgs() fileName = args[0] if len(args)==2: datasetName = args[1] else: datasetName = "defaultDataset" importFile(fileName, datasetName, options.format) def mainCgi(): - print "Content-Type: text/html\n" url = os.environ["REQUEST_URI"] parsedUrl = urlparse.urlparse(url) # get CGI parameters form = cgi.FieldStorage() # react based on symlink that was used to call this script page = parsedUrl[2].split("/")[-1] # last part of path is REST endpoint if page=="info": printJson(beaconInfo()) sys.exit(0) hgBotDelay() chrom = form.getfirst("chromosome") pos = form.getfirst("position") - allele = form.getfirst("allele") + refBases = form.getfirst("referenceBases") + altBases = form.getfirst("alternateBases") reference = form.getfirst("reference") dataset = form.getfirst("dataset") format = form.getfirst("format") - if chrom==None and pos==None and allele==None: + if chrom==None and pos==None and altBases==None: printHelp() sys.exit(0) - ret = lookupAlleleJson(chrom, pos, allele, reference, dataset) + ret = lookupAlleleJson(chrom, pos, altBases, refBases, reference, dataset) if format=="text": + print "Content-Type: text/html\n" print ret["response"]["exists"] else: printJson(ret) if __name__=="__main__": # deactivate this on the RR, but useful for debugging: prints a http header # on errors main()