a2be3b4aee520bfa47602e50b4825b43e9854690 max Thu Dec 11 17:06:43 2014 -0800 small formatting changes in last commit diff --git src/hg/hgBeacon/hgBeacon src/hg/hgBeacon/hgBeacon index ab3e4bc..6e9b759 100755 --- src/hg/hgBeacon/hgBeacon +++ src/hg/hgBeacon/hgBeacon @@ -1,133 +1,133 @@ #!/usr/bin/env python # A beacon for tracks we can't distribute: LOVD and HGMD # see ga4gh.org/#/beacon and redmine 14393 # Please do read the GA4GH website on the background of this CGI, otherwise # the functionality provided does not make any sense, without the context import cgi, subprocess, sys, cgitb, os, datetime from os.path import join, isfile cgitb.enable() # deactivate this on the RR, but useful for debugging: prints a http header on errors # dir for bigBed files and insdcToUcsc.tab DATADIR = "/gbdb/hg19/beacon" def errAbort(errMsg=None): print "
"
if errMsg is not None:
print "error: "+errMsg
host = os.environ.get("HTTP_HOST", "")
print """
- A Genomics Alliance Beacon at UCSC. In collaboration with the Leiden Open Variation Database.
+ A Genomics Alliance Beacon at UCSC.
- Data provided by Leiden Open Variation (Ivo Fokkema), Biobase/HGMD.
+ In collaboration with the Leiden Open Variation Database and Biobase/HGMD.
parameters:
- genome: ignored, always hg19/GRCh37
- chrom: either a number or X/Y or UCSC format like "chr1" or INSDC
accession like CM000663.1
- dataset: either lovd or hgmd. This parameter can also be called "track"
(backwards compatible)
- pos: 0-based position, like 10042537
- allele: A,C,T,G. Ignored for HGMD (HGMD gives out only positions, not the
- alleles, they are on the website)
+ alleles, they are only on their website)
If the allele was found, returns the string "Yes", otherwise "No".
For HGMD, an overlap with a variant is enough to trigger a "Yes", we don't have the alleles.
For LOVD, an overlap with a short deletion or duplication returns "Maybe".
Examples:
http://%(host)s/cgi-bin/hgBeacon/hgBeacon?dataset=lovd&chrom=chr1&pos=808921&allele=A
- returns "Yes"
http://%(host)s/cgi-bin/hgBeacon/hgBeacon?dataset=hgmd?chrom=1&pos=2160493&allele=T
- returns "Maybe", because the allele is in a small deletion
Number of variants:
- LOVD: 180k, version from Nov 2014
- HGMD: 97k, "public" version from end of 2014
""" % locals()
print ""
sys.exit(0)
# parse INSDC -> ucsc chrom name table
insdcToUcsc = {}
for line in open(join(DATADIR, "insdcToUcsc.tab")):
name, chrom = line.rstrip("\n").split("\t")
insdcToUcsc[name] = chrom
print "Content-Type: text/html\n"
form = cgi.FieldStorage()
track = form.getfirst("dataset")
if track is None:
track = form.getfirst("track")
chrom = form.getfirst("chrom")
pos = form.getfirst("pos")
allele = form.getfirst("allele")
# check parameters
if chrom==None:
errAbort()
if pos==None or not pos.isdigit():
errAbort("pos parameter is not an int")
if allele==None:
errAbort("need allele parameter")
if chrom.isdigit() or chrom in "XY":
chrom = "chr"+chrom
if track is None:
errAbort("missing dataset parameter")
if not track.isalpha():
errAbort("dataset/track parameter must contain only alphanumberic characters")
# convert chrom from genbank acc to ucsc string like "chr1"
if chrom not in insdcToUcsc.values():
chrom = insdcToUcsc.get(chrom, None)
if chrom==None:
errAbort( "ERROR: illegal chrom identifier %s" % chrom)
trackFname = join(DATADIR, track+".bb")
if not isfile(trackFname):
errAbort("illegal dataset/track name %s" % track)
if allele==None or len(allele)!=1 or allele[0].lower() not in "actg":
errAbort("invalid allele, can only be a single letter and ACTG")
# change parameters slightly
allele = allele.upper()
pos = int(pos)
# run tool
args = ["./bigBedToBed", trackFname, "stdout", "-chrom=%s"%chrom, "-start=%d"%pos, "-end=%d"%(pos+1)]
p = subprocess.Popen(args, stdout=subprocess.PIPE)
# parse output
isMaybe = False
for line in p.stdout:
fs = line.rstrip("\n").split("\t")
if fs[-1]=="maybe":
isMaybe=True
bedLen = int(fs[2])-int(fs[1])
# uniprot features are longer than one basepair
if bedLen != 1 and track != "uniprot":
continue
# hgmd and uniprot do not have alleles
if fs[3][-1]==allele or track in ["hgmd", "uniprot"]:
print "Yes"
sys.exit(0)
if isMaybe:
print "Maybe"
sys.exit(0)
# cannot check exit code, no overlap makes bigBedToBed return an error, too
# so the following does not work
#if p.returncode != 0:
#print("Unable to run command")
#sys.exit(0)
print "No"