880ad5f5e5b759ac892553985d6ab3721452e870
max
  Thu Dec 11 16:58:34 2014 -0800
largely revamped beacon, refs #14393
diff --git src/hg/hgBeacon/hgBeacon src/hg/hgBeacon/hgBeacon
new file mode 100755
index 0000000..ab3e4bc
--- /dev/null
+++ src/hg/hgBeacon/hgBeacon
@@ -0,0 +1,133 @@
+#!/usr/bin/env python
+# A beacon for tracks we can't distribute: LOVD and HGMD
+# see ga4gh.org/#/beacon and redmine 14393
+# Please do read the GA4GH website on the background of this CGI, otherwise
+# the functionality provided does not make any sense, without the context
+
+import cgi, subprocess, sys, cgitb, os, datetime
+from os.path import join, isfile
+cgitb.enable() # deactivate this on the RR, but useful for debugging: prints a http header on errors
+
+# dir for bigBed files and insdcToUcsc.tab
+DATADIR = "/gbdb/hg19/beacon"
+
+def errAbort(errMsg=None):
+    print "<html><body><pre>"
+    if errMsg is not None:
+        print "error: "+errMsg
+
+    host = os.environ.get("HTTP_HOST", "")
+
+    print """
+    A Genomics Alliance Beacon at UCSC. In collaboration with the Leiden Open Variation Database.
+
+    Data provided by Leiden Open Variation (Ivo Fokkema), Biobase/HGMD.
+
+    parameters:
+    - genome: ignored, always hg19/GRCh37
+    - chrom: either a number or X/Y or UCSC format like "chr1" or INSDC
+      accession like CM000663.1
+    - dataset: either lovd or hgmd. This parameter can also be called "track"
+      (backwards compatible)
+    - pos: 0-based position, like 10042537
+    - allele: A,C,T,G. Ignored for HGMD (HGMD gives out only positions, not the
+      alleles, they are on the website)
+
+    If the allele was found, returns the string "Yes", otherwise "No".
+
+    For HGMD, an overlap with a variant is enough to trigger a "Yes", we don't have the alleles.
+    For LOVD, an overlap with a short deletion or duplication returns "Maybe". 
+
+    Examples:
+    http://%(host)s/cgi-bin/hgBeacon/hgBeacon?dataset=lovd&chrom=chr1&pos=808921&allele=A
+    - returns "Yes"
+
+    http://%(host)s/cgi-bin/hgBeacon/hgBeacon?dataset=hgmd?chrom=1&pos=2160493&allele=T
+    - returns "Maybe", because the allele is in a small deletion
+
+    Number of variants:
+    - LOVD: 180k, version from Nov 2014
+    - HGMD: 97k, "public" version from end of 2014
+    """ % locals()
+    print "</body></pre></html>"
+    sys.exit(0)
+
+# parse INSDC -> ucsc chrom name table
+insdcToUcsc = {}
+for line in open(join(DATADIR, "insdcToUcsc.tab")):
+    name, chrom = line.rstrip("\n").split("\t")
+    insdcToUcsc[name] = chrom
+
+print "Content-Type: text/html\n"
+
+form = cgi.FieldStorage()
+
+track = form.getfirst("dataset")
+if track is None:
+    track = form.getfirst("track")
+
+chrom = form.getfirst("chrom")
+pos = form.getfirst("pos")
+allele = form.getfirst("allele")
+
+# check parameters
+if chrom==None:
+    errAbort()
+if pos==None or not pos.isdigit():
+    errAbort("pos parameter is not an int")
+if allele==None:
+    errAbort("need allele parameter")
+if chrom.isdigit() or chrom in "XY":
+    chrom = "chr"+chrom
+if track is None:
+    errAbort("missing dataset parameter")
+if not track.isalpha():
+    errAbort("dataset/track parameter must contain only alphanumberic characters")
+
+# convert chrom from genbank acc to ucsc string like "chr1"
+if chrom not in insdcToUcsc.values():
+    chrom = insdcToUcsc.get(chrom, None)
+if chrom==None:
+    errAbort( "ERROR: illegal chrom identifier %s" % chrom)
+    
+trackFname = join(DATADIR, track+".bb")
+if not isfile(trackFname):
+    errAbort("illegal dataset/track name %s" % track)
+
+if allele==None or len(allele)!=1 or allele[0].lower() not in "actg":
+    errAbort("invalid allele, can only be a single letter and ACTG")
+    
+# change parameters slightly
+allele = allele.upper()
+pos = int(pos)
+
+# run tool
+args = ["./bigBedToBed", trackFname, "stdout", "-chrom=%s"%chrom, "-start=%d"%pos, "-end=%d"%(pos+1)]
+p = subprocess.Popen(args, stdout=subprocess.PIPE)
+
+# parse output
+isMaybe = False
+for line in p.stdout:
+    fs = line.rstrip("\n").split("\t")
+    if fs[-1]=="maybe":
+        isMaybe=True
+    bedLen = int(fs[2])-int(fs[1])
+    # uniprot features are longer than one basepair
+    if bedLen != 1 and track != "uniprot":
+        continue
+    # hgmd and uniprot do not have alleles
+    if fs[3][-1]==allele or track in ["hgmd", "uniprot"]:
+        print "Yes"
+        sys.exit(0)
+
+if isMaybe:
+    print "Maybe"
+    sys.exit(0)
+
+# cannot check exit code, no overlap makes bigBedToBed return an error, too
+# so the following does not work
+#if p.returncode != 0:
+    #print("Unable to run command")
+    #sys.exit(0)
+
+print "No"