eb74447f35b60ac95ab98c0cde467da9b8c501e1
max
  Wed Nov 19 03:14:55 2014 -0800
the UCSC GA4GH beacon, refs #14393
diff --git src/beacon/query src/beacon/query
new file mode 100755
index 0000000..036d9f4
--- /dev/null
+++ src/beacon/query
@@ -0,0 +1,149 @@
+#!/usr/bin/env python
+# a beacon for LOVD, ClinVar, UniProt and HGMD, see ga4gh.org/#/beacon
+import cgi, subprocess, sys, cgitb, os, datetime, gdbm
+cgitb.enable()
+
+trackFnames = {
+"clinvar" : "clinvarMain.bb",
+"uniprot" : "spMut.bb",
+"hgmd" : "hgmd.bb",
+"lovd" : "lovd.bb"
+}
+
+MAXREQPERDAY=1000
+
+def checkReqCount():
+    " check if IP address has exceeded max number of requests today "
+    accCounts = gdbm.open("accCounts.gdbm", "c")
+
+    ipAddr = os.environ["REMOTE_ADDR"]
+    dateStr = datetime.date.today().isoformat()
+    accKey = ipAddr+"|"+dateStr
+    try:
+        oldCount = int(accCounts[accKey])
+    except KeyError:
+        oldCount = 0
+
+    newCount = oldCount+1
+    if newCount > MAXREQPERDAY:
+        print("No more requests left today")
+        sys.exit(0)
+
+    accCounts[accKey]=str(newCount)
+    accCounts.close()
+
+def getTrackFromScript():
+    " check which track name occurs in script name, to make the track captured by symlinks "
+    name = sys.argv[0].lower()
+    for trackName in trackFnames:
+        if trackName in name:
+            return trackName
+
+def errAbort(errMsg=None):
+    print "<html><body><pre>"
+    if errMsg is not None:
+        print "error: "+errMsg
+
+    print """
+    A Genomics Alliance Beacon at UCSC. In collaboration with the Leiden Open Variation Database.
+
+    Data provided by Leiden Open Variation (Ivo Fokkema), NCBI ClinVar, UniProt.
+
+    parameters:
+    - chrom: either a number or X/Y or UCSC format like "chr1" or INSDC accession like CM000663.1
+    - track: clinvar, uniprot or lovd
+    - pos: 0-based position, like 10042537
+    - allele: A,C,T,G. Ignored for UniProt (UniProt stores only amino acid changes)
+
+    If the allele was found in the track, returns the string "Yes", otherwise "No".
+
+    For UniProt, an overlap with a amino acid change is enough to trigger "Yes".
+    For LOVD, an overlap with a deletion or duplication returns "Maybe". 
+
+    Examples:
+    http://hgwdev-max.cse.ucsc.edu/cgi-bin/beacon/query?track=lovd&chrom=chr1&pos=808921&allele=T
+    http://hgwdev-max.cse.ucsc.edu/cgi-bin/beacon/query?track=clinvar&chrom=chr1&pos=10042537&allele=T
+    http://hgwdev-max.cse.ucsc.edu/cgi-bin/beacon/query?track=uniprot&chrom=CM000663.1&pos=977027&allele=T
+
+    Alternative URLs are queryLovd, queryClinvar and queryUniprot, e.g.
+    http://hgwdev-max.cse.ucsc.edu/cgi-bin/beacon/queryLovd?chrom=1&pos=2160493&allele=T
+    - returns "Maybe"
+
+    Number of variants:
+    - LOVD: 180k
+    - ClinVar: 30k
+    - HGMD: 90k
+    - UniProt: 47k
+    """
+    print "</body></pre></html>"
+    sys.exit(0)
+
+# parse INSDC -> ucsc chrom name table
+insdcToUcsc = {}
+for line in open("insdcToUcsc.tab"):
+    name, chrom = line.rstrip("\n").split("\t")
+    insdcToUcsc[name] = chrom
+
+print "Content-Type: text/html\n"
+form = cgi.FieldStorage()
+
+track = form.getfirst("track")
+if track is None:
+    track = getTrackFromScript()
+
+chrom = form.getfirst("chrom")
+pos = form.getfirst("pos")
+nucl = form.getfirst("allele")
+
+# check parameters
+if chrom==None:
+    errAbort()
+if not pos.isdigit():
+    errAbort("pos is not an int")
+if chrom.isdigit() or chrom in "XY":
+    chrom = "chr"+chrom
+
+# convert chrom from genbank acc to ucsc string like "chr1"
+if chrom not in insdcToUcsc.values():
+    chrom = insdcToUcsc.get(chrom, None)
+if chrom==None:
+    errAbort( "ERROR: illegal chrom identifier %s" % chrom)
+    
+if track not in trackFnames:
+    errAbort("illegal track name")
+
+if nucl==None or len(nucl)!=1 or nucl[0].lower() not in "actg":
+    errAbort("invalid allele, can only be ACTG")
+    
+checkReqCount()
+
+# change parameters slightly
+nucl = nucl.upper()
+fname = trackFnames[track]
+pos = int(pos)
+
+# run tool
+args = ["./bigBedToBed", fname, "stdout", "-chrom=%s"%chrom, "-start=%d"%pos, "-end=%d"%(pos+1)]
+p = subprocess.Popen(args, stdout=subprocess.PIPE)
+
+# parse output
+isMaybe = False
+for line in p.stdout:
+    fs = line.rstrip("\n").split("\t")
+    if fs[-1]=="maybe":
+        isMaybe=True
+    if int(fs[2])-int(fs[1])!=1 and track != "uniprot":
+        continue
+    if fs[3][-1]==nucl or track in ["hgmd", "uniprot"]:
+        print "Yes"
+        sys.exit(0)
+if isMaybe:
+    print "Maybe"
+    sys.exit(0)
+
+# cannot check exit code, no overlap triggers error
+#if p.returncode != 0:
+    #print("Unable to run command")
+    #sys.exit(0)
+
+print "No"