880ad5f5e5b759ac892553985d6ab3721452e870 max Thu Dec 11 16:58:34 2014 -0800 largely revamped beacon, refs #14393 diff --git src/beacon/query src/beacon/query deleted file mode 100755 index e951874..0000000 --- src/beacon/query +++ /dev/null @@ -1,156 +0,0 @@ -#!/usr/bin/env python -# a beacon for LOVD, ClinVar, UniProt and HGMD, see ga4gh.org/#/beacon -import cgi, subprocess, sys, cgitb, os, datetime, gdbm -from os.path import join -cgitb.enable() - -# dir for bigBed files -DATADIR = "/gbdb/hg19/beacon" - -trackFnames = { -"clinvar" : "clinvarMain.bb", -"uniprot" : "spMut.bb", -"hgmd" : "hgmd.bb", -"lovd" : "lovd.bb" -} - -MAXREQPERDAY=1000 - -def checkReqCount(): - " check if IP address has exceeded max number of requests today " - accCounts = gdbm.open("accCounts.gdbm", "c") - - ipAddr = os.environ["REMOTE_ADDR"] - dateStr = datetime.date.today().isoformat() - accKey = ipAddr+"|"+dateStr - try: - oldCount = int(accCounts[accKey]) - except KeyError: - oldCount = 0 - - newCount = oldCount+1 - if newCount > MAXREQPERDAY: - print("No more requests left today") - sys.exit(0) - - accCounts[accKey]=str(newCount) - accCounts.close() - -def getTrackFromScript(): - " check which track name occurs in script name, allows symlinks to cgi that include the track name " - name = sys.argv[0].lower() - for trackName in trackFnames: - if trackName in name: - return trackName - -def errAbort(errMsg=None): - print "
"
- if errMsg is not None:
- print "error: "+errMsg
-
- host = os.environ.get("HTTP_HOST", "")
-
- print """
- A Genomics Alliance Beacon at UCSC. In collaboration with the Leiden Open Variation Database.
-
- Data provided by Leiden Open Variation (Ivo Fokkema), NCBI ClinVar, UniProt, Biobase/HGMD.
-
- parameters:
- - chrom: either a number or X/Y or UCSC format like "chr1" or INSDC accession like CM000663.1
- - track: clinvar, uniprot or lovd
- - pos: 0-based position, like 10042537
- - allele: A,C,T,G. Ignored for UniProt (UniProt stores only amino acid changes)
-
- If the allele was found in the track, returns the string "Yes", otherwise "No".
-
- For UniProt and HGMD, an overlap with an amino acid change is enough to trigger "Yes".
- Therefore, the alleles are ignored for UniProt and HGMD.
- For LOVD, an overlap with a deletion or duplication returns "Maybe".
-
- Examples:
- http://%(host)s/cgi-bin/beacon/query?track=lovd&chrom=chr1&pos=808921&allele=T
- http://%(host)s/cgi-bin/beacon/query?track=clinvar&chrom=chr1&pos=10042537&allele=T
- http://%(host)s/cgi-bin/beacon/query?track=uniprot&chrom=CM000663.1&pos=977027&allele=T
-
- Alternative URLs are queryLovd, queryClinvar and queryUniprot, e.g.
- http://%(host)s/cgi-bin/beacon/queryLovd?chrom=1&pos=2160493&allele=T
- - returns "Maybe"
-
- Number of variants:
- - LOVD: 180k
- - ClinVar: 30k
- - HGMD: 90k
- - UniProt: 47k
- """ % locals()
- print ""
- sys.exit(0)
-
-# parse INSDC -> ucsc chrom name table
-insdcToUcsc = {}
-for line in open("insdcToUcsc.tab"):
- name, chrom = line.rstrip("\n").split("\t")
- insdcToUcsc[name] = chrom
-
-print "Content-Type: text/html\n"
-form = cgi.FieldStorage()
-
-track = form.getfirst("track")
-if track is None:
- track = getTrackFromScript()
-
-chrom = form.getfirst("chrom")
-pos = form.getfirst("pos")
-nucl = form.getfirst("allele")
-
-# check parameters
-if chrom==None:
- errAbort()
-if not pos.isdigit():
- errAbort("pos is not an int")
-if chrom.isdigit() or chrom in "XY":
- chrom = "chr"+chrom
-
-# convert chrom from genbank acc to ucsc string like "chr1"
-if chrom not in insdcToUcsc.values():
- chrom = insdcToUcsc.get(chrom, None)
-if chrom==None:
- errAbort( "ERROR: illegal chrom identifier %s" % chrom)
-
-if track not in trackFnames:
- errAbort("illegal track name")
-
-if nucl==None or len(nucl)!=1 or nucl[0].lower() not in "actg":
- errAbort("invalid allele, can only be ACTG")
-
-checkReqCount()
-
-# change parameters slightly
-nucl = nucl.upper()
-fname = join(DATADIR, trackFnames[track])
-pos = int(pos)
-
-# run tool
-args = ["./bigBedToBed", fname, "stdout", "-chrom=%s"%chrom, "-start=%d"%pos, "-end=%d"%(pos+1)]
-p = subprocess.Popen(args, stdout=subprocess.PIPE)
-
-# parse output
-isMaybe = False
-for line in p.stdout:
- fs = line.rstrip("\n").split("\t")
- if fs[-1]=="maybe":
- isMaybe=True
- if int(fs[2])-int(fs[1])!=1 and track != "uniprot":
- continue
- if fs[3][-1]==nucl or track in ["hgmd", "uniprot"]:
- print "Yes"
- sys.exit(0)
-if isMaybe:
- print "Maybe"
- sys.exit(0)
-
-# cannot check exit code, no overlap triggers error
-#if p.returncode != 0:
- #print("Unable to run command")
- #sys.exit(0)
-
-print "No"