880ad5f5e5b759ac892553985d6ab3721452e870 max Thu Dec 11 16:58:34 2014 -0800 largely revamped beacon, refs #14393 diff --git src/beacon/query src/beacon/query deleted file mode 100755 index e951874..0000000 --- src/beacon/query +++ /dev/null @@ -1,156 +0,0 @@ -#!/usr/bin/env python -# a beacon for LOVD, ClinVar, UniProt and HGMD, see ga4gh.org/#/beacon -import cgi, subprocess, sys, cgitb, os, datetime, gdbm -from os.path import join -cgitb.enable() - -# dir for bigBed files -DATADIR = "/gbdb/hg19/beacon" - -trackFnames = { -"clinvar" : "clinvarMain.bb", -"uniprot" : "spMut.bb", -"hgmd" : "hgmd.bb", -"lovd" : "lovd.bb" -} - -MAXREQPERDAY=1000 - -def checkReqCount(): - " check if IP address has exceeded max number of requests today " - accCounts = gdbm.open("accCounts.gdbm", "c") - - ipAddr = os.environ["REMOTE_ADDR"] - dateStr = datetime.date.today().isoformat() - accKey = ipAddr+"|"+dateStr - try: - oldCount = int(accCounts[accKey]) - except KeyError: - oldCount = 0 - - newCount = oldCount+1 - if newCount > MAXREQPERDAY: - print("No more requests left today") - sys.exit(0) - - accCounts[accKey]=str(newCount) - accCounts.close() - -def getTrackFromScript(): - " check which track name occurs in script name, allows symlinks to cgi that include the track name " - name = sys.argv[0].lower() - for trackName in trackFnames: - if trackName in name: - return trackName - -def errAbort(errMsg=None): - print "
" - if errMsg is not None: - print "error: "+errMsg - - host = os.environ.get("HTTP_HOST", "") - - print """ - A Genomics Alliance Beacon at UCSC. In collaboration with the Leiden Open Variation Database. - - Data provided by Leiden Open Variation (Ivo Fokkema), NCBI ClinVar, UniProt, Biobase/HGMD. - - parameters: - - chrom: either a number or X/Y or UCSC format like "chr1" or INSDC accession like CM000663.1 - - track: clinvar, uniprot or lovd - - pos: 0-based position, like 10042537 - - allele: A,C,T,G. Ignored for UniProt (UniProt stores only amino acid changes) - - If the allele was found in the track, returns the string "Yes", otherwise "No". - - For UniProt and HGMD, an overlap with an amino acid change is enough to trigger "Yes". - Therefore, the alleles are ignored for UniProt and HGMD. - For LOVD, an overlap with a deletion or duplication returns "Maybe". - - Examples: - http://%(host)s/cgi-bin/beacon/query?track=lovd&chrom=chr1&pos=808921&allele=T - http://%(host)s/cgi-bin/beacon/query?track=clinvar&chrom=chr1&pos=10042537&allele=T - http://%(host)s/cgi-bin/beacon/query?track=uniprot&chrom=CM000663.1&pos=977027&allele=T - - Alternative URLs are queryLovd, queryClinvar and queryUniprot, e.g. - http://%(host)s/cgi-bin/beacon/queryLovd?chrom=1&pos=2160493&allele=T - - returns "Maybe" - - Number of variants: - - LOVD: 180k - - ClinVar: 30k - - HGMD: 90k - - UniProt: 47k - """ % locals() - print "" - sys.exit(0) - -# parse INSDC -> ucsc chrom name table -insdcToUcsc = {} -for line in open("insdcToUcsc.tab"): - name, chrom = line.rstrip("\n").split("\t") - insdcToUcsc[name] = chrom - -print "Content-Type: text/html\n" -form = cgi.FieldStorage() - -track = form.getfirst("track") -if track is None: - track = getTrackFromScript() - -chrom = form.getfirst("chrom") -pos = form.getfirst("pos") -nucl = form.getfirst("allele") - -# check parameters -if chrom==None: - errAbort() -if not pos.isdigit(): - errAbort("pos is not an int") -if chrom.isdigit() or chrom in "XY": - chrom = "chr"+chrom - -# convert chrom from genbank acc to ucsc string like "chr1" -if chrom not in insdcToUcsc.values(): - chrom = insdcToUcsc.get(chrom, None) -if chrom==None: - errAbort( "ERROR: illegal chrom identifier %s" % chrom) - -if track not in trackFnames: - errAbort("illegal track name") - -if nucl==None or len(nucl)!=1 or nucl[0].lower() not in "actg": - errAbort("invalid allele, can only be ACTG") - -checkReqCount() - -# change parameters slightly -nucl = nucl.upper() -fname = join(DATADIR, trackFnames[track]) -pos = int(pos) - -# run tool -args = ["./bigBedToBed", fname, "stdout", "-chrom=%s"%chrom, "-start=%d"%pos, "-end=%d"%(pos+1)] -p = subprocess.Popen(args, stdout=subprocess.PIPE) - -# parse output -isMaybe = False -for line in p.stdout: - fs = line.rstrip("\n").split("\t") - if fs[-1]=="maybe": - isMaybe=True - if int(fs[2])-int(fs[1])!=1 and track != "uniprot": - continue - if fs[3][-1]==nucl or track in ["hgmd", "uniprot"]: - print "Yes" - sys.exit(0) -if isMaybe: - print "Maybe" - sys.exit(0) - -# cannot check exit code, no overlap triggers error -#if p.returncode != 0: - #print("Unable to run command") - #sys.exit(0) - -print "No"