eb74447f35b60ac95ab98c0cde467da9b8c501e1 max Wed Nov 19 03:14:55 2014 -0800 the UCSC GA4GH beacon, refs #14393 diff --git src/beacon/query src/beacon/query new file mode 100755 index 0000000..036d9f4 --- /dev/null +++ src/beacon/query @@ -0,0 +1,149 @@ +#!/usr/bin/env python +# a beacon for LOVD, ClinVar, UniProt and HGMD, see ga4gh.org/#/beacon +import cgi, subprocess, sys, cgitb, os, datetime, gdbm +cgitb.enable() + +trackFnames = { +"clinvar" : "clinvarMain.bb", +"uniprot" : "spMut.bb", +"hgmd" : "hgmd.bb", +"lovd" : "lovd.bb" +} + +MAXREQPERDAY=1000 + +def checkReqCount(): + " check if IP address has exceeded max number of requests today " + accCounts = gdbm.open("accCounts.gdbm", "c") + + ipAddr = os.environ["REMOTE_ADDR"] + dateStr = datetime.date.today().isoformat() + accKey = ipAddr+"|"+dateStr + try: + oldCount = int(accCounts[accKey]) + except KeyError: + oldCount = 0 + + newCount = oldCount+1 + if newCount > MAXREQPERDAY: + print("No more requests left today") + sys.exit(0) + + accCounts[accKey]=str(newCount) + accCounts.close() + +def getTrackFromScript(): + " check which track name occurs in script name, to make the track captured by symlinks " + name = sys.argv[0].lower() + for trackName in trackFnames: + if trackName in name: + return trackName + +def errAbort(errMsg=None): + print "
"
+    if errMsg is not None:
+        print "error: "+errMsg
+
+    print """
+    A Genomics Alliance Beacon at UCSC. In collaboration with the Leiden Open Variation Database.
+
+    Data provided by Leiden Open Variation (Ivo Fokkema), NCBI ClinVar, UniProt.
+
+    parameters:
+    - chrom: either a number or X/Y or UCSC format like "chr1" or INSDC accession like CM000663.1
+    - track: clinvar, uniprot or lovd
+    - pos: 0-based position, like 10042537
+    - allele: A,C,T,G. Ignored for UniProt (UniProt stores only amino acid changes)
+
+    If the allele was found in the track, returns the string "Yes", otherwise "No".
+
+    For UniProt, an overlap with a amino acid change is enough to trigger "Yes".
+    For LOVD, an overlap with a deletion or duplication returns "Maybe". 
+
+    Examples:
+    http://hgwdev-max.cse.ucsc.edu/cgi-bin/beacon/query?track=lovd&chrom=chr1&pos=808921&allele=T
+    http://hgwdev-max.cse.ucsc.edu/cgi-bin/beacon/query?track=clinvar&chrom=chr1&pos=10042537&allele=T
+    http://hgwdev-max.cse.ucsc.edu/cgi-bin/beacon/query?track=uniprot&chrom=CM000663.1&pos=977027&allele=T
+
+    Alternative URLs are queryLovd, queryClinvar and queryUniprot, e.g.
+    http://hgwdev-max.cse.ucsc.edu/cgi-bin/beacon/queryLovd?chrom=1&pos=2160493&allele=T
+    - returns "Maybe"
+
+    Number of variants:
+    - LOVD: 180k
+    - ClinVar: 30k
+    - HGMD: 90k
+    - UniProt: 47k
+    """
+    print "
" + sys.exit(0) + +# parse INSDC -> ucsc chrom name table +insdcToUcsc = {} +for line in open("insdcToUcsc.tab"): + name, chrom = line.rstrip("\n").split("\t") + insdcToUcsc[name] = chrom + +print "Content-Type: text/html\n" +form = cgi.FieldStorage() + +track = form.getfirst("track") +if track is None: + track = getTrackFromScript() + +chrom = form.getfirst("chrom") +pos = form.getfirst("pos") +nucl = form.getfirst("allele") + +# check parameters +if chrom==None: + errAbort() +if not pos.isdigit(): + errAbort("pos is not an int") +if chrom.isdigit() or chrom in "XY": + chrom = "chr"+chrom + +# convert chrom from genbank acc to ucsc string like "chr1" +if chrom not in insdcToUcsc.values(): + chrom = insdcToUcsc.get(chrom, None) +if chrom==None: + errAbort( "ERROR: illegal chrom identifier %s" % chrom) + +if track not in trackFnames: + errAbort("illegal track name") + +if nucl==None or len(nucl)!=1 or nucl[0].lower() not in "actg": + errAbort("invalid allele, can only be ACTG") + +checkReqCount() + +# change parameters slightly +nucl = nucl.upper() +fname = trackFnames[track] +pos = int(pos) + +# run tool +args = ["./bigBedToBed", fname, "stdout", "-chrom=%s"%chrom, "-start=%d"%pos, "-end=%d"%(pos+1)] +p = subprocess.Popen(args, stdout=subprocess.PIPE) + +# parse output +isMaybe = False +for line in p.stdout: + fs = line.rstrip("\n").split("\t") + if fs[-1]=="maybe": + isMaybe=True + if int(fs[2])-int(fs[1])!=1 and track != "uniprot": + continue + if fs[3][-1]==nucl or track in ["hgmd", "uniprot"]: + print "Yes" + sys.exit(0) +if isMaybe: + print "Maybe" + sys.exit(0) + +# cannot check exit code, no overlap triggers error +#if p.returncode != 0: + #print("Unable to run command") + #sys.exit(0) + +print "No"