eb74447f35b60ac95ab98c0cde467da9b8c501e1 max Wed Nov 19 03:14:55 2014 -0800 the UCSC GA4GH beacon, refs #14393 diff --git src/beacon/query src/beacon/query new file mode 100755 index 0000000..036d9f4 --- /dev/null +++ src/beacon/query @@ -0,0 +1,149 @@ +#!/usr/bin/env python +# a beacon for LOVD, ClinVar, UniProt and HGMD, see ga4gh.org/#/beacon +import cgi, subprocess, sys, cgitb, os, datetime, gdbm +cgitb.enable() + +trackFnames = { +"clinvar" : "clinvarMain.bb", +"uniprot" : "spMut.bb", +"hgmd" : "hgmd.bb", +"lovd" : "lovd.bb" +} + +MAXREQPERDAY=1000 + +def checkReqCount(): + " check if IP address has exceeded max number of requests today " + accCounts = gdbm.open("accCounts.gdbm", "c") + + ipAddr = os.environ["REMOTE_ADDR"] + dateStr = datetime.date.today().isoformat() + accKey = ipAddr+"|"+dateStr + try: + oldCount = int(accCounts[accKey]) + except KeyError: + oldCount = 0 + + newCount = oldCount+1 + if newCount > MAXREQPERDAY: + print("No more requests left today") + sys.exit(0) + + accCounts[accKey]=str(newCount) + accCounts.close() + +def getTrackFromScript(): + " check which track name occurs in script name, to make the track captured by symlinks " + name = sys.argv[0].lower() + for trackName in trackFnames: + if trackName in name: + return trackName + +def errAbort(errMsg=None): + print "
" + if errMsg is not None: + print "error: "+errMsg + + print """ + A Genomics Alliance Beacon at UCSC. In collaboration with the Leiden Open Variation Database. + + Data provided by Leiden Open Variation (Ivo Fokkema), NCBI ClinVar, UniProt. + + parameters: + - chrom: either a number or X/Y or UCSC format like "chr1" or INSDC accession like CM000663.1 + - track: clinvar, uniprot or lovd + - pos: 0-based position, like 10042537 + - allele: A,C,T,G. Ignored for UniProt (UniProt stores only amino acid changes) + + If the allele was found in the track, returns the string "Yes", otherwise "No". + + For UniProt, an overlap with a amino acid change is enough to trigger "Yes". + For LOVD, an overlap with a deletion or duplication returns "Maybe". + + Examples: + http://hgwdev-max.cse.ucsc.edu/cgi-bin/beacon/query?track=lovd&chrom=chr1&pos=808921&allele=T + http://hgwdev-max.cse.ucsc.edu/cgi-bin/beacon/query?track=clinvar&chrom=chr1&pos=10042537&allele=T + http://hgwdev-max.cse.ucsc.edu/cgi-bin/beacon/query?track=uniprot&chrom=CM000663.1&pos=977027&allele=T + + Alternative URLs are queryLovd, queryClinvar and queryUniprot, e.g. + http://hgwdev-max.cse.ucsc.edu/cgi-bin/beacon/queryLovd?chrom=1&pos=2160493&allele=T + - returns "Maybe" + + Number of variants: + - LOVD: 180k + - ClinVar: 30k + - HGMD: 90k + - UniProt: 47k + """ + print "" + sys.exit(0) + +# parse INSDC -> ucsc chrom name table +insdcToUcsc = {} +for line in open("insdcToUcsc.tab"): + name, chrom = line.rstrip("\n").split("\t") + insdcToUcsc[name] = chrom + +print "Content-Type: text/html\n" +form = cgi.FieldStorage() + +track = form.getfirst("track") +if track is None: + track = getTrackFromScript() + +chrom = form.getfirst("chrom") +pos = form.getfirst("pos") +nucl = form.getfirst("allele") + +# check parameters +if chrom==None: + errAbort() +if not pos.isdigit(): + errAbort("pos is not an int") +if chrom.isdigit() or chrom in "XY": + chrom = "chr"+chrom + +# convert chrom from genbank acc to ucsc string like "chr1" +if chrom not in insdcToUcsc.values(): + chrom = insdcToUcsc.get(chrom, None) +if chrom==None: + errAbort( "ERROR: illegal chrom identifier %s" % chrom) + +if track not in trackFnames: + errAbort("illegal track name") + +if nucl==None or len(nucl)!=1 or nucl[0].lower() not in "actg": + errAbort("invalid allele, can only be ACTG") + +checkReqCount() + +# change parameters slightly +nucl = nucl.upper() +fname = trackFnames[track] +pos = int(pos) + +# run tool +args = ["./bigBedToBed", fname, "stdout", "-chrom=%s"%chrom, "-start=%d"%pos, "-end=%d"%(pos+1)] +p = subprocess.Popen(args, stdout=subprocess.PIPE) + +# parse output +isMaybe = False +for line in p.stdout: + fs = line.rstrip("\n").split("\t") + if fs[-1]=="maybe": + isMaybe=True + if int(fs[2])-int(fs[1])!=1 and track != "uniprot": + continue + if fs[3][-1]==nucl or track in ["hgmd", "uniprot"]: + print "Yes" + sys.exit(0) +if isMaybe: + print "Maybe" + sys.exit(0) + +# cannot check exit code, no overlap triggers error +#if p.returncode != 0: + #print("Unable to run command") + #sys.exit(0) + +print "No"