413eea60a2ca4d9c534a6750fcbf332ffa7e1e36 max Thu Nov 27 03:06:00 2014 -0800 moving bigbed files to a location that is less lame, refs #14427 diff --git src/beacon/query src/beacon/query index 71a6970..e951874 100755 --- src/beacon/query +++ src/beacon/query @@ -1,152 +1,156 @@ #!/usr/bin/env python # a beacon for LOVD, ClinVar, UniProt and HGMD, see ga4gh.org/#/beacon import cgi, subprocess, sys, cgitb, os, datetime, gdbm +from os.path import join cgitb.enable() +# dir for bigBed files +DATADIR = "/gbdb/hg19/beacon" + trackFnames = { "clinvar" : "clinvarMain.bb", "uniprot" : "spMut.bb", "hgmd" : "hgmd.bb", "lovd" : "lovd.bb" } MAXREQPERDAY=1000 def checkReqCount(): " check if IP address has exceeded max number of requests today " accCounts = gdbm.open("accCounts.gdbm", "c") ipAddr = os.environ["REMOTE_ADDR"] dateStr = datetime.date.today().isoformat() accKey = ipAddr+"|"+dateStr try: oldCount = int(accCounts[accKey]) except KeyError: oldCount = 0 newCount = oldCount+1 if newCount > MAXREQPERDAY: print("No more requests left today") sys.exit(0) accCounts[accKey]=str(newCount) accCounts.close() def getTrackFromScript(): - " check which track name occurs in script name, to make the track captured by symlinks " + " check which track name occurs in script name, allows symlinks to cgi that include the track name " name = sys.argv[0].lower() for trackName in trackFnames: if trackName in name: return trackName def errAbort(errMsg=None): print "
"
     if errMsg is not None:
         print "error: "+errMsg
 
     host = os.environ.get("HTTP_HOST", "")
 
     print """
     A Genomics Alliance Beacon at UCSC. In collaboration with the Leiden Open Variation Database.
 
     Data provided by Leiden Open Variation (Ivo Fokkema), NCBI ClinVar, UniProt, Biobase/HGMD.
 
     parameters:
     - chrom: either a number or X/Y or UCSC format like "chr1" or INSDC accession like CM000663.1
     - track: clinvar, uniprot or lovd
     - pos: 0-based position, like 10042537
     - allele: A,C,T,G. Ignored for UniProt (UniProt stores only amino acid changes)
 
     If the allele was found in the track, returns the string "Yes", otherwise "No".
 
     For UniProt and HGMD, an overlap with an amino acid change is enough to trigger "Yes".
     Therefore, the alleles are ignored for UniProt and HGMD.
     For LOVD, an overlap with a deletion or duplication returns "Maybe". 
 
     Examples:
     http://%(host)s/cgi-bin/beacon/query?track=lovd&chrom=chr1&pos=808921&allele=T
     http://%(host)s/cgi-bin/beacon/query?track=clinvar&chrom=chr1&pos=10042537&allele=T
     http://%(host)s/cgi-bin/beacon/query?track=uniprot&chrom=CM000663.1&pos=977027&allele=T
 
     Alternative URLs are queryLovd, queryClinvar and queryUniprot, e.g.
     http://%(host)s/cgi-bin/beacon/queryLovd?chrom=1&pos=2160493&allele=T
     - returns "Maybe"
 
     Number of variants:
     - LOVD: 180k
     - ClinVar: 30k
     - HGMD: 90k
     - UniProt: 47k
     """ % locals()
     print "
" sys.exit(0) # parse INSDC -> ucsc chrom name table insdcToUcsc = {} for line in open("insdcToUcsc.tab"): name, chrom = line.rstrip("\n").split("\t") insdcToUcsc[name] = chrom print "Content-Type: text/html\n" form = cgi.FieldStorage() track = form.getfirst("track") if track is None: track = getTrackFromScript() chrom = form.getfirst("chrom") pos = form.getfirst("pos") nucl = form.getfirst("allele") # check parameters if chrom==None: errAbort() if not pos.isdigit(): errAbort("pos is not an int") if chrom.isdigit() or chrom in "XY": chrom = "chr"+chrom # convert chrom from genbank acc to ucsc string like "chr1" if chrom not in insdcToUcsc.values(): chrom = insdcToUcsc.get(chrom, None) if chrom==None: errAbort( "ERROR: illegal chrom identifier %s" % chrom) if track not in trackFnames: errAbort("illegal track name") if nucl==None or len(nucl)!=1 or nucl[0].lower() not in "actg": errAbort("invalid allele, can only be ACTG") checkReqCount() # change parameters slightly nucl = nucl.upper() -fname = trackFnames[track] +fname = join(DATADIR, trackFnames[track]) pos = int(pos) # run tool args = ["./bigBedToBed", fname, "stdout", "-chrom=%s"%chrom, "-start=%d"%pos, "-end=%d"%(pos+1)] p = subprocess.Popen(args, stdout=subprocess.PIPE) # parse output isMaybe = False for line in p.stdout: fs = line.rstrip("\n").split("\t") if fs[-1]=="maybe": isMaybe=True if int(fs[2])-int(fs[1])!=1 and track != "uniprot": continue if fs[3][-1]==nucl or track in ["hgmd", "uniprot"]: print "Yes" sys.exit(0) if isMaybe: print "Maybe" sys.exit(0) # cannot check exit code, no overlap triggers error #if p.returncode != 0: #print("Unable to run command") #sys.exit(0) print "No"