d38f471366cf2b2c0bf22b8eb0bd8feda07f7229 max Fri Oct 26 17:07:30 2018 -0700 adding splicing variants to hgBeacon, refs #22325 diff --git src/hg/hgBeacon/hgBeacon src/hg/hgBeacon/hgBeacon index a3eeba1..45caef1 100755 --- src/hg/hgBeacon/hgBeacon +++ src/hg/hgBeacon/hgBeacon @@ -259,30 +259,33 @@ if not (hostName.startswith("hgw") and hostName.endswith("ucsc.edu")) \ or hostName.startswith("hgwdev."): # enable special CGI error handler not on the RR, but on hgwdev cgitb.enable() mainCgi() else: mainCommandLine() def parseArgs(): " parse command line options into args and options " parser = optparse.OptionParser("""usage: %prog [options] filename [datasetName] - import VCF or BED files into the beacon database. - parameter 'datasetName' is optional and defaults to 'defaultDataset'. - any existing dataset of the same name will be overwritten - the data is written to beaconData.sqlite. You can use 'sqlite3' to inspect the data file. - the input file can be gzipped + + e.g. to load HGMD: + /usr/local/apache/cgi-bin/hgBeacon -f hgmd /tmp/temp.bed hgmd """) parser.add_option("-d", "--debug", dest="debug", \ action="store_true", help="show debug messages") parser.add_option("-f", "--format", dest="format", \ action="store", help="format of input file, one of vcf, lovd, hgmd. default %default", \ default = "vcf") (options, args) = parser.parse_args() if len(args)==0: parser.print_help() sys.exit(0) return args, options def dbMakeTable(conn, tableName): @@ -298,30 +301,31 @@ ' allele text' # alternate allele, can also be IATG = insertion of ATG or D15 = deletion of 15 bp ')') conn.execute(_tableDef % tableName) conn.commit() #' PRIMARY KEY (chrom, pos, allele) ' def dbOpen(mustExist=False): " open the sqlite db and return a DB connection object " dbDir = dirname(__file__) # directory where script is located fqdn = socket.getfqdn() if hostName.endswith("ucsc.edu") or fqdn.endswith(".ucsc.edu") or fqdn.endswith(".sdsc.edu"): # at UCSC, the data is not in the CGI-BIN directory dbDir = "/gbdb/hg19/beacon/" dbName = join(dbDir, SQLITEDB) + print("Opening %s" % dbName) if not exists(dbName) and mustExist: errAbort("Database file %s does not exist. This beacon is not serving any data." % dbName) conn = sqlite3.Connection(dbName) return conn def dbListTables(conn): " return list of tables in sqlite db " cursor = conn.cursor() cursor.execute("SELECT name FROM sqlite_master WHERE type='table';") rows = cursor.fetchall() tables = [] for row in rows: tables.append(row[0]) return tables @@ -416,35 +420,35 @@ print "read %d alleles, skipped %d non-SNV or del alleles" % (len(alleles), skipCount) return list(set(alleles)) def readAllelesHgmd(ifh): """ read the HGMD bed file and return in format (chrom, pos, altAllele). This function is only used internally at UCSC. """ # chr1 2338004 2338005 PEX10:CM090797 0 2338004 2338005 PEX10 CM090797 substitution alleles = [] count = 0 skipCount = 0 for line in ifh: fields = line.rstrip("\n").split("\t") chrom, start, end = fields[:3] - desc = fields[10] + desc = fields[11] start = int(start) end = int(end) - if desc=="substitution": + if desc=="substitution" or desc=="splicing variant": assert(end-start==1) alt = "*" else: skipCount += 1 continue chrom = chrom.replace("chr", "") alleles.append( (chrom, start, alt) ) print "read %d alleles, skipped %d non-SNV alleles" % (len(alleles), skipCount) return list(set(alleles)) def iterChunks(seq, size): " yields chunks of at most size elements from seq " for pos in range(0, len(seq), size):