d38f471366cf2b2c0bf22b8eb0bd8feda07f7229
max
  Fri Oct 26 17:07:30 2018 -0700
adding splicing variants to hgBeacon, refs #22325

diff --git src/hg/hgBeacon/hgBeacon src/hg/hgBeacon/hgBeacon
index a3eeba1..45caef1 100755
--- src/hg/hgBeacon/hgBeacon
+++ src/hg/hgBeacon/hgBeacon
@@ -259,30 +259,33 @@
         if not (hostName.startswith("hgw") and hostName.endswith("ucsc.edu")) \
             or hostName.startswith("hgwdev."):
             # enable special CGI error handler not on the RR, but on hgwdev
             cgitb.enable()
         mainCgi()
     else:
         mainCommandLine()
 
 def parseArgs():
     " parse command line options into args and options "
     parser = optparse.OptionParser("""usage: %prog [options] filename [datasetName] - import VCF or BED files into the beacon database. 
     - parameter 'datasetName' is optional and defaults to 'defaultDataset'.
     - any existing dataset of the same name will be overwritten
     - the data is written to beaconData.sqlite. You can use 'sqlite3' to inspect the data file.
     - the input file can be gzipped
+
+    e.g. to load HGMD:
+    /usr/local/apache/cgi-bin/hgBeacon -f hgmd /tmp/temp.bed hgmd
     """)
 
     parser.add_option("-d", "--debug", dest="debug", \
         action="store_true", help="show debug messages") 
     parser.add_option("-f", "--format", dest="format", \
         action="store", help="format of input file, one of vcf, lovd, hgmd. default %default", \
         default = "vcf") 
     (options, args) = parser.parse_args()
 
     if len(args)==0:
         parser.print_help()
         sys.exit(0)
     return args, options
 
 def dbMakeTable(conn, tableName):
@@ -298,30 +301,31 @@
     '  allele text' # alternate allele, can also be IATG = insertion of ATG or D15 = deletion of 15 bp
     ')')
     conn.execute(_tableDef % tableName)
     conn.commit()
     #'  PRIMARY KEY (chrom, pos, allele) '
 
 def dbOpen(mustExist=False):
     " open the sqlite db and return a DB connection object "
     dbDir = dirname(__file__) # directory where script is located
     fqdn = socket.getfqdn()
     if hostName.endswith("ucsc.edu") or fqdn.endswith(".ucsc.edu") or fqdn.endswith(".sdsc.edu"):
         # at UCSC, the data is not in the CGI-BIN directory
         dbDir = "/gbdb/hg19/beacon/"
 
     dbName = join(dbDir, SQLITEDB)
+    print("Opening %s" % dbName)
 
     if not exists(dbName) and mustExist:
         errAbort("Database file %s does not exist. This beacon is not serving any data." % dbName)
     conn = sqlite3.Connection(dbName)
     return conn
 
 def dbListTables(conn):
     " return list of tables in sqlite db "
     cursor = conn.cursor()
     cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
     rows = cursor.fetchall()
     tables = []
     for row in rows:
         tables.append(row[0])
     return tables
@@ -416,35 +420,35 @@
 
     print "read %d alleles, skipped %d non-SNV or del alleles" % (len(alleles), skipCount)
     return list(set(alleles))
 
 def readAllelesHgmd(ifh):
     """ read the HGMD bed file and return in format (chrom, pos, altAllele).
     This function is only used internally at UCSC.
     """
     # chr1 2338004 2338005 PEX10:CM090797 0 2338004 2338005 PEX10 CM090797 substitution
     alleles = []
     count = 0
     skipCount = 0
     for line in ifh:
         fields = line.rstrip("\n").split("\t")
         chrom, start, end = fields[:3]
-        desc = fields[10]
+        desc = fields[11]
         start = int(start)
         end = int(end)
 
-        if desc=="substitution":
+        if desc=="substitution" or desc=="splicing variant":
             assert(end-start==1)
             alt = "*"
         else:
             skipCount += 1
             continue
 
         chrom = chrom.replace("chr", "")
         alleles.append( (chrom, start, alt) )
 
     print "read %d alleles, skipped %d non-SNV alleles" % (len(alleles), skipCount)
     return list(set(alleles))
 
 def iterChunks(seq, size):
     " yields chunks of at most size elements from seq "
     for pos in range(0, len(seq), size):