a823a26a0ed014c44482f50eb1c4072b3860f9de
max
  Mon Oct 31 14:27:59 2016 -0700
small fix to doLocusName to handle genes with with spaces in names

diff --git src/utils/doLocusName src/utils/doLocusName
index f2c0fa3..8de63da 100755
--- src/utils/doLocusName
+++ src/utils/doLocusName
@@ -1,90 +1,90 @@
 #!/usr/bin/env python
 
 import logging, sys, optparse, subprocess, tempfile, os
 from collections import defaultdict
 from os.path import join, basename, dirname, isfile
 
 # ==== functions =====
     
 def parseArgs():
     " setup logging, parse command line arguments and options. -h shows auto-generated help page "
     parser = optparse.OptionParser("usage: %prog [options] db geneTableName - create and fill locusNames, a BED4 table that covers all chromosomes and assigns a name to all ranges. ensGene usually works well as the gene table.")
 
     parser.add_option("-d", "--debug", dest="debug", action="store_true", help="show debug messages")
     (options, args) = parser.parse_args()
 
     if args==[]:
         parser.print_help()
         exit(1)
 
     if options.debug:
         logging.basicConfig(level=logging.DEBUG)
     else:
         logging.basicConfig(level=logging.INFO)
     return args, options
 
 def runSql(db, query):
     " yield rows from sql query "
     proc = subprocess.Popen(['hgsql', db, "-NB", "-e", query], stdout=subprocess.PIPE)
     for line in  proc.stdout:
         yield line.rstrip("\n").split("\t")
 
 def runCmd(cmd, mustRun=True):
     " wrapper around os.system that prints error messages "
     print ("Running: "+ cmd)
     ret = os.system(cmd)
     if ret!=0 and mustRun:
         print "Could not run command %s" % cmd
         sys.exit(1)
     return ret
 
 def doLocusName(db, geneTableName):
     " create and fill the locusName table for db "
     if geneTableName=="ensGene":
         query = "SELECT * from ensemblToGeneName"
     elif geneTableName=="knownGene":
         query = "SELECT kgID, geneSymbol from kgXref where geneSymbol<>'' and geneSymbol not like '% %'"
     else:
         assert(False) # no support for this gene table yet
 
     tempFh = tempfile.NamedTemporaryFile()
     #tempFh = open("temp.tmp", "w")
     for row in runSql(db, query):
         tempFh.write("\t".join(row)+"\n")
     tempFh.flush()
     tempFname = tempFh.name
 
     chromSizesFname = "/cluster/data/%s/chrom.sizes" % db
     outTmp = "locusName.tmp.bed"
 
     cmdParts = []
     # get genePred
     cmdParts.append("hgsql %(db)s -NB -e 'SELECT * from %(geneTableName)s'| ")
     # only known Gene has no bin field
     if geneTableName!="knownGene":
         cmdParts.append("cut -f2- |")
     # break genes into exons
     cmdParts.append("genePredToBed stdin stdout | bedToExons stdin stdout | ")
     # replace transcript with symbol, skip dupes
     cmdParts.append("tabRepl %(tempFname)s 3 /dev/stdin | sort -u | ")
     # merge adjacent exons
     cmdParts.append("bedSort stdin stdout | bedOverlapMerge /dev/stdin /dev/stdout |")
     # annotate regions
     cmdParts.append("bedBetween stdin /dev/stdout -a -s %(chromSizesFname)s -m 100 |")
     # and sort
     cmdParts.append("bedSort stdin %(outTmp)s")
     runCmd(("".join(cmdParts)) % locals())
 
-    cmd = "hgLoadBed %s locusName %s" % (db, outTmp)
+    cmd = "hgLoadBed -tab %s locusName %s" % (db, outTmp)
     runCmd(cmd)
 
     os.remove(outTmp)
 
 # ----------- main --------------
 def main():
     args, options = parseArgs()
 
     db, geneTable = args
     doLocusName(db, geneTable)
 
 main()