src/utils/bedBetween 557415034ca7f233f8f4b5be7201261136ed80ae

557415034ca7f233f8f4b5be7201261136ed80ae
max
  Wed Sep 21 16:13:31 2016 -0700
small fix to two crispr-related utils

diff --git src/utils/bedBetween src/utils/bedBetween
index 1e065c3..ec454ed 100755
--- src/utils/bedBetween
+++ src/utils/bedBetween
@@ -6,53 +6,60 @@
 import sys
 from optparse import OptionParser
 import logging
 
 # === COMMAND LINE INTERFACE, OPTIONS AND HELP ===
 parser = OptionParser("usage: %prog [options] inSortedBedFile outfile (stdout ok): given sorted feats., create features between them and annotated these with the neighboring bednames. Regions around chromosomes are limited to 50kbp.")
 
 parser.add_option("-d", "", dest="debug", action="store_true", \
         help="show debug messages")
 parser.add_option("-s", "--chromSizes", dest="chromSizes", action="store", \
         help="use this file with chrom <tab> size - lines to get chrom sizes") 
 parser.add_option("-u", "--upstream", dest="upstream", action="store_true", \
         help="only report intergenic regions that are upstream (if between -/+ neighbors, two features are created)", default=False)
 parser.add_option("-l", "--limit", dest="limit", action="store", \
         help="if -u is set: limit the length of upstream regions to this size", default=0, type="int")
+parser.add_option("-m", "--maxLen", dest="maxLen", action="store", \
+        help="limit names of features to this many characters in output bed file", default=None, type="int")
 parser.add_option("", "--uniqueNames", dest="uniqueNames", action="store_true", \
         help="keep only the first feature, if identical names")
 parser.add_option("-a", "--all", dest="all", action="store_true", \
         help="output all features (input and spacers between them) and annotate with 'ex:', 'in:', 'ig:' (exon, intron, intergenic)")
 
 (options, args) = parser.parse_args()
 
 doAll = False
 
+maxNameLen = None
+
 # ==== FUNCTIONs =====
 
 def truncSize(start, end, limit, onLeft):
     """ if limit is true, cut down start end to max limit length and return as pair """
     if limit!=0 and end-start > limit:
         if onLeft:
                 return (start, start+limit)
         else:
                 return (end-limit, end)
     return start, end
 
 def writeBeds(outf, chrom, posList, names, type):
     for i in range(0, len(names)):
         name = names[i]
+        if maxNameLen is not None and len(name)>maxNameLen:
+            name = name[:maxNameLen-3]+"..."
+
         start, end = posList[i]
         # skip 0-length features
         if start==end:
             continue
         outf.write("%s\t%d\t%d\t%s\n" % (chrom, start, end, name))
 
 def output(fh, left, right, upstream, limit, type, chromSizes=None):
     """ given two flanking beds, create lists of names for the regions between and write to fh"""
     """ this can return 0, 1 or 2 different names.
     type can be "ig", "ex" or "in" = intergenic, exon, intron
     """
 
     global outCount
     global dublCount
     names = []
@@ -272,30 +279,32 @@
 
 if options.debug:
     logging.basicConfig(level=logging.DEBUG)
 if options.all:
     doAll = True
 
 # parse input arguments
 infile = args[0]
 outfile = args[1]
 upstream = options.upstream
 limit = options.limit
 chromSize=None
 if options.chromSizes!=None:
     chromSize = slurpdict(options.chromSizes)
 
+maxNameLen = options.maxLen
+
 stderr.write("Reading beds ...\n")
 beds = parseBedFilename(infile)
 stderr.write("%d features read\n" % len(beds))
 
 if options.uniqueNames:
     newBeds = []
     seenNames = set()
     for b in beds:
         if b.name not in seenNames:
             newBeds.append(b)
         seenNames.add(b.name)
     logging.info("Removing features with duplicate names, feature count %d -> %d" % \
         (len(beds), len(newBeds)))
     beds = newBeds