c7723adc566681d4f69232076df356c36aa5a1cf
max
  Fri Sep 9 16:03:22 2016 -0700
adding first versin of pipeline for crispr tracks and trackDb statements supporting external extra fields, refs #17235

diff --git src/utils/bedOverlapMerge src/utils/bedOverlapMerge
new file mode 100755
index 0000000..edf34cb
--- /dev/null
+++ src/utils/bedOverlapMerge
@@ -0,0 +1,67 @@
+#!/usr/bin/python
+
+import sys
+from optparse import OptionParser
+
+# === COMMAND LINE INTERFACE, OPTIONS AND HELP ===
+parser = OptionParser("usage: %prog [options] file - merge overlapping bed features, join their names")
+
+#parser.add_option("-v", "--inverse", dest="inverse", action="store_true", help="inverse restult, only print lines that DO overlap",default=False) 
+
+(options, args) = parser.parse_args()
+if len(args)==0:
+    parser.print_help()
+    sys.exit(0)
+
+# ==== FUNCTIONs =====
+def coordOverlap(start1, end1, start2, end2):
+    """ returns true if two Features overlap """
+    result = (( start2 <= start1 and end2 > start1) or \
+            (start2 < end1 and end2 >= end1) or \
+            (start1 >= start2 and end1 <= end2) or \
+            (start2 >= start1 and end2 <= end1))
+    #print result, start1, end1, start2, end2
+    return result
+
+def printLine(chrom, start, end, names):
+    row = [lastChrom, str(start), str(end), "/".join(names)]
+    print "\t".join(row)
+
+# ==== MAIN ====
+infname = args[0]
+
+lastChrom, lastStart, lastEnd = None, None, None
+names = []
+
+for line in open(infname):
+    #print line
+    chrom, start, end, name = line.rstrip("\n").split("\t")[:4]
+    start = int(start)
+    end = int(end)
+
+    # if first feature on chromosome and we have some lastX data: output lastX data
+    if lastChrom!=None and lastChrom!=chrom:
+        printLine(lastChrom, lastStart, lastEnd, names)
+        lastChrom=None
+
+    # if first feature on chromosome: save lastX data and continue
+    if lastChrom==None:
+        lastChrom, lastStart, lastEnd = chrom, start, end
+        names = [name]
+        continue
+
+    assert(start>=lastStart) # features must be sorted by start position
+
+    # if overlap: extend last feature
+    if coordOverlap(lastStart, lastEnd, start, end):
+        lastStart = min(start, lastStart)
+        lastEnd = max(end, lastEnd)
+        if name not in names:
+            names.append(name)
+    # no overlap: print last feature and update lastX data
+    else:
+        printLine(lastChrom, lastStart, lastEnd, names)
+        lastChrom, lastStart, lastEnd = chrom, start, end
+        names = [name]
+
+printLine(lastChrom, lastStart, lastEnd, names)