634275bbaa417bb81e44cca7aaa1757ee7b57b82 max Fri May 12 11:19:51 2017 -0700 fixing up crispr track pipeline for CHO/Hiram diff --git src/utils/bedOverlapMerge src/utils/bedOverlapMerge index edf34cb..00b3243 100755 --- src/utils/bedOverlapMerge +++ src/utils/bedOverlapMerge @@ -1,67 +1,70 @@ #!/usr/bin/python import sys from optparse import OptionParser # === COMMAND LINE INTERFACE, OPTIONS AND HELP === parser = OptionParser("usage: %prog [options] file - merge overlapping bed features, join their names") #parser.add_option("-v", "--inverse", dest="inverse", action="store_true", help="inverse restult, only print lines that DO overlap",default=False) +parser.add_option("-o", "--onlyFirst", dest="onlyFirst", action="store_true", help="only use the first name, do not concat all",default=False) (options, args) = parser.parse_args() if len(args)==0: parser.print_help() sys.exit(0) # ==== FUNCTIONs ===== def coordOverlap(start1, end1, start2, end2): """ returns true if two Features overlap """ result = (( start2 <= start1 and end2 > start1) or \ (start2 < end1 and end2 >= end1) or \ (start1 >= start2 and end1 <= end2) or \ (start2 >= start1 and end2 <= end1)) #print result, start1, end1, start2, end2 return result -def printLine(chrom, start, end, names): +def printLine(chrom, start, end, names, onlyFirst): + if onlyFirst: + names = [names[0]] row = [lastChrom, str(start), str(end), "/".join(names)] print "\t".join(row) # ==== MAIN ==== infname = args[0] lastChrom, lastStart, lastEnd = None, None, None names = [] for line in open(infname): #print line chrom, start, end, name = line.rstrip("\n").split("\t")[:4] start = int(start) end = int(end) # if first feature on chromosome and we have some lastX data: output lastX data if lastChrom!=None and lastChrom!=chrom: - printLine(lastChrom, lastStart, lastEnd, names) + printLine(lastChrom, lastStart, lastEnd, names, options.onlyFirst) lastChrom=None # if first feature on chromosome: save lastX data and continue if lastChrom==None: lastChrom, lastStart, lastEnd = chrom, start, end names = [name] continue assert(start>=lastStart) # features must be sorted by start position # if overlap: extend last feature if coordOverlap(lastStart, lastEnd, start, end): lastStart = min(start, lastStart) lastEnd = max(end, lastEnd) if name not in names: names.append(name) # no overlap: print last feature and update lastX data else: - printLine(lastChrom, lastStart, lastEnd, names) + printLine(lastChrom, lastStart, lastEnd, names, options.onlyFirst) lastChrom, lastStart, lastEnd = chrom, start, end names = [name] -printLine(lastChrom, lastStart, lastEnd, names) +printLine(lastChrom, lastStart, lastEnd, names, options.onlyFirst)