d0ab96bfa4307ef666f194cdd2bda05608972cd1 max Mon Nov 23 04:44:23 2020 -0800 trying to address a problem in bedFixBlockOverlaps that appears in the uniprot otto pipeline. This is a hacky fix, but the problem only appears in very unusual assemblies, not on any mammal. refs #25784 diff --git src/utils/bedFixBlockOverlaps src/utils/bedFixBlockOverlaps index eb009b4..46786fd 100755 --- src/utils/bedFixBlockOverlaps +++ src/utils/bedFixBlockOverlaps @@ -24,31 +24,32 @@ exit(1) filename = args[0] if filename=="stdin": ifh = sys.stdin else: ifh = open(filename) for line in ifh: row = line.rstrip("\n").split("\t") bSizes = [int(x) for x in row[10].strip(",").split(",")] bStarts = [int(x) for x in row[11].strip(",").split(",")] newLens = [] newStarts = [] - # go over blockLens/blockStarts and fix overlaps - pos = 0 + lastEnd = 0 for bs, bl in zip(bStarts, bSizes): - if pos>bs: - diff = pos-bs - bs = pos + #print("lastEnd, start, length", lastEnd, bs, bl) + if lastEnd!=0 and lastEnd>bs: + diff = lastEnd-bs + bs = lastEnd bl = bl-diff - pos += bl + #print("fixed to: lastEnd, start, length", lastEnd, bs, bl) + lastEnd= bs+bl newLens.append(str(bl)) newStarts.append(str(bs)) row[10] = ",".join(newLens) row[11] = ",".join(newStarts) print("\t".join(row))