fb682f5daed16fba30b6f1b9d32cad52179113ed max Mon Apr 24 16:55:59 2017 -0700 bedJoinTabOffset: making sure that input file is tab-separated, refs #18736 diff --git src/utils/bedJoinTabOffset src/utils/bedJoinTabOffset index 548717a..e95685d 100755 --- src/utils/bedJoinTabOffset +++ src/utils/bedJoinTabOffset @@ -41,31 +41,34 @@ end = ifh.tell() if line!="": yield line, start, end start = ifh.tell() def tabOffsets(fnames, keyCol): " iterate over all lines in tab filenames and return dict with value -> (offset, lineLen) " idxFnames = {} ret = {} for i, fname in enumerate(fnames): print ("Reading %s, %0.f %%" % (fname, float(100*i)/len(fnames))) offset = 0 for line, start, end in iterLineOffsets(open(fname, "rb")): if keyCol==0: # most common case: saves a bit of time by not building a huge list - key, restLine = string.split(line, "\t", 1) + fields = string.split(line, "\t", 1) + if len(fields)==1: + raise Exception("file %s does not seem to be a tab-separated file" % fname) + key, restLine = fields else: key = line.rstrip("\n").split("\t")[keyCol] if key in idxFnames: print("Warning: duplicate key %s, found in files %s and %s" % \ (key, fname, idxFnames[key])) continue idxFnames[key] = fname ret[key] = (start, len(line)) return ret def bedJoinTabOffset(inTab, inBed, outBed, options): " cat and index all files in inDir "