ae427c75bc1aa63033fb656a0f84ddc0c45486bf max Fri Apr 28 18:03:13 2017 -0700 Adding two more tools required for building the gene interactions track, refs #13634 diff --git src/utils/bedNamePickFirst src/utils/bedNamePickFirst new file mode 100755 index 0000000..564ce4b --- /dev/null +++ src/utils/bedNamePickFirst @@ -0,0 +1,38 @@ +#!/usr/bin/env python + +import logging, sys, optparse +from collections import defaultdict +from os.path import join, basename, dirname, isfile + +# === COMMAND LINE INTERFACE, OPTIONS AND HELP === +parser = optparse.OptionParser("usage: %prog [options] bedFname1 bedFname2 ... - given a list of BED files, load all, index by the name field and for each name, output the first line encounteredin the first file that contains the name") + +parser.add_option("-d", "--debug", dest="debug", action="store_true", help="show debug messages") +#parser.add_option("-f", "--file", dest="file", action="store", help="run on file") +#parser.add_option("", "--test", dest="test", action="store_true", help="do something") +(options, args) = parser.parse_args() + +if options.debug: + logging.basicConfig(level=logging.DEBUG) +else: + logging.basicConfig(level=logging.INFO) +# ==== FUNCTIONs ===== +# ----------- MAIN -------------- +if args==[]: + parser.print_help() + exit(1) + +filenames = args + +nameToLines = defaultdict(list) +lCount = 0 +for fname in filenames: + for line in open(fname): + name = line.rstrip("\n").split("\t")[3] + nameToLines[name].append(line) + lCount +=1 + +logging.info("Loaded %d names, %d lines" % (len(nameToLines), lCount)) + +for name, lines in nameToLines.iteritems(): + print lines[0],