c7723adc566681d4f69232076df356c36aa5a1cf
max
  Fri Sep 9 16:03:22 2016 -0700
adding first versin of pipeline for crispr tracks and trackDb statements supporting external extra fields, refs #17235

diff --git src/utils/tabRepl src/utils/tabRepl
new file mode 100755
index 0000000..ed0fb12
--- /dev/null
+++ src/utils/tabRepl
@@ -0,0 +1,56 @@
+#!/usr/bin/env python2.7
+
+from sys import *
+from optparse import OptionParser
+import logging
+
+# === COMMAND LINE INTERFACE, OPTIONS AND HELP ===
+parser = OptionParser("usage: %prog [options] replaceFile columnIndex tableFile - replace values in a column with values from a 2nd <oldVal>tab<newVal>-file") 
+
+parser.add_option("-d", "--debug", dest="debug", action="store_true", help="show debug messages") 
+parser.add_option("-s", "--skip", dest="skip", action="store_true", help="skip lines without replacement values") 
+(options, args) = parser.parse_args()
+
+if options.debug:
+    logging.basicConfig(level=logging.DEBUG)
+else:
+    logging.basicConfig(level=logging.INFO)
+# ==== FUNCTIONs =====
+    
+def parseDict(fname):
+    " parse key-val tab-sep file and return as dict "
+    ret = {}
+    for line in open(fname):
+        key, val = line.rstrip("\n").split('\t')
+        ret[key] = val
+    return ret
+
+# ----------- MAIN --------------
+if args==[]: 
+    parser.print_help()
+    exit(1)
+
+replaceFilename, fieldIdx, tableFilename = args
+
+fieldIdx = int(fieldIdx)
+
+logging.info("Reading replace values...\n")
+replaceValues = parseDict(replaceFilename)
+
+tableFile = open(tableFilename, "rb")
+
+lineNo = 0
+for line in tableFile:
+    row = line.rstrip("\n").split("\t")
+    key = row[fieldIdx]
+    if key in replaceValues:
+        replaceValue = replaceValues[key]
+    else:
+        logging.warn("line %d: %s not found in replaceFile" % (lineNo, key))
+        replaceValue = key
+        if options.skip:
+            continue
+
+    row[fieldIdx] = replaceValue
+    print "\t".join(row)
+    lineNo+=1