273486dcbd4a0fd80d7f09be639f32cd58598881
max
  Wed Jul 20 15:03:46 2016 -0700
adding uniprot track for hg38

diff --git src/utils/pslSameGene src/utils/pslSameGene
new file mode 100755
index 0000000..7924817
--- /dev/null
+++ src/utils/pslSameGene
@@ -0,0 +1,54 @@
+#!/usr/bin/env python
+
+import logging, sys, optparse
+from collections import defaultdict
+from os.path import join, basename, dirname, isfile
+
+# ==== functions =====
+    
+def parseArgs():
+    " setup logging, parse command line arguments and options. -h shows auto-generated help page "
+    parser = optparse.OptionParser("""usage: %prog [options] queryGenes targetGenes inPsl - pick out match of query against target where both query and target are from the same gene
+    
+    queryGenes and targetGenes are two files with columns queryId,geneId
+    """)
+
+    parser.add_option("-d", "--debug", dest="debug", action="store_true", help="show debug messages")
+    #parser.add_option("-f", "--file", dest="file", action="store", help="run on file") 
+    #parser.add_option("", "--test", dest="test", action="store_true", help="do something") 
+    (options, args) = parser.parse_args()
+
+    if args==[]:
+        parser.print_help()
+        exit(1)
+
+    if options.debug:
+        logging.basicConfig(level=logging.DEBUG)
+    else:
+        logging.basicConfig(level=logging.INFO)
+    return args, options
+
+# ----------- main --------------
+def readDict(fname):
+    " return map query (transcript) -> target (geneId) "
+    ret = {}
+    for line in open(fname):
+        query, target = line.strip().split()
+        ret[query] = target
+    return ret
+
+def main():
+    args, options = parseArgs()
+
+    queryGeneFname, targetGeneFname, inPslFname = args
+    queryGenes = readDict(queryGeneFname)
+    targetGenes = readDict(targetGeneFname)
+
+    for line in open(inPslFname):
+        row = line.strip("\n").split("\t")
+        qName, tName = row[9], row[13]
+        if queryGenes.get(qName, 1) != targetGenes.get(tName, 2): # no gene? -> 1!=2 -> skip
+            continue
+        print line,
+
+main()