python/ucscgenomics/rafile/raInspect.py 4e4379966f9fb72da067031f730aa181372d75a9

4e4379966f9fb72da067031f730aa181372d75a9
mmaddren
  Thu Feb 24 14:28:13 2011 -0800
Updated raFile suite, added raInspect which can verify cv.ra as well as perform filtering tasks on any ra file
diff --git python/ucscgenomics/rafile/raInspect.py python/ucscgenomics/rafile/raInspect.py
index 189e6e5..9600d6b 100644
--- python/ucscgenomics/rafile/raInspect.py
+++ python/ucscgenomics/rafile/raInspect.py
@@ -1,72 +1,113 @@
 import sys
 import re
 import raFile
 import filterFile
 
 path = sys.argv[1]
 fname = sys.argv[2]
 filter = list()
 
 filter = filterFile.FilterFile()
-filter.read(fname, '_name')
+filter.read(fname)
 
 ra = raFile.RaFile()
-ra.read(path, 'term')
+ra.read(path)
 newra = raFile.RaFile()
 
 # create a dictionary to keep track of which RaEntries have what terms
 keyDict = dict()
 for entry in ra.iterValues():
+    if entry == None:
+        continue
     for elem in entry.iterKeys():
         if elem not in keyDict:
             keyDict[elem] = list()
         keyDict[elem].append(entry.getValueAt(0))
 
 cellDict = dict()
 for entry in ra.iterValues():
     elem = entry.getValue('cell')
     if elem not in keyDict:
         cellDict[elem] = list()
     cellDict[elem].append(entry.getValueAt(0))
 
 #print cellDict
 
 # create a dictionary to keep track of what entries match what filters
 matchDict = dict()
-for entry in ra.iterValues():
+for entry in filter.iterValues():
     matchDict[entry.getValueAt(0)] = list()
 
 # a dictionary to keep track of user made groups based on filter matches
 groupDict = dict()
 
 for i in range(ra.count()):
     
+    if ra.getKeyAt(i).startswith('#'):
+        continue
+
     r = ra.getValueAt(i)
     for j in range(filter.count()):
 
         match = True
         f = filter.getValueAt(j) 
-        print f
-        for k in f:
-            if k[0].startswith('_'):
+        
+        if f.getKeyAt(0).startswith('_filter'):
+            for k in range(f.count()):
+                if f.getKeyAt(k).startswith('_'):
                 continue
-            print k
-            print k[1] + ', ' + str(r.getValue(k[0]))
-            if r.getValue(k[0]) == None or not re.match(k[1], r.getValue(k[0])):
+                if r.getValue(f.getKeyAt(k)) == None or not re.match(f.getValueAt(k), r.getValue(f.getKeyAt(k))):
                 match = False
                 break
 
         if match == True:
-            newra.add(ra.getKeyAt(i), r)
-            matchDict[f[0][1]].append(ra.getKeyAt(i))
-
-            for m in f.Match:
+                matchDict[f.getValueAt(0)].append(ra.getKeyAt(i))
+                for m in f.iterMatches():
                 if m[1] == 'add':
+                        if m[0] not in groupDict:
+                            groupDict[m[0]] = list()
+                        #print 'adding ' + ra.getKeyAt(i) + ' to ' +  m[0]
                     groupDict[m[0]].append(ra.getKeyAt(i))
                 elif m[1] == 'remove':
+                        if m[0] in groupDict:
+                            #print 'removing ' + ra.getKeyAt(i) + ' from ' +  m[0]
                     groupDict[m[0]].remove(ra.getKeyAt(i))
 
-            break
+#print groupDict
+
+for j in range(filter.count()):
+    f = filter.getValueAt(j)
+    if f.getKeyAt(0).startswith('_check'):
+        checkDict = dict()
+        #print ra
+        for entry in ra.iterValues():
+            elem = entry.getValue(f.getValue('_key'))
+            #print entry.getValueAt(0)
+            if elem not in checkDict:
+                checkDict[elem] = list()
+            checkDict[elem].append(entry.getValueAt(0))
+        #print checkDict
+        for key in checkDict.iterkeys():
+            if checkDict[key] == None:
+                continue
+            entry = checkDict[key]
+            if len(entry) > 1:
+                for m in f.iterMatches():
+                    if m[1] == 'add':
+                        #print entry
+                        hashstr = m[0] + ' (' + str(ra.getValue(entry[0]).getValue(f.getValue('_key'))) + ')'
+                        if hashstr not in groupDict:
+                            groupDict[hashstr] = list()
+                        groupDict[hashstr].extend(entry)
+                    elif m[1] == 'remove':
+                        if hashstr in groupDict:
+                            for rem in entry:
+                                groupDict[hashstr].remove(rem)
+
+for i in groupDict.iterkeys():
+    s = ''
+    print s + i + ' (' + str(len(groupDict[i])) + '):'
+    for j in groupDict[i]:
+        print j
+    print ''
 
-print matchDict
-#newra.write()