4e4379966f9fb72da067031f730aa181372d75a9 mmaddren Thu Feb 24 14:28:13 2011 -0800 Updated raFile suite, added raInspect which can verify cv.ra as well as perform filtering tasks on any ra file diff --git python/ucscgenomics/rafile/raInspect.py python/ucscgenomics/rafile/raInspect.py index 189e6e5..9600d6b 100644 --- python/ucscgenomics/rafile/raInspect.py +++ python/ucscgenomics/rafile/raInspect.py @@ -1,72 +1,113 @@ import sys import re import raFile import filterFile path = sys.argv[1] fname = sys.argv[2] filter = list() filter = filterFile.FilterFile() -filter.read(fname, '_name') +filter.read(fname) ra = raFile.RaFile() -ra.read(path, 'term') +ra.read(path) newra = raFile.RaFile() # create a dictionary to keep track of which RaEntries have what terms keyDict = dict() for entry in ra.iterValues(): + if entry == None: + continue for elem in entry.iterKeys(): if elem not in keyDict: keyDict[elem] = list() keyDict[elem].append(entry.getValueAt(0)) cellDict = dict() for entry in ra.iterValues(): elem = entry.getValue('cell') if elem not in keyDict: cellDict[elem] = list() cellDict[elem].append(entry.getValueAt(0)) #print cellDict # create a dictionary to keep track of what entries match what filters matchDict = dict() -for entry in ra.iterValues(): +for entry in filter.iterValues(): matchDict[entry.getValueAt(0)] = list() # a dictionary to keep track of user made groups based on filter matches groupDict = dict() for i in range(ra.count()): + if ra.getKeyAt(i).startswith('#'): + continue + r = ra.getValueAt(i) for j in range(filter.count()): match = True f = filter.getValueAt(j) - print f - for k in f: - if k[0].startswith('_'): + + if f.getKeyAt(0).startswith('_filter'): + for k in range(f.count()): + if f.getKeyAt(k).startswith('_'): continue - print k - print k[1] + ', ' + str(r.getValue(k[0])) - if r.getValue(k[0]) == None or not re.match(k[1], r.getValue(k[0])): + if r.getValue(f.getKeyAt(k)) == None or not re.match(f.getValueAt(k), r.getValue(f.getKeyAt(k))): match = False break if match == True: - newra.add(ra.getKeyAt(i), r) - matchDict[f[0][1]].append(ra.getKeyAt(i)) - - for m in f.Match: + matchDict[f.getValueAt(0)].append(ra.getKeyAt(i)) + for m in f.iterMatches(): if m[1] == 'add': + if m[0] not in groupDict: + groupDict[m[0]] = list() + #print 'adding ' + ra.getKeyAt(i) + ' to ' + m[0] groupDict[m[0]].append(ra.getKeyAt(i)) elif m[1] == 'remove': + if m[0] in groupDict: + #print 'removing ' + ra.getKeyAt(i) + ' from ' + m[0] groupDict[m[0]].remove(ra.getKeyAt(i)) - break +#print groupDict + +for j in range(filter.count()): + f = filter.getValueAt(j) + if f.getKeyAt(0).startswith('_check'): + checkDict = dict() + #print ra + for entry in ra.iterValues(): + elem = entry.getValue(f.getValue('_key')) + #print entry.getValueAt(0) + if elem not in checkDict: + checkDict[elem] = list() + checkDict[elem].append(entry.getValueAt(0)) + #print checkDict + for key in checkDict.iterkeys(): + if checkDict[key] == None: + continue + entry = checkDict[key] + if len(entry) > 1: + for m in f.iterMatches(): + if m[1] == 'add': + #print entry + hashstr = m[0] + ' (' + str(ra.getValue(entry[0]).getValue(f.getValue('_key'))) + ')' + if hashstr not in groupDict: + groupDict[hashstr] = list() + groupDict[hashstr].extend(entry) + elif m[1] == 'remove': + if hashstr in groupDict: + for rem in entry: + groupDict[hashstr].remove(rem) + +for i in groupDict.iterkeys(): + s = '' + print s + i + ' (' + str(len(groupDict[i])) + '):' + for j in groupDict[i]: + print j + print '' -print matchDict -#newra.write()