64f127cb71252486ce096a832ac7fad3deb324a5 mmaddren Wed Sep 14 17:04:53 2011 -0700 large-scale renaming change to allow python to be built into cluster/bin, also mkGeoPkg now renames files diff --git python/lib/ucscgenomics/ra.py python/lib/ucscgenomics/ra.py new file mode 100644 index 0000000..886c187 --- /dev/null +++ python/lib/ucscgenomics/ra.py @@ -0,0 +1,214 @@ +import sys +import re +from ucscgenomics.ordereddict.OrderedDict import * + +class RaFile(OrderedDict): + """ + Stores an Ra file in a set of entries, one for each stanza in the file. + """ + + def __init__(self, filePath=None): + OrderedDict.__init__(self) + if filePath != None: + self.read(filePath) + + def read(self, filePath): + """ + Reads an rafile stanza by stanza, and internalizes it. + """ + + file = open(filePath, 'r') + + #entry = None + stanza = list() + keyValue = '' + + for line in file: + + line = line.strip() + + if len(stanza) == 0 and (line.startswith('#') or line == ''): + OrderedDict.append(self, line) + continue + + if line != '': + stanza.append(line) + elif len(stanza) > 0: + if keyValue == '': + keyValue, name, entry = self.readStanza(stanza) + else: + testKey, name, entry = self.readStanza(stanza) + if entry != None and keyValue != testKey: + raise KeyError('Inconsistent Key ' + testKey) + + if entry != None: + if name in self: + raise KeyError('Duplicate Key ' + name) + self[name] = entry + + stanza = list() + + if len(stanza) > 0: + if keyValue == '': + keyValue, name, entry = self.readStanza(stanza) + else: + testKey, name, entry = self.readStanza(stanza) + if entry != None and keyValue != testKey: + raise KeyError('Inconsistent Key ' + testKey) + + if entry != None: + if name in self: + raise KeyError('Duplicate Key ' + name) + self[name] = entry + + file.close() + + + def readStanza(self, stanza): + entry = RaStanza() + val1, val2 = entry.readStanza(stanza) + return val1, val2, entry + + + def iter(self): + pass + + + def iterkeys(self): + for item in self._OrderedDict__ordering: + if not(item.startswith('#') or item == ''): + yield item + + + def itervalues(self): + for item in self._OrderedDict__ordering: + if not (item.startswith('#') or item == ''): + yield self[item] + + + def iteritems(self): + for item in self._OrderedDict__ordering: + if not (item.startswith('#') or item == ''): + yield item, self[item] + else: + yield [item] + + + def filter(self, where, select): + """ + select useful data from matching criteria + + where: the conditional function that must be met. Where takes one argument, the stanza and should return true or false + select: the data to return. Takes in stanza, should return whatever to be added to the list for that stanza. + + For each stanza, if where(stanza) holds, it will add select(stanza) to the list of returned entities. + Also forces silent failure of key errors, so you don't have to check that a value is or is not in the stanza. + """ + + ret = list() + for stanza in self.itervalues(): + try: + if where(stanza): + ret.append(select(stanza)) + except KeyError: + continue + return ret + + + def __str__(self): + str = '' + for item in self.iteritems(): + if len(item) == 1: + str += item[0].__str__() + '\n' + else: + str += item[1].__str__() + '\n' + return str + + +class RaStanza(OrderedDict): + """ + Holds an individual entry in the RaFile. + """ + + def __init__(self): + self._name = '' + OrderedDict.__init__(self) + + @property + def name(self): + return self._name + + + def readStanza(self, stanza): + """ + Populates this entry from a single stanza + """ + + for line in stanza: + self.readLine(line) + + return self.readName(stanza[0]) + + + def readName(self, line): + """ + Extracts the Stanza's name from the value of the first line of the + stanza. + """ + + if len(line.split(' ', 1)) != 2: + raise ValueError() + + names = map(str.strip, line.split(' ', 1)) + self._name = names[1] + return names + + def readLine(self, line): + """ + Reads a single line from the stanza, extracting the key-value pair + """ + + if line.startswith('#') or line == '': + OrderedDict.append(self, line) + else: + raKey = line.split(' ', 1)[0] + raVal = '' + if (len(line.split(' ', 1)) == 2): + raVal = line.split(' ', 1)[1] + #if raKey in self: + #raise KeyError(raKey + ' already exists') + self[raKey] = raVal + + + def iterkeys(self): + for item in self._OrderedDict__ordering: + if not (item.startswith('#') or item == ''): + yield item + + + def itervalues(self): + for item in self._OrderedDict__ordering: + if not (item.startswith('#') or item == ''): + yield self[item] + + + def iteritems(self): + for item in self._OrderedDict__ordering: + if not (item.startswith('#') or item == ''): + yield item, self[item] + + + def iter(self): + iterkeys(self) + + + def __str__(self): + str = '' + for key in self: + if key.startswith('#'): + str += key + '\n' + else: + str += key + ' ' + self[key] + '\n' + + return str +