dc32f222c1df1e9a154407b8311ccfebf398cc1f mmaddren Wed Dec 7 15:58:41 2011 -0800 added optional key parameter as per redmine ticket #6227, and updated unit tests diff --git python/lib/ucscgenomics/ra.py python/lib/ucscgenomics/ra.py index 3d3208a..906ba97 100644 --- python/lib/ucscgenomics/ra.py +++ python/lib/ucscgenomics/ra.py @@ -66,85 +66,78 @@ Note that once again, we don't have to ensure 'fileName' exists. Also note that lambda s: 1 means always return true. Lambda expressions are always preferable to functions unless the expression would need to be reused multiple times. It is also best to reduce the set of stanzas as much as possible before operating over them. Filtering allows you to eliminate a lot of code. """ def __init__(self, filePath=None): OrderedDict.__init__(self) if filePath != None: self.read(filePath) - def read(self, filePath): + def read(self, filePath, key=None): """ Reads an rafile stanza by stanza, and internalizes it. """ file = open(filePath, 'r') #entry = None stanza = list() keyValue = '' - for line in file: + reading = 1 + + while reading: + line = file.readline() + if line == '': + reading = 0 line = line.strip() - if len(stanza) == 0 and (line.startswith('#') or line == ''): + if len(stanza) == 0 and (line.startswith('#') or (line == '' and reading)): OrderedDict.append(self, line) continue if line != '': stanza.append(line) elif len(stanza) > 0: if keyValue == '': - keyValue, name, entry = self.readStanza(stanza) + keyValue, name, entry = self.readStanza(stanza, key) else: - testKey, name, entry = self.readStanza(stanza) + testKey, name, entry = self.readStanza(stanza, key) if entry != None and keyValue != testKey: raise KeyError('Inconsistent Key ' + testKey) if entry != None: + if name != None or key == None: if name in self: raise KeyError('Duplicate Key ' + name) self[name] = entry stanza = list() - if len(stanza) > 0: - if keyValue == '': - keyValue, name, entry = self.readStanza(stanza) - else: - testKey, name, entry = self.readStanza(stanza) - if entry != None and keyValue != testKey: - raise KeyError('Inconsistent Key ' + testKey) - - if entry != None: - if name in self: - raise KeyError('Duplicate Key ' + name) - self[name] = entry - file.close() - def readStanza(self, stanza): + def readStanza(self, stanza, key=None): entry = RaStanza() - val1, val2 = entry.readStanza(stanza) + val1, val2 = entry.readStanza(stanza, key) return val1, val2, entry def iter(self): pass def iterkeys(self): for item in self._OrderedDict__ordering: if not(item.startswith('#') or item == ''): yield item def itervalues(self): for item in self._OrderedDict__ordering: @@ -359,64 +352,75 @@ else: tempStanza[t] = self[stanza][t] else: tempStanza[t] = self[stanza][t] ret[stanza] = tempStanza return ret def __str__(self): str = '' for item in self.iteritems(): if len(item) == 1: str += item[0].__str__() + '\n' else: str += item[1].__str__() + '\n' - return str + return str #.rsplit('\n', 1)[0] class RaStanza(OrderedDict): """ Holds an individual entry in the RaFile. """ def __init__(self): self._name = '' OrderedDict.__init__(self) @property def name(self): return self._name - def readStanza(self, stanza): + def readStanza(self, stanza, key=None): """ Populates this entry from a single stanza """ for line in stanza: self.readLine(line) - return self.readName(stanza[0]) + return self.readName(stanza, key) - def readName(self, line): + def readName(self, stanza, key=None): """ Extracts the Stanza's name from the value of the first line of the stanza. """ + if key == None: + line = stanza[0] + else: + line = None + for s in stanza: + if s.split(' ', 1)[0] == key: + line = s + break + if line == None: + return None + if len(line.split(' ', 1)) != 2: raise ValueError() names = map(str.strip, line.split(' ', 1)) self._name = names[1] return names def readLine(self, line): """ Reads a single line from the stanza, extracting the key-value pair """ if line.startswith('#') or line == '': OrderedDict.append(self, line) else: