00bad97789ce05043f1c9c90ef7dfa6494c280db wong Wed Feb 1 10:08:55 2012 -0800 reversing morgan's merge, and adding back in his comments diff --git python/lib/ucscgenomics/ra.py python/lib/ucscgenomics/ra.py index fd32b73..75386fe 100644 --- python/lib/ucscgenomics/ra.py +++ python/lib/ucscgenomics/ra.py @@ -128,30 +128,31 @@ ''' Override this to create custom stanza behavior in derived types. IN stanza: list of strings with keyval data key: optional key for selective key filtering. Don't worry about it OUT namekey: the key of the stanza's name nameval: the value of the stanza's name entry: the stanza itself ''' entry = RaStanza() if entry.readStanza(stanza, key) == None: return None, None, None + entry = RaStanza() val1, val2 = entry.readStanza(stanza, key) return val1, val2, entry def iter(self): pass def iterkeys(self): for item in self._OrderedDict__ordering: if not(item.startswith('#') or item == ''): yield item def itervalues(self): @@ -229,52 +230,59 @@ one with identical values being preserved, differences are marked with a >>> and <<< ''' mergedKeys = ucscUtils.mergeList(list(self), list(other)) selfKeys = set(self) otherKeys = set(other) newCommon = RaFile() p = re.compile('^\s*#') p2 = re.compile('^\s*$') for i in mergedKeys: if p.match(i) or p2.match(i): newCommon.append(i) continue if i not in selfKeys: - newCommon.append(other[i]) + newCommon[i] = other[i] + continue if i not in otherKeys: - newCommon.append(self[i]) + newCommon[i] = self[i] + continue if i in otherKeys and i in selfKeys: newStanza = RaStanza() selfStanzaKeys = set(self[i].iterkeys()) otherStanzaKeys = set(other[i].iterkeys()) - stanzaKeys = ucscUtils.mergeList(list(self[i].iterkeys()), list(other[i].iterkeys())) + stanzaKeys = ucscUtils.mergeList(list(self[i]), list(other[i])) for j in stanzaKeys: + if p.match(j): + newStanza.append(j) + continue if j not in selfStanzaKeys: newStanza[j] = other[i][j] + continue if j not in otherStanzaKeys: newStanza[j] = self[i][j] + continue if j in selfStanzaKeys and j in otherStanzaKeys: if self[i][j] == other[i][j]: newStanza[j] = self[i][j] else: in_j = '>>>>>%s' % j out_j = '<<<<<%s' % j newStanza[out_j] = self[i][j] newStanza[in_j] = other[i][j] - newCommon.append(newStanza) + newCommon[i] = newStanza return newCommon def summaryDiff(self, other): ''' Input: RaFile object being compared. Output: RaFile with differences. Returns ***partial*** stanzas of ***anything*** different from the self dictionary compared to the other dictionary. For versatility, it only returns stanzas from the self Ra file. In other words, it returns the self dictionary lines that are either not present in or different from the other dictionary. @@ -289,52 +297,52 @@ if stanza.name not in other.keys(): RetThis[stanza.name] = stanza else: if stanza.difference(other[stanza.name]): RetThis[stanza.name] = stanza.difference(other[stanza.name]) return RetThis def changeSummary(self, otherRa): ''' Input: Two RaFile objects Output: Dictionary showing differences between stanzas, list of added and dropeed stanzas ''' retDict = collections.defaultdict(list) - dropList = set(self.iterkeys()) - set(otherRa.iterkeys()) - addList = set(otherRa.iterkeys()) - set(self.iterkeys()) + addList = set(self.iterkeys()) - set(otherRa.iterkeys()) + dropList = set(otherRa.iterkeys()) - set(self.iterkeys()) common = set(self.iterkeys()) & set(otherRa.iterkeys()) p = re.compile('^\s*#') for stanza in common: if p.match(stanza): continue for key in self[stanza]: if p.match(key): continue if key in otherRa[stanza]: if self[stanza][key] != otherRa[stanza][key]: retDict[stanza].append("Changed %s from %s -> %s" %(key, otherRa[stanza][key], self[stanza][key])) else: retDict[stanza].append("Added %s -> %s" %(key, self[stanza][key])) for key in otherRa[stanza]: if p.match(key): continue if key not in self[stanza]: retDict[stanza].append("Dropped %s -> %s" %(key, otherRa[stanza][key])) - return retDict, dropList, addList + return retDict, addList, dropList def diffFilter(self, select, other): ''' Input: Lambda function of desired comparison term RaFile object being compared. Output: RaFile with differences. Filter returns ***full*** stanzas of a ***select function*** from the self dictionary compared to the other dictionary. For versatility, it only returns stanzas from the self Ra file. In other words, it only returns self dictionary stanzas with the function term that are either not found in or different from the other dictionary. @@ -368,68 +376,62 @@ RetThis[stanza.name] = stanza elif thisSelectDict[stanza.name] != thatSelectDict[stanza.name]: RetThis[stanza.name] = stanza return RetThis def updateDiffFilter(self, term, other): ''' Replicates updateMetadata. Input: Term Other raFile Output: Merged RaFile Stanzas found in 'self' and 'other' that have the 'Term' in 'other' - are overwritten (or inserted if not found) into 'self'. Final merged - dictionary is returned. + are overwritten (or inserted if not found) into 'self'. + Final merged dictionary is returned. ''' ret = self common = set(self.iterkeys()) & set(other.iterkeys()) for stanza in common: if term not in self[stanza] and term not in other[stanza]: continue if term in self[stanza] and term not in other[stanza]: del ret[stanza][term] continue - if term in other[stanza]: #Remake stanza to keep order of terms tempStanza = RaStanza() tempStanza._name = stanza - try: - tempStanza['metaObject'] = self[stanza]['metaObject'] - tempStanza['objType'] = self[stanza]['objType'] - termList = self[stanza].keys() - termList.remove('metaObject') - termList.remove('objType') - except KeyError: - termList = self[stanza].keys() - if term not in termList: - termList.append(term) - for t in sorted(termList, key=str.lower): - if t == term: - if t not in self[stanza]: - tempStanza[t] = other[stanza][t] - elif self[stanza][t] != other[stanza][t]: - tempStanza[t] = other[stanza][t] + selfKeys = list(self[stanza].iterkeys()) + otherKeys = list(other[stanza].iterkeys()) + newOther = list() + #filter out keys in other that aren't in self, or the term we're interested in + for i in otherKeys: + if not i in selfKeys and i != term: + continue else: - tempStanza[t] = self[stanza][t] + newOther.append(i) + #merge self keylist and filtered other list + masterList = ucscUtils.mergeList(newOther, selfKeys) + for i in masterList: + if i == term: + tempStanza[i] = other[stanza][i] else: - tempStanza[t] = self[stanza][t] + tempStanza[i] = self[stanza][i] ret[stanza] = tempStanza - return ret def printTrackDbFormat(self): ''' Converts a .ra file into TrackDb format. Returns a printable string. ''' retstring = "" parentTrack = "" tier = 0 commentList = [] p = re.compile('^.*parent') p2 = re.compile('^.*subTrack') for stanza in self: if stanza == "":