a0f9d361a3f365777aab3843309aa534e5277e03
wong
  Thu Jan 5 10:56:29 2012 -0800
first draft of raMerging functions
diff --git python/lib/ucscgenomics/ra.py python/lib/ucscgenomics/ra.py
index d900f2a..9b675f3 100644
--- python/lib/ucscgenomics/ra.py
+++ python/lib/ucscgenomics/ra.py
@@ -1,18 +1,19 @@
-import sys
+import sys, string
 import re
 from ucscgenomics.ordereddict import OrderedDict
+from ucscgenomics import ucscUtils
 import collections
 
 class RaFile(OrderedDict):
     '''
     Stores a Ra file in a set of entries, one for each stanza in the file.
 
     To make a RaFile, it is usually easiest to just pass it's path:
         rafile = ra.RaFile('kent/src/hg/.../wgEncodeSomeRaFile.ra')
 
     The data is read in and organized as a collection of stanzas. Ra files
     store the stanza by it's name, so to access a specific stanza, say:
         somestanza = rafile['wgEncodeSomeStanzaName']
 
     Once you have a stanza, you may want specific data about that stanza.
     Stanzas are, as ra files, organized as a collection of terms. Therefore
@@ -192,30 +193,78 @@
         to be added to the list for that stanza.
 
         For each stanza, if where(stanza) holds, it will add select(stanza)
         to the list of returned entities. Also forces silent failure of key
         errors, so you don't have to check that a value is or is not in the stanza.
         '''
         ret = RaFile()
         for stanza in self.itervalues():
             try:
                 if where(stanza):
                         ret[stanza.name] = stanza
             except KeyError:
                 continue
         return ret
 
+    def mergeRa(self, other):
+        '''
+        Input:
+            Two RaFile objects
+        Output:
+            A merged RaFile
+
+        Common stanzas and key-val pairs are collapsed into
+        one with identical values being preserved,
+        differences are marked with a >>> and <<<
+        '''
+
+        mergedKeys = ucscUtils.mergeList(list(self), list(other))
+        selfKeys = set(self)
+        otherKeys = set(other)
+        newCommon = RaFile()
+        p = re.compile('^\s*#')
+        p2 = re.compile('^\s*$')
+        for i in mergedKeys:
+            if p.match(i) or p2.match(i):
+                newCommon.append(i)
+                continue
+            if i not in selfKeys:
+                newCommon.append(other[i])
+            if i not in otherKeys:
+                newCommon.append(self[i])
+            if i in otherKeys and i in selfKeys:
+                newStanza = RaStanza()
+                selfStanzaKeys = set(self[i].iterkeys())
+                otherStanzaKeys = set(other[i].iterkeys())
+                stanzaKeys = ucscUtils.mergeList(list(self[i].iterkeys()), list(other[i].iterkeys()))
+                for j in stanzaKeys:
+                    if j not in selfStanzaKeys:
+                        newStanza[j] = other[i][j]
+                    if j not in otherStanzaKeys:
+                        newStanza[j] = self[i][j]
+                    if j in selfStanzaKeys and j in otherStanzaKeys:
+                        if self[i][j] == other[i][j]:
+                            newStanza[j] = self[i][j]
+                        else:
+                            in_j = '>>>>>%s' % j
+                            out_j = '<<<<<%s' % j
+                            newStanza[out_j] = self[i][j]
+                            newStanza[in_j] = other[i][j]
+            newCommon.append(newStanza)
+        return newCommon
+
+
     def summaryDiff(self,other):
         '''
         Input:
             RaFile object being compared.
         Output: RaFile with differences.
 
         Returns ***partial*** stanzas of ***anything*** different
         from the self dictionary compared to the other dictionary.
         For versatility, it only returns stanzas from the self Ra file. In other
         words, it returns the self dictionary lines that are either not present
         in or different from the other dictionary.
 
         To obtain full set of differences, run summaryDiff twice
         ra1 = this.summaryDiff(that)
         and