python/lib/ucscgenomics/ra.py 0662bc8687751472e6eebd23d88891cc7a4a3e98

0662bc8687751472e6eebd23d88891cc7a4a3e98
mmaddren
  Mon Dec 12 11:26:28 2011 -0800
small bugfix to ra.py and changed tabs to spaces in ordereddict
diff --git python/lib/ucscgenomics/ra.py python/lib/ucscgenomics/ra.py
index 906ba97..f8e44ba 100644
--- python/lib/ucscgenomics/ra.py
+++ python/lib/ucscgenomics/ra.py
@@ -1,22 +1,22 @@
 import sys
 import re
 from ucscgenomics.ordereddict import OrderedDict
 import collections
 
 class RaFile(OrderedDict):
-    """
+    '''
     Stores a Ra file in a set of entries, one for each stanza in the file.
 
     To make a RaFile, it is usually easiest to just pass it's path:
         rafile = ra.RaFile('kent/src/hg/.../wgEncodeSomeRaFile.ra')
 
     The data is read in and organized as a collection of stanzas. Ra files
     store the stanza by it's name, so to access a specific stanza, say:
         somestanza = rafile['wgEncodeSomeStanzaName']
 
     Once you have a stanza, you may want specific data about that stanza.
     Stanzas are, as ra files, organized as a collection of terms. Therefore
     to get the description of the stanza, we can say:
         somedescription = somestanza['description']
 
     You can also access a stanza's name from the stanza itself, since by the
@@ -59,41 +59,41 @@
             lambda s: s)
 
     We don't always have to just return the stanza in the second parameter
     however. If we wanted to, for each stanza, return the file associated
     with that stanza, we could easily do that as well. This would return a
     simple list of the string filenames in a ra file:
         files = rafile.filter(lambda s: 1, lambda s: s['fileName'])
 
     Note that once again, we don't have to ensure 'fileName' exists. Also note
     that lambda s: 1 means always return true. Lambda expressions are always
     preferable to functions unless the expression would need to be reused
     multiple times. It is also best to reduce the set of stanzas as much as
     possible before operating over them.
 
     Filtering allows you to eliminate a lot of code.
-    """
+    '''
 
     def __init__(self, filePath=None):
         OrderedDict.__init__(self)
         if filePath != None:
             self.read(filePath)
 
     def read(self, filePath, key=None):
-        """
+        '''
         Reads an rafile stanza by stanza, and internalizes it.
-        """
+        '''
 
         file = open(filePath, 'r')
 
         #entry = None
         stanza = list()
         keyValue = ''
 
         reading = 1
         
         while reading:
             line = file.readline()
             if line == '':
                 reading = 0
         
             line = line.strip()
@@ -146,192 +146,192 @@
 
 
     def iteritems(self):
         for item in self._OrderedDict__ordering:
             if not (item.startswith('#') or item == ''):
                 yield item, self[item]
             else:
                 yield [item]
 
 
     def append(self, item):
         OrderedDict.append(self, item)
 
 
     def filter(self, where, select):
-        """
+        '''
         select useful data from matching criteria
 
         where: the conditional function that must be met. Where takes one
         argument, the stanza and should return true or false
         select: the data to return. Takes in stanza, should return whatever
         to be added to the list for that stanza.
 
         For each stanza, if where(stanza) holds, it will add select(stanza)
         to the list of returned entities. Also forces silent failure of key
         errors, so you don't have to check that a value is or is not in the stanza.
-        """
+        '''
 
         ret = list()
         for stanza in self.itervalues():
             try:
                 if where(stanza):
                     ret.append(select(stanza))
             except KeyError:
                 continue
         return ret
 
     def filter2(self, where):
-        """
+        '''
         select useful data from matching criteria
         Filter2 returns a Ra dictionary. Easier to use but more memory intensive.
 
         where: the conditional function that must be met. Where takes one
         argument, the stanza and should return true or false
         select: the data to return. Takes in stanza, should return whatever
         to be added to the list for that stanza.
 
         For each stanza, if where(stanza) holds, it will add select(stanza)
         to the list of returned entities. Also forces silent failure of key
         errors, so you don't have to check that a value is or is not in the stanza.
-        """
+        '''
         ret = RaFile()
         for stanza in self.itervalues():
             try:
                 if where(stanza):
                         ret[stanza.name] = stanza
             except KeyError:
                 continue
         return ret
 
     def summaryDiff(self,other):
-        """
+        '''
         Input:
             RaFile object being compared.
         Output: RaFile with differences.
 
         Returns ***partial*** stanzas of ***anything*** different
         from the self dictionary compared to the other dictionary.
         For versatility, it only returns stanzas from the self Ra file. In other
         words, it returns the self dictionary lines that are either not present
         in or different from the other dictionary.
 
         To obtain full set of differences, run summaryDiff twice
         ra1 = this.summaryDiff(that)
         and
         ra2 = that.summaryDiff(this)
-        """
+        '''
         this = RaFile()
         RetThis = RaFile()
         for stanza in self.itervalues():
             if stanza.name not in other.keys():
                 RetThis[stanza.name] = stanza
             else:
                 if stanza.difference(other[stanza.name]):
                     RetThis[stanza.name] = stanza.difference(other[stanza.name])
         return RetThis
 
     def changeSummary(self, otherRa):
-        """
+        '''
         Input:
             Two RaFile objects
         Output:
             Dictionary showing differences between stanzas, list of added and dropeed stanzas
-        """
+        '''
         retDict = collections.defaultdict(list)
         dropList = set(self.iterkeys()) - set(otherRa.iterkeys())
         addList = set(otherRa.iterkeys()) - set(self.iterkeys())
         common = set(self.iterkeys()) & set(otherRa.iterkeys())
 
         p = re.compile('^\s*#')
         for stanza in common:
             if p.match(stanza):
                 continue
             for key in self[stanza]:
                 if p.match(key):
                     continue
                 if key in otherRa[stanza]:
                     if self[stanza][key] != otherRa[stanza][key]:
                         retDict[stanza].append("Changed %s from  %s -> %s" %(key, self[stanza][key], otherRa[stanza][key]))
                 else:
                     retDict[stanza].append("Added %s -> %s" %(key, self[stanza][key]))
             for key in otherRa[stanza]:
                 if p.match(key):
                     continue
                 if key not in self[stanza]:
                     retDict[stanza].append("Dropped %s -> %s" %(key, otherRa[stanza][key]))
         return retDict, dropList, addList
 
     def diffFilter(self, select, other):
-        """
+        '''
         Input:
             Lambda function of desired comparison term
             RaFile object being compared.
         Output: RaFile with differences.
 
         Filter returns ***full*** stanzas of a ***select function*** from
         the self dictionary compared to the other dictionary. For
         versatility, it only returns stanzas from the self Ra file. In other
         words, it only returns self dictionary stanzas with the function term
         that are either not found in or different from the other
         dictionary.
 
         To obtain full set of differences, run diffFilter twice
         ra1 = this.diffFilter(select function, that)
         and
         ra2 = that.diffFilter(select function, this)
-        """
+        '''
         this = RaFile()
         RetThis = RaFile()
         thisSelectDict = dict()
         thatSelectDict = dict()
         #Build 2 dict of stanzas to later compare line-by-line
         for stanza in self.itervalues():
             try:
                 if select(stanza):
                     this[stanza.name] = stanza #'this' only records stanzas of the self dict
                     thisSelectDict[stanza.name] = select(stanza)
             except KeyError:
                 continue
         for stanza in other.itervalues():
             #Exact code as filter2 but kept for clarity.
             try:
                 if select(stanza):
                     thatSelectDict[stanza.name] = select(stanza)
             except KeyError:
                 continue
         #Compare this and that dict
         for stanza in this.itervalues():
             if stanza.name not in thatSelectDict:
                 RetThis[stanza.name] = stanza
             elif thisSelectDict[stanza.name] != thatSelectDict[stanza.name]:
                 RetThis[stanza.name] = stanza
         return RetThis
 
     def updateDiffFilter(self, term, other):
-        """
+        '''
         Replicates updateMetadata.
         Input:
             Term
             Other raFile
 
         Output:
             Merged RaFile
                 Stanzas found in 'self' and 'other' that have the 'Term' in 'other'
                 are overwritten (or inserted if not found) into 'self'. Final merged
                 dictionary is returned.
-        """
+        '''
         ret = self
         common = set(self.iterkeys()) & set(self.iterkeys())
         for stanza in common:
             if term not in self[stanza] and term not in other[stanza]:
                 continue
             if term in self[stanza] and term not in other[stanza]:
                     del ret[stanza][term]
                     continue
 
             if term in other[stanza]:
                 #Remake stanza to keep order of terms
                 tempStanza = RaStanza()
                 tempStanza._name = stanza
                 try:
                     tempStanza['metaObject'] = self[stanza]['metaObject']
@@ -356,82 +356,84 @@
                 ret[stanza] = tempStanza
 
         return ret
 
     def __str__(self):
         str = ''
         for item in self.iteritems():
             if len(item) == 1:
                 str += item[0].__str__() + '\n'
             else:
                 str += item[1].__str__() + '\n'
         return str #.rsplit('\n', 1)[0]
 
 
 class RaStanza(OrderedDict):
-    """
+    '''
     Holds an individual entry in the RaFile.
-    """
+    '''
 
     def __init__(self):
         self._name = ''
+        self._nametype = ''
         OrderedDict.__init__(self)
 
     @property
     def name(self):
         return self._name
 
 
     def readStanza(self, stanza, key=None):
-        """
+        '''
         Populates this entry from a single stanza
-        """
+        '''
 
         for line in stanza:
             self.readLine(line)
 
         return self.readName(stanza, key)
 
 
     def readName(self, stanza, key=None):
-        """
+        '''
         Extracts the Stanza's name from the value of the first line of the
         stanza.
-        """
+        '''
         
         if key == None:
             line = stanza[0]
         else:
             line = None
             for s in stanza:
                 if s.split(' ', 1)[0] == key:
                     line = s
                     break
             if line == None:
                 return None
         
         if len(line.split(' ', 1)) != 2:
             raise ValueError()
 
         names = map(str.strip, line.split(' ', 1))
+        self._nametype = names[0]
         self._name = names[1]
         return names
 
     def readLine(self, line):
-        """
+        '''
         Reads a single line from the stanza, extracting the key-value pair
-        """
+        '''
 
         if line.startswith('#') or line == '':
             OrderedDict.append(self, line)
         else:
             raKey = line.split(' ', 1)[0]
             raVal = ''
             if (len(line.split(' ', 1)) == 2):
                 raVal = line.split(' ', 1)[1]
             #if raKey in self:
                 #raise KeyError(raKey + ' already exists')
             self[raKey] = raVal
 
     def difference(self, other):
         '''
         Complement function to summaryDiff.
@@ -460,25 +462,26 @@
     def itervalues(self):
         for item in self._OrderedDict__ordering:
             if not (item.startswith('#') or item == ''):
                 yield self[item]
 
 
     def iteritems(self):
         for item in self._OrderedDict__ordering:
             if not (item.startswith('#') or item == ''):
                 yield item, self[item]
 
 
     def iter(self):
         iterkeys(self)
 
+        
     def __str__(self):
         str = ''
         for key in self:
             if key.startswith('#'):
                 str += key + '\n'
             else:
                 str += key + ' ' + self[key] + '\n'
 
         return str