python/lib/ucscgenomics/ra.py e14a4cf5576db1bae722948cd5697289ef073cd6

e14a4cf5576db1bae722948cd5697289ef073cd6
mmaddren
  Thu Jan 26 12:08:17 2012 -0800
added cvValidate back into the tree
diff --git python/lib/ucscgenomics/ra.py python/lib/ucscgenomics/ra.py
index 5cb074f..04abb68 100644
--- python/lib/ucscgenomics/ra.py
+++ python/lib/ucscgenomics/ra.py
@@ -69,31 +69,32 @@
     that lambda s: 1 means always return true. Lambda expressions are always
     preferable to functions unless the expression would need to be reused
     multiple times. It is also best to reduce the set of stanzas as much as
     possible before operating over them.
 
     Filtering allows you to eliminate a lot of code.
     '''
 
     def __init__(self, filePath=None, key=None):
         OrderedDict.__init__(self)
         if filePath != None:
             self.read(filePath, key)
 
     def read(self, filePath, key=None):
         '''
-        Reads an rafile stanza by stanza, and internalizes it.
+        Reads an rafile stanza by stanza, and internalizes it. Don't override
+        this for derived types, instead override readStanza.
         '''
 
         file = open(filePath, 'r')
 
         #entry = None
         stanza = list()
         keyValue = ''
 
         reading = 1
         
         while reading:
             line = file.readline()
             if line == '':
                 reading = 0
         
@@ -112,34 +113,45 @@
                     if entry != None and keyValue != testKey:
                         raise KeyError('Inconsistent Key ' + testKey)
 
                 if entry != None:
                     if name != None or key == None:
                         if name in self:
                             raise KeyError('Duplicate Key ' + name)
                         self[name] = entry
 
                 stanza = list()
 
         file.close()
 
 
     def readStanza(self, stanza, key=None):
+        '''
+        Override this to create custom stanza behavior in derived types.
+        
+        IN
+        stanza: list of strings with keyval data
+        key: optional key for selective key filtering. Don't worry about it
+
+        OUT
+        namekey: the key of the stanza's name
+        nameval: the value of the stanza's name
+        entry: the stanza itself
+        '''
         entry = RaStanza()
         if entry.readStanza(stanza, key) == None:
             return None, None, None
-        entry = RaStanza()
         val1, val2 = entry.readStanza(stanza, key)
         return val1, val2, entry
 
 
     def iter(self):
         pass
 
 
     def iterkeys(self):
         for item in self._OrderedDict__ordering:
             if not(item.startswith('#') or item == ''):
                 yield item
 
 
     def itervalues(self):
@@ -217,59 +229,52 @@
         one with identical values being preserved,
         differences are marked with a >>> and <<<
         '''
 
         mergedKeys = ucscUtils.mergeList(list(self), list(other))
         selfKeys = set(self)
         otherKeys = set(other)
         newCommon = RaFile()
         p = re.compile('^\s*#')
         p2 = re.compile('^\s*$')
         for i in mergedKeys:
             if p.match(i) or p2.match(i):
                 newCommon.append(i)
                 continue
             if i not in selfKeys:
-                newCommon[i] = other[i]
-                continue
+                newCommon.append(other[i])
             if i not in otherKeys:
-                newCommon[i] = self[i]
-                continue
+                newCommon.append(self[i])
             if i in otherKeys and i in selfKeys:
                 newStanza = RaStanza()
                 selfStanzaKeys = set(self[i].iterkeys())
                 otherStanzaKeys = set(other[i].iterkeys())
-                stanzaKeys = ucscUtils.mergeList(list(self[i]), list(other[i]))
+                stanzaKeys = ucscUtils.mergeList(list(self[i].iterkeys()), list(other[i].iterkeys()))
                 for j in stanzaKeys:
-                    if p.match(j):
-                        newStanza.append(j)
-                        continue
                     if j not in selfStanzaKeys:
                         newStanza[j] = other[i][j]
-                        continue
                     if j not in otherStanzaKeys:
                         newStanza[j] = self[i][j]
-                        continue
                     if j in selfStanzaKeys and j in otherStanzaKeys:
                         if self[i][j] == other[i][j]:
                             newStanza[j] = self[i][j]
                         else:
                             in_j = '>>>>>%s' % j
                             out_j = '<<<<<%s' % j
                             newStanza[out_j] = self[i][j]
                             newStanza[in_j] = other[i][j]
-                newCommon[i] = newStanza
+            newCommon.append(newStanza)
         return newCommon
 
 
     def summaryDiff(self, other):
         '''
         Input:
             RaFile object being compared.
         Output: RaFile with differences.
 
         Returns ***partial*** stanzas of ***anything*** different
         from the self dictionary compared to the other dictionary.
         For versatility, it only returns stanzas from the self Ra file. In other
         words, it returns the self dictionary lines that are either not present
         in or different from the other dictionary.
 
@@ -284,52 +289,52 @@
             if stanza.name not in other.keys():
                 RetThis[stanza.name] = stanza
             else:
                 if stanza.difference(other[stanza.name]):
                     RetThis[stanza.name] = stanza.difference(other[stanza.name])
         return RetThis
 
     def changeSummary(self, otherRa):
         '''
         Input:
             Two RaFile objects
         Output:
             Dictionary showing differences between stanzas, list of added and dropeed stanzas
         '''
         retDict = collections.defaultdict(list)
-        addList = set(self.iterkeys()) - set(otherRa.iterkeys())
-        dropList = set(otherRa.iterkeys()) - set(self.iterkeys())
+        dropList = set(self.iterkeys()) - set(otherRa.iterkeys())
+        addList = set(otherRa.iterkeys()) - set(self.iterkeys())
         common = set(self.iterkeys()) & set(otherRa.iterkeys())
 
         p = re.compile('^\s*#')
         for stanza in common:
             if p.match(stanza):
                 continue
             for key in self[stanza]:
                 if p.match(key):
                     continue
                 if key in otherRa[stanza]:
                     if self[stanza][key] != otherRa[stanza][key]:
                         retDict[stanza].append("Changed %s from %s -> %s" %(key, otherRa[stanza][key], self[stanza][key]))
                 else:
                     retDict[stanza].append("Added %s -> %s" %(key, self[stanza][key]))
             for key in otherRa[stanza]:
                 if p.match(key):
                     continue
                 if key not in self[stanza]:
                     retDict[stanza].append("Dropped %s -> %s" %(key, otherRa[stanza][key]))
-        return retDict, addList, dropList
+        return retDict, dropList, addList
 
     def diffFilter(self, select, other):
         '''
         Input:
             Lambda function of desired comparison term
             RaFile object being compared.
         Output: RaFile with differences.
 
         Filter returns ***full*** stanzas of a ***select function*** from
         the self dictionary compared to the other dictionary. For
         versatility, it only returns stanzas from the self Ra file. In other
         words, it only returns self dictionary stanzas with the function term
         that are either not found in or different from the other
         dictionary.
 
@@ -363,140 +368,98 @@
                 RetThis[stanza.name] = stanza
             elif thisSelectDict[stanza.name] != thatSelectDict[stanza.name]:
                 RetThis[stanza.name] = stanza
         return RetThis
 
     def updateDiffFilter(self, term, other):
         '''
         Replicates updateMetadata.
         Input:
             Term
             Other raFile
 
         Output:
             Merged RaFile
                 Stanzas found in 'self' and 'other' that have the 'Term' in 'other'
-                are overwritten (or inserted if not found) into 'self'. 
-                Final merged dictionary is returned.
+                are overwritten (or inserted if not found) into 'self'. Final merged
+                dictionary is returned.
         '''
         ret = self
         common = set(self.iterkeys()) & set(other.iterkeys())
         for stanza in common:
             if term not in self[stanza] and term not in other[stanza]:
                 continue
             if term in self[stanza] and term not in other[stanza]:
                     del ret[stanza][term]
                     continue
+
             if term in other[stanza]:
                 #Remake stanza to keep order of terms
                 tempStanza = RaStanza()
                 tempStanza._name = stanza
-                selfKeys = list(self[stanza].iterkeys())
-                otherKeys = list(other[stanza].iterkeys())
-                newOther = list()
-                #filter out keys in other that aren't in self, or the term we're interested in
-                for i in otherKeys:
-                    if not i in selfKeys and i != term:
-                        continue
+                try:
+                    tempStanza['metaObject'] = self[stanza]['metaObject']
+                    tempStanza['objType'] = self[stanza]['objType']
+                    termList = self[stanza].keys()
+                    termList.remove('metaObject')
+                    termList.remove('objType')
+                except KeyError:
+                    termList = self[stanza].keys()
+                if term not in termList:
+                    termList.append(term)
+                for t in sorted(termList, key=str.lower):
+                    if t == term:
+                        if t not in self[stanza]:
+                            tempStanza[t] = other[stanza][t]
+                        elif self[stanza][t] != other[stanza][t]:
+                            tempStanza[t] = other[stanza][t]
                     else:
-                        newOther.append(i)
-                #merge self keylist and filtered other list
-                masterList = ucscUtils.mergeList(newOther, selfKeys)
-                for i in masterList:
-                    if i == term:
-                        tempStanza[i] = other[stanza][i]
+                            tempStanza[t] = self[stanza][t]
                     else:
-                        tempStanza[i] = self[stanza][i]
+                        tempStanza[t] = self[stanza][t]
             ret[stanza] = tempStanza
 
         return ret
 
-    def printTrackDbFormat(self):
-        retstring = ""
-        space = False
-        tab = False
-        commentList = []
-        for stanza in self:
-            if stanza == "":
-                if commentList:
-                    for line in commentList:
-                        if space == True:
-                            retstring += "    "
-                        if tab == True:
-                            retstring += "    "
-                        retstring += line + "\n"
-                    commentList = []
-                    retstring += "\n"
-                continue
-            if stanza.startswith("#"):
-                commentList.append(stanza)
-                continue
-            if "visibility" in self[stanza].keys():
-                tab = False
-                space = True
-            if "subGroups" in self[stanza].keys():
-                tab = True
-                space = True
-            if commentList:
-                for line in commentList:
-                    if space == True:
-                        retstring += "    "
-                    if tab == True:
-                        retstring += "    "
-                    retstring += line + "\n"
-                commentList = []
-            for line in self[stanza]:
-                if space == True:
-                    retstring += "    "
-                if tab == True:
-                    retstring += "    "
-                if line.startswith("#"):
-                    retstring += line + "\n"
-                else:
-                    retstring += line + " " + self[stanza][line] + "\n"
-            retstring += "\n"
-        return retstring
-
-
     def __str__(self):
         str = ''
         for item in self.iteritems():
             if len(item) == 1:
                 str += item[0].__str__() + '\n'
             else:
                 str += item[1].__str__() + '\n'
         return str #.rsplit('\n', 1)[0]
 
 
 class RaStanza(OrderedDict):
     '''
     Holds an individual entry in the RaFile.
     '''
 
-    def __init__(self):
-        self._name = ''
-        self._nametype = ''
-        OrderedDict.__init__(self)
-
     @property
     def name(self):
         return self._name
 
+    def __init__(self):
+        self._name = ''
+        self._nametype = ''
+        OrderedDict.__init__(self)
 
     def readStanza(self, stanza, key=None):
         '''
-        Populates this entry from a single stanza
+        Populates this entry from a single stanza. Override this to create
+        custom behavior in derived classes
         '''
 
         for line in stanza:
             self.readLine(line)
 
         return self.readName(stanza, key)
 
 
     def readName(self, stanza, key=None):
         '''
         Extracts the Stanza's name from the value of the first line of the
         stanza.
         '''
         
         if key == None: