python/lib/ucscgenomics/cv.py 3aee104cf4c1e245dd020f743fbc58c17fd75976

3aee104cf4c1e245dd020f743fbc58c17fd75976
mmaddren
  Mon Apr 9 12:12:44 2012 -0700
added encode.py to store global constants and other encode stuff, and made all other libraries interface correctly with it
diff --git python/lib/ucscgenomics/cv.py python/lib/ucscgenomics/cv.py
index dafe1c4..7709a32 100644
--- python/lib/ucscgenomics/cv.py
+++ python/lib/ucscgenomics/cv.py
@@ -1,18 +1,18 @@
 import re
 import os
-from ucscgenomics import ra
+from ucscgenomics import ra, encode
 
 def extractValue(val, prefix='', removeComments=1):
     val2 = val.replace(prefix, '')
     if removeComments and '#' in val2:
         val2 = val2.split('#', 1)[0]
     return val2.strip()
 
 def extractList(val, prefix='', removeComments=1):
     val2 = val.replace(prefix, '')
     if removeComments and '#' in val2:
         val2 = val2.split('#', 1)[0]
     return map(str.strip, val2.split(','))
 
 class CvFile(ra.RaFile):
     '''
@@ -31,77 +31,46 @@
     simply call validate() on the cv object.
     
     For more information about other things not specific to the cv, but for
     all ra files, look at the RaFile documentation.
     '''
 
     def __init__(self, filePath=None, handler=None, protocolPath=None):
         '''sets up exception handling method, and optionally reads from a file'''
         ra.RaFile.__init__(self)
         
         self.handler = handler
         if handler == None:
             self.handler = self.raiseException
             
         if filePath == None:
-            filePath = os.path.expanduser('~/kent/src/hg/makeDb/trackDb/') + 'cv/alpha/cv.ra'
+            filePath = encode.defaultCvPath()
         
         self.protocolPath = protocolPath
         if protocolPath == None:
             self.protocolPath == os.path.expanduser('~/htdocsExtras/ENCODE/')
         
         self.missingTypes = set()
         
         self.read(filePath)
 
     def raiseException(self, exception):
         '''wrapper function for raising exception'''
         raise exception
 
     def readStanza(self, stanza, key=None):
         '''overriden method from RaFile which makes specialized stanzas based on type'''
-        # e = ra.RaStanza()
-        # ek, ev = e.readStanza(stanza)
-        # type = e['type']
-
-        # if type == 'Antibody':
-            # entry = AntibodyStanza()
-        # elif type == 'Cell Line':
-            # if e['organism'] == 'human':
-                # entry = CellLineStanza()
-            # elif e['organism'] == 'mouse':
-                # entry = MouseStanza()
-            # else:
-                # self.handler(NonmatchKeyError(e.name, e['organism'], 'organism'))
-                # return ek, ev, None
-        # elif type == 'age':
-            # entry = AgeStanza()
-        # elif type == 'dataType':
-            # entry = DataTypeStanza()
-        # elif type == 'lab':
-            # entry = LabStanza()
-        # elif type == 'seqPlatform':
-            # entry = SeqPlatformStanza()
-        # elif type == 'typeOfTerm':
-            # entry = TypeOfTermStanza()
-        # elif type == 'view':
-            # entry = ViewStanza()
-        # elif type == 'localization':
-            # entry = LocalizationStanza()
-        # elif type == 'grant':
-            # entry = GrantStanza()
-        # else:
         entry = CvStanza()
 
         key, val = entry.readStanza(stanza)
         return key, val, entry
 
 
     def validate(self):
         '''base validation method which calls all stanzas' validate'''
         for stanza in self.itervalues():
             stanza.validate(self)
         print self.missingTypes
 
     def getTypeOfTermStanza(self, type):
         types = self.filter(lambda s: s['term'] == type and s['type'] == 'typeOfTerm', lambda s: s)
         if len(types) != 1:
@@ -160,31 +129,34 @@
             else:
                 self[raKey] = raVal
         
     def validate(self, cvfile):
         type = self['type']
         if self['type'] == 'Cell Line': # :(
             if 'organism' in self and self['organism'] == 'human':
                 type = 'cellType'
             elif 'organism' in self and self['organism'] == 'mouse':
                 type = 'mouseCellType'
             else:
                 cvfile.handler(OrganismError(self))
         
         typeStanza = cvfile.getTypeOfTermStanza(type)
         if typeStanza == None:
-            cvfile.handler(InvalidTypeError(self, self['type']))
+            #print cvfile.filter2(lambda s: s['type'] == 'typeOfTerm').keys()
+            #print '>%s<' % cvfile['mouseCellType ']['term']
+            #print '>%s<' % cvfile['mouseCellType ']['type']
+            cvfile.handler(InvalidTypeError(self, self['type'] + '(%s)' % type))
             return
         required = list()
         if 'requiredVars' in typeStanza:
             required = extractList(typeStanza['requiredVars'])
         optional = list()
         if 'optionalVars' in typeStanza:
             optional = extractList(typeStanza['optionalVars'])
         
         self.checkMandatory(cvfile, required)
         required.extend(optional)
         self.checkExtraneous(cvfile, required)
         self.checkDuplicates(cvfile)
         
         for key in self.iterkeys():
             
@@ -234,105 +206,96 @@
         
         #     validate [cv/date/exists/float/integer/list:/none/regex:] outlines the expected values.  ENFORCED by mdbPrint -validate
 #           cv: must be defined term in cv (e.g. cell=GM12878).  "cv or None" indicates that "None is also acceptable.
 #               "cv or control" indicates that cv-defined terms of type "control" are also acceptable.
 #         date: must be date in YYYY-MM-DD format
 #       exists: not enforced.  (e.g. fileName could be validated to exist in download directory)
 #        float: must be floating point number
 #      integer: must be integer
 #      "list:": must be one of several terms in comma delimeited list (e.g. "list: yes,no,maybe" )  # ("list:" includes colon)
 #         none: not validated in any way
 #     "regex:": must match regular expression (e.g. "regex: ^GS[M,E][0-9]$" )  # ("regex:" includes colon)
 #    # NOTE: that validate rules may end comment delimited by a '#'
 
         
         
-    def validate2(self, cvfile, necessary=None, optional=None):
-        '''default validation for a generic cv stanza. Should be called with all arguments if overidden'''
+    # def validate2(self, cvfile, necessary=None, optional=None):
+        # '''default validation for a generic cv stanza. Should be called with all arguments if overidden'''
         
-        if necessary == None:
-            necessary = set()
+        # if necessary == None:
+            # necessary = set()
             
-        if optional == None:
-            optional = set()
+        # if optional == None:
+            # optional = set()
         
-        baseNecessary = {'term', 'tag', 'type'}
+        # baseNecessary = {'term', 'tag', 'type'}
         
-        if self['type'] != 'Antibody':
-            baseNecessary.add('description')
+        # if self['type'] != 'Antibody':
+            # baseNecessary.add('description')
         
-        baseOptional = {'deprecated', 'label'}
-        self.checkMandatory(cvfile, necessary | baseNecessary)
-        self.checkExtraneous(cvfile, necessary | baseNecessary | optional | baseOptional)
+        # baseOptional = {'deprecated', 'label'}
+        # self.checkMandatory(cvfile, necessary | baseNecessary)
+        # self.checkExtraneous(cvfile, necessary | baseNecessary | optional | baseOptional)
         
-        temptype = self['type']
-        if self['type'] == 'Cell Line': # :(
-            temptype = 'cellType'
-        if len(cvfile.filter(lambda s: s['term'] == temptype and s['type'] == 'typeOfTerm', lambda s: s)) == 0:
-            cvfile.handler(InvalidTypeError(self, self['type']))
+        # temptype = self['type']
+        # if self['type'] == 'Cell Line': # :(
+            # temptype = 'cellType'
+        # if len(cvfile.filter(lambda s: s['term'] == temptype and s['type'] == 'typeOfTerm', lambda s: s)) == 0:
+            # cvfile.handler(InvalidTypeError(self, self['type']))
 
-        self.checkDuplicates(cvfile)
+        # self.checkDuplicates(cvfile)
         
         
     def checkDuplicates(self, cvfile):
         '''ensure that all keys are present and not blank in the stanza'''
         for key in self.iterkeys():
             if '__$$' in key:
                 newkey = key.split('__$$', 1)[0]
                 cvfile.handler(DuplicateKeyError(self, newkey))
         
     def checkMandatory(self, cvfile, keys):
         '''ensure that all keys are present and not blank in the stanza'''
         for key in keys:
             if not key in self.keys():
                 cvfile.handler(MissingKeyError(self, key))
             elif self[key] == '':
                 cvfile.handler(BlankKeyError(self, key))
                 
-    # def checkOptional(self, cvfile, keys):
-        # '''ensure that all keys are present and not blank in the stanza'''
-        # for key in keys:
-            # if key in self and self[key] == '':
-                # cvfile.handler(BlankKeyError(self, key))
-        
     def checkExtraneous(self, cvfile, keys):
         '''check for keys that are not in the list of keys'''
         for key in self.iterkeys():
             if key not in keys and '__$$' not in key:
                 cvfile.handler(ExtraKeyError(self, key))
     
     def checkFullRelational(self, cvfile, key, other, type):
         '''check that the value at key matches the value of another
         stanza's value at other, where the stanza type is specified by type'''
         
         p = 0
         if key not in self:
             return
         
         for entry in cvfile.itervalues():
             if 'type' in entry and other in entry:
                 if entry['type'] == type and self[key] == entry[other]:
                     p = 1
                     break
         if p == 0:
             cvfile.handler(NonmatchKeyError(self, key, other))
     
     def checkRelational(self, cvfile, key, other):
         '''check that the value at key matches the value at other'''
-        
-        
-        
         p = 0
         
         if key not in self:
             return
         
         for entry in cvfile.itervalues():
             if 'type' in entry and other in entry:
                 if entry['type'] == key and self[key] == entry[other]:
                     p = 1
                     break
         if p == 0:
             cvfile.handler(NonmatchKeyError(self, key, other))
             
     def checkListRelational(self, cvfile, key, other):
         '''check that the value at key matches the value at other'''
@@ -371,117 +334,86 @@
         self.stanza = stanza
         self.msg = ''
         self.strict = 0
         
     def __str__(self):
         return str('%s[%s] %s: %s' % (self.stanza.name, self.stanza['type'], self.__class__.__name__, self.msg))
         
 class MissingKeyError(CvError):
     '''raised if a mandatory key is missing'''
     
     def __init__(self, stanza, key):
         CvError.__init__(self, stanza)
         self.msg =  key
         self.strict = 1
     
-    # def __str__(self):
-        # return str('%s(%s[%s])' % self.__class__.__name__ self.stanza + ': missing key (' + self.key + ')')
-    
-    
 class DuplicateKeyError(CvError):
     '''raised if a key is duplicated'''
     
     def __init__(self, stanza, key):
         CvError.__init__(self, stanza)
         self.msg = key
         self.strict = 1
     
-    # def __str__(self):
-        # return str(self.stanza + ': duplicate key (' + self.key + ')')
-    
-    
 class BlankKeyError(CvError):
     '''raised if a mandatory key is blank'''
     
     def __init__(self, stanza, key):
         CvError.__init__(self, stanza)
         self.msg = key
         self.strict = 0
     
-    # def __str__(self):
-        # return str(self.stanza + ': key (' + self.key + ') is blank')
-    
-    
 class ExtraKeyError(CvError):
     '''raised if an extra key not in the list of keys is found'''
 
     def __init__(self, stanza, key):
         CvError.__init__(self, stanza)
         self.msg = key
         self.strict = 0
     
-    # def __str__(self):
-        # return str(self.stanza + ': extra key (' + self.key + ')')    
-
-        
 class NonmatchKeyError(CvError):
     '''raised if a relational key does not match any other value'''
     
     def __init__(self, stanza, key, val):
         CvError.__init__(self, stanza)
         self.msg = '%s does not match %s' % (key, val)
         self.strict = 1
     
-    # def __str__(self):
-        # return str(self.stanza + ': key (' + self.key + ') does not match any (' + self.val + ')')
-        
-        
 class DuplicateVendorIdError(CvError):
     '''When there exists more than one connected component of stanzas (through derivedFrom) with the same vendorId'''
     
     def __init__(self, stanza):
         CvError.__init__(self, stanza)
         self.msg = '%s' % self.stanza['vendorId']
         self.strict = 0
         
-    # def __str__(self):
-        # return str('warning: ' + self.stanza.name + ': vendorId (' + self.stanza['vendorId'] + ') has multiple parent cell lines')
-        
-        
 class InvalidProtocolError(CvError):
     '''raised if a protocol doesnt match anything in the directory'''
     
     def __init__(self, stanza, key):
         CvError.__init__(self, stanza)
         self.msg = key
         self.strict = 0
     
-    # def __str__(self):
-        # return str(self.stanza.name + ': missing protocol document (' + self.key + ')')    
-    
-        
 class InvalidTypeError(CvError):
     '''raised if a relational key does not match any other value'''
     
     def __init__(self, stanza, key):
         CvError.__init__(self, stanza)
         self.msg = key
         self.strict = 1
     
-    # def __str__(self):
-        # return str(self.stanza + ': ' + self.key + ' does not match any types')
-        
 class TypeValidationError(CvError):
     '''raised if the terms type of term has an invalid validation value'''
     
     def __init__(self, stanza):
         CvError.__init__(self, stanza)
         self.msg = 'validation ' + stanza['validation']
         self.strict = 1
        
 class InvalidDateError(CvError):
     '''raised if the value is an invalid date'''
     
     def __init__(self, stanza, val):
         CvError.__init__(self, stanza)
         self.msg = val + ' does not match a YYYY-MM-DD date'
         self.strict = 1