python/lib/ucscgenomics/cv.py 1beffe2552c338c9bfc3cdc9cedaf2326ad7804e

1beffe2552c338c9bfc3cdc9cedaf2326ad7804e
mmaddren
  Tue Apr 24 14:00:24 2012 -0700
added new changes to cv.py to allow mouseCellType to work from within code
diff --git python/lib/ucscgenomics/cv.py python/lib/ucscgenomics/cv.py
index 0fc7b9f..8db721f 100644
--- python/lib/ucscgenomics/cv.py
+++ python/lib/ucscgenomics/cv.py
@@ -60,30 +60,45 @@
     def readStanza(self, stanza, key=None):
         '''overriden method from RaFile which makes specialized stanzas based on type'''
         entry = CvStanza()
 
         key, val = entry.readStanza(stanza)
         return key, val, entry
 
 
     def validate(self):
         '''base validation method which calls all stanzas' validate'''
         for stanza in self.itervalues():
             stanza.validate(self)
         #print self.missingTypes
 
     def getTypeOfTermStanza(self, type):
+    
+        if type == 'mouseCellType':
+            mousestanza = cv.CvStanza()
+            mousestanza['term'] = 'mouseCellType'
+            mousestanza['tag'] = 'MOUSECELLTYPE'
+            mousestanza['type'] = 'typeOfTerm'
+            mousestanza['label'] = 'Cell, tissue or DNA sample specific to mouse'
+            mousestanza['description'] = 'NOT FOR USE! ONLY FOR VALIDATION. Cell line or tissue used as the source of experimental material specific to mouse.'
+            mousestanza['searchable'] = 'multiSelect'
+            mousestanza['cvDefined'] = 'yes'
+            mousestanza['validate'] = 'cv or None'
+            mousestanza['requiredVars'] = 'term,tag,type,description,organism,vendorName,orderUrl,age,strain,sex #Provisional'
+            mousestanza['optionalVars'] = 'label,tissue,termId,termUrl,color,protocol,category,vendorId,lots,deprecated #Provisional'
+            return mousestanza
+    
         types = self.filter(lambda s: s['term'] == type and s['type'] == 'typeOfTerm', lambda s: s)
         if len(types) != 1:
             return None
         return types[0]
                 
 class CvStanza(ra.RaStanza):
     '''base class for a single stanza in the cv, which adds validation'''
     
     def __init__(self):
         ra.RaStanza.__init__(self)
 
     def readStanza(self, stanza):
         '''
         Populates this entry from a single stanza
         '''
@@ -117,30 +132,42 @@
             raKey = line.split(' ', 1)[0]
             raVal = ''
             if (len(line.split(' ', 1)) == 2):
                 raVal = line.split(' ', 1)[1]
                 
             if raKey in self:
                 count = 0
                 while raKey + '__$$' + str(count) in self:
                     count = count + 1
                     
                 self[raKey + '__$$' + str(count)] = raVal
                 
             else:
                 self[raKey] = raVal
         
+#     validate [cv/date/exists/float/integer/list:/none/regex:] outlines the expected values.  ENFORCED by mdbPrint -validate
+#           cv: must be defined term in cv (e.g. cell=GM12878).  "cv or None" indicates that "None is also acceptable.
+#               "cv or control" indicates that cv-defined terms of type "control" are also acceptable.
+#         date: must be date in YYYY-MM-DD format
+#       exists: not enforced.  (e.g. fileName could be validated to exist in download directory)
+#        float: must be floating point number
+#      integer: must be integer
+#      "list:": must be one of several terms in comma delimeited list (e.g. "list: yes,no,maybe" )  # ("list:" includes colon)
+#         none: not validated in any way
+#     "regex:": must match regular expression (e.g. "regex: ^GS[M,E][0-9]$" )  # ("regex:" includes colon)
+#    # NOTE: that validate rules may end comment delimited by a '#'
+        
     def validate(self, cvfile):
         type = self['type']
         if self['type'] == 'Cell Line': # :(
             if 'organism' in self and self['organism'] == 'human':
                 type = 'cellType'
             elif 'organism' in self and self['organism'] == 'mouse':
                 type = 'mouseCellType'
             else:
                 cvfile.handler(OrganismError(self))
         
         typeStanza = cvfile.getTypeOfTermStanza(type)
         if typeStanza == None:
             cvfile.handler(InvalidTypeError(self, self['type'] + '(%s)' % type))
             return
         required = list()
@@ -189,71 +216,30 @@
                 except:
                     cvfile.handler(InvalidIntError(self, val))
             elif validation.startswith('list:'):
                 validVals = extractList(validation, 'list:')
                 if val not in validVals:
                     cvfile.handler(InvalidListError(self, val, validVals))
             elif validation == 'none':
                 pass
             elif validation.startswith('regex:'):
                 regex = extractValue(validation, 'regex:')
                 if not re.match(val, regex):
                     cvfile.handler(UnmatchedRegexError(self, val, regex))
             # else:
                 # cvfile.handler(TypeValidationError(itemType))
         
-        #     validate [cv/date/exists/float/integer/list:/none/regex:] outlines the expected values.  ENFORCED by mdbPrint -validate
-#           cv: must be defined term in cv (e.g. cell=GM12878).  "cv or None" indicates that "None is also acceptable.
-#               "cv or control" indicates that cv-defined terms of type "control" are also acceptable.
-#         date: must be date in YYYY-MM-DD format
-#       exists: not enforced.  (e.g. fileName could be validated to exist in download directory)
-#        float: must be floating point number
-#      integer: must be integer
-#      "list:": must be one of several terms in comma delimeited list (e.g. "list: yes,no,maybe" )  # ("list:" includes colon)
-#         none: not validated in any way
-#     "regex:": must match regular expression (e.g. "regex: ^GS[M,E][0-9]$" )  # ("regex:" includes colon)
-#    # NOTE: that validate rules may end comment delimited by a '#'
-
-        
-        
-    # def validate2(self, cvfile, necessary=None, optional=None):
-        # '''default validation for a generic cv stanza. Should be called with all arguments if overidden'''
-        
-        # if necessary == None:
-            # necessary = set()
-            
-        # if optional == None:
-            # optional = set()
-        
-        # baseNecessary = {'term', 'tag', 'type'}
-        
-        # if self['type'] != 'Antibody':
-            # baseNecessary.add('description')
-        
-        # baseOptional = {'deprecated', 'label'}
-        # self.checkMandatory(cvfile, necessary | baseNecessary)
-        # self.checkExtraneous(cvfile, necessary | baseNecessary | optional | baseOptional)
-        
-        # temptype = self['type']
-        # if self['type'] == 'Cell Line': # :(
-            # temptype = 'cellType'
-        # if len(cvfile.filter(lambda s: s['term'] == temptype and s['type'] == 'typeOfTerm', lambda s: s)) == 0:
-            # cvfile.handler(InvalidTypeError(self, self['type']))
-
-        # self.checkDuplicates(cvfile)
-        
-        
     def checkDuplicates(self, cvfile):
         '''ensure that all keys are present and not blank in the stanza'''
         for key in self.iterkeys():
             if '__$$' in key:
                 newkey = key.split('__$$', 1)[0]
                 cvfile.handler(DuplicateKeyError(self, newkey))
         
     def checkMandatory(self, cvfile, keys):
         '''ensure that all keys are present and not blank in the stanza'''
         for key in keys:
             if not key in self.keys():
                 cvfile.handler(MissingKeyError(self, key))
             elif self[key] == '':
                 cvfile.handler(BlankKeyError(self, key))