1beffe2552c338c9bfc3cdc9cedaf2326ad7804e mmaddren Tue Apr 24 14:00:24 2012 -0700 added new changes to cv.py to allow mouseCellType to work from within code diff --git python/lib/ucscgenomics/cv.py python/lib/ucscgenomics/cv.py index 0fc7b9f..8db721f 100644 --- python/lib/ucscgenomics/cv.py +++ python/lib/ucscgenomics/cv.py @@ -60,30 +60,45 @@ def readStanza(self, stanza, key=None): '''overriden method from RaFile which makes specialized stanzas based on type''' entry = CvStanza() key, val = entry.readStanza(stanza) return key, val, entry def validate(self): '''base validation method which calls all stanzas' validate''' for stanza in self.itervalues(): stanza.validate(self) #print self.missingTypes def getTypeOfTermStanza(self, type): + + if type == 'mouseCellType': + mousestanza = cv.CvStanza() + mousestanza['term'] = 'mouseCellType' + mousestanza['tag'] = 'MOUSECELLTYPE' + mousestanza['type'] = 'typeOfTerm' + mousestanza['label'] = 'Cell, tissue or DNA sample specific to mouse' + mousestanza['description'] = 'NOT FOR USE! ONLY FOR VALIDATION. Cell line or tissue used as the source of experimental material specific to mouse.' + mousestanza['searchable'] = 'multiSelect' + mousestanza['cvDefined'] = 'yes' + mousestanza['validate'] = 'cv or None' + mousestanza['requiredVars'] = 'term,tag,type,description,organism,vendorName,orderUrl,age,strain,sex #Provisional' + mousestanza['optionalVars'] = 'label,tissue,termId,termUrl,color,protocol,category,vendorId,lots,deprecated #Provisional' + return mousestanza + types = self.filter(lambda s: s['term'] == type and s['type'] == 'typeOfTerm', lambda s: s) if len(types) != 1: return None return types[0] class CvStanza(ra.RaStanza): '''base class for a single stanza in the cv, which adds validation''' def __init__(self): ra.RaStanza.__init__(self) def readStanza(self, stanza): ''' Populates this entry from a single stanza ''' @@ -117,30 +132,42 @@ raKey = line.split(' ', 1)[0] raVal = '' if (len(line.split(' ', 1)) == 2): raVal = line.split(' ', 1)[1] if raKey in self: count = 0 while raKey + '__$$' + str(count) in self: count = count + 1 self[raKey + '__$$' + str(count)] = raVal else: self[raKey] = raVal +# validate [cv/date/exists/float/integer/list:/none/regex:] outlines the expected values. ENFORCED by mdbPrint -validate +# cv: must be defined term in cv (e.g. cell=GM12878). "cv or None" indicates that "None is also acceptable. +# "cv or control" indicates that cv-defined terms of type "control" are also acceptable. +# date: must be date in YYYY-MM-DD format +# exists: not enforced. (e.g. fileName could be validated to exist in download directory) +# float: must be floating point number +# integer: must be integer +# "list:": must be one of several terms in comma delimeited list (e.g. "list: yes,no,maybe" ) # ("list:" includes colon) +# none: not validated in any way +# "regex:": must match regular expression (e.g. "regex: ^GS[M,E][0-9]$" ) # ("regex:" includes colon) +# # NOTE: that validate rules may end comment delimited by a '#' + def validate(self, cvfile): type = self['type'] if self['type'] == 'Cell Line': # :( if 'organism' in self and self['organism'] == 'human': type = 'cellType' elif 'organism' in self and self['organism'] == 'mouse': type = 'mouseCellType' else: cvfile.handler(OrganismError(self)) typeStanza = cvfile.getTypeOfTermStanza(type) if typeStanza == None: cvfile.handler(InvalidTypeError(self, self['type'] + '(%s)' % type)) return required = list() @@ -189,71 +216,30 @@ except: cvfile.handler(InvalidIntError(self, val)) elif validation.startswith('list:'): validVals = extractList(validation, 'list:') if val not in validVals: cvfile.handler(InvalidListError(self, val, validVals)) elif validation == 'none': pass elif validation.startswith('regex:'): regex = extractValue(validation, 'regex:') if not re.match(val, regex): cvfile.handler(UnmatchedRegexError(self, val, regex)) # else: # cvfile.handler(TypeValidationError(itemType)) - # validate [cv/date/exists/float/integer/list:/none/regex:] outlines the expected values. ENFORCED by mdbPrint -validate -# cv: must be defined term in cv (e.g. cell=GM12878). "cv or None" indicates that "None is also acceptable. -# "cv or control" indicates that cv-defined terms of type "control" are also acceptable. -# date: must be date in YYYY-MM-DD format -# exists: not enforced. (e.g. fileName could be validated to exist in download directory) -# float: must be floating point number -# integer: must be integer -# "list:": must be one of several terms in comma delimeited list (e.g. "list: yes,no,maybe" ) # ("list:" includes colon) -# none: not validated in any way -# "regex:": must match regular expression (e.g. "regex: ^GS[M,E][0-9]$" ) # ("regex:" includes colon) -# # NOTE: that validate rules may end comment delimited by a '#' - - - - # def validate2(self, cvfile, necessary=None, optional=None): - # '''default validation for a generic cv stanza. Should be called with all arguments if overidden''' - - # if necessary == None: - # necessary = set() - - # if optional == None: - # optional = set() - - # baseNecessary = {'term', 'tag', 'type'} - - # if self['type'] != 'Antibody': - # baseNecessary.add('description') - - # baseOptional = {'deprecated', 'label'} - # self.checkMandatory(cvfile, necessary | baseNecessary) - # self.checkExtraneous(cvfile, necessary | baseNecessary | optional | baseOptional) - - # temptype = self['type'] - # if self['type'] == 'Cell Line': # :( - # temptype = 'cellType' - # if len(cvfile.filter(lambda s: s['term'] == temptype and s['type'] == 'typeOfTerm', lambda s: s)) == 0: - # cvfile.handler(InvalidTypeError(self, self['type'])) - - # self.checkDuplicates(cvfile) - - def checkDuplicates(self, cvfile): '''ensure that all keys are present and not blank in the stanza''' for key in self.iterkeys(): if '__$$' in key: newkey = key.split('__$$', 1)[0] cvfile.handler(DuplicateKeyError(self, newkey)) def checkMandatory(self, cvfile, keys): '''ensure that all keys are present and not blank in the stanza''' for key in keys: if not key in self.keys(): cvfile.handler(MissingKeyError(self, key)) elif self[key] == '': cvfile.handler(BlankKeyError(self, key))