13db05916455fc43b3c007d9265887772e20e127 mmaddren Mon Jan 30 12:39:22 2012 -0800 added optional label for all stanzas in CV, and dataGroup mandatory for dataType diff --git python/lib/ucscgenomics/cv.py python/lib/ucscgenomics/cv.py index f31fa4b..9b4f247 100644 --- python/lib/ucscgenomics/cv.py +++ python/lib/ucscgenomics/cv.py @@ -61,34 +61,30 @@ return ek, ev, None elif type == 'age': entry = AgeStanza() elif type == 'dataType': entry = DataTypeStanza() elif type == 'lab': entry = LabStanza() elif type == 'seqPlatform': entry = SeqPlatformStanza() elif type == 'typeOfTerm': entry = TypeOfTermStanza() elif type == 'view': entry = ViewStanza() elif type == 'localization': entry = LocalizationStanza() - elif type == 'rnaExtract': - entry = RnaExtractStanza() - elif type == 'treatment': - entry = TreatmentStanza() elif type == 'grant': entry = GrantStanza() else: entry = CvStanza() key, val = entry.readStanza(stanza) return key, val, entry def validate(self): '''base validation method which calls all stanzas' validate''' for stanza in self.itervalues(): stanza.validate(self) @@ -146,31 +142,31 @@ def validate(self, ra, necessary=None, optional=None): '''default validation for a generic cv stanza. Should be called with all arguments if overidden''' if necessary == None: necessary = set() if optional == None: optional = set() baseNecessary = {'term', 'tag', 'type'} if self['type'] != 'Antibody': baseNecessary.add('description') - baseOptional = {'deprecated'} + baseOptional = {'deprecated', 'label'} self.checkMandatory(ra, necessary | baseNecessary) self.checkExtraneous(ra, necessary | baseNecessary | optional | baseOptional) temptype = self['type'] if self['type'] == 'Cell Line': # cv, you disgust me with your inconsistencies temptype = 'cellType' if len(ra.filter(lambda s: s['term'] == temptype and s['type'] == 'typeOfTerm', lambda s: s)) == 0: ra.handler(InvalidTypeError(self, self['type'])) self.checkDuplicates(ra) def checkDuplicates(self, ra): '''ensure that all keys are present and not blank in the stanza''' for key in self.iterkeys(): @@ -250,154 +246,164 @@ def checkProtocols(self, ra, path): if 'protocol' in self: protocols = self['protocol'].split() for protocol in protocols: if ':' not in protocol: ra.handler(InvalidProtocolError(self, protocol)) else: p = protocol.split(':', 1)[1] if ra.protocolPath != None and not os.path.isfile(ra.protocolPath + path + p): ra.handler(InvalidProtocolError(self, protocol)) class CvError(Exception): '''base error class for the cv.''' def __init__(self, stanza): + Exception.__init__(self) self.stanza = stanza self.msg = '' + self.strict = 0 def __str__(self): return str('%s[%s] %s: %s' % (self.stanza.name, self.stanza['type'], self.__class__.__name__, self.msg)) class MissingKeyError(CvError): '''raised if a mandatory key is missing''' def __init__(self, stanza, key): CvError.__init__(self, stanza) self.msg = key + self.strict = 1 # def __str__(self): # return str('%s(%s[%s])' % self.__class__.__name__ self.stanza + ': missing key (' + self.key + ')') class DuplicateKeyError(CvError): '''raised if a key is duplicated''' def __init__(self, stanza, key): CvError.__init__(self, stanza) self.msg = key + self.strict = 1 # def __str__(self): # return str(self.stanza + ': duplicate key (' + self.key + ')') class BlankKeyError(CvError): '''raised if a mandatory key is blank''' def __init__(self, stanza, key): CvError.__init__(self, stanza) self.msg = key + self.strict = 0 # def __str__(self): # return str(self.stanza + ': key (' + self.key + ') is blank') class ExtraKeyError(CvError): '''raised if an extra key not in the list of keys is found''' def __init__(self, stanza, key): CvError.__init__(self, stanza) self.msg = key + self.strict = 0 # def __str__(self): # return str(self.stanza + ': extra key (' + self.key + ')') class NonmatchKeyError(CvError): '''raised if a relational key does not match any other value''' def __init__(self, stanza, key, val): CvError.__init__(self, stanza) self.msg = '%s does not match %s' % (key, val) + self.strict = 1 # def __str__(self): # return str(self.stanza + ': key (' + self.key + ') does not match any (' + self.val + ')') class DuplicateVendorIdError(CvError): '''When there exists more than one connected component of stanzas (through derivedFrom) with the same vendorId''' def __init__(self, stanza): CvError.__init__(self, stanza) self.msg = '%s' % self.stanza['vendorId'] + self.strict = 0 # def __str__(self): # return str('warning: ' + self.stanza.name + ': vendorId (' + self.stanza['vendorId'] + ') has multiple parent cell lines') class InvalidProtocolError(CvError): '''raised if a protocol doesnt match anything in the directory''' def __init__(self, stanza, key): CvError.__init__(self, stanza) self.msg = key + self.strict = 0 # def __str__(self): # return str(self.stanza.name + ': missing protocol document (' + self.key + ')') class InvalidTypeError(CvError): '''raised if a relational key does not match any other value''' def __init__(self, stanza, key): CvError.__init__(self, stanza) self.msg = key + self.strict = 1 # def __str__(self): # return str(self.stanza + ': ' + self.key + ' does not match any types') class LabStanza(CvStanza): def __init__(self): CvStanza.__init__(self) def validate(self, ra): necessary = {'organism', 'labPi'} - optional = {'label', 'labInst', 'labPiFull', 'grantPi'} + optional = {'labInst', 'labPiFull', 'grantPi'} CvStanza.validate(self, ra, necessary, optional) self.checkRelational(ra, 'organism', 'term') class AgeStanza(CvStanza): def __init__(self): CvStanza.__init__(self) def validate(self, ra): necessary = {'stage'} CvStanza.validate(self, ra, necessary) class DataTypeStanza(CvStanza): def __init__(self): CvStanza.__init__(self) def validate(self, ra): - necessary = {'label'} + necessary = {'label', 'dataGroup'} CvStanza.validate(self, ra, necessary) class CellLineStanza(CvStanza): def __init__(self): CvStanza.__init__(self) def validate(self, ra): necessary = {'organism', 'vendorName', 'orderUrl', 'sex', 'tier'} optional = {'tissue', 'vendorId', 'karyotype', 'lineage', 'termId', 'termUrl', 'color', 'protocol', 'category', 'lots', 'derivedFrom', 'lab'} CvStanza.validate(self, ra, necessary, optional) self.checkRelational(ra, 'organism', 'term') self.checkRelational(ra, 'sex', 'term') @@ -446,31 +452,31 @@ def __init__(self): CvStanza.__init__(self) def validate(self, ra): necessary = {'label'} CvStanza.validate(self, ra, necessary) class TypeOfTermStanza(CvStanza): def __init__(self): CvStanza.__init__(self) def validate(self, ra): necessary = {'searchable', 'cvDefined', 'validate', 'priority'} - optional = {'label', 'hidden'} + optional = {'hidden'} CvStanza.validate(self, ra, necessary, optional) if len(ra.filter(lambda s: s['term'] == self['type'] and s['type'] == 'typeOfTerm', lambda s: s)) == 0: ra.handler(InvalidTypeError(self, self['type'])) class MouseStanza(CvStanza): def __init__(self): CvStanza.__init__(self) def validate(self, ra): necessary = {'organism', 'vendorName', 'orderUrl', 'age', 'strain', 'sex'} optional = {'tissue', 'termId', 'termUrl', 'color', 'protocol', 'category', 'vendorId', 'lots'} CvStanza.validate(self, ra, necessary, optional) @@ -478,49 +484,27 @@ self.checkRelational(ra, 'organism', 'term') self.checkRelational(ra, 'sex', 'term') self.checkRelational(ra, 'category', 'term') self.checkRelational(ra, 'age', 'term') self.checkRelational(ra, 'strain', 'term') self.checkProtocols(ra, 'protocols/cell/mouse/') class LocalizationStanza(CvStanza): def __init__(self): CvStanza.__init__(self) def validate(self, ra): necessary = {'termId', 'termUrl'} - optional = {'label'} - CvStanza.validate(self, ra, necessary, optional) - - -class RnaExtractStanza(CvStanza): - - def __init__(self): - CvStanza.__init__(self) - - def validate(self, ra): - optional = {'label'} - CvStanza.validate(self, ra, None, optional) - - -class TreatmentStanza(CvStanza): - - def __init__(self): - CvStanza.__init__(self) - - def validate(self, ra): - optional = {'label'} - CvStanza.validate(self, ra, None, optional) + CvStanza.validate(self, ra, necessary) class GrantStanza(CvStanza): def __init__(self): CvStanza.__init__(self) def validate(self, ra): necessary = {'grantInst', 'projectName'} - optional = {'label'} - CvStanza.validate(self, ra, necessary, optional) + CvStanza.validate(self, ra, necessary, None)