e7f828d1d1d15fe187a2cb9a241dee10874af38f
mmaddren
  Mon Feb 6 13:35:15 2012 -0800
cvValidate updated for new cv spec
diff --git python/lib/ucscgenomics/mdb.py python/lib/ucscgenomics/mdb.py
index b39400a..9ab868c 100644
--- python/lib/ucscgenomics/mdb.py
+++ python/lib/ucscgenomics/mdb.py
@@ -7,64 +7,73 @@
         self.molecule = molecule
         self.strategy = strategy
         self.source = source
         self.selection = selection
         self.type = type
         
     @property    
     def valid(self):
         return self.molecule != 'REPLACE' and self.strategy != 'REPLACE' and self.source != 'REPLACE' and self.selection != 'REPLACE' and self.type != None
     
     @property
     def shouldSubmit(self):
         return self.type != 'NotGeo'
     
 dataTypes = {
-    'Cage': DataType('Cage', 'OVERRIDE RNA', 'OTHER', 'transcriptomic', 'CAGE', 'HighThroughput'),
+    'Cage': DataType(           'Cage',             'RNA',          'OTHER',                                            'transcriptomic',   'CAGE',                                         'HighThroughput'),
     'ChipSeq': DataType('ChipSeq', 'genomic DNA', 'ChIP-Seq', 'genomic', 'ChIP', 'HighThroughput'),
     'DnaPet': DataType('DnaPet', 'genomic DNA', 'OTHER', 'genomic', 'size fractionation', 'HighThroughput'),
     'DnaseDgf': DataType('DnaseDgf', 'genomic DNA', 'DNase-Hypersensitivity', 'genomic', 'DNase', 'HighThroughput'),
     'DnaseSeq': DataType('DnaseSeq', 'genomic DNA', 'DNase-Hypersensitivity', 'genomic', 'DNase', 'HighThroughput'),
     'FaireSeq': DataType('FaireSeq', 'genomic DNA', 'OTHER', 'genomic', 'other', 'HighThroughput'),
     'MethylSeq': DataType('MethylSeq', 'genomic DNA', 'MRE-Seq', 'genomic', 'Restriction Digest', 'HighThroughput'),
     'MethylRrbs': DataType('MethylRrbs', 'genomic DNA', 'Bisulfite-Seq', 'genomic', 'Reduced Representation', 'HighThroughput'),
     'Orchid': DataType('Orchid', 'genomic DNA', 'OTHER', 'genomic', 'other', 'HighThroughput'),
     'Proteogenomics': DataType('Proteogenomics', 'protein', 'mass spectrometry-based proteogenomic mapping', 'protein', 'chromatographically fractionated peptides', 'HighThroughput'),
-    'RnaPet': DataType('RnaPet', 'OVERRIDE RNA', 'OTHER', 'transcriptomic', 'other', 'HighThroughput'),
-    'RnaSeq': DataType('RnaSeq', 'OVERRIDE RNA', 'RNA-Seq', 'transcriptomic', 'cDNA', 'HighThroughput'),
+    'RnaPet': DataType(         'RnaPet',           'RNA',          'OTHER',                                            'transcriptomic',   'other',                                        'HighThroughput'),
+    'RnaSeq': DataType(         'RnaSeq',           'RNA',          'RNA-Seq',                                          'transcriptomic',   'cDNA',                                         'HighThroughput'),
     
-    #these need to be curated
-    '5C': DataType('5C', 'REPLACE', 'REPLACE', 'REPLACE', 'REPLACE', None),
-    'AffyExonArray': DataType('AffyExonArray', 'REPLACE', 'REPLACE', 'REPLACE', 'REPLACE', 'MicroArray'),
+    #doublecheck

+    'ChiaPet': DataType(        'ChiaPet',          'genomic DNA',  'ChIP-Seq followed by ligation',                    'genomic',          'other',                                      'HighThroughput'),

+    'Nucleosome': DataType(     'Nucleosome',       'genomic DNA',  'ChIP-Seq',                                         'genomic',          'ChIP',                                         'HighThroughput'),

+    'RipSeq': DataType(         'RipSeq',           'RNA',          'OTHER',                                          'transcriptomic',   'RNA binding protein antibody',                 'HighThroughput'),

+    #for ripseq, ask geo about new 'ripseq'

+    

+    #not geo stuff

+    '5C': DataType('5C', 'REPLACE', 'REPLACE', 'REPLACE', 'REPLACE', 'NotGeo'),

     'Bip': DataType('Bip', 'REPLACE', 'REPLACE', 'REPLACE', 'REPLACE', 'NotGeo'),
-    'Cluster': DataType('Cluster', 'REPLACE', 'REPLACE', 'REPLACE', 'REPLACE', None),
-    'Cnv': DataType('Cnv', 'REPLACE', 'REPLACE', 'REPLACE', 'REPLACE', None),
-    'Combined': DataType('Combined', 'REPLACE', 'REPLACE', 'REPLACE', 'REPLACE', None),
-    'Genotype': DataType('Genotype', 'genomic DNA', 'REPLACE', 'REPLACE', 'REPLACE', None),
     'Gencode': DataType('Gencode', 'REPLACE', 'REPLACE', 'REPLACE', 'REPLACE', 'NotGeo'),
-    'ChiaPet': DataType('ChiaPet', 'REPLACE', 'REPLACE', 'REPLACE', 'REPLACE', None),
     'Mapability': DataType('Mapability', 'REPLACE', 'REPLACE', 'REPLACE', 'REPLACE', 'NotGeo'),
-    'MethylArray': DataType('MethylArray', 'REPLACE', 'REPLACE', 'REPLACE', 'REPLACE', None),
     'NRE': DataType('NRE', 'REPLACE', 'REPLACE', 'REPLACE', 'REPLACE', 'NotGeo'),
-    'Nucleosome': DataType('Nucleosome', 'REPLACE', 'REPLACE', 'REPLACE', 'REPLACE', None),
-    'RnaChip': DataType('RnaChip', 'REPLACE', 'REPLACE', 'REPLACE', 'REPLACE', None),
-    'RipGeneSt': DataType('RipGeneSt', 'OVERRIDE RNA', 'REPLACE', 'transcriptomic', 'RNA binding protein antibody', 'MicroArray'), #this isn't correct
-    'RipTiling': DataType('RipTiling', 'REPLACE', 'REPLACE', 'REPLACE', 'REPLACE', None),
-    'RipChip': DataType('RipChip', 'REPLACE', 'REPLACE', 'REPLACE', 'REPLACE', None),
-    'RipSeq': DataType('RipSeq', 'REPLACE', 'REPLACE', 'REPLACE', 'REPLACE', None),
     'Switchgear': DataType('Switchgear', 'REPLACE', 'REPLACE', 'REPLACE', 'REPLACE', 'NotGeo'),
-    'TfbsValid': DataType('TfbsValid', 'REPLACE', 'REPLACE', 'REPLACE', 'REPLACE', 'NotGeo')
+    'TfbsValid': DataType('TfbsValid', 'REPLACE', 'REPLACE', 'REPLACE', 'REPLACE', 'NotGeo'),

+    'Cluster': DataType('Cluster', 'REPLACE', 'REPLACE', 'REPLACE', 'REPLACE', 'NotGeo'),

+    

+    #array

+    'AffyExonArray': DataType(  'AffyExonArray',    'mRNA',         'RNA-Microarray',                                   'transcriptomic',   'polyA',                                        'MicroArray'),

+    'MethylArray': DataType(    'MethylArray',      'genomic DNA',  'REPLACE',                                          'genomic',          'REPLACE',                                      'MicroArray'),

+    'RipGeneSt': DataType(      'RipGeneSt',        'RNA',          'REPLACE',                                          'transcriptomic',   'RNA binding protein antibody',                 'MicroArray'), #this isn't correct

+    'RipTiling': DataType(      'RipTiling',        'RNA',          'REPLACE',                                          'transcriptomic',   'RNA binding protein antibody',                 'MicroArray'),

+    
+    #these need to be curated
+    'Cnv': DataType(            'Cnv',              'REPLACE',      'REPLACE',                                          'REPLACE',          'REPLACE',                                      None),
+    'Combined': DataType(       'Combined',         'REPLACE',      'REPLACE',                                          'REPLACE',          'REPLACE',                                      None),
+    'Genotype': DataType(       'Genotype',         'genomic DNA',  'REPLACE',                                          'genomic',          'REPLACE',                                      None),
+    'RnaChip': DataType(        'RnaChip',          'RNA',          'REPLACE',                                          'transcriptomic',   'RNA binding protein antibody',                 None),
+    'RipChip': DataType(        'RipChip',          'RNA',          'REPLACE',                                          'transcriptomic',   'RNA binding protein antibody',                 None)
+    
+    
 }
 
 #compare this to the source in datatype, give GP ids depending on the type
 gpIds = {
     'human genomic': '63443',
     'human transcriptomic': '30709',
     'human protein': '63447',
     
     'mouse genomic': '63471',
     'mouse transcriptomic': '66167',
     'mouse protein': '63475'
 }
 
 class MdbFile(ra.RaFile):
     '''
@@ -78,34 +87,37 @@
         try:
             return self._expVars
         except AttributeError:
             self._expVars = self.compositeStanza['expVars'].split(',')
             return self._expVars
     
     @property
     def dataType(self):
         '''The data type of the experiment. 'None' if inconsistent.'''
         try:
             return self._dataType
         except AttributeError:
             self._dataType = None
             for e in self.experiments.itervalues():
                 if self._dataType == None and e.dataType != None:
+                    print e.dataType
                     self._dataType = e.dataType
                 elif self._dataType != e.dataType or e.dataType == None:
+                    print 'multiple data types!'
                     self._dataType = None
                     break
+            print 'still none'
             return self._dataType
     
     @property    
     def compositeStanza(self):
         '''the stanza (typically first in file) describing the composite'''
         try:
             return self._compositeStanza
         except AttributeError:
             self._compositeStanza = self.filter(lambda s: s['objType'] == 'composite', lambda s: s)
             if len(self._compositeStanza) != 1:
                 raise KeyError
             else:
                 self._compositeStanza = self._compositeStanza[0]
             return self._compositeStanza
             
@@ -169,26 +181,30 @@
     
     @property
     def name(self):
         return self._id
         
     @property
     def dataType(self):
         '''The data type of the experiment. 'None' if inconsistent.'''
         try:
             return self._dataType
         except AttributeError:
             self._dataType = None
             for s in self:
                 if 'dataType' in s:
                     if self._dataType == None:
+                        print dataTypes[s['dataType']]
                         self._dataType = dataTypes[s['dataType']]
                     elif self._dataType.name != s['dataType']:
+                        print 'exp multiple data types!'
                         self._dataType = None
                         break
+                        

+            print 'still none (exp)'
             return self._dataType
     
     def __init__(self, id, parent, stanzas):
         list.__init__(self)
         self.extend(stanzas)
         self._id = id
         self._parent = parent