3aee104cf4c1e245dd020f743fbc58c17fd75976
mmaddren
  Mon Apr 9 12:12:44 2012 -0700
added encode.py to store global constants and other encode stuff, and made all other libraries interface correctly with it
diff --git python/lib/ucscgenomics/mdb.py python/lib/ucscgenomics/mdb.py
index c93e3b0..5c53cc5 100644
--- python/lib/ucscgenomics/mdb.py
+++ python/lib/ucscgenomics/mdb.py
@@ -1,91 +1,16 @@
-from ucscgenomics import ra, ordereddict

-
-class DataType(object):
-
-    def __init__(self, name, molecule, strategy, source, selection, type):
-        self.name = name
-        self.molecule = molecule
-        self.strategy = strategy
-        self.source = source
-        self.selection = selection
-        self.type = type
-        
-    @property    
-    def valid(self):
-        return self.molecule != 'REPLACE' and self.strategy != 'REPLACE' and self.source != 'REPLACE' and self.selection != 'REPLACE' and self.type != None
-    
-    @property
-    def shouldSubmit(self):
-        return self.type != 'NotGeo'
-    
-dataTypes = {
-    'Cage': DataType(           'Cage',             'RNA',          'OTHER',                                            'transcriptomic',   'CAGE',                                         'HighThroughput'),
-    'ChipSeq': DataType(        'ChipSeq',          'genomic DNA',  'ChIP-Seq',                                         'genomic',          'ChIP',                                         'HighThroughput'),
-    'DnaPet': DataType(         'DnaPet',           'genomic DNA',  'OTHER',                                            'genomic',          'size fractionation',                           'HighThroughput'),
-    'DnaseDgf': DataType(       'DnaseDgf',         'genomic DNA',  'DNase-Hypersensitivity',                           'genomic',          'DNase',                                        'HighThroughput'),
-    'DnaseSeq': DataType(       'DnaseSeq',         'genomic DNA',  'DNase-Hypersensitivity',                           'genomic',          'DNase',                                        'HighThroughput'),
-    'FaireSeq': DataType(       'FaireSeq',         'genomic DNA',  'OTHER',                                            'genomic',          'other',                                        'HighThroughput'),
-    'MethylSeq': DataType(      'MethylSeq',        'genomic DNA',  'MRE-Seq',                                          'genomic',          'Restriction Digest',                           'HighThroughput'),
-    'MethylRrbs': DataType(     'MethylRrbs',       'genomic DNA',  'Bisulfite-Seq',                                    'genomic',          'Reduced Representation',                       'HighThroughput'),
-    'Orchid': DataType(         'Orchid',           'genomic DNA',  'OTHER',                                            'genomic',          'other',                                        'HighThroughput'),
-    'Proteogenomics': DataType( 'Proteogenomics',   'protein',      'mass spectrometry-based proteogenomic mapping',    'protein',          'chromatographically fractionated peptides',    'HighThroughput'),
-    'RnaPet': DataType(         'RnaPet',           'RNA',          'OTHER',                                            'transcriptomic',   'other',                                        'HighThroughput'),
-    'RnaSeq': DataType(         'RnaSeq',           'RNA',          'RNA-Seq',                                          'transcriptomic',   'cDNA',                                         'HighThroughput'),
-    

-    #doublecheck

-    'ChiaPet': DataType(        'ChiaPet',          'genomic DNA',  'ChIP-Seq followed by ligation',                    'genomic',          'other',                                      'HighThroughput'),

-    'Nucleosome': DataType(     'Nucleosome',       'genomic DNA',  'ChIP-Seq',                                         'genomic',          'ChIP',                                         'HighThroughput'),

-    'RipSeq': DataType(         'RipSeq',           'RNA',          'OTHER',                                          'transcriptomic',   'RNA binding protein antibody',                 'HighThroughput'),

-    #for ripseq, ask geo about new 'ripseq'

-    

-    #not geo stuff

-    '5C': DataType('5C', 'REPLACE', 'REPLACE', 'REPLACE', 'REPLACE', 'NotGeo'),

-    'Bip': DataType('Bip', 'REPLACE', 'REPLACE', 'REPLACE', 'REPLACE', 'NotGeo'),

-    'Gencode': DataType('Gencode', 'REPLACE', 'REPLACE', 'REPLACE', 'REPLACE', 'NotGeo'),

-    'Mapability': DataType('Mapability', 'REPLACE', 'REPLACE', 'REPLACE', 'REPLACE', 'NotGeo'),

-    'NRE': DataType('NRE', 'REPLACE', 'REPLACE', 'REPLACE', 'REPLACE', 'NotGeo'),

-    'Switchgear': DataType('Switchgear', 'REPLACE', 'REPLACE', 'REPLACE', 'REPLACE', 'NotGeo'),

-    'TfbsValid': DataType('TfbsValid', 'REPLACE', 'REPLACE', 'REPLACE', 'REPLACE', 'NotGeo'),

-    'Cluster': DataType('Cluster', 'REPLACE', 'REPLACE', 'REPLACE', 'REPLACE', 'NotGeo'),

-    

-    #array

-    'AffyExonArray': DataType(  'AffyExonArray',    'mRNA',         'RNA-Microarray',                                   'transcriptomic',   'polyA',                                        'MicroArray'),

-    'MethylArray': DataType(    'MethylArray',      'genomic DNA',  'REPLACE',                                          'genomic',          'REPLACE',                                      'MicroArray'),

-    'RipGeneSt': DataType(      'RipGeneSt',        'RNA',          'REPLACE',                                          'transcriptomic',   'RNA binding protein antibody',                 'MicroArray'), #this isn't correct

-    'RipTiling': DataType(      'RipTiling',        'RNA',          'REPLACE',                                          'transcriptomic',   'RNA binding protein antibody',                 'MicroArray'),

-    
-    #these need to be curated
-    'Cnv': DataType(            'Cnv',              'REPLACE',      'REPLACE',                                          'REPLACE',          'REPLACE',                                      None),
-    'Combined': DataType(       'Combined',         'REPLACE',      'REPLACE',                                          'REPLACE',          'REPLACE',                                      None),
-    'Genotype': DataType(       'Genotype',         'genomic DNA',  'REPLACE',                                          'genomic',          'REPLACE',                                      None),
-    'RnaChip': DataType(        'RnaChip',          'RNA',          'REPLACE',                                          'transcriptomic',   'RNA binding protein antibody',                 None),
-    'RipChip': DataType(        'RipChip',          'RNA',          'REPLACE',                                          'transcriptomic',   'RNA binding protein antibody',                 None)
-    
-    
-}
-
-#compare this to the source in datatype, give GP ids depending on the type
-gpIds = {
-    'human genomic': '63443',
-    'human transcriptomic': '30709',
-    'human protein': '63447',
-    
-    'mouse genomic': '63471',
-    'mouse transcriptomic': '66167',
-    'mouse protein': '63475'
-}
+from ucscgenomics import ra, ordereddict, encode

 
 class MdbFile(ra.RaFile):
     '''
     This should be used for all files in the metaDb, since they extend RaFile
     with useful functionality specific to metaDb ra files.
     '''
     
     @property
     def name(self):
         return self.compositeStanza['metaObject']
     
     @property
     def expVars(self):
         '''the experimental variables used in this track'''
         try:
@@ -219,31 +144,31 @@
                 elif self._title != s.title:
                     self._title = None
                     break
             return self._title
         
     @property
     def dataType(self):
         '''The data type of the experiment. 'None' if inconsistent.'''
         try:
             return self._dataType
         except AttributeError:
             self._dataType = None
             for s in self.normalStanzas:
                 if 'dataType' in s:
                     if self._dataType == None:
-                        self._dataType = dataTypes[s['dataType']]
+                        self._dataType = encode.dataTypes[s['dataType']]
                     elif self._dataType.name != s['dataType']:
                         self._dataType = None
                         break
             return self._dataType
     
     @property
     def normalStanzas(self):
         '''Returns the list of stanzas without revoked items'''
         try:
             return self._normal
         except AttributeError:
             self._normal = list()
             for s in self:
                 if 'objStatus' not in s:
                     self._normal.append(s)