3aee104cf4c1e245dd020f743fbc58c17fd75976 mmaddren Mon Apr 9 12:12:44 2012 -0700 added encode.py to store global constants and other encode stuff, and made all other libraries interface correctly with it diff --git python/lib/ucscgenomics/mdb.py python/lib/ucscgenomics/mdb.py index c93e3b0..5c53cc5 100644 --- python/lib/ucscgenomics/mdb.py +++ python/lib/ucscgenomics/mdb.py @@ -1,91 +1,16 @@ -from ucscgenomics import ra, ordereddict - -class DataType(object): - - def __init__(self, name, molecule, strategy, source, selection, type): - self.name = name - self.molecule = molecule - self.strategy = strategy - self.source = source - self.selection = selection - self.type = type - - @property - def valid(self): - return self.molecule != 'REPLACE' and self.strategy != 'REPLACE' and self.source != 'REPLACE' and self.selection != 'REPLACE' and self.type != None - - @property - def shouldSubmit(self): - return self.type != 'NotGeo' - -dataTypes = { - 'Cage': DataType( 'Cage', 'RNA', 'OTHER', 'transcriptomic', 'CAGE', 'HighThroughput'), - 'ChipSeq': DataType( 'ChipSeq', 'genomic DNA', 'ChIP-Seq', 'genomic', 'ChIP', 'HighThroughput'), - 'DnaPet': DataType( 'DnaPet', 'genomic DNA', 'OTHER', 'genomic', 'size fractionation', 'HighThroughput'), - 'DnaseDgf': DataType( 'DnaseDgf', 'genomic DNA', 'DNase-Hypersensitivity', 'genomic', 'DNase', 'HighThroughput'), - 'DnaseSeq': DataType( 'DnaseSeq', 'genomic DNA', 'DNase-Hypersensitivity', 'genomic', 'DNase', 'HighThroughput'), - 'FaireSeq': DataType( 'FaireSeq', 'genomic DNA', 'OTHER', 'genomic', 'other', 'HighThroughput'), - 'MethylSeq': DataType( 'MethylSeq', 'genomic DNA', 'MRE-Seq', 'genomic', 'Restriction Digest', 'HighThroughput'), - 'MethylRrbs': DataType( 'MethylRrbs', 'genomic DNA', 'Bisulfite-Seq', 'genomic', 'Reduced Representation', 'HighThroughput'), - 'Orchid': DataType( 'Orchid', 'genomic DNA', 'OTHER', 'genomic', 'other', 'HighThroughput'), - 'Proteogenomics': DataType( 'Proteogenomics', 'protein', 'mass spectrometry-based proteogenomic mapping', 'protein', 'chromatographically fractionated peptides', 'HighThroughput'), - 'RnaPet': DataType( 'RnaPet', 'RNA', 'OTHER', 'transcriptomic', 'other', 'HighThroughput'), - 'RnaSeq': DataType( 'RnaSeq', 'RNA', 'RNA-Seq', 'transcriptomic', 'cDNA', 'HighThroughput'), - - #doublecheck - 'ChiaPet': DataType( 'ChiaPet', 'genomic DNA', 'ChIP-Seq followed by ligation', 'genomic', 'other', 'HighThroughput'), - 'Nucleosome': DataType( 'Nucleosome', 'genomic DNA', 'ChIP-Seq', 'genomic', 'ChIP', 'HighThroughput'), - 'RipSeq': DataType( 'RipSeq', 'RNA', 'OTHER', 'transcriptomic', 'RNA binding protein antibody', 'HighThroughput'), - #for ripseq, ask geo about new 'ripseq' - - #not geo stuff - '5C': DataType('5C', 'REPLACE', 'REPLACE', 'REPLACE', 'REPLACE', 'NotGeo'), - 'Bip': DataType('Bip', 'REPLACE', 'REPLACE', 'REPLACE', 'REPLACE', 'NotGeo'), - 'Gencode': DataType('Gencode', 'REPLACE', 'REPLACE', 'REPLACE', 'REPLACE', 'NotGeo'), - 'Mapability': DataType('Mapability', 'REPLACE', 'REPLACE', 'REPLACE', 'REPLACE', 'NotGeo'), - 'NRE': DataType('NRE', 'REPLACE', 'REPLACE', 'REPLACE', 'REPLACE', 'NotGeo'), - 'Switchgear': DataType('Switchgear', 'REPLACE', 'REPLACE', 'REPLACE', 'REPLACE', 'NotGeo'), - 'TfbsValid': DataType('TfbsValid', 'REPLACE', 'REPLACE', 'REPLACE', 'REPLACE', 'NotGeo'), - 'Cluster': DataType('Cluster', 'REPLACE', 'REPLACE', 'REPLACE', 'REPLACE', 'NotGeo'), - - #array - 'AffyExonArray': DataType( 'AffyExonArray', 'mRNA', 'RNA-Microarray', 'transcriptomic', 'polyA', 'MicroArray'), - 'MethylArray': DataType( 'MethylArray', 'genomic DNA', 'REPLACE', 'genomic', 'REPLACE', 'MicroArray'), - 'RipGeneSt': DataType( 'RipGeneSt', 'RNA', 'REPLACE', 'transcriptomic', 'RNA binding protein antibody', 'MicroArray'), #this isn't correct - 'RipTiling': DataType( 'RipTiling', 'RNA', 'REPLACE', 'transcriptomic', 'RNA binding protein antibody', 'MicroArray'), - - #these need to be curated - 'Cnv': DataType( 'Cnv', 'REPLACE', 'REPLACE', 'REPLACE', 'REPLACE', None), - 'Combined': DataType( 'Combined', 'REPLACE', 'REPLACE', 'REPLACE', 'REPLACE', None), - 'Genotype': DataType( 'Genotype', 'genomic DNA', 'REPLACE', 'genomic', 'REPLACE', None), - 'RnaChip': DataType( 'RnaChip', 'RNA', 'REPLACE', 'transcriptomic', 'RNA binding protein antibody', None), - 'RipChip': DataType( 'RipChip', 'RNA', 'REPLACE', 'transcriptomic', 'RNA binding protein antibody', None) - - -} - -#compare this to the source in datatype, give GP ids depending on the type -gpIds = { - 'human genomic': '63443', - 'human transcriptomic': '30709', - 'human protein': '63447', - - 'mouse genomic': '63471', - 'mouse transcriptomic': '66167', - 'mouse protein': '63475' -} +from ucscgenomics import ra, ordereddict, encode class MdbFile(ra.RaFile): ''' This should be used for all files in the metaDb, since they extend RaFile with useful functionality specific to metaDb ra files. ''' @property def name(self): return self.compositeStanza['metaObject'] @property def expVars(self): '''the experimental variables used in this track''' try: @@ -219,31 +144,31 @@ elif self._title != s.title: self._title = None break return self._title @property def dataType(self): '''The data type of the experiment. 'None' if inconsistent.''' try: return self._dataType except AttributeError: self._dataType = None for s in self.normalStanzas: if 'dataType' in s: if self._dataType == None: - self._dataType = dataTypes[s['dataType']] + self._dataType = encode.dataTypes[s['dataType']] elif self._dataType.name != s['dataType']: self._dataType = None break return self._dataType @property def normalStanzas(self): '''Returns the list of stanzas without revoked items''' try: return self._normal except AttributeError: self._normal = list() for s in self: if 'objStatus' not in s: self._normal.append(s)