python/lib/ucscgenomics/encode.py a44e3254174152b2d4b3f241d1935654cd139181

a44e3254174152b2d4b3f241d1935654cd139181
vsmalladi
  Tue May 8 10:11:21 2012 -0700
Renamed library from ucscgenomics to ucscGb. Redmine #7029.
diff --git python/lib/ucscgenomics/encode.py python/lib/ucscgenomics/encode.py
deleted file mode 100644
index 2d1f944..0000000
--- python/lib/ucscgenomics/encode.py
+++ /dev/null
@@ -1,115 +0,0 @@
-import os, hashlib
-
-class DataType(object):
-
-    def __init__(self, name, molecule, strategy, source, selection, type):
-        self.name = name
-        self.molecule = molecule
-        self.strategy = strategy
-        self.source = source
-        self.selection = selection
-        self.type = type
-        
-    @property    
-    def valid(self):
-        return self.molecule != 'REPLACE' and self.strategy != 'REPLACE' and self.source != 'REPLACE' and self.selection != 'REPLACE' and self.type != None
-    
-    @property
-    def shouldSubmit(self):
-        return self.type != 'NotGeo'
-
-
-dataTypes = {
-    'Cage': DataType(           'Cage',             'RNA',          'OTHER',                                            'transcriptomic',   'CAGE',                                         'HighThroughput'),
-    'ChipSeq': DataType(        'ChipSeq',          'genomic DNA',  'ChIP-Seq',                                         'genomic',          'ChIP',                                         'HighThroughput'),
-    'DnaPet': DataType(         'DnaPet',           'genomic DNA',  'OTHER',                                            'genomic',          'size fractionation',                           'HighThroughput'),
-    'DnaseDgf': DataType(       'DnaseDgf',         'genomic DNA',  'DNase-Hypersensitivity',                           'genomic',          'DNase',                                        'HighThroughput'),
-    'DnaseSeq': DataType(       'DnaseSeq',         'genomic DNA',  'DNase-Hypersensitivity',                           'genomic',          'DNase',                                        'HighThroughput'),
-    'FaireSeq': DataType(       'FaireSeq',         'genomic DNA',  'OTHER',                                            'genomic',          'other',                                        'HighThroughput'),
-    'MethylSeq': DataType(      'MethylSeq',        'genomic DNA',  'MRE-Seq',                                          'genomic',          'Restriction Digest',                           'HighThroughput'),
-    'MethylRrbs': DataType(     'MethylRrbs',       'genomic DNA',  'Bisulfite-Seq',                                    'genomic',          'Reduced Representation',                       'HighThroughput'),
-    'Orchid': DataType(         'Orchid',           'genomic DNA',  'OTHER',                                            'genomic',          'other',                                        'HighThroughput'),
-    'Proteogenomics': DataType( 'Proteogenomics',   'protein',      'mass spectrometry-based proteogenomic mapping',    'protein',          'chromatographically fractionated peptides',    'HighThroughput'),
-    'RnaPet': DataType(         'RnaPet',           'RNA',          'OTHER',                                            'transcriptomic',   'other',                                        'HighThroughput'),
-    'RnaSeq': DataType(         'RnaSeq',           'RNA',          'RNA-Seq',                                          'transcriptomic',   'cDNA',                                         'HighThroughput'),
-    
-    #doublecheck
-    'ChiaPet': DataType(        'ChiaPet',          'genomic DNA',  'ChIP-Seq followed by ligation',                    'genomic',          'other',                                      'HighThroughput'),
-    'Nucleosome': DataType(     'Nucleosome',       'genomic DNA',  'ChIP-Seq',                                         'genomic',          'ChIP',                                         'HighThroughput'),
-    'RipSeq': DataType(         'RipSeq',           'RNA',          'OTHER',                                          'transcriptomic',   'RNA binding protein antibody',                 'HighThroughput'),
-    #for ripseq, ask geo about new 'ripseq'
-    
-    #not geo stuff
-    '5C': DataType('5C', 'REPLACE', 'REPLACE', 'REPLACE', 'REPLACE', 'NotGeo'),
-    'Bip': DataType('Bip', 'REPLACE', 'REPLACE', 'REPLACE', 'REPLACE', 'NotGeo'),
-    'Gencode': DataType('Gencode', 'REPLACE', 'REPLACE', 'REPLACE', 'REPLACE', 'NotGeo'),
-    'Mapability': DataType('Mapability', 'REPLACE', 'REPLACE', 'REPLACE', 'REPLACE', 'NotGeo'),
-    'NRE': DataType('NRE', 'REPLACE', 'REPLACE', 'REPLACE', 'REPLACE', 'NotGeo'),
-    'Switchgear': DataType('Switchgear', 'REPLACE', 'REPLACE', 'REPLACE', 'REPLACE', 'NotGeo'),
-    'TfbsValid': DataType('TfbsValid', 'REPLACE', 'REPLACE', 'REPLACE', 'REPLACE', 'NotGeo'),
-    'Cluster': DataType('Cluster', 'REPLACE', 'REPLACE', 'REPLACE', 'REPLACE', 'NotGeo'),
-    
-    #array
-    'AffyExonArray': DataType(  'AffyExonArray',    'mRNA',         'RNA-Microarray',                                   'transcriptomic',   'polyA',                                        'MicroArray'),
-    'MethylArray': DataType(    'MethylArray',      'genomic DNA',  'REPLACE',                                          'genomic',          'REPLACE',                                      'MicroArray'),
-    'RipGeneSt': DataType(      'RipGeneSt',        'RNA',          'REPLACE',                                          'transcriptomic',   'RNA binding protein antibody',                 'MicroArray'), #this isn't correct
-    'RipTiling': DataType(      'RipTiling',        'RNA',          'REPLACE',                                          'transcriptomic',   'RNA binding protein antibody',                 'MicroArray'),
-    
-    #these need to be curated
-    'Cnv': DataType(            'Cnv',              'REPLACE',      'REPLACE',                                          'REPLACE',          'REPLACE',                                      None),
-    'Combined': DataType(       'Combined',         'REPLACE',      'REPLACE',                                          'REPLACE',          'REPLACE',                                      None),
-    'Genotype': DataType(       'Genotype',         'genomic DNA',  'REPLACE',                                          'genomic',          'REPLACE',                                      None),
-    'RnaChip': DataType(        'RnaChip',          'RNA',          'REPLACE',                                          'transcriptomic',   'RNA binding protein antibody',                 None),
-    'RipChip': DataType(        'RipChip',          'RNA',          'REPLACE',                                          'transcriptomic',   'RNA binding protein antibody',                 None)
-    
-    
-}
-
-#compare this to the source in datatype, give GP ids depending on the type
-gpIds = {
-    'human genomic': '63443',
-    'human transcriptomic': '30709',
-    'human protein': '63447',
-    
-    'mouse genomic': '63471',
-    'mouse transcriptomic': '66167',
-    'mouse protein': '63475'
-}
-
-organisms = {
-    'hg19': 'human',
-    'hg18': 'human',
-    'mm9': 'mouse',
-    'encodeTest': 'human'
-}
-
-def defaultTrackPath():
-    return os.path.expanduser('~/kent/src/hg/makeDb/trackDb/')
-
-def defaultCvPath():
-    return defaultTrackPath() + 'cv/alpha/cv.ra'
-    
-def downloadsPath(database, composite):
-    return '/hive/groups/encode/dcc/analysis/ftp/pipeline/' + database + '/' + composite + '/'
-    
-def readMd5sums(filename):
-    '''Reads an md5sum.txt file and returns a dictionary of filename: md5'''
-    if os.path.isfile(filename):
-        md5sums = dict()
-        md5file = open(filename, 'r')
-        for line in md5file:
-            key, val = map(str.strip, line.split(' ', 1))
-            md5sums[key] = val
-        return md5sums
-    else:
-        return None
-
-def hashFile(filename, hasher=hashlib.md5(), blocksize=65536):
-    '''MD5's the file, and returns the number'''
-    afile = open(filename, 'rb')
-    buf = afile.read(blocksize)
-    while len(buf) > 0:
-        hasher.update(buf)
-        buf = afile.read(blocksize)
-    return hasher.hexdigest()
-    
\ No newline at end of file