3aee104cf4c1e245dd020f743fbc58c17fd75976
mmaddren
  Mon Apr 9 12:12:44 2012 -0700
added encode.py to store global constants and other encode stuff, and made all other libraries interface correctly with it
diff --git python/lib/ucscgenomics/track.py python/lib/ucscgenomics/track.py
index 8a79340..bc45749 100644
--- python/lib/ucscgenomics/track.py
+++ python/lib/ucscgenomics/track.py
@@ -1,74 +1,47 @@
-import os, re, hashlib
-from ucscgenomics import ra, mdb
-
-organisms = {
-    'hg19': 'human',
-    'hg18': 'human',
-    'mm9': 'mouse',
-    'encodeTest': 'human'
-}
-
-def readMd5sums(filename):
-    if os.path.isfile(filename):
-        md5sums = dict()
-        md5file = open(filename, 'r')
-        for line in md5file:
-            key, val = map(str.strip, line.split(' ', 1))
-            md5sums[key] = val
-        return md5sums
-    else:
-        return None
-
-        
-def hashfile(filename, hasher=hashlib.md5(), blocksize=65536):
-    afile = open(filename, 'rb')
-    buf = afile.read(blocksize)
-    while len(buf) > 0:
-        hasher.update(buf)
-        buf = afile.read(blocksize)
-    return hasher.hexdigest()
+import os, re
+from ucscgenomics import ra, mdb, encode
         
 class TrackFile(object):
     '''
     A file in the trackDb, which has useful information about iself.
     
     CompositeTrack (below) has multiple dictionaries of TrackFiles, one for
     the root downloads directory, and one for each release. The root directory
     will link itself to the CompositeTrack's alpha metadata.
     '''
 
     @property 
     def name(self):
         '''The file's name'''
         return self._name
         
     @property 
     def fullname(self):
         '''The file's full name including path'''
         return self._path + self._name
         
     @property 
     def path(self):
         '''The file's path'''
         return self._path
         
     @property 
     def md5sum(self):
         '''The md5sum for this file, stored in the md5sum.txt file in the downloads directory'''
         if self._md5sum == None:
-            self._md5sum = hashfile(self.fullname)
+            self._md5sum = encode.hashFile(self.fullname)
         return self._md5sum
         
     @property 
     def extension(self):
         '''The filetype'''
         return self._extension
         
     @property 
     def size(self):
         '''The size in bytes'''
         return self._size
         
     @property 
     def metaObject(self):
         '''The size in bytes'''
@@ -80,30 +53,54 @@
             raise KeyError('invalid file: %s' % fullname)
         self._path, self._name = fullname.rsplit('/', 1)
         self._path = self._path + '/'
         self._fullname = fullname
         self._size = os.stat(fullname).st_size
         self._md5sum = md5
         self._metaObj = metaObj
         
         self._extension = self._name
         self._extension.replace('.gz', '').replace('.tgz', '')
         if '.' in self._extension:
             self._extension = self._extension.rsplit('.')[1]
         else:
             self._extension = None
     
+class Release(object):
+    '''
+    Keeps track of a single release, stored within the track.
+    '''
+    
+    @property
+    def index(self):
+        '''Which release, represented as an int starting with 1'''
+        return self._index
+        
+    # @property
+    # def status(self):
+        # '''A string representing the status of this release: alpha, beta, or public'''
+        # return self._status
+    
+    @property
+    def files(self):
+        '''A dictionary of TrackFiles where the filename is the key'''
+        return self._files
+    
+    def __init__(self, index, status, files):
+        self._files = files
+        self._index = index
+        self._status = status.split()
     
 class CompositeTrack(object):
     '''
     Stores an entire track, consisting mainly of its metadata and files.
     
     To make a CompositeTrack, you must specify database and name of the track:
         sometrack = CompositeTrack('hg19', 'wgEncodeCshlLongRnaSeq')
         
     You can also specify a trackDb path in the event that yours is different
     from the default, '~/kent/src/hg/makeDb/trackDb/':
         sometrack = CompositeTrack('hg19', 'wgEncode...', '/weird/path')
         
     It's important to know that the CompositeTrack does NOT load all of its
     information up front. Therefore, there's no performance hit for using a
     CompositeTrack instead of just specifying a RaFile. In fact, it's
@@ -147,31 +144,31 @@
         return self._downloadsDirectory
    
     @property 
     def httpDownloadsPath(self):
         '''The location of the downloadable files path in apache form'''
         if not os.path.isdir(self._httpDownloadsPath):
             raise KeyError(self._httpDownloadsPath + ' does not exist')
         return self._httpDownloadsPath
     
     @property 
     def files(self):
         '''A list of all files in the downloads directory of this composite'''
         try:
             return self._files
         except AttributeError:
-            md5sums = readMd5sums(self._md5path)
+            md5sums = encode.readMd5sums(self._md5path)
             
             radict = dict()
             for stanza in self.alphaMetaDb.itervalues():
                 if 'fileName' in stanza:
                     for file in stanza['fileName'].split(','):
                         radict[file] = stanza
             
             self._files = dict()
             for file in os.listdir(self.downloadsDirectory):
                 if os.path.isfile(self.downloadsDirectory + file):
                 
                     stanza = None
                     if file in radict:
                         stanza = radict[file]
                         
@@ -190,41 +187,95 @@
         else:
             os.makedirs(qaDir)
         self._qaDir = qaDir
         return qaDir
     @property 
     def qaInitDirTest(self):
         qaDir = '/hive/groups/encode/encodeQa/test/' + self._database + '/' + self._name + '/'
         if os.path.exists(qaDir) and os.path.isdir(qaDir):
             pass
         else:
             os.makedirs(qaDir)
         self._qaDir = qaDir
         return qaDir
 
     @property 
+    def releaseObjects(self):
+        '''A set of release objects describing each release'''
+        
+        try:
+            return self._releaseObjects
+        except AttributeError:
+            self._releaseObjects = list()
+            count = 1
+            
+            omit = ['README.txt', 'md5sum.txt', 'md5sum.history', 'files.txt']
+            
+            maxcomposite = 0
+            statuses = dict()
+            for line in open(self._trackDbDir):
+                parts = line.split()
+                composite = parts[1]
+                places = ''
+                if len(parts) > 2:
+                    places = parts[2]
+                if composite.startswith(self.name):
+                    compositeparts = composite.split('.')
+                    if len(compositeparts) >= 2 and compositeparts[1].startswith('release'):
+                        index = int(compositeparts[1].replace('release', ''))
+                        statuses[index] = places
+                        maxcomposite = max(maxcomposite, index)
+                    else:                       # THINK MORE ABOUT THIS REGION RE: PATCHES
+                        statuses[1] = places
+                        maxcomposite = max(maxcomposite, 1)
+            
+            lastplace = statuses[maxcomposite]
+            for i in range(maxcomposite, 0, -1):
+                if i not in statuses:
+                    statuses[i] = lastplace
+                else:
+                    lastplace = statuses[i]
+                    
+            while(1):
+                releasepath = self.downloadsDirectory + ('release%d' % count) + '/'
+                
+                if not os.path.exists(releasepath):
+                    break
+                    
+                md5s = encode.readMd5sums(releasepath + 'md5sum.txt')
+                releasefiles = dict()
+                
+                for file in os.listdir(releasepath):
+                    if os.path.isfile(releasepath + file) and file not in omit:
+                        if md5s != None and file in md5s:
+                            releasefiles[file] = TrackFile(releasepath + file, md5s[file])
+                        else:
+                            releasefiles[file] = TrackFile(releasepath + file, None)
+                
+                self._releaseObjects.append(Release(count, statuses[count], releasefiles))
+    @property 
     def releases(self):
         '''A list of all files in the release directory of this composite'''
         try:
             return self._releaseFiles
         except AttributeError:
             self._releaseFiles = list()
             count = 1
             
             while os.path.exists(self.downloadsDirectory + 'release' + str(count)):
                 releasepath = self.downloadsDirectory + 'release' + str(count) + '/'
-                md5s = readMd5sums(releasepath + 'md5sum.txt')
+                md5s = encode.readMd5sums(releasepath + 'md5sum.txt')
                 releasefiles = dict()
                 
                 for file in os.listdir(releasepath):
                     if file != 'md5sum.txt' and md5s != None and file in md5s and not os.path.isdir(releasepath + file):
                         releasefiles[file] = TrackFile(releasepath + file, md5s[file])
                     elif not os.path.isdir(releasepath + file):
                         releasefiles[file] = TrackFile(releasepath + file, None)
                     elif os.path.isdir(releasepath + file):
                         if not re.match('.*supplemental.*', releasepath + file):
                             continue
                         for innerfile in os.listdir(releasepath + file):
                             pathfile = file + "/" + innerfile 
                             releasefiles[pathfile] = TrackFile(releasepath + pathfile, None)
         #releasefiles.sort()
                 self._releaseFiles.append(releasefiles)
@@ -305,32 +356,32 @@
         return None
 
 
     def __init__(self, database, compositeName, trackPath=None, mdbCompositeName=None):
         
         if mdbCompositeName == None:
             mdbCompositeName = compositeName
         
         if trackPath == None:
             self._trackPath = os.path.expanduser('~/kent/src/hg/makeDb/trackDb/')
         else:
             self._trackPath = trackPath
             if not self._trackPath.endswith('/'):
                 self._trackPath = self._trackPath + '/'
             
-        if database in organisms:
-            self._organism = organisms[database]
+        if database in encode.organisms:
+            self._organism = encode.organisms[database]
         else:
             raise KeyError(database + ' is not a valid database')
         
         #self._trackDbPath = self._trackPath + self._organism + '/' + database + '/' + compositeName + '.ra'
         self._trackDbDir = self._trackPath + self._organism + '/' + database + '/'
   
         self._alphaMdbPath = self._trackPath + self._organism + '/' + database + '/metaDb/alpha/' + mdbCompositeName + '.ra'
         self._betaMdbPath = self._trackPath + self._organism + '/' + database + '/metaDb/beta/' + mdbCompositeName + '.ra'    
         self._publicMdbPath = self._trackPath + self._organism + '/' + database + '/metaDb/public/' + mdbCompositeName + '.ra'
         self._alphaMdbDir = self._trackPath + self._organism + '/' + database + '/metaDb/alpha/'
         self._betaMdbDir = self._trackPath + self._organism + '/' + database + '/metaDb/beta/'
         self._publicMdbDir = self._trackPath + self._organism + '/' + database + '/metaDb/public/'
         self._downloadsDirectory = '/hive/groups/encode/dcc/analysis/ftp/pipeline/' + database + '/' + compositeName + '/'
         self._httpDownloadsPath = '/usr/local/apache/htdocs-hgdownload/goldenPath/' + database + '/encodeDCC/' + compositeName + '/'
         self._rrHttpDir = '/usr/local/apache/htdocs/goldenPath/' + database + '/encodeDCC/' + compositeName + '/'
@@ -353,32 +404,32 @@
     '''
     
     @property 
     def database(self):
         return self._database
     
     @property 
     def organism(self):
         return self._organism  
         
     def __init__(self, database, trackPath=None):
         dict.__init__(self)
     
         self._database = database
         
-        if database in organisms:
-            self._organism = organisms[database]
+        if database in encode.organisms:
+            self._organism = encode.organisms[database]
         else:
             raise KeyError(database + ' is not a valid database')
     
         if trackPath == None:
             self._trackPath = os.path.expanduser('~/kent/src/hg/makeDb/trackDb/')
         else:
             self._trackPath = trackPath
             if not self._trackPath.endswith('/'):
                 self._trackPath = self._trackPath + '/'
     
         metaDb = self._trackPath + self._organism + '/' + self._database + '/metaDb/alpha/'
         
         for file in os.listdir(metaDb):
             if os.path.isfile(metaDb + file) and file.endswith('.ra'):
                 trackname = file.replace('.ra', '')