35c0685ff195fd0332ba86828c6230a34eaaaacb
mmaddren
  Thu Apr 19 16:44:47 2012 -0700
added track viewing tools for GEO so that venkat can use it
diff --git python/lib/ucscgenomics/track.py python/lib/ucscgenomics/track.py
index bc45749..d4ca776 100644
--- python/lib/ucscgenomics/track.py
+++ python/lib/ucscgenomics/track.py
@@ -1,439 +1,458 @@
 import os, re
 from ucscgenomics import ra, mdb, encode
 
 class TrackFile(object):
     '''
     A file in the trackDb, which has useful information about iself.
     
     CompositeTrack (below) has multiple dictionaries of TrackFiles, one for
     the root downloads directory, and one for each release. The root directory
     will link itself to the CompositeTrack's alpha metadata.
     '''
 
     @property 
     def name(self):
         '''The file's name'''
         return self._name
         
     @property 
     def fullname(self):
         '''The file's full name including path'''
         return self._path + self._name
         
     @property 
     def path(self):
         '''The file's path'''
         return self._path
         
     @property 
     def md5sum(self):
         '''The md5sum for this file, stored in the md5sum.txt file in the downloads directory'''
         if self._md5sum == None:
             self._md5sum = encode.hashFile(self.fullname)
         return self._md5sum
         
     @property 
     def extension(self):
         '''The filetype'''
         return self._extension
         
     @property 
     def size(self):
         '''The size in bytes'''
         return self._size
         
     @property 
     def metaObject(self):
         '''The size in bytes'''
         return self._metaObj
     
     def __init__(self, fullname, md5=None, metaObj=None):
         fullname = os.path.abspath(fullname)
         if not os.path.isfile(fullname):
             raise KeyError('invalid file: %s' % fullname)
         self._path, self._name = fullname.rsplit('/', 1)
         self._path = self._path + '/'
         self._fullname = fullname
         self._size = os.stat(fullname).st_size
         self._md5sum = md5
         self._metaObj = metaObj
         
         self._extension = self._name
         self._extension.replace('.gz', '').replace('.tgz', '')
         if '.' in self._extension:
             self._extension = self._extension.rsplit('.')[1]
         else:
             self._extension = None
     
 class Release(object):
     '''
     Keeps track of a single release, stored within the track.
     '''
     
     @property
     def index(self):
         '''Which release, represented as an int starting with 1'''
         return self._index
         
     # @property
     # def status(self):
         # '''A string representing the status of this release: alpha, beta, or public'''
         # return self._status
+    @property
+    def onAlpha(self):
+        return self._alpha
+    
+    @property
+    def onBeta(self):
+        return self._beta
+        
+    @property
+    def onPublic(self):
+        return self._public
     
     @property
     def files(self):
-        '''A dictionary of TrackFiles where the filename is the key'''
+        '''A dictionary of TrackFiles belonging to this release where the filename is the key'''
         return self._files
     
     def __init__(self, index, status, files):
         self._files = files
         self._index = index
-        self._status = status.split()
+        if (status.strip() == ''):
+            self._alpha = self._beta = self._public = 1
+        else:
+            self._alpha = 'alpha' in status.split(',')
+            self._beta = 'beta' in status.split(',')
+            self._public = 'public' in status.split(',')
     
 class CompositeTrack(object):
     '''
     Stores an entire track, consisting mainly of its metadata and files.
     
     To make a CompositeTrack, you must specify database and name of the track:
         sometrack = CompositeTrack('hg19', 'wgEncodeCshlLongRnaSeq')
         
     You can also specify a trackDb path in the event that yours is different
     from the default, '~/kent/src/hg/makeDb/trackDb/':
         sometrack = CompositeTrack('hg19', 'wgEncode...', '/weird/path')
         
     It's important to know that the CompositeTrack does NOT load all of its
     information up front. Therefore, there's no performance hit for using a
     CompositeTrack instead of just specifying a RaFile. In fact, it's
     beneficial, since it adds another layer of abstraction to your code. You
     can access a composite's ra files:
         somemetadata = sometrack.alphaMetaDb
         
     For more information on what you can do with ra files, check the ra.py
     documentation.
     
     You can also access a track's files. This is one of the more useful parts
     of the composite track:
         for file in sometrack.files:
             print '%s %s' % (file.name, file.size)
             
     Each file is an instance of a TrackFile object, which is detailed in its
     own documentation above. There are also lists of these files for each
     release associated with the track:
         for file in sometrack.releases[0]:
             print file.name in sometrack.releases[1]
             
     Note that the files are indexed by their filename. This means that you can
     easily compare multiple releases as in the above example.
     '''
 
     @property 
     def database(self):
         '''The database for this composite, typically hg19 for humans'''
         return self._database
         
     @property 
     def name(self):
         '''The composite name'''
         return self._name
         
     @property 
     def downloadsDirectory(self):
         '''The location of files in downloads'''
         if not os.path.isdir(self._downloadsDirectory):
             raise KeyError(self._downloadsDirectory + ' does not exist')
         return self._downloadsDirectory
    
     @property 
     def httpDownloadsPath(self):
         '''The location of the downloadable files path in apache form'''
         if not os.path.isdir(self._httpDownloadsPath):
             raise KeyError(self._httpDownloadsPath + ' does not exist')
         return self._httpDownloadsPath
     
     @property 
     def files(self):
         '''A list of all files in the downloads directory of this composite'''
         try:
             return self._files
         except AttributeError:
             md5sums = encode.readMd5sums(self._md5path)
             
             radict = dict()
             for stanza in self.alphaMetaDb.itervalues():
                 if 'fileName' in stanza:
                     for file in stanza['fileName'].split(','):
                         radict[file] = stanza
             
             self._files = dict()
             for file in os.listdir(self.downloadsDirectory):
                 if os.path.isfile(self.downloadsDirectory + file):
                 
                     stanza = None
                     if file in radict:
                         stanza = radict[file]
                         
                     if file in md5sums:
                         self._files[file] = TrackFile(self.downloadsDirectory + file, md5sums[file], stanza)
                     else:
                         self._files[file] = TrackFile(self.downloadsDirectory + file, None, stanza)
         
             return self._files
             
     @property 
     def qaInitDir(self):
         qaDir = '/hive/groups/encode/encodeQa/' + self._database + '/' + self._name + '/'
         if os.path.exists(qaDir) and os.path.isdir(qaDir):
             pass
         else:
             os.makedirs(qaDir)
         self._qaDir = qaDir
         return qaDir
     @property 
     def qaInitDirTest(self):
         qaDir = '/hive/groups/encode/encodeQa/test/' + self._database + '/' + self._name + '/'
         if os.path.exists(qaDir) and os.path.isdir(qaDir):
             pass
         else:
             os.makedirs(qaDir)
         self._qaDir = qaDir
         return qaDir
 
     @property
     def releaseObjects(self):
         '''A set of release objects describing each release'''
         
         try:
             return self._releaseObjects
         except AttributeError:
             self._releaseObjects = list()
-            count = 1
             
             omit = ['README.txt', 'md5sum.txt', 'md5sum.history', 'files.txt']
             
             maxcomposite = 0
             statuses = dict()
-            for line in open(self._trackDbDir):
+            for line in open(self._trackDbDir + 'trackDb.wgEncode.ra'):
+                if line.startswith('#') or line.strip() == '':
+                    continue
                 parts = line.split()
                 composite = parts[1]
                 places = ''
                 if len(parts) > 2:
                     places = parts[2]
                 if composite.startswith(self.name):
                     compositeparts = composite.split('.')
                     if len(compositeparts) >= 2 and compositeparts[1].startswith('release'):
                         index = int(compositeparts[1].replace('release', ''))
                         statuses[index] = places
                         maxcomposite = max(maxcomposite, index)
                     else:                       # THINK MORE ABOUT THIS REGION RE: PATCHES
                         statuses[1] = places
                         maxcomposite = max(maxcomposite, 1)
             
             lastplace = statuses[maxcomposite]
             for i in range(maxcomposite, 0, -1):
                 if i not in statuses:
                     statuses[i] = lastplace
                 else:
                     lastplace = statuses[i]
                     
-            while(1):
-                releasepath = self.downloadsDirectory + ('release%d' % count) + '/'
+            # while(1):
+                # releasepath = self.downloadsDirectory + ('release%d' % count) + '/'
                 
-                if not os.path.exists(releasepath):
-                    break
+                # if not os.path.exists(releasepath):
+                    # break
                     
-                md5s = encode.readMd5sums(releasepath + 'md5sum.txt')
-                releasefiles = dict()
+                # md5s = encode.readMd5sums(releasepath + 'md5sum.txt')
+                # releasefiles = dict()
                 
-                for file in os.listdir(releasepath):
-                    if os.path.isfile(releasepath + file) and file not in omit:
-                        if md5s != None and file in md5s:
-                            releasefiles[file] = TrackFile(releasepath + file, md5s[file])
-                        else:
-                            releasefiles[file] = TrackFile(releasepath + file, None)
+                # for file in os.listdir(releasepath):
+                    # if os.path.isfile(releasepath + file) and file not in omit:
+                        # if md5s != None and file in md5s:
+                            # releasefiles[file] = TrackFile(releasepath + file, md5s[file])
+                        # else:
+                            # releasefiles[file] = TrackFile(releasepath + file, None)
+            for i in range(1, maxcomposite + 1):    
+                self._releaseObjects.append(Release(i, statuses[i], None))
                 
-                self._releaseObjects.append(Release(count, statuses[count], releasefiles))
+            return self._releaseObjects
     @property 
     def releases(self):
         '''A list of all files in the release directory of this composite'''
         try:
             return self._releaseFiles
         except AttributeError:
             self._releaseFiles = list()
             count = 1
             
             while os.path.exists(self.downloadsDirectory + 'release' + str(count)):
                 releasepath = self.downloadsDirectory + 'release' + str(count) + '/'
                 md5s = encode.readMd5sums(releasepath + 'md5sum.txt')
                 releasefiles = dict()
                 
                 for file in os.listdir(releasepath):
                     if file != 'md5sum.txt' and md5s != None and file in md5s and not os.path.isdir(releasepath + file):
                         releasefiles[file] = TrackFile(releasepath + file, md5s[file])
                     elif not os.path.isdir(releasepath + file):
                         releasefiles[file] = TrackFile(releasepath + file, None)
                     elif os.path.isdir(releasepath + file):
                         if not re.match('.*supplemental.*', releasepath + file):
                             continue
                         for innerfile in os.listdir(releasepath + file):
                             pathfile = file + "/" + innerfile 
                             releasefiles[pathfile] = TrackFile(releasepath + pathfile, None)
         #releasefiles.sort()
                 self._releaseFiles.append(releasefiles)
                 count = count + 1
                 
             return self._releaseFiles
         
     @property 
     def alphaMetaDb(self):
         '''The Ra file in the metaDb for this composite'''
         try:
             return self._alphaMetaDb
         except AttributeError:
             if not os.path.isfile(self._alphaMdbPath):
                 raise KeyError(self._alphaMdbPath + ' does not exist')
             self._alphaMetaDb = mdb.MdbFile(self._alphaMdbPath)
             return self._alphaMetaDb
         
     @property 
     def betaMetaDb(self):
         '''The Ra file in the metaDb for this composite'''
         try:
             return self._betaMetaDb
         except AttributeError:
             if not os.path.isfile(self._betaMdbPath):
                 raise KeyError(self._betaMdbPath + ' does not exist')
             self._betaMetaDb = mdb.MdbFile(self._betaMdbPath)
             return self._betaMetaDb
         
     @property 
     def publicMetaDb(self):
         '''The Ra file in the metaDb for this composite'''
         try:
             return self._publicMetaDb
         except AttributeError:
             if not os.path.isfile(self._publicMdbPath):
                 raise KeyError(self._publicMdbPath + ' does not exist')
             self._publicMetaDb = mdb.MdbFile(self._publicMdbPath)
             return self._publicMetaDb
         
     @property 
     def trackDb(self):
         '''The Ra file in the trackDb for this composite'''
         try:
             return self._trackDb
         except AttributeError:
             self._trackDb = ra.RaFile(self._trackDbPath)
             return self._trackDb
         
     @property 
     def trackPath(self):
         '''The track path for this composite'''
         return self._trackPath
         
     @property 
     def url(self):
         '''The url on our site for this composite'''
         return self._url
         
     @property 
     def organism(self):
         '''The url on our site for this composite'''
         return self._organism
 
     @property 
     def currentTrackDb(self):
         trackDb = self._trackDbDir + "trackDb.wgEncode.ra"
         f = open(trackDb, "r")
         lines = f.readlines()
         p = re.compile(".*(%s\S+) ?(\S+)" % self._name)
         for i in lines:
             if re.match("^\s*#.*", i):
                 continue
             m = p.match(i)
             if m and re.search('alpha', m.group(2)):
                 tdbpath = "%s%s" % (self._trackDbDir, m.group(1))
                 return tdbpath
         return None
 
 
     def __init__(self, database, compositeName, trackPath=None, mdbCompositeName=None):
         
         if mdbCompositeName == None:
             mdbCompositeName = compositeName
         
         if trackPath == None:
             self._trackPath = os.path.expanduser('~/kent/src/hg/makeDb/trackDb/')
         else:
             self._trackPath = trackPath
             if not self._trackPath.endswith('/'):
                 self._trackPath = self._trackPath + '/'
             
         if database in encode.organisms:
             self._organism = encode.organisms[database]
         else:
             raise KeyError(database + ' is not a valid database')
         
         #self._trackDbPath = self._trackPath + self._organism + '/' + database + '/' + compositeName + '.ra'
         self._trackDbDir = self._trackPath + self._organism + '/' + database + '/'
   
         self._alphaMdbPath = self._trackPath + self._organism + '/' + database + '/metaDb/alpha/' + mdbCompositeName + '.ra'
         self._betaMdbPath = self._trackPath + self._organism + '/' + database + '/metaDb/beta/' + mdbCompositeName + '.ra'    
         self._publicMdbPath = self._trackPath + self._organism + '/' + database + '/metaDb/public/' + mdbCompositeName + '.ra'
         self._alphaMdbDir = self._trackPath + self._organism + '/' + database + '/metaDb/alpha/'
         self._betaMdbDir = self._trackPath + self._organism + '/' + database + '/metaDb/beta/'
         self._publicMdbDir = self._trackPath + self._organism + '/' + database + '/metaDb/public/'
         self._downloadsDirectory = '/hive/groups/encode/dcc/analysis/ftp/pipeline/' + database + '/' + compositeName + '/'
         self._httpDownloadsPath = '/usr/local/apache/htdocs-hgdownload/goldenPath/' + database + '/encodeDCC/' + compositeName + '/'
         self._rrHttpDir = '/usr/local/apache/htdocs/goldenPath/' + database + '/encodeDCC/' + compositeName + '/'
         self._notesDirectory = os.path.expanduser("~/kent/src/hg/makeDb/doc/encodeDcc%s" % database.capitalize()) + '/'
         self._url = 'http://genome.ucsc.edu/cgi-bin/hgTrackUi?db=' + database + '&g=' + compositeName
         self._database = database
         self._name = compositeName        
         self._md5path = '/hive/groups/encode/dcc/analysis/ftp/pipeline/' + database + '/' + compositeName + '/md5sum.txt'
         self._trackDbPath = self.currentTrackDb
         if self._trackDbPath == None:
             self._trackDbPath = self._trackPath + self._organism + '/' + database + '/' + compositeName + '.ra' 
         if not os.path.isfile(self._trackDbPath):
             raise KeyError(self._trackDbPath + ' does not exist')
         
 
 class TrackCollection(dict):
     '''
     A collection that stores all the tracks for a given database, indexed by
     its metaDb name.
     '''
     
     @property 
     def database(self):
         return self._database
     
     @property 
     def organism(self):
         return self._organism  
         
     def __init__(self, database, trackPath=None):
         dict.__init__(self)
     
         self._database = database
         
         if database in encode.organisms:
             self._organism = encode.organisms[database]
         else:
             raise KeyError(database + ' is not a valid database')
     
         if trackPath == None:
             self._trackPath = os.path.expanduser('~/kent/src/hg/makeDb/trackDb/')
         else:
             self._trackPath = trackPath
             if not self._trackPath.endswith('/'):
                 self._trackPath = self._trackPath + '/'
     
         metaDb = self._trackPath + self._organism + '/' + self._database + '/metaDb/alpha/'
         
         for file in os.listdir(metaDb):
             if os.path.isfile(metaDb + file) and file.endswith('.ra'):
                 trackname = file.replace('.ra', '') 
                 if os.path.isfile(self._trackPath + self._organism + '/' + self._database + '/' + file):
                     self[trackname] = CompositeTrack(self._database, trackname, self._trackPath)
                 
                 
\ No newline at end of file