58f787640ea77f16c8f6d4481693a83ec9ef647b
vsmalladi
  Tue May 8 10:29:52 2012 -0700
First step in python lib reogranization. Redmine #7029.
diff --git python/lib/ucscGb/track.py python/lib/ucscGb/track.py
deleted file mode 100644
index d4ca776..0000000
--- python/lib/ucscGb/track.py
+++ /dev/null
@@ -1,458 +0,0 @@
-import os, re
-from ucscgenomics import ra, mdb, encode
-
-class TrackFile(object):
-    '''
-    A file in the trackDb, which has useful information about iself.
-    
-    CompositeTrack (below) has multiple dictionaries of TrackFiles, one for
-    the root downloads directory, and one for each release. The root directory
-    will link itself to the CompositeTrack's alpha metadata.
-    '''
-
-    @property 
-    def name(self):
-        '''The file's name'''
-        return self._name
-        
-    @property 
-    def fullname(self):
-        '''The file's full name including path'''
-        return self._path + self._name
-        
-    @property 
-    def path(self):
-        '''The file's path'''
-        return self._path
-        
-    @property 
-    def md5sum(self):
-        '''The md5sum for this file, stored in the md5sum.txt file in the downloads directory'''
-        if self._md5sum == None:
-            self._md5sum = encode.hashFile(self.fullname)
-        return self._md5sum
-        
-    @property 
-    def extension(self):
-        '''The filetype'''
-        return self._extension
-        
-    @property 
-    def size(self):
-        '''The size in bytes'''
-        return self._size
-        
-    @property 
-    def metaObject(self):
-        '''The size in bytes'''
-        return self._metaObj
-    
-    def __init__(self, fullname, md5=None, metaObj=None):
-        fullname = os.path.abspath(fullname)
-        if not os.path.isfile(fullname):
-            raise KeyError('invalid file: %s' % fullname)
-        self._path, self._name = fullname.rsplit('/', 1)
-        self._path = self._path + '/'
-        self._fullname = fullname
-        self._size = os.stat(fullname).st_size
-        self._md5sum = md5
-        self._metaObj = metaObj
-        
-        self._extension = self._name
-        self._extension.replace('.gz', '').replace('.tgz', '')
-        if '.' in self._extension:
-            self._extension = self._extension.rsplit('.')[1]
-        else:
-            self._extension = None
-    
-class Release(object):
-    '''
-    Keeps track of a single release, stored within the track.
-    '''
-    
-    @property
-    def index(self):
-        '''Which release, represented as an int starting with 1'''
-        return self._index
-        
-    # @property
-    # def status(self):
-        # '''A string representing the status of this release: alpha, beta, or public'''
-        # return self._status
-    @property
-    def onAlpha(self):
-        return self._alpha
-    
-    @property
-    def onBeta(self):
-        return self._beta
-        
-    @property
-    def onPublic(self):
-        return self._public
-    
-    @property
-    def files(self):
-        '''A dictionary of TrackFiles belonging to this release where the filename is the key'''
-        return self._files
-    
-    def __init__(self, index, status, files):
-        self._files = files
-        self._index = index
-        if (status.strip() == ''):
-            self._alpha = self._beta = self._public = 1
-        else:
-            self._alpha = 'alpha' in status.split(',')
-            self._beta = 'beta' in status.split(',')
-            self._public = 'public' in status.split(',')
-    
-class CompositeTrack(object):
-    '''
-    Stores an entire track, consisting mainly of its metadata and files.
-    
-    To make a CompositeTrack, you must specify database and name of the track:
-        sometrack = CompositeTrack('hg19', 'wgEncodeCshlLongRnaSeq')
-        
-    You can also specify a trackDb path in the event that yours is different
-    from the default, '~/kent/src/hg/makeDb/trackDb/':
-        sometrack = CompositeTrack('hg19', 'wgEncode...', '/weird/path')
-        
-    It's important to know that the CompositeTrack does NOT load all of its
-    information up front. Therefore, there's no performance hit for using a
-    CompositeTrack instead of just specifying a RaFile. In fact, it's
-    beneficial, since it adds another layer of abstraction to your code. You
-    can access a composite's ra files:
-        somemetadata = sometrack.alphaMetaDb
-        
-    For more information on what you can do with ra files, check the ra.py
-    documentation.
-    
-    You can also access a track's files. This is one of the more useful parts
-    of the composite track:
-        for file in sometrack.files:
-            print '%s %s' % (file.name, file.size)
-            
-    Each file is an instance of a TrackFile object, which is detailed in its
-    own documentation above. There are also lists of these files for each
-    release associated with the track:
-        for file in sometrack.releases[0]:
-            print file.name in sometrack.releases[1]
-            
-    Note that the files are indexed by their filename. This means that you can
-    easily compare multiple releases as in the above example.
-    '''
-
-    @property 
-    def database(self):
-        '''The database for this composite, typically hg19 for humans'''
-        return self._database
-        
-    @property 
-    def name(self):
-        '''The composite name'''
-        return self._name
-        
-    @property 
-    def downloadsDirectory(self):
-        '''The location of files in downloads'''
-        if not os.path.isdir(self._downloadsDirectory):
-            raise KeyError(self._downloadsDirectory + ' does not exist')
-        return self._downloadsDirectory
-   
-    @property 
-    def httpDownloadsPath(self):
-        '''The location of the downloadable files path in apache form'''
-        if not os.path.isdir(self._httpDownloadsPath):
-            raise KeyError(self._httpDownloadsPath + ' does not exist')
-        return self._httpDownloadsPath
-    
-    @property 
-    def files(self):
-        '''A list of all files in the downloads directory of this composite'''
-        try:
-            return self._files
-        except AttributeError:
-            md5sums = encode.readMd5sums(self._md5path)
-            
-            radict = dict()
-            for stanza in self.alphaMetaDb.itervalues():
-                if 'fileName' in stanza:
-                    for file in stanza['fileName'].split(','):
-                        radict[file] = stanza
-            
-            self._files = dict()
-            for file in os.listdir(self.downloadsDirectory):
-                if os.path.isfile(self.downloadsDirectory + file):
-                
-                    stanza = None
-                    if file in radict:
-                        stanza = radict[file]
-                        
-                    if file in md5sums:
-                        self._files[file] = TrackFile(self.downloadsDirectory + file, md5sums[file], stanza)
-                    else:
-                        self._files[file] = TrackFile(self.downloadsDirectory + file, None, stanza)
-        
-            return self._files
-            
-    @property 
-    def qaInitDir(self):
-        qaDir = '/hive/groups/encode/encodeQa/' + self._database + '/' + self._name + '/'
-        if os.path.exists(qaDir) and os.path.isdir(qaDir):
-            pass
-        else:
-            os.makedirs(qaDir)
-        self._qaDir = qaDir
-        return qaDir
-    @property 
-    def qaInitDirTest(self):
-        qaDir = '/hive/groups/encode/encodeQa/test/' + self._database + '/' + self._name + '/'
-        if os.path.exists(qaDir) and os.path.isdir(qaDir):
-            pass
-        else:
-            os.makedirs(qaDir)
-        self._qaDir = qaDir
-        return qaDir
-
-    @property
-    def releaseObjects(self):
-        '''A set of release objects describing each release'''
-        
-        try:
-            return self._releaseObjects
-        except AttributeError:
-            self._releaseObjects = list()
-            
-            omit = ['README.txt', 'md5sum.txt', 'md5sum.history', 'files.txt']
-            
-            maxcomposite = 0
-            statuses = dict()
-            for line in open(self._trackDbDir + 'trackDb.wgEncode.ra'):
-                if line.startswith('#') or line.strip() == '':
-                    continue
-                parts = line.split()
-                composite = parts[1]
-                places = ''
-                if len(parts) > 2:
-                    places = parts[2]
-                if composite.startswith(self.name):
-                    compositeparts = composite.split('.')
-                    if len(compositeparts) >= 2 and compositeparts[1].startswith('release'):
-                        index = int(compositeparts[1].replace('release', ''))
-                        statuses[index] = places
-                        maxcomposite = max(maxcomposite, index)
-                    else:                       # THINK MORE ABOUT THIS REGION RE: PATCHES
-                        statuses[1] = places
-                        maxcomposite = max(maxcomposite, 1)
-            
-            lastplace = statuses[maxcomposite]
-            for i in range(maxcomposite, 0, -1):
-                if i not in statuses:
-                    statuses[i] = lastplace
-                else:
-                    lastplace = statuses[i]
-                    
-            # while(1):
-                # releasepath = self.downloadsDirectory + ('release%d' % count) + '/'
-                
-                # if not os.path.exists(releasepath):
-                    # break
-                    
-                # md5s = encode.readMd5sums(releasepath + 'md5sum.txt')
-                # releasefiles = dict()
-                
-                # for file in os.listdir(releasepath):
-                    # if os.path.isfile(releasepath + file) and file not in omit:
-                        # if md5s != None and file in md5s:
-                            # releasefiles[file] = TrackFile(releasepath + file, md5s[file])
-                        # else:
-                            # releasefiles[file] = TrackFile(releasepath + file, None)
-            for i in range(1, maxcomposite + 1):    
-                self._releaseObjects.append(Release(i, statuses[i], None))
-                
-            return self._releaseObjects
-    @property 
-    def releases(self):
-        '''A list of all files in the release directory of this composite'''
-        try:
-            return self._releaseFiles
-        except AttributeError:
-            self._releaseFiles = list()
-            count = 1
-            
-            while os.path.exists(self.downloadsDirectory + 'release' + str(count)):
-                releasepath = self.downloadsDirectory + 'release' + str(count) + '/'
-                md5s = encode.readMd5sums(releasepath + 'md5sum.txt')
-                releasefiles = dict()
-                
-                for file in os.listdir(releasepath):
-                    if file != 'md5sum.txt' and md5s != None and file in md5s and not os.path.isdir(releasepath + file):
-                        releasefiles[file] = TrackFile(releasepath + file, md5s[file])
-                    elif not os.path.isdir(releasepath + file):
-                        releasefiles[file] = TrackFile(releasepath + file, None)
-                    elif os.path.isdir(releasepath + file):
-                        if not re.match('.*supplemental.*', releasepath + file):
-                            continue
-                        for innerfile in os.listdir(releasepath + file):
-                            pathfile = file + "/" + innerfile 
-                            releasefiles[pathfile] = TrackFile(releasepath + pathfile, None)
-        #releasefiles.sort()
-                self._releaseFiles.append(releasefiles)
-                count = count + 1
-                
-            return self._releaseFiles
-        
-    @property 
-    def alphaMetaDb(self):
-        '''The Ra file in the metaDb for this composite'''
-        try:
-            return self._alphaMetaDb
-        except AttributeError:
-            if not os.path.isfile(self._alphaMdbPath):
-                raise KeyError(self._alphaMdbPath + ' does not exist')
-            self._alphaMetaDb = mdb.MdbFile(self._alphaMdbPath)
-            return self._alphaMetaDb
-        
-    @property 
-    def betaMetaDb(self):
-        '''The Ra file in the metaDb for this composite'''
-        try:
-            return self._betaMetaDb
-        except AttributeError:
-            if not os.path.isfile(self._betaMdbPath):
-                raise KeyError(self._betaMdbPath + ' does not exist')
-            self._betaMetaDb = mdb.MdbFile(self._betaMdbPath)
-            return self._betaMetaDb
-        
-    @property 
-    def publicMetaDb(self):
-        '''The Ra file in the metaDb for this composite'''
-        try:
-            return self._publicMetaDb
-        except AttributeError:
-            if not os.path.isfile(self._publicMdbPath):
-                raise KeyError(self._publicMdbPath + ' does not exist')
-            self._publicMetaDb = mdb.MdbFile(self._publicMdbPath)
-            return self._publicMetaDb
-        
-    @property 
-    def trackDb(self):
-        '''The Ra file in the trackDb for this composite'''
-        try:
-            return self._trackDb
-        except AttributeError:
-            self._trackDb = ra.RaFile(self._trackDbPath)
-            return self._trackDb
-        
-    @property 
-    def trackPath(self):
-        '''The track path for this composite'''
-        return self._trackPath
-        
-    @property 
-    def url(self):
-        '''The url on our site for this composite'''
-        return self._url
-        
-    @property 
-    def organism(self):
-        '''The url on our site for this composite'''
-        return self._organism
-
-    @property 
-    def currentTrackDb(self):
-        trackDb = self._trackDbDir + "trackDb.wgEncode.ra"
-        f = open(trackDb, "r")
-        lines = f.readlines()
-        p = re.compile(".*(%s\S+) ?(\S+)" % self._name)
-        for i in lines:
-            if re.match("^\s*#.*", i):
-                continue
-            m = p.match(i)
-            if m and re.search('alpha', m.group(2)):
-                tdbpath = "%s%s" % (self._trackDbDir, m.group(1))
-                return tdbpath
-        return None
-
-
-    def __init__(self, database, compositeName, trackPath=None, mdbCompositeName=None):
-        
-        if mdbCompositeName == None:
-            mdbCompositeName = compositeName
-        
-        if trackPath == None:
-            self._trackPath = os.path.expanduser('~/kent/src/hg/makeDb/trackDb/')
-        else:
-            self._trackPath = trackPath
-            if not self._trackPath.endswith('/'):
-                self._trackPath = self._trackPath + '/'
-            
-        if database in encode.organisms:
-            self._organism = encode.organisms[database]
-        else:
-            raise KeyError(database + ' is not a valid database')
-        
-        #self._trackDbPath = self._trackPath + self._organism + '/' + database + '/' + compositeName + '.ra'
-        self._trackDbDir = self._trackPath + self._organism + '/' + database + '/'
-  
-        self._alphaMdbPath = self._trackPath + self._organism + '/' + database + '/metaDb/alpha/' + mdbCompositeName + '.ra'
-        self._betaMdbPath = self._trackPath + self._organism + '/' + database + '/metaDb/beta/' + mdbCompositeName + '.ra'    
-        self._publicMdbPath = self._trackPath + self._organism + '/' + database + '/metaDb/public/' + mdbCompositeName + '.ra'
-        self._alphaMdbDir = self._trackPath + self._organism + '/' + database + '/metaDb/alpha/'
-        self._betaMdbDir = self._trackPath + self._organism + '/' + database + '/metaDb/beta/'
-        self._publicMdbDir = self._trackPath + self._organism + '/' + database + '/metaDb/public/'
-        self._downloadsDirectory = '/hive/groups/encode/dcc/analysis/ftp/pipeline/' + database + '/' + compositeName + '/'
-        self._httpDownloadsPath = '/usr/local/apache/htdocs-hgdownload/goldenPath/' + database + '/encodeDCC/' + compositeName + '/'
-        self._rrHttpDir = '/usr/local/apache/htdocs/goldenPath/' + database + '/encodeDCC/' + compositeName + '/'
-        self._notesDirectory = os.path.expanduser("~/kent/src/hg/makeDb/doc/encodeDcc%s" % database.capitalize()) + '/'
-        self._url = 'http://genome.ucsc.edu/cgi-bin/hgTrackUi?db=' + database + '&g=' + compositeName
-        self._database = database
-        self._name = compositeName        
-        self._md5path = '/hive/groups/encode/dcc/analysis/ftp/pipeline/' + database + '/' + compositeName + '/md5sum.txt'
-        self._trackDbPath = self.currentTrackDb
-        if self._trackDbPath == None:
-            self._trackDbPath = self._trackPath + self._organism + '/' + database + '/' + compositeName + '.ra' 
-        if not os.path.isfile(self._trackDbPath):
-            raise KeyError(self._trackDbPath + ' does not exist')
-        
-
-class TrackCollection(dict):
-    '''
-    A collection that stores all the tracks for a given database, indexed by
-    its metaDb name.
-    '''
-    
-    @property 
-    def database(self):
-        return self._database
-    
-    @property 
-    def organism(self):
-        return self._organism  
-        
-    def __init__(self, database, trackPath=None):
-        dict.__init__(self)
-    
-        self._database = database
-        
-        if database in encode.organisms:
-            self._organism = encode.organisms[database]
-        else:
-            raise KeyError(database + ' is not a valid database')
-    
-        if trackPath == None:
-            self._trackPath = os.path.expanduser('~/kent/src/hg/makeDb/trackDb/')
-        else:
-            self._trackPath = trackPath
-            if not self._trackPath.endswith('/'):
-                self._trackPath = self._trackPath + '/'
-    
-        metaDb = self._trackPath + self._organism + '/' + self._database + '/metaDb/alpha/'
-        
-        for file in os.listdir(metaDb):
-            if os.path.isfile(metaDb + file) and file.endswith('.ra'):
-                trackname = file.replace('.ra', '') 
-                if os.path.isfile(self._trackPath + self._organism + '/' + self._database + '/' + file):
-                    self[trackname] = CompositeTrack(self._database, trackname, self._trackPath)
-                
-                
\ No newline at end of file