31dcb07904fb264e64f184905d0f5ad7ccc94f44
mmaddren
  Wed Sep 21 16:06:55 2011 -0700
more documentation for ucscgenomics
diff --git python/lib/ucscgenomics/track.py python/lib/ucscgenomics/track.py
index 70064c6..5309b32 100644
--- python/lib/ucscgenomics/track.py
+++ python/lib/ucscgenomics/track.py
@@ -1,234 +1,275 @@
 import os
 from ucscgenomics import ra
 
 def readMd5sums(filename):
 	if os.path.isfile(filename):
 		md5sums = dict()
 		md5file = open(filename, 'r')
 		for line in md5file:
 			key, val = map(str.strip, line.split(' ', 1))
 			md5sums[key] = val
 		return md5sums
 	else:
 		return None
 
 		
 class TrackFile(object):
+	"""
+	A file in the trackDb, which has useful information about iself.
+	
+	CompositeTrack (below) has multiple dictionaries of TrackFiles, one for
+	the root downloads directory, and one for each release. The root directory
+	will link itself to the CompositeTrack's alpha metadata.
+	"""
 
 	@property 
 	def name(self):
 		"""The file's name"""
 		return self._name
 		
 	@property 
 	def fullname(self):
 		"""The file's full name including path"""
 		return self._path + self._name
 		
 	@property 
 	def path(self):
 		"""The file's path"""
 		return self._path
 		
 	@property 
 	def md5sum(self):
 		"""The md5sum for this file, stored in the md5sum.txt file in the downloads directory"""
 		return self._md5sum
 		
 	@property 
 	def extension(self):
 		"""The filetype"""
 		return self._extension
 		
 	@property 
 	def size(self):
 		"""The size in bytes"""
 		return self._size
 		
 	@property 
 	def metaObject(self):
 		"""The size in bytes"""
 		return self._metaObj
 	
 	def __init__(self, fullname, md5, metaObj=None):
 		if not os.path.isfile(fullname):
 			raise FileError('invalid file: %s' % fullname)
 		self._path, self._name = fullname.rsplit('/', 1)
 		self._path = self._path + '/'
 		self._fullname = fullname
 		self._size = os.stat(fullname).st_size
 		self._md5sum = md5
 		self._metaObj = metaObj
 		
 		self._extension = self._name
 		self._extension.replace('.gz', '').replace('.tgz', '')
 		if '.' in self._extension:
 			self._extension = self._extension.rsplit('.')[1]
 		else:
 			self._extension = None
 	
 	
 class CompositeTrack(object):
+	"""
+	Stores an entire track, consisting mainly of its metadata and files.
+	
+	To make a CompositeTrack, you must specify database and name of the track:
+		sometrack = CompositeTrack('hg19', 'wgEncodeCshlLongRnaSeq')
+		
+	You can also specify a trackDb path in the event that yours is different
+	from the default, '~/kent/src/hg/makeDb/trackDb/':
+		sometrack = CompositeTrack('hg19', 'wgEncode...', '/weird/path')
+		
+	It's important to know that the CompositeTrack does NOT load all of its
+	information up front. Therefore, there's no performance hit for using a
+	CompositeTrack instead of just specifying a RaFile. In fact, it's
+	beneficial, since it adds another layer of abstraction to your code. You
+	can access a composite's ra files:
+		somemetadata = sometrack.alphaMetaDb
+		
+	For more information on what you can do with ra files, check the ra.py
+	documentation.
+	
+	You can also access a track's files. This is one of the more useful parts
+	of the composite track:
+		for file in sometrack.files:
+			print '%s %s' % (file.name, file.size)
+			
+	Each file is an instance of a TrackFile object, which is detailed in its
+	own documentation above. There are also lists of these files for each
+	release associated with the track:
+		for file in sometrack.releases[0]:
+			print file.name in sometrack.releases[1]
+			
+	Note that the files are indexed by their filename. This means that you can
+	easily compare multiple releases as in the above example.
+	"""
 
 	@property 
 	def database(self):
 		"""The database for this composite, typically hg19 for humans"""
 		return self._database
 		
 	@property 
 	def name(self):
 		"""The composite name"""
 		return self._name
 		
 	@property 
 	def downloadsDirectory(self):
 		"""The location of files in downloads"""
 		if not os.path.isdir(self._downloadsDirectory):
 			raise KeyError(self._downloadsDirectory + ' does not exist')
 		return self._downloadsDirectory
 	
 	@property 
 	def files(self):
 		"""A list of all files in the downloads directory of this composite"""
 		try:
 			return self._files
 		except AttributeError:
 			md5sums = readMd5sums(self._md5path)
 			
 			radict = dict()
 			for stanza in self.alphaMetaDb:
 				if 'fileName' in stanza:
 					radict[stanza['fileName']] = stanza
 			
 			self._files = dict()
 			for file in os.listdir(self.downloadsDirectory):
 				if os.path.isfile(self.downloadsDirectory + file):
 					
 					stanza = None
 					if file in radict:
 						stanza = radict[file]
 						
 					if file in md5sums:
 						self._files[file] = TrackFile(self.downloadsDirectory + file, md5sums[file], stanza)
 					else:
 						self._files[file] = TrackFile(self.downloadsDirectory + file, None, stanza)
 		
 			return self._files
 		
 	@property 
 	def releases(self):
 		"""A list of all files in the release directory of this composite"""
 		try:
 			return self._releaseFiles
 		except AttributeError:
 			self._releaseFiles = list()
 			count = 1
 			
 			while os.path.exists(self.downloadsDirectory + 'release' + str(count)):
 				releasepath = self.downloadsDirectory + 'release' + str(count) + '/'
 				md5s = readMd5sums(releasepath + 'md5sum.txt')
 				releasefiles = dict()
 				
 				for file in os.listdir(releasepath):
 					if file != 'md5sum.txt' and md5s != None and file in md5s:
 						releasefiles[file] = TrackFile(releasepath + file, md5s[file])
 					else:
 						releasefiles[file] = TrackFile(releasepath + file, None)
 					
 				#releasefiles.sort()
 				self._releaseFiles.append(releasefiles)
 				count = count + 1
 				
 			return self._releaseFiles
 		
 	@property 
 	def alphaMetaDb(self):
 		"""The Ra file in the metaDb for this composite"""
 		try:
 			return self._alphaMetaDb
 		except AttributeError:
 			if not os.path.isfile(self._alphaMdbPath):
 				raise KeyError(self._alphaMdbPath + ' does not exist')
 			self._alphaMetaDb = ra.RaFile(self._alphaMdbPath)
 			return self._alphaMetaDb
 		
 	@property 
 	def betaMetaDb(self):
 		"""The Ra file in the metaDb for this composite"""
 		try:
 			return self._betaMetaDb
 		except AttributeError:
 			if not os.path.isfile(self._betaMdbPath):
 				raise KeyError(self._betaMdbPath + ' does not exist')
 			self._betaMetaDb = ra.RaFile(self._betaMdbPath)
 			return self._betaMetaDb
 		
 	@property 
 	def publicMetaDb(self):
 		"""The Ra file in the metaDb for this composite"""
 		try:
 			return self._publicMetaDb
 		except AttributeError:
 			if not os.path.isfile(self._publicMdbPath):
 				raise KeyError(self._publicMdbPath + ' does not exist')
 			self._publicMetaDb = ra.RaFile(self._publicMdbPath)
 			return self._publicMetaDb
 		
 	@property 
 	def trackDb(self):
 		"""The Ra file in the trackDb for this composite"""
 		try:
 			return self._trackDb
 		except AttributeError:
 			self._trackDb = ra.RaFile(self._trackDbPath)
 			return self._trackDb
 		
 	@property 
 	def trackPath(self):
 		"""The track path for this composite"""
 		return self._trackPath
 		
 	@property 
 	def url(self):
 		"""The url on our site for this composite"""
 		return self._url
 		
 	@property 
 	def organism(self):
 		"""The url on our site for this composite"""
 		return self._organism
 		
 	def __init__(self, database, compositeName, trackPath=None):
 		
 		if trackPath == None:
 			self._trackPath = os.path.expanduser('~/kent/src/hg/makeDb/trackDb/')
 		else:
 			self._trackPath = trackPath
 			
 		organisms = {
 			'hg19': 'human',
 			'hg18': 'human',
 			'mm9': 'mouse'
 		}
 		
 		if database in organisms:
 			self._organism = organisms[database]
 		else:
 			raise KeyError(database + ' is not a valid database')
 		
 		if not self._trackPath.endswith('/'):
 			self._trackPath = self._trackPath + '/'
 		
 		self._trackDbPath = self._trackPath + self._organism + '/' + database + '/' + compositeName + '.ra'
 		if not os.path.isfile(self._trackDbPath):
 			raise KeyError(self._trackDbPath + ' does not exist')	
 		
 		self._alphaMdbPath = self._trackPath + self._organism + '/' + database + '/metaDb/alpha/' + compositeName + '.ra'
 		self._betaMdbPath = self._trackPath + self._organism + '/' + database + '/metaDb/beta/' + compositeName + '.ra'	
 		self._publicMdbPath = self._trackPath + self._organism + '/' + database + '/metaDb/public/' + compositeName + '.ra'
 		self._downloadsDirectory = '/hive/groups/encode/dcc/analysis/ftp/pipeline/' + database + '/' + compositeName + '/'
 		self._url = 'http://genome.ucsc.edu/cgi-bin/hgTrackUi?db=' + database + '&g=' + compositeName
 		self._database = database
 		self._name = compositeName		
 		self._md5path = '/hive/groups/encode/dcc/analysis/ftp/pipeline/' + database + '/' + compositeName + '/md5sum.txt'