64f127cb71252486ce096a832ac7fad3deb324a5
mmaddren
  Wed Sep 14 17:04:53 2011 -0700
large-scale renaming change to allow python to be built into cluster/bin, also mkGeoPkg now renames files
diff --git python/lib/ucscgenomics/track.py python/lib/ucscgenomics/track.py
new file mode 100644
index 0000000..70064c6
--- /dev/null
+++ python/lib/ucscgenomics/track.py
@@ -0,0 +1,234 @@
+import os
+from ucscgenomics import ra
+
+def readMd5sums(filename):
+	if os.path.isfile(filename):
+		md5sums = dict()
+		md5file = open(filename, 'r')
+		for line in md5file:
+			key, val = map(str.strip, line.split(' ', 1))
+			md5sums[key] = val
+		return md5sums
+	else:
+		return None
+
+		
+class TrackFile(object):
+
+	@property 
+	def name(self):
+		"""The file's name"""
+		return self._name
+		
+	@property 
+	def fullname(self):
+		"""The file's full name including path"""
+		return self._path + self._name
+		
+	@property 
+	def path(self):
+		"""The file's path"""
+		return self._path
+		
+	@property 
+	def md5sum(self):
+		"""The md5sum for this file, stored in the md5sum.txt file in the downloads directory"""
+		return self._md5sum
+		
+	@property 
+	def extension(self):
+		"""The filetype"""
+		return self._extension
+		
+	@property 
+	def size(self):
+		"""The size in bytes"""
+		return self._size
+		
+	@property 
+	def metaObject(self):
+		"""The size in bytes"""
+		return self._metaObj
+	
+	def __init__(self, fullname, md5, metaObj=None):
+		if not os.path.isfile(fullname):
+			raise FileError('invalid file: %s' % fullname)
+		self._path, self._name = fullname.rsplit('/', 1)
+		self._path = self._path + '/'
+		self._fullname = fullname
+		self._size = os.stat(fullname).st_size
+		self._md5sum = md5
+		self._metaObj = metaObj
+		
+		self._extension = self._name
+		self._extension.replace('.gz', '').replace('.tgz', '')
+		if '.' in self._extension:
+			self._extension = self._extension.rsplit('.')[1]
+		else:
+			self._extension = None
+	
+	
+class CompositeTrack(object):
+
+	@property 
+	def database(self):
+		"""The database for this composite, typically hg19 for humans"""
+		return self._database
+		
+	@property 
+	def name(self):
+		"""The composite name"""
+		return self._name
+		
+	@property 
+	def downloadsDirectory(self):
+		"""The location of files in downloads"""
+		if not os.path.isdir(self._downloadsDirectory):
+			raise KeyError(self._downloadsDirectory + ' does not exist')
+		return self._downloadsDirectory
+	
+	@property 
+	def files(self):
+		"""A list of all files in the downloads directory of this composite"""
+		try:
+			return self._files
+		except AttributeError:
+			md5sums = readMd5sums(self._md5path)
+			
+			radict = dict()
+			for stanza in self.alphaMetaDb:
+				if 'fileName' in stanza:
+					radict[stanza['fileName']] = stanza
+			
+			self._files = dict()
+			for file in os.listdir(self.downloadsDirectory):
+				if os.path.isfile(self.downloadsDirectory + file):
+					
+					stanza = None
+					if file in radict:
+						stanza = radict[file]
+						
+					if file in md5sums:
+						self._files[file] = TrackFile(self.downloadsDirectory + file, md5sums[file], stanza)
+					else:
+						self._files[file] = TrackFile(self.downloadsDirectory + file, None, stanza)
+		
+			return self._files
+		
+	@property 
+	def releases(self):
+		"""A list of all files in the release directory of this composite"""
+		try:
+			return self._releaseFiles
+		except AttributeError:
+			self._releaseFiles = list()
+			count = 1
+			
+			while os.path.exists(self.downloadsDirectory + 'release' + str(count)):
+				releasepath = self.downloadsDirectory + 'release' + str(count) + '/'
+				md5s = readMd5sums(releasepath + 'md5sum.txt')
+				releasefiles = dict()
+				
+				for file in os.listdir(releasepath):
+					if file != 'md5sum.txt' and md5s != None and file in md5s:
+						releasefiles[file] = TrackFile(releasepath + file, md5s[file])
+					else:
+						releasefiles[file] = TrackFile(releasepath + file, None)
+					
+				#releasefiles.sort()
+				self._releaseFiles.append(releasefiles)
+				count = count + 1
+				
+			return self._releaseFiles
+		
+	@property 
+	def alphaMetaDb(self):
+		"""The Ra file in the metaDb for this composite"""
+		try:
+			return self._alphaMetaDb
+		except AttributeError:
+			if not os.path.isfile(self._alphaMdbPath):
+				raise KeyError(self._alphaMdbPath + ' does not exist')
+			self._alphaMetaDb = ra.RaFile(self._alphaMdbPath)
+			return self._alphaMetaDb
+		
+	@property 
+	def betaMetaDb(self):
+		"""The Ra file in the metaDb for this composite"""
+		try:
+			return self._betaMetaDb
+		except AttributeError:
+			if not os.path.isfile(self._betaMdbPath):
+				raise KeyError(self._betaMdbPath + ' does not exist')
+			self._betaMetaDb = ra.RaFile(self._betaMdbPath)
+			return self._betaMetaDb
+		
+	@property 
+	def publicMetaDb(self):
+		"""The Ra file in the metaDb for this composite"""
+		try:
+			return self._publicMetaDb
+		except AttributeError:
+			if not os.path.isfile(self._publicMdbPath):
+				raise KeyError(self._publicMdbPath + ' does not exist')
+			self._publicMetaDb = ra.RaFile(self._publicMdbPath)
+			return self._publicMetaDb
+		
+	@property 
+	def trackDb(self):
+		"""The Ra file in the trackDb for this composite"""
+		try:
+			return self._trackDb
+		except AttributeError:
+			self._trackDb = ra.RaFile(self._trackDbPath)
+			return self._trackDb
+		
+	@property 
+	def trackPath(self):
+		"""The track path for this composite"""
+		return self._trackPath
+		
+	@property 
+	def url(self):
+		"""The url on our site for this composite"""
+		return self._url
+		
+	@property 
+	def organism(self):
+		"""The url on our site for this composite"""
+		return self._organism
+		
+	def __init__(self, database, compositeName, trackPath=None):
+		
+		if trackPath == None:
+			self._trackPath = os.path.expanduser('~/kent/src/hg/makeDb/trackDb/')
+		else:
+			self._trackPath = trackPath
+			
+		organisms = {
+			'hg19': 'human',
+			'hg18': 'human',
+			'mm9': 'mouse'
+		}
+		
+		if database in organisms:
+			self._organism = organisms[database]
+		else:
+			raise KeyError(database + ' is not a valid database')
+		
+		if not self._trackPath.endswith('/'):
+			self._trackPath = self._trackPath + '/'
+		
+		self._trackDbPath = self._trackPath + self._organism + '/' + database + '/' + compositeName + '.ra'
+		if not os.path.isfile(self._trackDbPath):
+			raise KeyError(self._trackDbPath + ' does not exist')	
+		
+		self._alphaMdbPath = self._trackPath + self._organism + '/' + database + '/metaDb/alpha/' + compositeName + '.ra'
+		self._betaMdbPath = self._trackPath + self._organism + '/' + database + '/metaDb/beta/' + compositeName + '.ra'	
+		self._publicMdbPath = self._trackPath + self._organism + '/' + database + '/metaDb/public/' + compositeName + '.ra'
+		self._downloadsDirectory = '/hive/groups/encode/dcc/analysis/ftp/pipeline/' + database + '/' + compositeName + '/'
+		self._url = 'http://genome.ucsc.edu/cgi-bin/hgTrackUi?db=' + database + '&g=' + compositeName
+		self._database = database
+		self._name = compositeName		
+		self._md5path = '/hive/groups/encode/dcc/analysis/ftp/pipeline/' + database + '/' + compositeName + '/md5sum.txt'
+