31dcb07904fb264e64f184905d0f5ad7ccc94f44 mmaddren Wed Sep 21 16:06:55 2011 -0700 more documentation for ucscgenomics diff --git python/lib/ucscgenomics/track.py python/lib/ucscgenomics/track.py index 70064c6..5309b32 100644 --- python/lib/ucscgenomics/track.py +++ python/lib/ucscgenomics/track.py @@ -2,30 +2,37 @@ from ucscgenomics import ra def readMd5sums(filename): if os.path.isfile(filename): md5sums = dict() md5file = open(filename, 'r') for line in md5file: key, val = map(str.strip, line.split(' ', 1)) md5sums[key] = val return md5sums else: return None class TrackFile(object): + """ + A file in the trackDb, which has useful information about iself. + + CompositeTrack (below) has multiple dictionaries of TrackFiles, one for + the root downloads directory, and one for each release. The root directory + will link itself to the CompositeTrack's alpha metadata. + """ @property def name(self): """The file's name""" return self._name @property def fullname(self): """The file's full name including path""" return self._path + self._name @property def path(self): """The file's path""" return self._path @@ -57,30 +64,64 @@ self._path = self._path + '/' self._fullname = fullname self._size = os.stat(fullname).st_size self._md5sum = md5 self._metaObj = metaObj self._extension = self._name self._extension.replace('.gz', '').replace('.tgz', '') if '.' in self._extension: self._extension = self._extension.rsplit('.')[1] else: self._extension = None class CompositeTrack(object): + """ + Stores an entire track, consisting mainly of its metadata and files. + + To make a CompositeTrack, you must specify database and name of the track: + sometrack = CompositeTrack('hg19', 'wgEncodeCshlLongRnaSeq') + + You can also specify a trackDb path in the event that yours is different + from the default, '~/kent/src/hg/makeDb/trackDb/': + sometrack = CompositeTrack('hg19', 'wgEncode...', '/weird/path') + + It's important to know that the CompositeTrack does NOT load all of its + information up front. Therefore, there's no performance hit for using a + CompositeTrack instead of just specifying a RaFile. In fact, it's + beneficial, since it adds another layer of abstraction to your code. You + can access a composite's ra files: + somemetadata = sometrack.alphaMetaDb + + For more information on what you can do with ra files, check the ra.py + documentation. + + You can also access a track's files. This is one of the more useful parts + of the composite track: + for file in sometrack.files: + print '%s %s' % (file.name, file.size) + + Each file is an instance of a TrackFile object, which is detailed in its + own documentation above. There are also lists of these files for each + release associated with the track: + for file in sometrack.releases[0]: + print file.name in sometrack.releases[1] + + Note that the files are indexed by their filename. This means that you can + easily compare multiple releases as in the above example. + """ @property def database(self): """The database for this composite, typically hg19 for humans""" return self._database @property def name(self): """The composite name""" return self._name @property def downloadsDirectory(self): """The location of files in downloads""" if not os.path.isdir(self._downloadsDirectory):