71a7f24285c283b54289eb7eeee00a3df82ff5c6 mmaddren Wed Nov 30 16:05:09 2011 -0800 track files will now generate their md5sum on the fly when it is retrieved when it was originally None. This should be a rare occurance since most files already have an md5sum which is attached through the md5sums.txt file diff --git python/lib/ucscgenomics/track.py python/lib/ucscgenomics/track.py index 831c10f..4460fb4 100644 --- python/lib/ucscgenomics/track.py +++ python/lib/ucscgenomics/track.py @@ -1,75 +1,86 @@ -import os, re +import os, re, hashlib from ucscgenomics import ra def readMd5sums(filename): if os.path.isfile(filename): md5sums = dict() md5file = open(filename, 'r') for line in md5file: key, val = map(str.strip, line.split(' ', 1)) md5sums[key] = val return md5sums else: return None +def hashfile(filename, hasher=hashlib.md5(), blocksize=65536): + afile = open(filename, 'rb') + buf = afile.read(blocksize) + while len(buf) > 0: + hasher.update(buf) + buf = afile.read(blocksize) + return hasher.hexdigest() + class TrackFile(object): """ A file in the trackDb, which has useful information about iself. CompositeTrack (below) has multiple dictionaries of TrackFiles, one for the root downloads directory, and one for each release. The root directory will link itself to the CompositeTrack's alpha metadata. """ @property def name(self): """The file's name""" return self._name @property def fullname(self): """The file's full name including path""" return self._path + self._name @property def path(self): """The file's path""" return self._path @property def md5sum(self): """The md5sum for this file, stored in the md5sum.txt file in the downloads directory""" + if self._md5sum == None: + self._md5sum = hashfile(self.fullname) return self._md5sum @property def extension(self): """The filetype""" return self._extension @property def size(self): """The size in bytes""" return self._size @property def metaObject(self): """The size in bytes""" return self._metaObj - def __init__(self, fullname, md5, metaObj=None): + def __init__(self, fullname, md5=None, metaObj=None): + fullname = os.path.abspath(fullname) if not os.path.isfile(fullname): raise FileError('invalid file: %s' % fullname) self._path, self._name = fullname.rsplit('/', 1) self._path = self._path + '/' self._fullname = fullname self._size = os.stat(fullname).st_size self._md5sum = md5 self._metaObj = metaObj self._extension = self._name self._extension.replace('.gz', '').replace('.tgz', '') if '.' in self._extension: self._extension = self._extension.rsplit('.')[1] else: self._extension = None