src/hg/encode/encodeMkChangeNotes/encodeMkChangeNotes 1.14
1.14 2010/05/25 21:20:21 krish
added flag to skip md5 sums for faster run time
Index: src/hg/encode/encodeMkChangeNotes/encodeMkChangeNotes
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/encode/encodeMkChangeNotes/encodeMkChangeNotes,v
retrieving revision 1.13
retrieving revision 1.14
diff -b -B -U 4 -r1.13 -r1.14
--- src/hg/encode/encodeMkChangeNotes/encodeMkChangeNotes 25 May 2010 17:17:29 -0000 1.13
+++ src/hg/encode/encodeMkChangeNotes/encodeMkChangeNotes 25 May 2010 21:20:21 -0000 1.14
@@ -74,16 +74,17 @@
return name
else:
return None
-def same_file(x, y):
+def same_file(x, y, do_md5=True):
"""checks if two files are the same with various definitions of same"""
if os.path.samefile(x, y):
return True # same inode
else:
if os.path.getsize(x) != os.path.getsize(y):
return False # different sizes
else: # now check md5s of the two files
+ if do_md5:
md5_x = md5.new()
file_x = open(x)
block = file_x.read(2**24)
while block != "":
@@ -97,8 +98,10 @@
md5_y.update(block)
block = file_y.read(2**24)
return md5_x.digest() == md5_y.digest()
+ else:
+ return True
#### Main ######################################################################
def main(argv=None):
@@ -116,8 +119,9 @@
parser.add_option("-n", "--track-name", dest="track_name",
help="the English name of track, by default this is \"ENCODE [composite-name]\"", metavar="N", default=None)
parser.add_option("--files", dest="files_path", help="dump list of new files to F", metavar="F")
parser.add_option("--tables", dest="tables_path", help="dump list of new tablesto F", metavar="F")
+ parser.add_option("--disable-md5-checks", dest="do_md5_checks", action="store_false", help="disable MD5 checks on files", default=True)
global options
(options, args) = parser.parse_args()
@@ -184,8 +188,11 @@
new_tables_list = []
new_files_list = []
new_gbdbs_list = []
+ if not options.do_md5_checks:
+ warnings.append("Use of MD5 checksums to verify unchanged files has been disabled.")
+
# process the list of unchanged files
for f in unchanged_files:
name, type, extension = f.split(".")
assert extension == "gz"
@@ -194,9 +201,9 @@
stem, version = parse_version(name)
if next_version(stem, version) in new_files:
raise ValueError, "Unimplemented: newer version of %s found, can't deal with this yet" % name
# check to make sure the files are really the same
- if not same_file(os.path.join(current_release_dir, f), os.path.join(prev_release_dir, f)):
+ if not same_file(os.path.join(current_release_dir, f), os.path.join(prev_release_dir, f), options.do_md5_checks):
warn = "file %s in %s and %s don't appear to be the same (type=%s)" % \
(name, current_release_dir, prev_release_dir, type)
warnings.append(warn)
print >>sys.stderr, warn