src/hg/encode/encodeMkChangeNotes/encodeMkChangeNotes 1.14

1.14 2010/05/25 21:20:21 krish
added flag to skip md5 sums for faster run time
Index: src/hg/encode/encodeMkChangeNotes/encodeMkChangeNotes
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/encode/encodeMkChangeNotes/encodeMkChangeNotes,v
retrieving revision 1.13
retrieving revision 1.14
diff -b -B -U 4 -r1.13 -r1.14
--- src/hg/encode/encodeMkChangeNotes/encodeMkChangeNotes	25 May 2010 17:17:29 -0000	1.13
+++ src/hg/encode/encodeMkChangeNotes/encodeMkChangeNotes	25 May 2010 21:20:21 -0000	1.14
@@ -74,16 +74,17 @@
             return name
         else:
             return None
 
-def same_file(x, y):
+def same_file(x, y, do_md5=True):
     """checks if two files are the same with various definitions of same"""
     if os.path.samefile(x, y):
         return True     # same inode
     else:
         if os.path.getsize(x) != os.path.getsize(y):
             return False    # different sizes
         else:   # now check md5s of the two files
+            if do_md5:
             md5_x = md5.new()
             file_x = open(x)
             block = file_x.read(2**24)
             while block != "":
@@ -97,8 +98,10 @@
                 md5_y.update(block)
                 block = file_y.read(2**24)
 
             return md5_x.digest() == md5_y.digest()
+            else:
+                return True
 
 #### Main ######################################################################
 
 def main(argv=None):
@@ -116,8 +119,9 @@
     parser.add_option("-n", "--track-name", dest="track_name",
         help="the English name of track, by default this is \"ENCODE [composite-name]\"", metavar="N", default=None)
     parser.add_option("--files", dest="files_path", help="dump list of new files to F", metavar="F")
     parser.add_option("--tables", dest="tables_path", help="dump list of new tablesto F", metavar="F")
+    parser.add_option("--disable-md5-checks", dest="do_md5_checks", action="store_false", help="disable MD5 checks on files", default=True)
 
     global options
     (options, args) = parser.parse_args()
 
@@ -184,8 +188,11 @@
     new_tables_list = []
     new_files_list  = []
     new_gbdbs_list  = []
 
+    if not options.do_md5_checks:
+        warnings.append("Use of MD5 checksums to verify unchanged files has been disabled.")
+
     # process the list of unchanged files
     for f in unchanged_files:
         name, type, extension = f.split(".")
         assert extension == "gz"
@@ -194,9 +201,9 @@
         stem, version = parse_version(name)
         if next_version(stem, version) in new_files:
             raise ValueError, "Unimplemented: newer version of %s found, can't deal with this yet" % name
         # check to make sure the files are really the same
-        if not same_file(os.path.join(current_release_dir, f), os.path.join(prev_release_dir, f)):
+        if not same_file(os.path.join(current_release_dir, f), os.path.join(prev_release_dir, f), options.do_md5_checks):
             warn = "file %s in %s and %s don't appear to be the same (type=%s)" % \
                 (name, current_release_dir, prev_release_dir, type)
             warnings.append(warn)
             print >>sys.stderr, warn