src/hg/encode/encodeMkChangeNotes/encodeMkChangeNotes 1.3

1.3 2010/03/17 00:49:12 krish
new types support, warnings now added to notes file
Index: src/hg/encode/encodeMkChangeNotes/encodeMkChangeNotes
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/encode/encodeMkChangeNotes/encodeMkChangeNotes,v
retrieving revision 1.2
retrieving revision 1.3
diff -b -B -U 4 -r1.2 -r1.3
--- src/hg/encode/encodeMkChangeNotes/encodeMkChangeNotes	15 Mar 2010 23:34:11 -0000	1.2
+++ src/hg/encode/encodeMkChangeNotes/encodeMkChangeNotes	17 Mar 2010 00:49:12 -0000	1.3
@@ -115,20 +115,28 @@
     database = args[0]
     current_release_dir = args[1]
     prev_release_dir = args[2]
 
+    # some re we will be using
+    normal_processed = re.compile("^(narrowPeaks|narrowPeak|broadPeak|gtf|bedGraph|bed\d+)$")
+    not_processed = re.compile("^(tagAlign|fastq|fasta|rpkm|bowtie|psl)$")
+
     # generate the list of files
     current_files = get_file_set(current_release_dir, ".*\.gz$")
     if prev_release_dir == "-":
         prev_files = set()
+        prev_release_dir = ""
     else:
         prev_files = get_file_set(prev_release_dir, ".*\.gz$")
 
     # form the three derived sets
     removed_files = prev_files - current_files
     unchanged_files = current_files & prev_files
     new_files = current_files - prev_files
 
+    # warnings
+    warnings = []
+
     # the list of files that we'll be printing
     unchanged_tables_list = []
     unchanged_files_list  = []
     unchanged_wibs_list  = []
@@ -149,29 +157,36 @@
         if next_version(stem, version) in new_files:
             raise ValueError, "Unimplemented: newer version of %s found, can't deal with this yet" % name
         # check to make sure the files are really the same
         if not same_file(os.path.join(current_release_dir, f), os.path.join(prev_release_dir, f)):
-            raise ValueError, "file %s in %s and %s don't appear to be the same" \
-                % (name, current_release_dir, prev_release_dir)
-
+            warn = "file %s in %s and %s don't appear to be the same (type=%s)" % \
+                (name, current_release_dir, prev_release_dir, type)
+            warnings.append(warn)
+            print >>sys.stderr, warn
         if type == "wig":
             wib_path = get_wib_pathname(database, name)
             if not os.path.exists(wib_path):
-                raise ValueError, "could not find %s wib file for wig %s" % (wib_path, f)
+                warn = "could not find %s wib file for wig %s" % (wib_path, f)
+                warnings.append(warn)
+                print >>sys.stderr, warn
             if not table_exists(database, name):
-                raise ValueError, "table %s does not exist, from filetype %s" % (name, type)
+                warn = "table %s does not exist, from filetype %s" % (name, type)
+                warnings.append(warn)
+                print >>sys.stderr, warn
             unchanged_tables_list.append(name)
             unchanged_files_list.append(f)
             unchanged_wibs_list.append(wib_path)
-        elif type == "narrowPeak" or type.startswith("bedGraph"):
+        elif normal_processed.match(type):
             if not table_exists(database, name):
-                raise ValueError,  "table %s does not exist, from filetype %s" % (name, type)
+                warn = "table %s does not exist, from filetype %s" % (name, type)
+                warnings.append(warn)
+                print >>sys.stderr, warn
             unchanged_tables_list.append(name)
             unchanged_files_list.append(f)
-        elif type == "tagAlign" or type == "fastq":
+        elif not_processed.match(type):
             unchanged_files_list.append(f)
         else:
-            raise ValueError, "unknown type %s" % type
+            raise ValueError, "unknown type %s of file %s" % (type, f)
 
     # process the list of removed files
     for f in removed_files:
         name, type, extension = f.split(".")
@@ -179,23 +194,29 @@
 
         if type == "wig":
             wib_path = get_wib_pathname(database, name)
             if not os.path.exists(wib_path):
-                raise ValueError, "could not find %s wib file for wig %s" % (wib_path, f)
+                warn = "could not find %s wib file for wig %s" % (wib_path, f)
+                warnings.append(warn)
+                print >>sys.stderr, warn
             if not table_exists(database, name):
-                raise ValueError, "table %s does not exist, from filetype %s" % (name, type)
+                warn = "table %s does not exist, from filetype %s" % (name, type)
+                warnings.append(warn)
+                print >>sys.stderr, warn
             removed_tables_list.append(name)
             removed_files_list.append(f)
             removed_wibs_list.append(wib_path)
-        elif type == "narrowPeak" or type.startswith("bedGraph"):
+        elif normal_processed.match(type):
             if not table_exists(database, name):
-                raise ValueError,  "table %s does not exist, from filetype %s" % (name, type)
+                warn = "table %s does not exist, from filetype %s" % (name, type)
+                warnings.append(warn)
+                print >>sys.stderr, warn
             removed_tables_list.append(name)
             removed_files_list.append(f)
-        elif type == "tagAlign" or type == "fastq":
+        elif not_processed.match(type):
             removed_files_list.append(f)
         else:
-            raise ValueError, "unknown type %s" % type
+            raise ValueError, "unknown type %s of file %s" % (type, f)
 
     # process the list of new files
     for f in new_files:
         name, type, extension = f.split(".")
@@ -203,23 +224,29 @@
 
         if type == "wig":
             wib_path = get_wib_pathname(database, name)
             if not os.path.exists(wib_path):
-                raise ValueError, "could not find %s wib file for wig %s" % (wib_path, f)
+                warn = "could not find %s wib file for wig %s" % (wib_path, f)
+                warnings.append(warn)
+                print >>sys.stderr, warn
             if not table_exists(database, name):
-                raise ValueError, "table %s does not exist, from filetype %s" % (name, type)
+                warn = "table %s does not exist, from filetype %s" % (name, type)
+                warnings.append(warn)
+                print >>sys.stderr, warn
             new_tables_list.append(name)
-            new_files_list.append(os.path.join("/usr/local/apache/htdocs/goldenPath/hg18/encodeDCC", options.track_name, current_release_dir, f))
+            new_files_list.append(f)
             new_wibs_list.append(wib_path)
-        elif type == "narrowPeak" or type.startswith("bedGraph"):
+        elif normal_processed.match(type):
             if not table_exists(database, name):
-                raise ValueError,  "table %s does not exist, from filetype %s" % (name, type)
+                warn = "table %s does not exist, from filetype %s" % (name, type)
+                warnings.append(warn)
+                print >>sys.stderr, warn
             new_tables_list.append(name)
-            new_files_list.append(os.path.join("/usr/local/apache/htdocs/goldenPath/hg18/encodeDCC", options.track_name, current_release_dir, f))
-        elif type == "tagAlign" or type == "fastq":
-            new_files_list.append(os.path.join("/usr/local/apache/htdocs/goldenPath/hg18/encodeDCC", options.track_name, current_release_dir, f))
+            new_files_list.append(f)
+        elif not_processed.match(type):
+            new_files_list.append(f)
         else:
-            raise ValueError, "unknown type %s" % type
+            raise ValueError, "unknown type %s of file %s" % (type, f)
 
     # output some basic stats
     if options.verbose:
         print >>sys.stderr, "Counts:"
@@ -257,8 +284,18 @@
     # generate the header
     print "# generated with %s" % parser.get_version()
     print "This is a %s of the \"%s\"" % (current_release_dir, options.name)
     print "The composite track is %s" % options.track_name
+    
+    if len(warnings) > 0:
+        warn_header = "# WARNINGS "
+        print warn_header, "#" * len(warn_header)
+        c = 1
+        for w in warnings:
+            print  "%0d - %s" % (c, w)
+            c += 1
+        print "#" * 60
+
     print """
 Categories of tables and files('):
 A) Untouched - are on public browser and should remain
 B) Deprecated - are currently on RR but will no longer be needed and should not be referenced by the public site.