src/hg/encode/encodeMkChangeNotes/encodeMkChangeNotes 1.4

1.4 2010/03/31 22:55:51 krish
incorporated QA suggestions
Index: src/hg/encode/encodeMkChangeNotes/encodeMkChangeNotes
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/encode/encodeMkChangeNotes/encodeMkChangeNotes,v
retrieving revision 1.3
retrieving revision 1.4
diff -b -B -U 4 -r1.3 -r1.4
--- src/hg/encode/encodeMkChangeNotes/encodeMkChangeNotes	17 Mar 2010 00:49:12 -0000	1.3
+++ src/hg/encode/encodeMkChangeNotes/encodeMkChangeNotes	31 Mar 2010 22:55:51 -0000	1.4
@@ -6,12 +6,8 @@
 import re
 import subprocess
 import md5
 
-# NOTES:
-#   fullpath name to ALL files
-#
-
 #### Classes ###################################################################
 
 #### Functions #################################################################
 
@@ -89,15 +85,16 @@
         releases of an ENCODE track.
     """
     if argv is None: argv = sys.argv
     # parse the args
-    parser = optparse.OptionParser(usage="Usage: %prog [options] database current_release prev_release",
+    parser = optparse.OptionParser(usage="%prog [options] database current_release (prev_release|-)",
         version="%prog 0.9")
     parser.add_option("-v", "--verbose", dest="verbose", action="store_true", default=False)
-    parser.add_option("-t", "--track-name", dest="track_name",
-        help="the name of the composite track by default this is the name of the current directory", metavar="N")
-    parser.add_option("-n", "--name", dest="name",
-        help="the English name of track", metavar="N", default="Untitled")
+    parser.add_option("-t", "--composite-name", dest="composite_name",
+        help="the name of the composite track by default this is the name of the current directory",
+        metavar="N", default=None)
+    parser.add_option("-n", "--track-name", dest="track_name",
+        help="the English name of track", metavar="N", default=None)
     parser.add_option("--files", dest="files_path", help="dump list of new files to F", metavar="F")
     parser.add_option("--tables", dest="tables_path", help="dump list of new tablesto F", metavar="F")
 
     global options
@@ -107,19 +104,31 @@
     if len(args) != 3:
         parser.print_help()
         sys.exit(10)
 
+    # default track name is the current direcotry name
+    if options.composite_name == None:
+        options.composite_name = os.path.basename(os.getcwd())
+
+    # default composite name is "ENCODE composite_name"
     if options.track_name == None:
-        options.track_name = os.path.basename(os.getcwd())
+        options.track_name = "ENCODE %s" % options.composite_name
 
     # get the positional args
     database = args[0]
     current_release_dir = args[1]
     prev_release_dir = args[2]
 
     # some re we will be using
-    normal_processed = re.compile("^(narrowPeaks|narrowPeak|broadPeak|gtf|bedGraph|bed\d+)$")
-    not_processed = re.compile("^(tagAlign|fastq|fasta|rpkm|bowtie|psl)$")
+    table_and_file = re.compile("^(narrowPeaks|narrowPeak|broadPeak|gtf|bedGraph\d+|bed\d+)$")
+    wig = re.compile("^(wig)$")
+    file_only = re.compile("^(tagAlign|fastq|fasta|rpkm|bowtie|psl)$")
+
+    # if new relase, add the full path to all files
+    path_prefix = ""
+    if prev_release_dir == "-":
+        path_prefix = "/usr/local/apache/htdocs/goldenPath/%s/encodeDCC/%s/%s/" \
+            % (database, options.composite_name, current_release_dir)
 
     # generate the list of files
     current_files = get_file_set(current_release_dir, ".*\.gz$")
     if prev_release_dir == "-":
@@ -161,9 +170,9 @@
             warn = "file %s in %s and %s don't appear to be the same (type=%s)" % \
                 (name, current_release_dir, prev_release_dir, type)
             warnings.append(warn)
             print >>sys.stderr, warn
-        if type == "wig":
+        if wig.match(type):
             wib_path = get_wib_pathname(database, name)
             if not os.path.exists(wib_path):
                 warn = "could not find %s wib file for wig %s" % (wib_path, f)
                 warnings.append(warn)
@@ -172,28 +181,28 @@
                 warn = "table %s does not exist, from filetype %s" % (name, type)
                 warnings.append(warn)
                 print >>sys.stderr, warn
             unchanged_tables_list.append(name)
-            unchanged_files_list.append(f)
+            unchanged_files_list.append(path_prefix + f)
             unchanged_wibs_list.append(wib_path)
-        elif normal_processed.match(type):
+        elif table_and_file.match(type):
             if not table_exists(database, name):
                 warn = "table %s does not exist, from filetype %s" % (name, type)
                 warnings.append(warn)
                 print >>sys.stderr, warn
             unchanged_tables_list.append(name)
-            unchanged_files_list.append(f)
-        elif not_processed.match(type):
-            unchanged_files_list.append(f)
+            unchanged_files_list.append(path_prefix + f)
+        elif file_only.match(type):
+            unchanged_files_list.append(path_prefix + f)
         else:
             raise ValueError, "unknown type %s of file %s" % (type, f)
 
     # process the list of removed files
     for f in removed_files:
         name, type, extension = f.split(".")
         assert extension == "gz"
 
-        if type == "wig":
+        if wig.match(type):
             wib_path = get_wib_pathname(database, name)
             if not os.path.exists(wib_path):
                 warn = "could not find %s wib file for wig %s" % (wib_path, f)
                 warnings.append(warn)
@@ -202,28 +211,28 @@
                 warn = "table %s does not exist, from filetype %s" % (name, type)
                 warnings.append(warn)
                 print >>sys.stderr, warn
             removed_tables_list.append(name)
-            removed_files_list.append(f)
+            removed_files_list.append(path_prefix + f)
             removed_wibs_list.append(wib_path)
-        elif normal_processed.match(type):
+        elif table_and_file.match(type):
             if not table_exists(database, name):
                 warn = "table %s does not exist, from filetype %s" % (name, type)
                 warnings.append(warn)
                 print >>sys.stderr, warn
             removed_tables_list.append(name)
-            removed_files_list.append(f)
-        elif not_processed.match(type):
-            removed_files_list.append(f)
+            removed_files_list.append(f + path_prefix)
+        elif file_only.match(type):
+            removed_files_list.append(path_prefix + f)
         else:
             raise ValueError, "unknown type %s of file %s" % (type, f)
 
     # process the list of new files
     for f in new_files:
         name, type, extension = f.split(".")
         assert extension == "gz"
 
-        if type == "wig":
+        if wig.match(type):
             wib_path = get_wib_pathname(database, name)
             if not os.path.exists(wib_path):
                 warn = "could not find %s wib file for wig %s" % (wib_path, f)
                 warnings.append(warn)
@@ -232,19 +241,19 @@
                 warn = "table %s does not exist, from filetype %s" % (name, type)
                 warnings.append(warn)
                 print >>sys.stderr, warn
             new_tables_list.append(name)
-            new_files_list.append(f)
+            new_files_list.append(path_prefix + f)
             new_wibs_list.append(wib_path)
-        elif normal_processed.match(type):
+        elif table_and_file.match(type):
             if not table_exists(database, name):
                 warn = "table %s does not exist, from filetype %s" % (name, type)
                 warnings.append(warn)
                 print >>sys.stderr, warn
             new_tables_list.append(name)
-            new_files_list.append(f)
-        elif not_processed.match(type):
-            new_files_list.append(f)
+            new_files_list.append(path_prefix + f)
+        elif file_only.match(type):
+            new_files_list.append(path_prefix + f)
         else:
             raise ValueError, "unknown type %s of file %s" % (type, f)
 
     # output some basic stats
@@ -282,10 +291,10 @@
         new_tables_file.close()
 
     # generate the header
     print "# generated with %s" % parser.get_version()
-    print "This is a %s of the \"%s\"" % (current_release_dir, options.name)
-    print "The composite track is %s" % options.track_name
+    print "This is a %s of the \"%s\"" % (current_release_dir, options.track_name)
+    print "The composite track is %s" % options.composite_name
     
     if len(warnings) > 0:
         warn_header = "# WARNINGS "
         print warn_header, "#" * len(warn_header)
@@ -303,8 +312,16 @@
    This list is provided for completeness. Any files marked here as in gbdb may be eliminated.
 C) New - are only currently on test but will need to be pushed to the RR.
 D) Additional items of note
 """
+    
+    # some summary counts of current files, i.e. new + untouched
+    print "Summary total counts for %s (new+untouched):" % current_release_dir
+    print "    Tables: %d" % (len(unchanged_tables_list) + len(new_tables_list))
+    print "    Files: %d" % (len(unchanged_files_list) + len(new_files_list))
+    print "    Wibs: %d" % (len(unchanged_wibs_list) + len(new_wibs_list))
+    print
+
     # untouched list
     print "A) Untouched Tables (%d):" % len(unchanged_tables_list)
     unchanged_tables_list.sort()
     for i in unchanged_tables_list:
@@ -312,12 +329,16 @@
     print
 
     print "A') Untouched Files (%d downloadables, %d wibs):" % (len(unchanged_files_list),
                                                            len(unchanged_wibs_list))
+    if prev_release_dir == "":
+        assert len(unchanged_files_list) == 0
+        assert len(unchanged_wibs_list) == 0
+    else:
     print "    current location on alpha:"
-    print "      /usr/local/apache/htdocs/goldenPath/hg18/encodeDCC/%s/%s/" % (options.track_name, current_release_dir)
+        print "      /usr/local/apache/htdocs/goldenPath/%s/encodeDCC/%s/%s/" % (database, options.composite_name, current_release_dir)
     print "    on RR:"
-    print "      {...}/goldenPath/encodeDCC/%s/" % options.track_name
+        print "      /usr/local/apache/htdocs/goldenPath/%s/encodeDCC/%s/" % (database, options.composite_name)
     print
     unchanged_files_list.sort()
     for i in unchanged_files_list:
         print i
@@ -336,14 +356,18 @@
     print
 
     print "B') Deprecated files (%d downloadables, %d wibs):" %(len(removed_files_list),
                                                                 len(removed_wibs_list))
+    if prev_release_dir == "":
+        assert len(removed_files_list) == 0
+        assert len(removed_wibs_list) == 0
+    else:
     print "    NOTE: NO FILES SHOULD BE REMOVED from the downloads directory on hgdownloads (RR)."
     print "    This list is provided for completeness. Any files marked here as in gbdb may be eliminated."
     print "    current location on alpha:"
-    print "      /usr/local/apache/htdocs/goldenPath/hg18/encodeDCC/%s/%s/" % (options.track_name, prev_release_dir)
+        print "      /usr/local/apache/htdocs/goldenPath/%s/encodeDCC/%s/%s/" % (database, options.composite_name, prev_release_dir)
     print "    on RR:"
-    print "      {...}/goldenPath/encodeDCC/%s/" % options.track_name
+        print "      /usr/local/apache/htdocs/goldenPath/%s/encodeDCC/%s/" % (database, options.composite_name)
     print
 
     removed_files_list.sort()
     for i in removed_files_list:
@@ -364,11 +388,11 @@
 
     print "C') New files (%d downloadables, %d wibs):" % (len(new_files_list),
                                                          len(new_wibs_list))
     print "    current location on alpha:"
-    print "      /usr/local/apache/htdocs/goldenPath/hg18/encodeDCC/%s/%s/" % (options.track_name, current_release_dir)
+    print "      /usr/local/apache/htdocs/goldenPath/%s/encodeDCC/%s/%s/" % (database, options.composite_name, current_release_dir)
     print "    NOT on RR but must be placed in:"
-    print "      {...}/goldenPath/encodeDCC/%s/" % options.track_name
+    print "      /usr/local/apache/htdocs/goldenPath/%s/encodeDCC/%s/" % (database, options.composite_name)
     print
 
     new_files_list.sort()
     for i in new_files_list:
@@ -381,11 +405,11 @@
     print
 
     print "D) Additional items:"
     print "    current location on alpha:"
-    print "      /usr/local/apache/htdocs/goldenPath/hg18/encodeDCC/%s/%s/" % (options.track_name, current_release_dir)
+    print "      /usr/local/apache/htdocs/goldenPath/%s/encodeDCC/%s/%s/" % (database, options.composite_name, current_release_dir)
     print "    should be placed on the RR in (overwritting any existing copy):"
-    print "      {...}/goldenPath/encodeDCC/%s/" % options.track_name
+    print "      /usr/local/apache/htdocs/goldenPath/%s/encodeDCC/%s/" % (database, options.composite_name)
     print
     print "index.html"
     print "files.txt"
     print "md5sum.txt"