src/hg/encode/encodeMkChangeNotes/encodeMkChangeNotes 1.9

1.9 2010/04/07 22:08:37 krish
added support for .bb files
Index: src/hg/encode/encodeMkChangeNotes/encodeMkChangeNotes
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/encode/encodeMkChangeNotes/encodeMkChangeNotes,v
retrieving revision 1.8
retrieving revision 1.9
diff -b -B -U 4 -r1.8 -r1.9
--- src/hg/encode/encodeMkChangeNotes/encodeMkChangeNotes	5 Apr 2010 18:37:18 -0000	1.8
+++ src/hg/encode/encodeMkChangeNotes/encodeMkChangeNotes	7 Apr 2010 22:08:37 -0000	1.9
@@ -49,10 +49,10 @@
 def next_version(name, version):
     """return the next version of a name/version pair"""
     return "%sV%d" % (name, version + 1)
 
-def get_wib_pathname(database, table_name):
-    """extract the wib pathaname of a wiggle table"""
+def get_gbdb_pathname(database, table_name):
+    """extract the gbdb pathaname of a gbdb table"""
     query = "SELECT file FROM %s LIMIT 1;" % table_name
     command = "hgsql %s --skip-column-names -e \"%s;\"" % (database, query)
     proc = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE,
                                                  stdin=subprocess.PIPE,
@@ -61,8 +61,19 @@
     status_code = proc.returncode;
     if status_code == 0:
         return name
     else:
+        query = "SELECT fileName FROM %s LIMIT 1;" % table_name
+        command = "hgsql %s --skip-column-names -e \"%s;\"" % (database, query)
+        proc = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE,
+                                                     stdin=subprocess.PIPE,
+                                                     stderr=subprocess.PIPE)
+        name = proc.communicate()[0].rstrip()
+        status_code = proc.returncode;
+
+        if status_code == 0:
+            return name
+        else:
         return None
 
 def same_file(x, y):
     """checks if two files are the same with various definitions of same"""
@@ -119,10 +130,10 @@
     prev_release_dir = args[2]
 
     # some re we will be using
     table_and_file = re.compile("^(narrowPeaks|narrowPeak|broadPeak|gtf|bedGraph\d+|bed\d+)$")
-    wig = re.compile("^(wig)$")
-    file_only = re.compile("^(tagAlign|fastq|fasta|rpkm|bowtie|psl|csqual|csfasta|pairedTagAlign)$")
+    gbdb = re.compile("^(wig|tagAlign)$")
+    file_only = re.compile("^(fastq|fasta|rpkm|bowtie|psl|csqual|csfasta|pairedTagAlign)$")
 
     # if new relase, add the full path to all files
     path_prefix = ""
     if prev_release_dir == "-":
@@ -147,15 +158,15 @@
 
     # the list of files that we'll be printing
     unchanged_tables_list = []
     unchanged_files_list  = []
-    unchanged_wibs_list  = []
+    unchanged_gbdbs_list  = []
     removed_tables_list = []
     removed_files_list  = []
-    removed_wibs_list  = []
+    removed_gbdbs_list  = []
     new_tables_list = []
     new_files_list  = []
-    new_wibs_list  = []
+    new_gbdbs_list  = []
 
     # process the list of unchanged files
     for f in unchanged_files:
         name, type, extension = f.split(".")
@@ -170,28 +181,32 @@
             warn = "file %s in %s and %s don't appear to be the same (type=%s)" % \
                 (name, current_release_dir, prev_release_dir, type)
             warnings.append(warn)
             print >>sys.stderr, warn
-        if wig.match(type):
-            wib_path = get_wib_pathname(database, name)
-            if not os.path.exists(wib_path):
-                warn = "could not find %s wib file for wig %s" % (wib_path, f)
+
+        if gbdb.match(type):
+            unchanged_files_list.append(path_prefix + f)
+            if table_exists(database, name):
+                unchanged_tables_list.append(name)
+                gbdb_path = get_gbdb_pathname(database, name)
+                if gbdb_path != None and os.path.exists(gbdb_path):
+                    unchanged_gbdbs_list.append(gbdb_path)
+                else:
+                    warn = "could not find %s file for %s" % (gbdb_path, f)
                 warnings.append(warn)
                 print >>sys.stderr, warn
-            if not table_exists(database, name):
+            else:
                 warn = "table %s does not exist, from filetype %s" % (name, type)
                 warnings.append(warn)
                 print >>sys.stderr, warn
-            unchanged_tables_list.append(name)
-            unchanged_files_list.append(path_prefix + f)
-            unchanged_wibs_list.append(wib_path)
         elif table_and_file.match(type):
-            if not table_exists(database, name):
+            unchanged_files_list.append(path_prefix + f)
+            if table_exists(database, name):
+                unchanged_tables_list.append(name)
+            else:
                 warn = "table %s does not exist, from filetype %s" % (name, type)
                 warnings.append(warn)
                 print >>sys.stderr, warn
-            unchanged_tables_list.append(name)
-            unchanged_files_list.append(path_prefix + f)
         elif file_only.match(type):
             unchanged_files_list.append(path_prefix + f)
         else:
             raise ValueError, "unknown type %s of file %s" % (type, f)
@@ -200,28 +217,31 @@
     for f in removed_files:
         name, type, extension = f.split(".")
         assert extension == "gz"
 
-        if wig.match(type):
-            wib_path = get_wib_pathname(database, name)
-            if not os.path.exists(wib_path):
-                warn = "could not find %s wib file for wig %s" % (wib_path, f)
+        if gbdb.match(type):
+            removed_files_list.append(path_prefix + f)
+            if table_exists(database, name):
+                removed_tables_list.append(name)
+                gbdb_path = get_gbdb_pathname(database, name)
+                if gbdb_path != None and os.path.exists(gbdb_path):
+                    removed_gbdbs_list.append(gbdb_path)
+                else:
+                    warn = "could not find %s file for %s" % (gbdb_path, f)
                 warnings.append(warn)
                 print >>sys.stderr, warn
-            if not table_exists(database, name):
+            else:
                 warn = "table %s does not exist, from filetype %s" % (name, type)
                 warnings.append(warn)
                 print >>sys.stderr, warn
-            removed_tables_list.append(name)
-            removed_files_list.append(path_prefix + f)
-            removed_wibs_list.append(wib_path)
         elif table_and_file.match(type):
-            if not table_exists(database, name):
+            removed_files_list.append(path_prefix + f)
+            if table_exists(database, name):
+                removed_tables_list.append(name)
+            else:
                 warn = "table %s does not exist, from filetype %s" % (name, type)
                 warnings.append(warn)
                 print >>sys.stderr, warn
-            removed_tables_list.append(name)
-            removed_files_list.append(f + path_prefix)
         elif file_only.match(type):
             removed_files_list.append(path_prefix + f)
         else:
             raise ValueError, "unknown type %s of file %s" % (type, f)
@@ -230,28 +251,31 @@
     for f in new_files:
         name, type, extension = f.split(".")
         assert extension == "gz"
 
-        if wig.match(type):
-            wib_path = get_wib_pathname(database, name)
-            if not os.path.exists(wib_path):
-                warn = "could not find %s wib file for wig %s" % (wib_path, f)
+        if gbdb.match(type):
+            new_files_list.append(path_prefix + f)
+            if table_exists(database, name):
+                new_tables_list.append(name)
+                gbdb_path = get_gbdb_pathname(database, name)
+                if gbdb_path != None and os.path.exists(gbdb_path):
+                    new_gbdbs_list.append(gbdb_path)
+                else:
+                    warn = "could not find %s file for %s" % (gbdb_path, f)
                 warnings.append(warn)
                 print >>sys.stderr, warn
-            if not table_exists(database, name):
+            else:
                 warn = "table %s does not exist, from filetype %s" % (name, type)
                 warnings.append(warn)
                 print >>sys.stderr, warn
-            new_tables_list.append(name)
-            new_files_list.append(path_prefix + f)
-            new_wibs_list.append(wib_path)
         elif table_and_file.match(type):
-            if not table_exists(database, name):
+            new_files_list.append(path_prefix + f)
+            if table_exists(database, name):
+                new_tables_list.append(name)
+            else:
                 warn = "table %s does not exist, from filetype %s" % (name, type)
                 warnings.append(warn)
                 print >>sys.stderr, warn
-            new_tables_list.append(name)
-            new_files_list.append(path_prefix + f)
         elif file_only.match(type):
             new_files_list.append(path_prefix + f)
         else:
             raise ValueError, "unknown type %s of file %s" % (type, f)
@@ -260,25 +284,25 @@
     if options.verbose:
         print >>sys.stderr, "Counts:"
         print >>sys.stderr, "  unchanged tables: %d" % len(unchanged_tables_list)
         print >>sys.stderr, "  unchanged files: %d" % len(unchanged_files_list)
-        print >>sys.stderr, "  unchanged wibs: %d" % len(unchanged_wibs_list)
+        print >>sys.stderr, "  unchanged gbdbs: %d" % len(unchanged_gbdbs_list)
         print >>sys.stderr, "  removed tables: %d" % len(removed_tables_list)
         print >>sys.stderr, "  removed files: %d" % len(removed_files_list)
-        print >>sys.stderr, "  removed wibs: %d" % len(removed_wibs_list)
+        print >>sys.stderr, "  removed gbdbs: %d" % len(removed_gbdbs_list)
         print >>sys.stderr, "  new tables: %d" % len(new_tables_list)
         print >>sys.stderr, "  new files: %d" % len(new_files_list)
-        print >>sys.stderr, "  new wibs: %d" % len(new_wibs_list)
+        print >>sys.stderr, "  new gbdbs: %d" % len(new_gbdbs_list)
 
     # if asked, save the list of new files
     if options.files_path:
         new_files_file = open(options.files_path, "w")
         new_files_list.sort()
         for i in new_files_list:
             print >>new_files_file, i
         print >>new_files_file
-        new_wibs_list.sort()
-        for i in new_wibs_list:
+        new_gbdbs_list.sort()
+        for i in new_gbdbs_list:
             print >>new_files_file, i
         new_files_file.close()
 
     # if asked, generate list of new tables
@@ -296,9 +320,9 @@
     print "The composite track is %s" % options.composite_name
     
     if len(warnings) > 0:
         warn_header = "# WARNINGS "
-        print warn_header, "#" * len(warn_header)
+        print warn_header + "#" * (60 - len(warn_header))
         c = 1
         for w in warnings:
             print  "%0d - %s" % (c, w)
             c += 1
@@ -314,12 +338,12 @@
 D) Additional items of note
 """
     
     # some summary counts of current files, i.e. new + untouched
-    print "Summary total counts for %s (new+untouched):" % current_release_dir
+    print "Summary total counts for %s (new+untouched, not counting D):" % current_release_dir
     print "    Tables: %d" % (len(unchanged_tables_list) + len(new_tables_list))
     print "    Files: %d" % (len(unchanged_files_list) + len(new_files_list))
-    print "    Wibs: %d" % (len(unchanged_wibs_list) + len(new_wibs_list))
+    print "    Gbdbs: %d" % (len(unchanged_gbdbs_list) + len(new_gbdbs_list))
     print
 
     # untouched list
     print "A) Untouched Tables (%d):" % len(unchanged_tables_list)
@@ -327,13 +351,13 @@
     for i in unchanged_tables_list:
         print i
     print
 
-    print "A') Untouched Files (%d downloadables, %d wibs):" % (len(unchanged_files_list),
-                                                           len(unchanged_wibs_list))
+    print "A') Untouched Files (%d downloadables, %d gbdbs):" % (len(unchanged_files_list),
+                                                           len(unchanged_gbdbs_list))
     if prev_release_dir == "":
         assert len(unchanged_files_list) == 0
-        assert len(unchanged_wibs_list) == 0
+        assert len(unchanged_gbdbs_list) == 0
     else:
         print "    current location on alpha:"
         print "      /usr/local/apache/htdocs/goldenPath/%s/encodeDCC/%s/%s/" % (database, options.composite_name, current_release_dir)
         print "    on RR:"
@@ -342,10 +366,10 @@
         unchanged_files_list.sort()
         for i in unchanged_files_list:
             print i
         print
-        unchanged_wibs_list.sort()
-        for i in unchanged_wibs_list:
+        unchanged_gbdbs_list.sort()
+        for i in unchanged_gbdbs_list:
             print i
     print
 
     # eliminated list
@@ -354,13 +378,13 @@
     for i in removed_tables_list:
         print i
     print
 
-    print "B') Deprecated files (%d downloadables, %d wibs):" %(len(removed_files_list),
-                                                                len(removed_wibs_list))
+    print "B') Deprecated files (%d downloadables, %d gbdbs):" %(len(removed_files_list),
+                                                                len(removed_gbdbs_list))
     if prev_release_dir == "":
         assert len(removed_files_list) == 0
-        assert len(removed_wibs_list) == 0
+        assert len(removed_gbdbs_list) == 0
     else:
         print "    NOTE: NO FILES SHOULD BE REMOVED from the downloads directory on hgdownloads (RR)."
         print "    This list is provided for completeness. Any files marked here as in gbdb may be eliminated."
         print "    current location on alpha:"
@@ -372,10 +396,10 @@
         removed_files_list.sort()
         for i in removed_files_list:
             print i
         print
-        removed_wibs_list.sort()
-        for i in removed_wibs_list:
+        removed_gbdbs_list.sort()
+        for i in removed_gbdbs_list:
             print i
 
     print
 
@@ -385,10 +409,10 @@
     for i in new_tables_list:
         print i
     print
 
-    print "C') New files (%d downloadables, %d wibs):" % (len(new_files_list),
-                                                         len(new_wibs_list))
+    print "C') New files (%d downloadables, %d gbdbs):" % (len(new_files_list),
+                                                         len(new_gbdbs_list))
     print "    current location on alpha:"
     print "      /usr/local/apache/htdocs/goldenPath/%s/encodeDCC/%s/%s/" % (database, options.composite_name, current_release_dir)
     print "    NOT on RR but must be placed in:"
     print "      /usr/local/apache/htdocs/goldenPath/%s/encodeDCC/%s/" % (database, options.composite_name)
@@ -397,10 +421,10 @@
     new_files_list.sort()
     for i in new_files_list:
         print i
     print
-    new_wibs_list.sort()
-    for i in new_wibs_list:
+    new_gbdbs_list.sort()
+    for i in new_gbdbs_list:
         print i
 
     print