src/hg/encode/encodeMkChangeNotes/encodeMkChangeNotes 1.4
1.4 2010/03/31 22:55:51 krish
incorporated QA suggestions
Index: src/hg/encode/encodeMkChangeNotes/encodeMkChangeNotes
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/encode/encodeMkChangeNotes/encodeMkChangeNotes,v
retrieving revision 1.3
retrieving revision 1.4
diff -b -B -U 4 -r1.3 -r1.4
--- src/hg/encode/encodeMkChangeNotes/encodeMkChangeNotes 17 Mar 2010 00:49:12 -0000 1.3
+++ src/hg/encode/encodeMkChangeNotes/encodeMkChangeNotes 31 Mar 2010 22:55:51 -0000 1.4
@@ -6,12 +6,8 @@
import re
import subprocess
import md5
-# NOTES:
-# fullpath name to ALL files
-#
-
#### Classes ###################################################################
#### Functions #################################################################
@@ -89,15 +85,16 @@
releases of an ENCODE track.
"""
if argv is None: argv = sys.argv
# parse the args
- parser = optparse.OptionParser(usage="Usage: %prog [options] database current_release prev_release",
+ parser = optparse.OptionParser(usage="%prog [options] database current_release (prev_release|-)",
version="%prog 0.9")
parser.add_option("-v", "--verbose", dest="verbose", action="store_true", default=False)
- parser.add_option("-t", "--track-name", dest="track_name",
- help="the name of the composite track by default this is the name of the current directory", metavar="N")
- parser.add_option("-n", "--name", dest="name",
- help="the English name of track", metavar="N", default="Untitled")
+ parser.add_option("-t", "--composite-name", dest="composite_name",
+ help="the name of the composite track by default this is the name of the current directory",
+ metavar="N", default=None)
+ parser.add_option("-n", "--track-name", dest="track_name",
+ help="the English name of track", metavar="N", default=None)
parser.add_option("--files", dest="files_path", help="dump list of new files to F", metavar="F")
parser.add_option("--tables", dest="tables_path", help="dump list of new tablesto F", metavar="F")
global options
@@ -107,19 +104,31 @@
if len(args) != 3:
parser.print_help()
sys.exit(10)
+ # default track name is the current direcotry name
+ if options.composite_name == None:
+ options.composite_name = os.path.basename(os.getcwd())
+
+ # default composite name is "ENCODE composite_name"
if options.track_name == None:
- options.track_name = os.path.basename(os.getcwd())
+ options.track_name = "ENCODE %s" % options.composite_name
# get the positional args
database = args[0]
current_release_dir = args[1]
prev_release_dir = args[2]
# some re we will be using
- normal_processed = re.compile("^(narrowPeaks|narrowPeak|broadPeak|gtf|bedGraph|bed\d+)$")
- not_processed = re.compile("^(tagAlign|fastq|fasta|rpkm|bowtie|psl)$")
+ table_and_file = re.compile("^(narrowPeaks|narrowPeak|broadPeak|gtf|bedGraph\d+|bed\d+)$")
+ wig = re.compile("^(wig)$")
+ file_only = re.compile("^(tagAlign|fastq|fasta|rpkm|bowtie|psl)$")
+
+ # if new relase, add the full path to all files
+ path_prefix = ""
+ if prev_release_dir == "-":
+ path_prefix = "/usr/local/apache/htdocs/goldenPath/%s/encodeDCC/%s/%s/" \
+ % (database, options.composite_name, current_release_dir)
# generate the list of files
current_files = get_file_set(current_release_dir, ".*\.gz$")
if prev_release_dir == "-":
@@ -161,9 +170,9 @@
warn = "file %s in %s and %s don't appear to be the same (type=%s)" % \
(name, current_release_dir, prev_release_dir, type)
warnings.append(warn)
print >>sys.stderr, warn
- if type == "wig":
+ if wig.match(type):
wib_path = get_wib_pathname(database, name)
if not os.path.exists(wib_path):
warn = "could not find %s wib file for wig %s" % (wib_path, f)
warnings.append(warn)
@@ -172,28 +181,28 @@
warn = "table %s does not exist, from filetype %s" % (name, type)
warnings.append(warn)
print >>sys.stderr, warn
unchanged_tables_list.append(name)
- unchanged_files_list.append(f)
+ unchanged_files_list.append(path_prefix + f)
unchanged_wibs_list.append(wib_path)
- elif normal_processed.match(type):
+ elif table_and_file.match(type):
if not table_exists(database, name):
warn = "table %s does not exist, from filetype %s" % (name, type)
warnings.append(warn)
print >>sys.stderr, warn
unchanged_tables_list.append(name)
- unchanged_files_list.append(f)
- elif not_processed.match(type):
- unchanged_files_list.append(f)
+ unchanged_files_list.append(path_prefix + f)
+ elif file_only.match(type):
+ unchanged_files_list.append(path_prefix + f)
else:
raise ValueError, "unknown type %s of file %s" % (type, f)
# process the list of removed files
for f in removed_files:
name, type, extension = f.split(".")
assert extension == "gz"
- if type == "wig":
+ if wig.match(type):
wib_path = get_wib_pathname(database, name)
if not os.path.exists(wib_path):
warn = "could not find %s wib file for wig %s" % (wib_path, f)
warnings.append(warn)
@@ -202,28 +211,28 @@
warn = "table %s does not exist, from filetype %s" % (name, type)
warnings.append(warn)
print >>sys.stderr, warn
removed_tables_list.append(name)
- removed_files_list.append(f)
+ removed_files_list.append(path_prefix + f)
removed_wibs_list.append(wib_path)
- elif normal_processed.match(type):
+ elif table_and_file.match(type):
if not table_exists(database, name):
warn = "table %s does not exist, from filetype %s" % (name, type)
warnings.append(warn)
print >>sys.stderr, warn
removed_tables_list.append(name)
- removed_files_list.append(f)
- elif not_processed.match(type):
- removed_files_list.append(f)
+ removed_files_list.append(f + path_prefix)
+ elif file_only.match(type):
+ removed_files_list.append(path_prefix + f)
else:
raise ValueError, "unknown type %s of file %s" % (type, f)
# process the list of new files
for f in new_files:
name, type, extension = f.split(".")
assert extension == "gz"
- if type == "wig":
+ if wig.match(type):
wib_path = get_wib_pathname(database, name)
if not os.path.exists(wib_path):
warn = "could not find %s wib file for wig %s" % (wib_path, f)
warnings.append(warn)
@@ -232,19 +241,19 @@
warn = "table %s does not exist, from filetype %s" % (name, type)
warnings.append(warn)
print >>sys.stderr, warn
new_tables_list.append(name)
- new_files_list.append(f)
+ new_files_list.append(path_prefix + f)
new_wibs_list.append(wib_path)
- elif normal_processed.match(type):
+ elif table_and_file.match(type):
if not table_exists(database, name):
warn = "table %s does not exist, from filetype %s" % (name, type)
warnings.append(warn)
print >>sys.stderr, warn
new_tables_list.append(name)
- new_files_list.append(f)
- elif not_processed.match(type):
- new_files_list.append(f)
+ new_files_list.append(path_prefix + f)
+ elif file_only.match(type):
+ new_files_list.append(path_prefix + f)
else:
raise ValueError, "unknown type %s of file %s" % (type, f)
# output some basic stats
@@ -282,10 +291,10 @@
new_tables_file.close()
# generate the header
print "# generated with %s" % parser.get_version()
- print "This is a %s of the \"%s\"" % (current_release_dir, options.name)
- print "The composite track is %s" % options.track_name
+ print "This is a %s of the \"%s\"" % (current_release_dir, options.track_name)
+ print "The composite track is %s" % options.composite_name
if len(warnings) > 0:
warn_header = "# WARNINGS "
print warn_header, "#" * len(warn_header)
@@ -303,8 +312,16 @@
This list is provided for completeness. Any files marked here as in gbdb may be eliminated.
C) New - are only currently on test but will need to be pushed to the RR.
D) Additional items of note
"""
+
+ # some summary counts of current files, i.e. new + untouched
+ print "Summary total counts for %s (new+untouched):" % current_release_dir
+ print " Tables: %d" % (len(unchanged_tables_list) + len(new_tables_list))
+ print " Files: %d" % (len(unchanged_files_list) + len(new_files_list))
+ print " Wibs: %d" % (len(unchanged_wibs_list) + len(new_wibs_list))
+ print
+
# untouched list
print "A) Untouched Tables (%d):" % len(unchanged_tables_list)
unchanged_tables_list.sort()
for i in unchanged_tables_list:
@@ -312,12 +329,16 @@
print
print "A') Untouched Files (%d downloadables, %d wibs):" % (len(unchanged_files_list),
len(unchanged_wibs_list))
+ if prev_release_dir == "":
+ assert len(unchanged_files_list) == 0
+ assert len(unchanged_wibs_list) == 0
+ else:
print " current location on alpha:"
- print " /usr/local/apache/htdocs/goldenPath/hg18/encodeDCC/%s/%s/" % (options.track_name, current_release_dir)
+ print " /usr/local/apache/htdocs/goldenPath/%s/encodeDCC/%s/%s/" % (database, options.composite_name, current_release_dir)
print " on RR:"
- print " {...}/goldenPath/encodeDCC/%s/" % options.track_name
+ print " /usr/local/apache/htdocs/goldenPath/%s/encodeDCC/%s/" % (database, options.composite_name)
print
unchanged_files_list.sort()
for i in unchanged_files_list:
print i
@@ -336,14 +356,18 @@
print
print "B') Deprecated files (%d downloadables, %d wibs):" %(len(removed_files_list),
len(removed_wibs_list))
+ if prev_release_dir == "":
+ assert len(removed_files_list) == 0
+ assert len(removed_wibs_list) == 0
+ else:
print " NOTE: NO FILES SHOULD BE REMOVED from the downloads directory on hgdownloads (RR)."
print " This list is provided for completeness. Any files marked here as in gbdb may be eliminated."
print " current location on alpha:"
- print " /usr/local/apache/htdocs/goldenPath/hg18/encodeDCC/%s/%s/" % (options.track_name, prev_release_dir)
+ print " /usr/local/apache/htdocs/goldenPath/%s/encodeDCC/%s/%s/" % (database, options.composite_name, prev_release_dir)
print " on RR:"
- print " {...}/goldenPath/encodeDCC/%s/" % options.track_name
+ print " /usr/local/apache/htdocs/goldenPath/%s/encodeDCC/%s/" % (database, options.composite_name)
print
removed_files_list.sort()
for i in removed_files_list:
@@ -364,11 +388,11 @@
print "C') New files (%d downloadables, %d wibs):" % (len(new_files_list),
len(new_wibs_list))
print " current location on alpha:"
- print " /usr/local/apache/htdocs/goldenPath/hg18/encodeDCC/%s/%s/" % (options.track_name, current_release_dir)
+ print " /usr/local/apache/htdocs/goldenPath/%s/encodeDCC/%s/%s/" % (database, options.composite_name, current_release_dir)
print " NOT on RR but must be placed in:"
- print " {...}/goldenPath/encodeDCC/%s/" % options.track_name
+ print " /usr/local/apache/htdocs/goldenPath/%s/encodeDCC/%s/" % (database, options.composite_name)
print
new_files_list.sort()
for i in new_files_list:
@@ -381,11 +405,11 @@
print
print "D) Additional items:"
print " current location on alpha:"
- print " /usr/local/apache/htdocs/goldenPath/hg18/encodeDCC/%s/%s/" % (options.track_name, current_release_dir)
+ print " /usr/local/apache/htdocs/goldenPath/%s/encodeDCC/%s/%s/" % (database, options.composite_name, current_release_dir)
print " should be placed on the RR in (overwritting any existing copy):"
- print " {...}/goldenPath/encodeDCC/%s/" % options.track_name
+ print " /usr/local/apache/htdocs/goldenPath/%s/encodeDCC/%s/" % (database, options.composite_name)
print
print "index.html"
print "files.txt"
print "md5sum.txt"