src/hg/encode/encodeMkChangeNotes/encodeMkChangeNotes 1.3
1.3 2010/03/17 00:49:12 krish
new types support, warnings now added to notes file
Index: src/hg/encode/encodeMkChangeNotes/encodeMkChangeNotes
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/encode/encodeMkChangeNotes/encodeMkChangeNotes,v
retrieving revision 1.2
retrieving revision 1.3
diff -b -B -U 4 -r1.2 -r1.3
--- src/hg/encode/encodeMkChangeNotes/encodeMkChangeNotes 15 Mar 2010 23:34:11 -0000 1.2
+++ src/hg/encode/encodeMkChangeNotes/encodeMkChangeNotes 17 Mar 2010 00:49:12 -0000 1.3
@@ -115,20 +115,28 @@
database = args[0]
current_release_dir = args[1]
prev_release_dir = args[2]
+ # some re we will be using
+ normal_processed = re.compile("^(narrowPeaks|narrowPeak|broadPeak|gtf|bedGraph|bed\d+)$")
+ not_processed = re.compile("^(tagAlign|fastq|fasta|rpkm|bowtie|psl)$")
+
# generate the list of files
current_files = get_file_set(current_release_dir, ".*\.gz$")
if prev_release_dir == "-":
prev_files = set()
+ prev_release_dir = ""
else:
prev_files = get_file_set(prev_release_dir, ".*\.gz$")
# form the three derived sets
removed_files = prev_files - current_files
unchanged_files = current_files & prev_files
new_files = current_files - prev_files
+ # warnings
+ warnings = []
+
# the list of files that we'll be printing
unchanged_tables_list = []
unchanged_files_list = []
unchanged_wibs_list = []
@@ -149,29 +157,36 @@
if next_version(stem, version) in new_files:
raise ValueError, "Unimplemented: newer version of %s found, can't deal with this yet" % name
# check to make sure the files are really the same
if not same_file(os.path.join(current_release_dir, f), os.path.join(prev_release_dir, f)):
- raise ValueError, "file %s in %s and %s don't appear to be the same" \
- % (name, current_release_dir, prev_release_dir)
-
+ warn = "file %s in %s and %s don't appear to be the same (type=%s)" % \
+ (name, current_release_dir, prev_release_dir, type)
+ warnings.append(warn)
+ print >>sys.stderr, warn
if type == "wig":
wib_path = get_wib_pathname(database, name)
if not os.path.exists(wib_path):
- raise ValueError, "could not find %s wib file for wig %s" % (wib_path, f)
+ warn = "could not find %s wib file for wig %s" % (wib_path, f)
+ warnings.append(warn)
+ print >>sys.stderr, warn
if not table_exists(database, name):
- raise ValueError, "table %s does not exist, from filetype %s" % (name, type)
+ warn = "table %s does not exist, from filetype %s" % (name, type)
+ warnings.append(warn)
+ print >>sys.stderr, warn
unchanged_tables_list.append(name)
unchanged_files_list.append(f)
unchanged_wibs_list.append(wib_path)
- elif type == "narrowPeak" or type.startswith("bedGraph"):
+ elif normal_processed.match(type):
if not table_exists(database, name):
- raise ValueError, "table %s does not exist, from filetype %s" % (name, type)
+ warn = "table %s does not exist, from filetype %s" % (name, type)
+ warnings.append(warn)
+ print >>sys.stderr, warn
unchanged_tables_list.append(name)
unchanged_files_list.append(f)
- elif type == "tagAlign" or type == "fastq":
+ elif not_processed.match(type):
unchanged_files_list.append(f)
else:
- raise ValueError, "unknown type %s" % type
+ raise ValueError, "unknown type %s of file %s" % (type, f)
# process the list of removed files
for f in removed_files:
name, type, extension = f.split(".")
@@ -179,23 +194,29 @@
if type == "wig":
wib_path = get_wib_pathname(database, name)
if not os.path.exists(wib_path):
- raise ValueError, "could not find %s wib file for wig %s" % (wib_path, f)
+ warn = "could not find %s wib file for wig %s" % (wib_path, f)
+ warnings.append(warn)
+ print >>sys.stderr, warn
if not table_exists(database, name):
- raise ValueError, "table %s does not exist, from filetype %s" % (name, type)
+ warn = "table %s does not exist, from filetype %s" % (name, type)
+ warnings.append(warn)
+ print >>sys.stderr, warn
removed_tables_list.append(name)
removed_files_list.append(f)
removed_wibs_list.append(wib_path)
- elif type == "narrowPeak" or type.startswith("bedGraph"):
+ elif normal_processed.match(type):
if not table_exists(database, name):
- raise ValueError, "table %s does not exist, from filetype %s" % (name, type)
+ warn = "table %s does not exist, from filetype %s" % (name, type)
+ warnings.append(warn)
+ print >>sys.stderr, warn
removed_tables_list.append(name)
removed_files_list.append(f)
- elif type == "tagAlign" or type == "fastq":
+ elif not_processed.match(type):
removed_files_list.append(f)
else:
- raise ValueError, "unknown type %s" % type
+ raise ValueError, "unknown type %s of file %s" % (type, f)
# process the list of new files
for f in new_files:
name, type, extension = f.split(".")
@@ -203,23 +224,29 @@
if type == "wig":
wib_path = get_wib_pathname(database, name)
if not os.path.exists(wib_path):
- raise ValueError, "could not find %s wib file for wig %s" % (wib_path, f)
+ warn = "could not find %s wib file for wig %s" % (wib_path, f)
+ warnings.append(warn)
+ print >>sys.stderr, warn
if not table_exists(database, name):
- raise ValueError, "table %s does not exist, from filetype %s" % (name, type)
+ warn = "table %s does not exist, from filetype %s" % (name, type)
+ warnings.append(warn)
+ print >>sys.stderr, warn
new_tables_list.append(name)
- new_files_list.append(os.path.join("/usr/local/apache/htdocs/goldenPath/hg18/encodeDCC", options.track_name, current_release_dir, f))
+ new_files_list.append(f)
new_wibs_list.append(wib_path)
- elif type == "narrowPeak" or type.startswith("bedGraph"):
+ elif normal_processed.match(type):
if not table_exists(database, name):
- raise ValueError, "table %s does not exist, from filetype %s" % (name, type)
+ warn = "table %s does not exist, from filetype %s" % (name, type)
+ warnings.append(warn)
+ print >>sys.stderr, warn
new_tables_list.append(name)
- new_files_list.append(os.path.join("/usr/local/apache/htdocs/goldenPath/hg18/encodeDCC", options.track_name, current_release_dir, f))
- elif type == "tagAlign" or type == "fastq":
- new_files_list.append(os.path.join("/usr/local/apache/htdocs/goldenPath/hg18/encodeDCC", options.track_name, current_release_dir, f))
+ new_files_list.append(f)
+ elif not_processed.match(type):
+ new_files_list.append(f)
else:
- raise ValueError, "unknown type %s" % type
+ raise ValueError, "unknown type %s of file %s" % (type, f)
# output some basic stats
if options.verbose:
print >>sys.stderr, "Counts:"
@@ -257,8 +284,18 @@
# generate the header
print "# generated with %s" % parser.get_version()
print "This is a %s of the \"%s\"" % (current_release_dir, options.name)
print "The composite track is %s" % options.track_name
+
+ if len(warnings) > 0:
+ warn_header = "# WARNINGS "
+ print warn_header, "#" * len(warn_header)
+ c = 1
+ for w in warnings:
+ print "%0d - %s" % (c, w)
+ c += 1
+ print "#" * 60
+
print """
Categories of tables and files('):
A) Untouched - are on public browser and should remain
B) Deprecated - are currently on RR but will no longer be needed and should not be referenced by the public site.