src/hg/encode/encodeCharts/encodeTimeline.py 1.1

1.1 2010/04/07 19:10:30 bsuh
Initial checking of python script to generate timeline html
Index: src/hg/encode/encodeCharts/encodeTimeline.py
===================================================================
RCS file: src/hg/encode/encodeCharts/encodeTimeline.py
diff -N src/hg/encode/encodeCharts/encodeTimeline.py
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ src/hg/encode/encodeCharts/encodeTimeline.py	7 Apr 2010 19:10:30 -0000	1.1
@@ -0,0 +1,223 @@
+#!/usr/bin/env python
+
+import datetime
+import os
+import re
+import sys
+
+import gviz_api
+
+# Directory containing the report files
+reportDir = "/hive/groups/encode/dcc/reports"
+
+# File listing the important events and dates
+importantDatesFile = "/hive/users/bsuh/encode/important.dates.tab"
+
+# Given the directory of reports, find the latest report
+# Return the filename of the latest report
+def getRecentReport (reportDir):
+  # Regex for the report file
+  pattern = re.compile("newreport\.(\d{4})\-(\d{2})\-(\d{2})\.dcc\.txt")
+
+  # Scan the report directory and find the most recent report
+  currentDate = 19010101
+  currentFile = "NULL"
+
+  try:
+    dirList = os.listdir(reportDir)
+  except:
+    print >> sys.stderr, "Error: Can't open dir '%s'" % reportDir
+    sys.exit(-1)
+
+  for f in dirList:
+    m = pattern.match(f)
+    if m:
+      # Convert date into an int
+      date = int(m.group(1)) * 10000 + int(m.group(2)) * 100 + int(m.group(3))
+      if date > currentDate:
+        # Update the current latest date
+        currentDate = date
+        currentFile = f
+
+  if currentFile == "NULL":
+    print >> sys.stderr, "Error: Can't find a report file in dir '%s'" % reportDir
+    sys.exit(-1)
+
+  return currentFile
+
+# Read and parse the important dates file
+# Return a dict where key = event date and value = event label
+def readImportantDatesFile (file):
+
+  importantDateHash = {}
+
+  try:
+    f = open(file, "r")
+  except:
+    print >> sys.stderr, "Error: Can't open file '%s'" % file
+    sys.exit(-1)
+  for line in f:
+    line = line.rstrip()
+    if line.startswith('#'):
+      continue
+
+    (date, text) = line.split('\t')
+    importantDateHash[int(date)] = text
+
+  return importantDateHash
+
+# Convert dates into the int format YYYYMMDD
+def convertDate (d):
+
+  # Convert MM/DD/YY
+  pattern = re.compile("(\d{2})\/(\d{2})\/(\d{2})")
+  m = pattern.match(d)
+  if m:
+    dateNum = 20000000 + int(m.group(3)) * 10000 + int(m.group(1)) * 100 + int(m.group(2))
+    return dateNum
+
+  # Convert YYYY-MM-DD
+  pattern = re.compile("(\d{4})\-(\d{2})\-(\d{2})")
+  m = pattern.match(d)
+  if m:
+    dateNum = int(m.group(1)) * 10000 + int(m.group(2)) * 100 + int(m.group(3))
+    return dateNum
+
+  return d
+
+# Parse report file and return result in the proper format
+#   for the Google Visualization API
+def getDataArray (reportDir, importantDatesFile):
+
+  importantDateHash = readImportantDatesFile(importantDatesFile)
+
+  submitHash = {}
+  releaseHash = {}
+  currentFile = getRecentReport(reportDir)
+  fullFilePath = reportDir + "/" + currentFile
+
+  try:
+    f = open(fullFilePath, "r")
+  except:
+    print >> sys.stderr, "Error: Can't open file '%s'" % f
+    sys.exit(-1)
+
+  print >> sys.stderr, "Parsing file: %s" % (fullFilePath)
+  for line in f:
+    line = line.rstrip()
+    if (line.startswith('Project')):
+      # Skip the header line
+      continue
+
+    # The submit and release date are in fields 6 and 7
+    splitArray = line.split('\t')
+    startDate = splitArray[6]
+    endDate = splitArray[7]
+
+    # Convert dates into ints
+    submitDate = convertDate(startDate)
+    releaseDate = convertDate(endDate)
+
+    # Accumulate dates in hash
+    if isinstance(submitDate, int):
+      if not submitDate in submitHash:
+        submitHash[submitDate] = 0
+      submitHash[submitDate] += 1
+
+    if isinstance(releaseDate, int):
+      if not releaseDate in releaseHash:
+        releaseHash[releaseDate] = 0
+      releaseHash[releaseDate] += 1
+
+  # Get the union of all possible dates
+  unionDates = set.union(set(submitHash.keys()), set(releaseHash.keys()), set(importantDateHash.keys()))
+
+  submitValue = 0
+  submitSum = 0
+  releaseValue = 0
+  releaseSum = 0
+
+  # Populate dataArray with the contents of the data matrix
+  dataArray = []
+  for date in sorted(unionDates):
+    dateString = str(date)
+
+    submitValue = 0
+    if date in submitHash:
+      submitValue = submitHash[date]
+
+    releaseValue = 0
+    if date in releaseHash:
+      releaseValue = releaseHash[date]
+
+    submitSum += submitValue
+    releaseSum += releaseValue
+
+    annotText = ""
+    if date in importantDateHash:
+      annotText = importantDateHash[date]
+
+#    print "%d\t%d\t%d\t%d\t%d\t%s" % (date, releaseValue, releaseSum, submitValue, submitSum, annotText)
+    # Single row of data
+    array = []
+    array.append(datetime.date(int(dateString[0:4]), int(dateString[4:6]), int(dateString[6:8])))
+    array.append(releaseValue)
+    array.append(releaseSum)
+    array.append(submitValue)
+    array.append(submitSum)
+    array.append(annotText)
+    dataArray.append(array)
+
+  return dataArray
+
+def main():
+  # Headers for the columns in the data matrix
+  description = [("date", "date"), ("release", "number"), ("release_cumul", "number"), ("submit", "number"), ("submit_cumul", "number"), ("events", "string") ]
+
+  # Create the data table 
+  data_table = gviz_api.DataTable(description)
+
+  # Create and load the matrix
+  matrix = getDataArray(reportDir, importantDatesFile)
+  data_table.LoadData(matrix)
+
+  # Convert to JavaScript code
+  jscode = data_table.ToJSCode("jscode_data")
+
+  # Commented out but could serve this page dynamically
+#  print "Content-type: text/html"
+#  print
+
+  # Print out the webpage
+  print page_template % vars()
+
+  return
+
+# The html template. Will be filled in by string subs
+page_template = """
+<html>
+  <head>
+    <script type='text/javascript' src='http://www.google.com/jsapi'></script>
+    <script type='text/javascript'>
+      google.load('visualization', '1', {'packages':['annotatedtimeline']});
+
+      google.setOnLoadCallback(drawChart);
+      function drawChart() {
+        %(jscode)s
+
+        var chart = new google.visualization.AnnotatedTimeLine(document.getElementById('chart_div'));
+        chart.draw(jscode_data, {displayAnnotations: true, displayAnnotationsFilter: true, fill:25, thickness:3, annotationsWidth: 15});
+      }
+    </script>
+    <title>ENCODE Cumulative Submit and Release Timeline</title>
+  </head>
+
+  <body>
+    <div id='chart_div' style='width: 854px; height: 480px;'></div>
+  </body>
+</html>
+"""
+
+if __name__ == '__main__':
+  main()
+  sys.exit(0)