src/hg/encode/encodeCharts/encodeTimeline.py 1.4

1.4 2010/06/02 00:18:38 bsuh
Initial check-in of ENCODE Report Charts cgis
Index: src/hg/encode/encodeCharts/encodeTimeline.py
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/encode/encodeCharts/encodeTimeline.py,v
retrieving revision 1.3
retrieving revision 1.4
diff -b -B -U 4 -r1.3 -r1.4
--- src/hg/encode/encodeCharts/encodeTimeline.py	8 Apr 2010 00:29:15 -0000	1.3
+++ src/hg/encode/encodeCharts/encodeTimeline.py	2 Jun 2010 00:18:38 -0000	1.4
@@ -1,112 +1,43 @@
 #!/hive/groups/recon/local/bin/python
+# Requires Python 2.6, current default python on hgwdev is 2.4
+
+"""CGI script that outputs the timeline of ENCODE submissions and
+releases as a Google Visualization Annotated Timeline.
+"""
 
 import cgitb
 import datetime
-import os
-import re
+import json
 import sys
 
+# Import local modules found in "/hive/groups/encode/dcc/charts"
+sys.path.append("/hive/groups/encode/dcc/charts")
 import gviz_api
+import encodeReportLib
 
-cgitb.enable()
-
-# Directory containing the report files
-reportDir = "/hive/groups/encode/dcc/reports"
-
-# File listing the important events and dates
-importantDatesFile = "/hive/groups/encode/dcc/charts/important.dates.tab"
-
-# Given the directory of reports, find the latest report
-# Return the filename of the latest report
-def getRecentReport (reportDir):
-  # Regex for the report file
-  pattern = re.compile("newreport\.(\d{4})\-(\d{2})\-(\d{2})\.dcc\.txt")
-
-  # Scan the report directory and find the most recent report
-  currentDate = 19010101
-  currentFile = "NULL"
-
-  try:
-    dirList = os.listdir(reportDir)
-  except:
-    print >> sys.stderr, "Error: Can't open dir '%s'" % reportDir
-    sys.exit(-1)
-
-  for f in dirList:
-    m = pattern.match(f)
-    if m:
-      # Convert date into an int
-      date = int(m.group(1)) * 10000 + int(m.group(2)) * 100 + int(m.group(3))
-      if date > currentDate:
-        # Update the current latest date
-        currentDate = date
-        currentFile = f
-
-  if currentFile == "NULL":
-    print >> sys.stderr, "Error: Can't find a report file in dir '%s'" % reportDir
-    sys.exit(-1)
-
-  return currentFile, currentDate
-
-# Read and parse the important dates file
-# Return a dict where key = event date and value = event label
-def readImportantDatesFile (file):
-
-  importantDateHash = {}
+__author__  = "Bernard Suh"
+__email__   = "bsuh@soe.ucsc.edu"
+__version__ = "1.0.0"
 
-  try:
-    f = open(file, "r")
-  except:
-    print >> sys.stderr, "Error: Can't open file '%s'" % file
-    sys.exit(-1)
-  for line in f:
-    line = line.rstrip()
-    if line.startswith('#'):
-      continue
-
-    (date, text) = line.split('\t')
-    importantDateHash[int(date)] = text
-
-  return importantDateHash
-
-# Convert dates into the int format YYYYMMDD
-def convertDate (d):
-
-  # Convert MM/DD/YY
-  pattern = re.compile("(\d{2})\/(\d{2})\/(\d{2})")
-  m = pattern.match(d)
-  if m:
-    dateNum = 20000000 + int(m.group(3)) * 10000 + int(m.group(1)) * 100 + int(m.group(2))
-    return dateNum
-
-  # Convert YYYY-MM-DD
-  pattern = re.compile("(\d{4})\-(\d{2})\-(\d{2})")
-  m = pattern.match(d)
-  if m:
-    dateNum = int(m.group(1)) * 10000 + int(m.group(2)) * 100 + int(m.group(3))
-    return dateNum
-
-  return d
+cgitb.enable()
 
 # Parse report file and return result in the proper format
 #   for the Google Visualization API
-def getDataArray (reportDir, importantDatesFile):
+def processReportFile (reportFile, currentDate):
 
-  importantDateHash = readImportantDatesFile(importantDatesFile)
+  importantDateHash = encodeReportLib.readImportantDatesFile(currentDate)
 
   submitHash = {}
   releaseHash = {}
-  currentFile, currentDate = getRecentReport(reportDir)
-  fullFilePath = reportDir + "/" + currentFile
 
   try:
-    f = open(fullFilePath, "r")
+    f = open(reportFile, "r")
   except:
     print >> sys.stderr, "Error: Can't open file '%s'" % f
     sys.exit(-1)
 
-  print >> sys.stderr, "Parsing file: %s" % (fullFilePath)
+  print >> sys.stderr, "Parsing file: %s" % (reportFile)
   for line in f:
     line = line.rstrip()
     if (line.startswith('Project')):
       # Skip the header line
@@ -117,10 +48,10 @@
     startDate = splitArray[6]
     endDate = splitArray[7]
 
     # Convert dates into ints
-    submitDate = convertDate(startDate)
-    releaseDate = convertDate(endDate)
+    submitDate = encodeReportLib.convertDate(startDate)
+    releaseDate = encodeReportLib.convertDate(endDate)
 
     # Accumulate dates in hash
     if isinstance(submitDate, int):
       if not submitDate in submitHash:
@@ -130,11 +61,14 @@
     if isinstance(releaseDate, int):
       if not releaseDate in releaseHash:
         releaseHash[releaseDate] = 0
       releaseHash[releaseDate] += 1
+  f.close()
 
   # Get the union of all possible dates
-  unionDates = set.union(set(submitHash.keys()), set(releaseHash.keys()), set(importantDateHash.keys()))
+  unionDates = set.union(set(submitHash.keys()), 
+                         set(releaseHash.keys()), 
+                         set(importantDateHash.keys()))
 
   submitValue = 0
   submitSum = 0
   releaseValue = 0
@@ -159,73 +93,62 @@
     annotText = ""
     if date in importantDateHash:
       annotText = importantDateHash[date]
 
-#    print "%d\t%d\t%d\t%d\t%d\t%s" % (date, releaseValue, releaseSum, submitValue, submitSum, annotText)
     # Single row of data
     array = []
-    array.append(datetime.date(int(dateString[0:4]), int(dateString[4:6]), int(dateString[6:8])))
+    array.append(datetime.date(int(dateString[0:4]), int(dateString[4:6]), 
+                               int(dateString[6:8])))
     array.append(releaseValue)
     array.append(releaseSum)
     array.append(submitValue)
     array.append(submitSum)
     array.append(annotText)
     dataArray.append(array)
 
-  return dataArray, currentDate
+  return dataArray
 
 def main():
   # Headers for the columns in the data matrix
-  description = [("date", "date"), ("release", "number"), ("release_cumul", "number"), ("submit", "number"), ("submit_cumul", "number"), ("events", "string") ]
+  description = [ ("date", "date"), ("release", "number"), 
+                  ("release_cumul", "number"), ("submit", "number"), 
+                  ("submit_cumul", "number"), ("events", "string") ]
 
   # Create the data table 
   data_table = gviz_api.DataTable(description)
 
+  currentFile, currentDate = encodeReportLib.getRecentReport()
+
   # Create and load the matrix
-  matrix, reportDate = getDataArray(reportDir, importantDatesFile)
+  matrix = processReportFile(currentFile, currentDate)
   data_table.LoadData(matrix)
 
-  reportDate = str(reportDate)
-  reportDateObj = datetime.date(int(reportDate[0:4]), int(reportDate[4:6]), int(reportDate[6:8]))
-  dateStamp = reportDateObj.strftime("%b %d, %Y")
-
   # Convert to JavaScript code
   jscode = data_table.ToJSCode("jscode_data")
 
-  # Commented out but could serve this page dynamically
-  print "Content-type: text/html"
-  print
+  # Set variables for HTML output
+  template_vars = {}
+  template_vars['jscode'] = jscode
+  template_vars['dateStamp'] = encodeReportLib.dateIntToDateStr(currentDate)
+  template_vars['title'] = "ENCODE Cumulative Release and Submit Timeline"
+  template_vars['packageName'] = 'annotatedtimeline'
+  template_vars['visClass'] = 'AnnotatedTimeLine'
+  template_vars['style'] = 'style="width:854; height:480"'
+
+  # Set the chart specific configuration options
+  chart_config = {}
+  chart_config['annotationsWidth'] = 15
+  chart_config['displayAnnotations'] = 'true'
+  chart_config['displayAnnotationsFilter'] = 'true'
+  chart_config['fill'] = 25
+  chart_config['thickness'] = 3
+  chart_config['width'] = 854
+  chart_config['height'] = 480
+  template_vars['chart_config'] = json.dumps(chart_config)
 
-  # Print out the webpage
-  print page_template % vars()
+  encodeReportLib.renderHtml(template_vars, 0)
 
   return
 
-# The html template. Will be filled in by string subs
-page_template = """
-<html>
-  <head>
-    <script type='text/javascript' src='http://www.google.com/jsapi'></script>
-    <script type='text/javascript'>
-      google.load('visualization', '1', {'packages':['annotatedtimeline']});
-
-      google.setOnLoadCallback(drawChart);
-      function drawChart() {
-        %(jscode)s
-
-        var chart = new google.visualization.AnnotatedTimeLine(document.getElementById('chart_div'));
-        chart.draw(jscode_data, {displayAnnotations: true, displayAnnotationsFilter: true, fill:25, thickness:3, annotationsWidth: 15});
-      }
-    </script>
-    <title>ENCODE Cumulative Submit and Release Timeline</title>
-  </head>
-
-  <body>
-    <h2>ENCODE Cumulative Submit and Release Timeline <br><font size="-1">(Report Date: %(dateStamp)s)</font></h2>
-    <div id='chart_div' style='width: 854px; height: 480px;'></div>
-  </body>
-</html>
-"""
-
 if __name__ == '__main__':
   main()
   sys.exit(0)