src/hg/encode/encodeCharts/encodeTimeline.py 1.4
1.4 2010/06/02 00:18:38 bsuh
Initial check-in of ENCODE Report Charts cgis
Index: src/hg/encode/encodeCharts/encodeTimeline.py
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/encode/encodeCharts/encodeTimeline.py,v
retrieving revision 1.3
retrieving revision 1.4
diff -b -B -U 1000000 -r1.3 -r1.4
--- src/hg/encode/encodeCharts/encodeTimeline.py 8 Apr 2010 00:29:15 -0000 1.3
+++ src/hg/encode/encodeCharts/encodeTimeline.py 2 Jun 2010 00:18:38 -0000 1.4
@@ -1,231 +1,154 @@
#!/hive/groups/recon/local/bin/python
+# Requires Python 2.6, current default python on hgwdev is 2.4
+
+"""CGI script that outputs the timeline of ENCODE submissions and
+releases as a Google Visualization Annotated Timeline.
+"""
import cgitb
import datetime
-import os
-import re
+import json
import sys
+# Import local modules found in "/hive/groups/encode/dcc/charts"
+sys.path.append("/hive/groups/encode/dcc/charts")
import gviz_api
+import encodeReportLib
-cgitb.enable()
-
-# Directory containing the report files
-reportDir = "/hive/groups/encode/dcc/reports"
-
-# File listing the important events and dates
-importantDatesFile = "/hive/groups/encode/dcc/charts/important.dates.tab"
-
-# Given the directory of reports, find the latest report
-# Return the filename of the latest report
-def getRecentReport (reportDir):
- # Regex for the report file
- pattern = re.compile("newreport\.(\d{4})\-(\d{2})\-(\d{2})\.dcc\.txt")
-
- # Scan the report directory and find the most recent report
- currentDate = 19010101
- currentFile = "NULL"
-
- try:
- dirList = os.listdir(reportDir)
- except:
- print >> sys.stderr, "Error: Can't open dir '%s'" % reportDir
- sys.exit(-1)
-
- for f in dirList:
- m = pattern.match(f)
- if m:
- # Convert date into an int
- date = int(m.group(1)) * 10000 + int(m.group(2)) * 100 + int(m.group(3))
- if date > currentDate:
- # Update the current latest date
- currentDate = date
- currentFile = f
-
- if currentFile == "NULL":
- print >> sys.stderr, "Error: Can't find a report file in dir '%s'" % reportDir
- sys.exit(-1)
-
- return currentFile, currentDate
-
-# Read and parse the important dates file
-# Return a dict where key = event date and value = event label
-def readImportantDatesFile (file):
-
- importantDateHash = {}
+__author__ = "Bernard Suh"
+__email__ = "bsuh@soe.ucsc.edu"
+__version__ = "1.0.0"
- try:
- f = open(file, "r")
- except:
- print >> sys.stderr, "Error: Can't open file '%s'" % file
- sys.exit(-1)
- for line in f:
- line = line.rstrip()
- if line.startswith('#'):
- continue
-
- (date, text) = line.split('\t')
- importantDateHash[int(date)] = text
-
- return importantDateHash
-
-# Convert dates into the int format YYYYMMDD
-def convertDate (d):
-
- # Convert MM/DD/YY
- pattern = re.compile("(\d{2})\/(\d{2})\/(\d{2})")
- m = pattern.match(d)
- if m:
- dateNum = 20000000 + int(m.group(3)) * 10000 + int(m.group(1)) * 100 + int(m.group(2))
- return dateNum
-
- # Convert YYYY-MM-DD
- pattern = re.compile("(\d{4})\-(\d{2})\-(\d{2})")
- m = pattern.match(d)
- if m:
- dateNum = int(m.group(1)) * 10000 + int(m.group(2)) * 100 + int(m.group(3))
- return dateNum
-
- return d
+cgitb.enable()
# Parse report file and return result in the proper format
# for the Google Visualization API
-def getDataArray (reportDir, importantDatesFile):
+def processReportFile (reportFile, currentDate):
- importantDateHash = readImportantDatesFile(importantDatesFile)
+ importantDateHash = encodeReportLib.readImportantDatesFile(currentDate)
submitHash = {}
releaseHash = {}
- currentFile, currentDate = getRecentReport(reportDir)
- fullFilePath = reportDir + "/" + currentFile
try:
- f = open(fullFilePath, "r")
+ f = open(reportFile, "r")
except:
print >> sys.stderr, "Error: Can't open file '%s'" % f
sys.exit(-1)
- print >> sys.stderr, "Parsing file: %s" % (fullFilePath)
+ print >> sys.stderr, "Parsing file: %s" % (reportFile)
for line in f:
line = line.rstrip()
if (line.startswith('Project')):
# Skip the header line
continue
# The submit and release date are in fields 6 and 7
splitArray = line.split('\t')
startDate = splitArray[6]
endDate = splitArray[7]
# Convert dates into ints
- submitDate = convertDate(startDate)
- releaseDate = convertDate(endDate)
+ submitDate = encodeReportLib.convertDate(startDate)
+ releaseDate = encodeReportLib.convertDate(endDate)
# Accumulate dates in hash
if isinstance(submitDate, int):
if not submitDate in submitHash:
submitHash[submitDate] = 0
submitHash[submitDate] += 1
if isinstance(releaseDate, int):
if not releaseDate in releaseHash:
releaseHash[releaseDate] = 0
releaseHash[releaseDate] += 1
+ f.close()
# Get the union of all possible dates
- unionDates = set.union(set(submitHash.keys()), set(releaseHash.keys()), set(importantDateHash.keys()))
+ unionDates = set.union(set(submitHash.keys()),
+ set(releaseHash.keys()),
+ set(importantDateHash.keys()))
submitValue = 0
submitSum = 0
releaseValue = 0
releaseSum = 0
# Populate dataArray with the contents of the data matrix
dataArray = []
for date in sorted(unionDates):
dateString = str(date)
submitValue = 0
if date in submitHash:
submitValue = submitHash[date]
releaseValue = 0
if date in releaseHash:
releaseValue = releaseHash[date]
submitSum += submitValue
releaseSum += releaseValue
annotText = ""
if date in importantDateHash:
annotText = importantDateHash[date]
-# print "%d\t%d\t%d\t%d\t%d\t%s" % (date, releaseValue, releaseSum, submitValue, submitSum, annotText)
# Single row of data
array = []
- array.append(datetime.date(int(dateString[0:4]), int(dateString[4:6]), int(dateString[6:8])))
+ array.append(datetime.date(int(dateString[0:4]), int(dateString[4:6]),
+ int(dateString[6:8])))
array.append(releaseValue)
array.append(releaseSum)
array.append(submitValue)
array.append(submitSum)
array.append(annotText)
dataArray.append(array)
- return dataArray, currentDate
+ return dataArray
def main():
# Headers for the columns in the data matrix
- description = [("date", "date"), ("release", "number"), ("release_cumul", "number"), ("submit", "number"), ("submit_cumul", "number"), ("events", "string") ]
+ description = [ ("date", "date"), ("release", "number"),
+ ("release_cumul", "number"), ("submit", "number"),
+ ("submit_cumul", "number"), ("events", "string") ]
# Create the data table
data_table = gviz_api.DataTable(description)
+ currentFile, currentDate = encodeReportLib.getRecentReport()
+
# Create and load the matrix
- matrix, reportDate = getDataArray(reportDir, importantDatesFile)
+ matrix = processReportFile(currentFile, currentDate)
data_table.LoadData(matrix)
- reportDate = str(reportDate)
- reportDateObj = datetime.date(int(reportDate[0:4]), int(reportDate[4:6]), int(reportDate[6:8]))
- dateStamp = reportDateObj.strftime("%b %d, %Y")
-
# Convert to JavaScript code
jscode = data_table.ToJSCode("jscode_data")
- # Commented out but could serve this page dynamically
- print "Content-type: text/html"
- print
+ # Set variables for HTML output
+ template_vars = {}
+ template_vars['jscode'] = jscode
+ template_vars['dateStamp'] = encodeReportLib.dateIntToDateStr(currentDate)
+ template_vars['title'] = "ENCODE Cumulative Release and Submit Timeline"
+ template_vars['packageName'] = 'annotatedtimeline'
+ template_vars['visClass'] = 'AnnotatedTimeLine'
+ template_vars['style'] = 'style="width:854; height:480"'
+
+ # Set the chart specific configuration options
+ chart_config = {}
+ chart_config['annotationsWidth'] = 15
+ chart_config['displayAnnotations'] = 'true'
+ chart_config['displayAnnotationsFilter'] = 'true'
+ chart_config['fill'] = 25
+ chart_config['thickness'] = 3
+ chart_config['width'] = 854
+ chart_config['height'] = 480
+ template_vars['chart_config'] = json.dumps(chart_config)
- # Print out the webpage
- print page_template % vars()
+ encodeReportLib.renderHtml(template_vars, 0)
return
-# The html template. Will be filled in by string subs
-page_template = """
-<html>
- <head>
- <script type='text/javascript' src='http://www.google.com/jsapi'></script>
- <script type='text/javascript'>
- google.load('visualization', '1', {'packages':['annotatedtimeline']});
-
- google.setOnLoadCallback(drawChart);
- function drawChart() {
- %(jscode)s
-
- var chart = new google.visualization.AnnotatedTimeLine(document.getElementById('chart_div'));
- chart.draw(jscode_data, {displayAnnotations: true, displayAnnotationsFilter: true, fill:25, thickness:3, annotationsWidth: 15});
- }
- </script>
- <title>ENCODE Cumulative Submit and Release Timeline</title>
- </head>
-
- <body>
- <h2>ENCODE Cumulative Submit and Release Timeline <br><font size="-1">(Report Date: %(dateStamp)s)</font></h2>
- <div id='chart_div' style='width: 854px; height: 480px;'></div>
- </body>
-</html>
-"""
-
if __name__ == '__main__':
main()
sys.exit(0)