src/hg/encode/encodeCharts/encodeTimeline.py 1.3
1.3 2010/04/08 00:29:15 bsuh
Updated path to important dates file
Index: src/hg/encode/encodeCharts/encodeTimeline.py
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/encode/encodeCharts/encodeTimeline.py,v
retrieving revision 1.2
retrieving revision 1.3
diff -b -B -U 1000000 -r1.2 -r1.3
--- src/hg/encode/encodeCharts/encodeTimeline.py 7 Apr 2010 23:13:58 -0000 1.2
+++ src/hg/encode/encodeCharts/encodeTimeline.py 8 Apr 2010 00:29:15 -0000 1.3
@@ -1,231 +1,231 @@
#!/hive/groups/recon/local/bin/python
import cgitb
import datetime
import os
import re
import sys
import gviz_api
cgitb.enable()
# Directory containing the report files
reportDir = "/hive/groups/encode/dcc/reports"
# File listing the important events and dates
-importantDatesFile = "/hive/users/bsuh/encode/important.dates.tab"
+importantDatesFile = "/hive/groups/encode/dcc/charts/important.dates.tab"
# Given the directory of reports, find the latest report
# Return the filename of the latest report
def getRecentReport (reportDir):
# Regex for the report file
pattern = re.compile("newreport\.(\d{4})\-(\d{2})\-(\d{2})\.dcc\.txt")
# Scan the report directory and find the most recent report
currentDate = 19010101
currentFile = "NULL"
try:
dirList = os.listdir(reportDir)
except:
print >> sys.stderr, "Error: Can't open dir '%s'" % reportDir
sys.exit(-1)
for f in dirList:
m = pattern.match(f)
if m:
# Convert date into an int
date = int(m.group(1)) * 10000 + int(m.group(2)) * 100 + int(m.group(3))
if date > currentDate:
# Update the current latest date
currentDate = date
currentFile = f
if currentFile == "NULL":
print >> sys.stderr, "Error: Can't find a report file in dir '%s'" % reportDir
sys.exit(-1)
return currentFile, currentDate
# Read and parse the important dates file
# Return a dict where key = event date and value = event label
def readImportantDatesFile (file):
importantDateHash = {}
try:
f = open(file, "r")
except:
print >> sys.stderr, "Error: Can't open file '%s'" % file
sys.exit(-1)
for line in f:
line = line.rstrip()
if line.startswith('#'):
continue
(date, text) = line.split('\t')
importantDateHash[int(date)] = text
return importantDateHash
# Convert dates into the int format YYYYMMDD
def convertDate (d):
# Convert MM/DD/YY
pattern = re.compile("(\d{2})\/(\d{2})\/(\d{2})")
m = pattern.match(d)
if m:
dateNum = 20000000 + int(m.group(3)) * 10000 + int(m.group(1)) * 100 + int(m.group(2))
return dateNum
# Convert YYYY-MM-DD
pattern = re.compile("(\d{4})\-(\d{2})\-(\d{2})")
m = pattern.match(d)
if m:
dateNum = int(m.group(1)) * 10000 + int(m.group(2)) * 100 + int(m.group(3))
return dateNum
return d
# Parse report file and return result in the proper format
# for the Google Visualization API
def getDataArray (reportDir, importantDatesFile):
importantDateHash = readImportantDatesFile(importantDatesFile)
submitHash = {}
releaseHash = {}
currentFile, currentDate = getRecentReport(reportDir)
fullFilePath = reportDir + "/" + currentFile
try:
f = open(fullFilePath, "r")
except:
print >> sys.stderr, "Error: Can't open file '%s'" % f
sys.exit(-1)
print >> sys.stderr, "Parsing file: %s" % (fullFilePath)
for line in f:
line = line.rstrip()
if (line.startswith('Project')):
# Skip the header line
continue
# The submit and release date are in fields 6 and 7
splitArray = line.split('\t')
startDate = splitArray[6]
endDate = splitArray[7]
# Convert dates into ints
submitDate = convertDate(startDate)
releaseDate = convertDate(endDate)
# Accumulate dates in hash
if isinstance(submitDate, int):
if not submitDate in submitHash:
submitHash[submitDate] = 0
submitHash[submitDate] += 1
if isinstance(releaseDate, int):
if not releaseDate in releaseHash:
releaseHash[releaseDate] = 0
releaseHash[releaseDate] += 1
# Get the union of all possible dates
unionDates = set.union(set(submitHash.keys()), set(releaseHash.keys()), set(importantDateHash.keys()))
submitValue = 0
submitSum = 0
releaseValue = 0
releaseSum = 0
# Populate dataArray with the contents of the data matrix
dataArray = []
for date in sorted(unionDates):
dateString = str(date)
submitValue = 0
if date in submitHash:
submitValue = submitHash[date]
releaseValue = 0
if date in releaseHash:
releaseValue = releaseHash[date]
submitSum += submitValue
releaseSum += releaseValue
annotText = ""
if date in importantDateHash:
annotText = importantDateHash[date]
# print "%d\t%d\t%d\t%d\t%d\t%s" % (date, releaseValue, releaseSum, submitValue, submitSum, annotText)
# Single row of data
array = []
array.append(datetime.date(int(dateString[0:4]), int(dateString[4:6]), int(dateString[6:8])))
array.append(releaseValue)
array.append(releaseSum)
array.append(submitValue)
array.append(submitSum)
array.append(annotText)
dataArray.append(array)
return dataArray, currentDate
def main():
# Headers for the columns in the data matrix
description = [("date", "date"), ("release", "number"), ("release_cumul", "number"), ("submit", "number"), ("submit_cumul", "number"), ("events", "string") ]
# Create the data table
data_table = gviz_api.DataTable(description)
# Create and load the matrix
matrix, reportDate = getDataArray(reportDir, importantDatesFile)
data_table.LoadData(matrix)
reportDate = str(reportDate)
reportDateObj = datetime.date(int(reportDate[0:4]), int(reportDate[4:6]), int(reportDate[6:8]))
dateStamp = reportDateObj.strftime("%b %d, %Y")
# Convert to JavaScript code
jscode = data_table.ToJSCode("jscode_data")
# Commented out but could serve this page dynamically
print "Content-type: text/html"
print
# Print out the webpage
print page_template % vars()
return
# The html template. Will be filled in by string subs
page_template = """
<html>
<head>
<script type='text/javascript' src='http://www.google.com/jsapi'></script>
<script type='text/javascript'>
google.load('visualization', '1', {'packages':['annotatedtimeline']});
google.setOnLoadCallback(drawChart);
function drawChart() {
%(jscode)s
var chart = new google.visualization.AnnotatedTimeLine(document.getElementById('chart_div'));
chart.draw(jscode_data, {displayAnnotations: true, displayAnnotationsFilter: true, fill:25, thickness:3, annotationsWidth: 15});
}
</script>
<title>ENCODE Cumulative Submit and Release Timeline</title>
</head>
<body>
<h2>ENCODE Cumulative Submit and Release Timeline <br><font size="-1">(Report Date: %(dateStamp)s)</font></h2>
<div id='chart_div' style='width: 854px; height: 480px;'></div>
</body>
</html>
"""
if __name__ == '__main__':
main()
sys.exit(0)