3aee104cf4c1e245dd020f743fbc58c17fd75976
mmaddren
  Mon Apr 9 12:12:44 2012 -0700
added encode.py to store global constants and other encode stuff, and made all other libraries interface correctly with it
diff --git python/lib/ucscgenomics/geo.py python/lib/ucscgenomics/geo.py
index 1dc6c03..490bec8 100644
--- python/lib/ucscgenomics/geo.py
+++ python/lib/ucscgenomics/geo.py
@@ -21,37 +21,61 @@
     'chromatin': 'nuclear RNA',
     'cell': 'total RNA'
 }
 
 # map our instrument names to GEO's names
 instrumentModels = {
     'Illumina_GA2x': 'Illumina Genome Analyzer II',
     'Illumina_GA2': 'Illumina Genome Analyzer II',
     'Illumina_HiSeq_2000': 'Illumina HiSeq 2000',
     'Illumina_GA1': 'Illumina Genome Analyzer',
     'Illumina_GA1_or_GA2': 'Illumina Genome Analyzer, Illumina Genome Analyzer II',
     'SOLiD_Unknown': 'SOLiD',
     'Unknown': 'Illumina Genome Analyzer'
 }
 
+class Submission(object):
+    
+    @property
+    def accessions(self):
+        return self._accessions
+        
+    @property
+    def dateSubmitted(self):
+        return self._submitted
+    
+    @property
+    def dateUpdated(self):
+        return self._updated
+    
+    def __init__(self, geoId):
+        html = getHtml(geoId)
+        self._accessions = getAccessions(html)
+        self._submitted = getDateSubmitted(html)
+        self._updated = getDateUpdated(html)
+
 def getHtml(geoId):
     try:
         response = urllib2.urlopen('http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=%s' % geoId)
     except:
         return None
     return response.read()
     
-def getGeo(geoId):
-    return re.findall('(GSM[0-9]+)</a></td>\n<td valign="top">([^<]+)</td>', getHtml(geoId))
+def getAccessions(html):
+    gsms = re.findall('(GSM[0-9]+)</a></td>\n<td valign="top">([^<]+)</td>', html)
+    d = dict()
+    for gsm in gsms:
+        d[gsm[1]] = gsm[0]
+    return d
     
-def getDateSubmitted(geoId):
-    datestr = re.search('<td>Submission date</td>\n<td>([^<]+)</td>', getHtml(geoId))
+def getDateSubmitted(html):
+    datestr = re.search('<td>Submission date</td>\n<td>([^<]+)</td>', html)
     if datestr == None:
         return None
     return datetime.datetime.strptime(datestr.group(1), '%b %d, %Y')
     
-def getDateUpdated(geoId):
-    datestr = re.search('<td>Last update date</td>\n<td>([^<]+)</td>', getHtml(geoId))
+def getDateUpdated(html):
+    datestr = re.search('<td>Last update date</td>\n<td>([^<]+)</td>', html)
     if datestr == None:
         return None
     return datetime.datetime.strptime(datestr.group(1), '%b %d, %Y')