python/ucscgenomics/rafile/read.py dc9640c570df4ad11a2467aaa6cb9fa9ed13615d

dc9640c570df4ad11a2467aaa6cb9fa9ed13615d
mmaddren
  Thu Jan 20 14:49:42 2011 -0800
added initial test script for rafile read
diff --git python/ucscgenomics/rafile/read.py python/ucscgenomics/rafile/read.py
index ef43590..3216ed5 100644
--- python/ucscgenomics/rafile/read.py
+++ python/ucscgenomics/rafile/read.py
@@ -1,45 +1,77 @@
 #
 # rafile/read.py
 # 
-# PURPOSE UNCLEAR
+# takes in an RA file and creates a dictionary structure for each one. Then puts
+# all entries into 1 dictionary, which is indexed by the value of the key-value 
+# pair specified by keyField (typically the object's name) which has to be the
+# first line in each entry. An example input with its created raDict follows.
+#
+# EXAMPLE:
+#
+# INPUT:
+#     readRaFile(file, key1)
+#
+#     file:     
+#         key1 valueA
+#         key2 valueB
+#
+#         key1 valueC
+#         key2 valueD
+# 
+# OUTPUT:
+#     raDict
+#         value1
+#             key1 valueA
+#             key2 valueB
+#         value3
+#             key1 valueC
+#             key2 valueD
+#
+#
+#
 #
 
 import re
 import radict
 
 def readRaFile(filePath, keyField):
 
     file = open(filePath, 'r')
     raDict = radict.RaDict()
     raEntry = radict.EntryDict()
     raKey = None
 
     for line in file:
 
         line = line.strip()
 
+        # remove all commented lines
         if (line.startswith('#')):
             continue
 
+        # a blank line indicates we need to move to the next entry
         if (len(line) == 0):
             raKey = None
             raEntry = None
             continue
 
+        # check if we're at the first key in a new entry
         if (line.split()[0] == keyField):
             raKey = line.split()[1]
             raEntry = radict.EntryDict()
             raEntry.add(keyField, raKey)
             raDict.add(raKey, raEntry)
 
+        # otherwise we should be somewhere in the middle of an entry
         elif (raEntry != None):
             splits = line.split()            
             raEntry.add(splits[0], splits[1])
 
+        # we'll only get here if we didn't find the keyField at the beginning
         else:
             print 'Error: Key missing - <' + keyField + '> before line <' + line + '>.'
             return None
 
     file.close()
     return raDict