2fdb988d482d7928ad969a0e0c1706446b31846c mmaddren Thu Jan 20 16:04:45 2011 -0800 updated ra scripts diff --git python/ucscgenomics/rafile/read.py python/ucscgenomics/rafile/read.py index 3216ed5..577ddd1 100644 --- python/ucscgenomics/rafile/read.py +++ python/ucscgenomics/rafile/read.py @@ -1,77 +1,85 @@ # # rafile/read.py # # takes in an RA file and creates a dictionary structure for each one. Then puts # all entries into 1 dictionary, which is indexed by the value of the key-value # pair specified by keyField (typically the object's name) which has to be the # first line in each entry. An example input with its created raDict follows. # # EXAMPLE: # # INPUT: # readRaFile(file, key1) # # file: # key1 valueA # key2 valueB # # key1 valueC # key2 valueD # # OUTPUT: # raDict # value1 # key1 valueA # key2 valueB # value3 # key1 valueC # key2 valueD # -# -# -# +import sys import re import radict def readRaFile(filePath, keyField): file = open(filePath, 'r') raDict = radict.RaDict() raEntry = radict.EntryDict() raKey = None for line in file: line = line.strip() # remove all commented lines if (line.startswith('#')): + raDict.addComment(line) continue # a blank line indicates we need to move to the next entry if (len(line) == 0): raKey = None raEntry = None continue # check if we're at the first key in a new entry - if (line.split()[0] == keyField): - raKey = line.split()[1] + if (line.split()[0].strip() == keyField): + if len(line.split()) < 2: + print 'ERROR: blank key on <' + line + '>' + sys.exit(1) + + raKey = line.split(' ', 1)[1].strip() raEntry = radict.EntryDict() raEntry.add(keyField, raKey) raDict.add(raKey, raEntry) # otherwise we should be somewhere in the middle of an entry elif (raEntry != None): - splits = line.split() - raEntry.add(splits[0], splits[1]) + raKey = line.split()[0].strip() + raVal = '' + + if len(line.split()) > 1: + raVal = line.split(' ', 1)[1].strip() + + raEntry.add(raKey, raVal) # we'll only get here if we didn't find the keyField at the beginning else: print 'Error: Key missing - <' + keyField + '> before line <' + line + '>.' return None file.close() return raDict