src/lib/ra.c 1.14
1.14 2009/11/23 07:33:44 kent
Adding a few new .ra parsing routines.
Index: src/lib/ra.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/lib/ra.c,v
retrieving revision 1.13
retrieving revision 1.14
diff -b -B -U 1000000 -r1.13 -r1.14
--- src/lib/ra.c 2 Feb 2009 23:52:07 -0000 1.13
+++ src/lib/ra.c 23 Nov 2009 07:33:44 -0000 1.14
@@ -1,205 +1,239 @@
/* Stuff to parse .ra files. Ra files are simple text databases.
* The database is broken into records by blank lines.
* Each field takes a line. The name of the field is the first
* word in the line. The value of the field is the rest of the line.
*
* This file is copyright 2002 Jim Kent, but license is hereby
* granted for all use - public, private or commercial. */
#include "common.h"
#include "linefile.h"
#include "hash.h"
#include "localmem.h"
#include "ra.h"
static char const rcsid[] = "$Id$";
-struct hash *raNextRecord(struct lineFile *lf)
-/* Return a hash containing next record.
- * Returns NULL at end of file. freeHash this
- * when done. Note this will free the hash
- * keys and values as well, so you'll have to
- * cloneMem them if you want them for later. */
+boolean raSkipLeadingEmptyLines(struct lineFile *lf)
+/* Skip leading empty lines and comments. Returns FALSE at end of file. */
{
-struct hash *hash = NULL;
-char *line, *key, *val;
-
+char *line;
/* Skip leading empty lines and comments. */
for (;;)
{
if (!lineFileNext(lf, &line, NULL))
- return NULL;
+ return FALSE;
line = skipLeadingSpaces(line);
if (line[0] != 0 )
{
if (line[0] == '#')
continue;
else
break;
}
}
lineFileReuse(lf);
+return TRUE;
+}
+
+boolean raNextTagVal(struct lineFile *lf, char **retTag, char **retVal)
+/* Read next line. Return FALSE at end of file or blank line. Otherwise
+ * fill in *retTag and *retVal and return TRUE */
+{
+char *line;
for (;;)
{
if (!lineFileNext(lf, &line, NULL))
- break;
+ return FALSE;
line = skipLeadingSpaces(line);
if (line[0] == 0)
- break;
+ return FALSE;;
if (line[0] == '#')
{
if (startsWith("#EOF", line))
- return NULL;
+ return FALSE;
else
continue;
}
+ break;
+ }
+*retTag = nextWord(&line);
+*retVal = trimSpaces(line);
+return TRUE;
+}
+
+struct hash *raNextRecord(struct lineFile *lf)
+/* Return a hash containing next record.
+ * Returns NULL at end of file. freeHash this
+ * when done. Note this will free the hash
+ * keys and values as well, so you'll have to
+ * cloneMem them if you want them for later. */
+{
+struct hash *hash = NULL;
+char *key, *val;
+
+if (!raSkipLeadingEmptyLines(lf))
+ return NULL;
+while (raNextTagVal(lf, &key, &val))
+ {
if (hash == NULL)
hash = newHash(7);
- key = nextWord(&line);
- val = trimSpaces(line);
- if (line == NULL)
- line = "";
val = lmCloneString(hash->lm, val);
hashAdd(hash, key, val);
}
return hash;
}
+struct slPair *raNextRecordAsSlPairList(struct lineFile *lf)
+/* Return ra record as a slPair list instead of a hash. Handy if you want to preserve the order.
+ * Do a slPairFreeValsAndList on result when done. */
+{
+struct slPair *list = NULL;
+char *key, *val;
+if (!raSkipLeadingEmptyLines(lf))
+ return NULL;
+while (raNextTagVal(lf, &key, &val))
+ slPairAdd(&list, key, cloneString(val));
+slReverse(&list);
+return list;
+}
+
struct hash *raFromString(char *string)
/* Return hash of key/value pairs from string.
* As above freeHash this when done. */
{
char *dupe = cloneString(string);
char *s = dupe, *lineEnd;
struct hash *hash = newHash(7);
char *key, *val;
for (;;)
{
s = skipLeadingSpaces(s);
if (s == NULL || s[0] == 0)
break;
lineEnd = strchr(s, '\n');
if (lineEnd != NULL)
*lineEnd++ = 0;
key = nextWord(&s);
val = skipLeadingSpaces(s);
s = lineEnd;
val = lmCloneString(hash->lm, val);
hashAdd(hash, key, val);
}
freeMem(dupe);
return hash;
}
char *raFoldInOneRetName(struct lineFile *lf, struct hash *hashOfHash)
/* Fold in one record from ra file into hashOfHash.
* This will add ra's and ra fields to whatever already
* exists in the hashOfHash, overriding fields of the
* same name if they exist already. */
{
char *word, *line, *name;
struct hash *ra;
struct hashEl *hel;
/* Get first nonempty non-comment line and make sure
* it contains name. */
if (!lineFileNextReal(lf, &line))
return NULL;
word = nextWord(&line);
if (!sameString(word, "name"))
errAbort("Expecting 'name' line %d of %s, got %s",
lf->lineIx, lf->fileName, word);
name = nextWord(&line);
if (name == NULL)
errAbort("Short name field line %d of %s", lf->lineIx, lf->fileName);
/* Find ra hash associated with name, making up a new
* one if need be. */
if ((ra = hashFindVal(hashOfHash, name)) == NULL)
{
ra = newHash(7);
hashAdd(hashOfHash, name, ra);
hashAdd(ra, "name", lmCloneString(ra->lm, name));
}
/* Fill in fields of ra hash with data up to next
* blank line or end of file. */
for (;;)
{
if (!lineFileNext(lf, &line, NULL))
break;
line = skipLeadingSpaces(line);
if (line[0] == 0)
break;
if (line[0] == '#')
continue;
word = nextWord(&line);
line = skipLeadingSpaces(line);
if (line == NULL)
line = "";
hel = hashLookup(ra, word);
if (hel == NULL)
hel = hashAdd(ra, word, lmCloneString(ra->lm, line));
else
hel->val = lmCloneString(ra->lm, line);
}
return hashFindVal(ra, "name");
}
boolean raFoldInOne(struct lineFile *lf, struct hash *hashOfHash)
{
return raFoldInOneRetName(lf, hashOfHash) != NULL;
}
void raFoldIn(char *fileName, struct hash *hashOfHash)
/* Read ra's in file name and fold them into hashOfHash.
* This will add ra's and ra fields to whatever already
* exists in the hashOfHash, overriding fields of the
* same name if they exist already. */
{
struct lineFile *lf = lineFileMayOpen(fileName, TRUE);
if (lf != NULL)
{
struct hash *uniqHash = hashNew(0);
char *name;
while ((name = raFoldInOneRetName(lf, hashOfHash)) != NULL)
{
if (hashLookup(uniqHash, name))
errAbort("%s duplicated in record ending line %d of %s", name,
lf->lineIx, lf->fileName);
hashAdd(uniqHash, name, NULL);
}
lineFileClose(&lf);
hashFree(&uniqHash);
}
}
struct hash *raReadSingle(char *fileName)
/* Read in first ra record in file and return as hash. */
{
struct lineFile *lf = lineFileOpen(fileName, TRUE);
struct hash *hash = raNextRecord(lf);
lineFileClose(&lf);
return hash;
}
struct hash *raReadAll(char *fileName, char *keyField)
/* Return hash that contains all ra records in file keyed
* by given field, which must exist. The values of the
* hash are themselves hashes. */
{
struct lineFile *lf = lineFileOpen(fileName, TRUE);
struct hash *bigHash = hashNew(0);
struct hash *hash;
while ((hash = raNextRecord(lf)) != NULL)
{
char *key = hashFindVal(hash, keyField);
if (key == NULL)
errAbort("Couldn't find key field %s line %d of %s",
keyField, lf->lineIx, lf->fileName);
hashAdd(bigHash, key, hash);
}
lineFileClose(&lf);
return bigHash;
}
+