src/lib/ra.c 814da1fd6d9e117ec28ae41a2d560dbd957669de

814da1fd6d9e117ec28ae41a2d560dbd957669de
tdreszer
  Tue Nov 29 12:47:39 2011 -0800
Improved cv efficiency by only reading the cv.ra file once.
diff --git src/lib/ra.c src/lib/ra.c
index 72832d0..8116513 100644
--- src/lib/ra.c
+++ src/lib/ra.c
@@ -303,60 +303,95 @@
     if (key == NULL)
         errAbort("Couldn't find key field %s line %d of %s",
 		keyField, lf->lineIx, lf->fileName);
     hashAdd(bigHash, key, hash);
     }
 lineFileClose(&lf);
 return bigHash;
 }
 
 struct hash *raReadWithFilter(char *fileName, char *keyField,char *filterKey,char *filterValue)
 /* Return hash that contains all filtered ra records in file keyed by given field, which must exist.
  * The values of the hash are themselves hashes.  The filter is a key/value pair that must exist.
  * Example raReadWithFilter(file,"term","type","antibody"): returns hash of hashes of every term with type=antibody */
 {
 struct lineFile *lf = lineFileOpen(fileName, TRUE);
-struct hash *bigHash = hashNew(10);
+struct hash *bigHash = hashNew(14);
 struct hash *hash;
 while ((hash = raNextRecord(lf)) != NULL)
     {
     char *key = hashFindVal(hash, keyField);
     if (key == NULL)
         errAbort("Couldn't find key field %s line %d of %s",
                 keyField, lf->lineIx, lf->fileName);
     if (filterKey != NULL)
         {
         char *filter = hashFindVal(hash, filterKey);
         if (filter == NULL)
             {
             hashFree(&hash);
             continue;
             }
         if (filterValue != NULL && differentString(filterValue,filter))
             {
             hashFree(&hash);
             continue;
             }
         }
         hashAdd(bigHash, key, hash);
     }
 lineFileClose(&lf);
 if (hashNumEntries(bigHash) == 0)
     hashFree(&bigHash);
 return bigHash;
 }
 
+struct hash *raReadThreeLevels(char *fileName, char *lowKeyField, char *middleKeyField)
+/* Return 3 level hash that contains all ra records in file keyed by lowKeyField, which must exist.
+ * and broken into sub hashes based upon middleKeyField that must exist.
+ * Example raReadThreeLevels("cv.ra","term","type"):
+ *         returns hash of 'type' hashes of 'term' hashes of every stanza in cv.ra */
+{
+struct lineFile *lf = lineFileOpen(fileName, TRUE);
+struct hash *topHash = hashNew(0); // Not expecting that many types
+struct hash *bottomHash;
+while ((bottomHash = raNextRecord(lf)) != NULL)
+    {
+    char *lowKey = hashFindVal(bottomHash, lowKeyField);
+    if (lowKey == NULL)
+        errAbort("Couldn't find key field %s line %d of %s",
+                lowKeyField, lf->lineIx, lf->fileName);
+
+    char *middleKey = hashFindVal(bottomHash, middleKeyField);
+    if (middleKey == NULL)
+        errAbort("Couldn't find middle key field %s line %d of %s",
+                middleKeyField, lf->lineIx, lf->fileName);
+
+    struct hash *middleHash = hashFindVal(topHash, middleKey);
+    if (middleHash == NULL)
+        {
+        middleHash = hashNew(16); // could be quite a few terms per type.
+        hashAdd(topHash, middleKey, middleHash);
+        }
+    hashAdd(middleHash, lowKey, bottomHash);
+    }
+lineFileClose(&lf);
+if (hashNumEntries(topHash) == 0)
+    hashFree(&topHash);
+return topHash;
+}
+
 struct hash *raTagVals(char *fileName, char *tag)
 /* Return a hash of all values of given tag seen in any stanza of ra file. */
 {
 struct hash *hash = hashNew(0);
 struct lineFile *lf = lineFileOpen(fileName, TRUE);
 char *line;
 while (lineFileNextFullReal(lf, &line))
     {
     char *word = nextWord(&line);
     if (sameString(word, tag))
         {
 	char *val = trimSpaces(line);
 	if (!hashLookup(hash, val))
 	    hashAdd(hash, val, NULL);
 	}