814da1fd6d9e117ec28ae41a2d560dbd957669de tdreszer Tue Nov 29 12:47:39 2011 -0800 Improved cv efficiency by only reading the cv.ra file once. diff --git src/lib/ra.c src/lib/ra.c index 72832d0..8116513 100644 --- src/lib/ra.c +++ src/lib/ra.c @@ -303,60 +303,95 @@ if (key == NULL) errAbort("Couldn't find key field %s line %d of %s", keyField, lf->lineIx, lf->fileName); hashAdd(bigHash, key, hash); } lineFileClose(&lf); return bigHash; } struct hash *raReadWithFilter(char *fileName, char *keyField,char *filterKey,char *filterValue) /* Return hash that contains all filtered ra records in file keyed by given field, which must exist. * The values of the hash are themselves hashes. The filter is a key/value pair that must exist. * Example raReadWithFilter(file,"term","type","antibody"): returns hash of hashes of every term with type=antibody */ { struct lineFile *lf = lineFileOpen(fileName, TRUE); -struct hash *bigHash = hashNew(10); +struct hash *bigHash = hashNew(14); struct hash *hash; while ((hash = raNextRecord(lf)) != NULL) { char *key = hashFindVal(hash, keyField); if (key == NULL) errAbort("Couldn't find key field %s line %d of %s", keyField, lf->lineIx, lf->fileName); if (filterKey != NULL) { char *filter = hashFindVal(hash, filterKey); if (filter == NULL) { hashFree(&hash); continue; } if (filterValue != NULL && differentString(filterValue,filter)) { hashFree(&hash); continue; } } hashAdd(bigHash, key, hash); } lineFileClose(&lf); if (hashNumEntries(bigHash) == 0) hashFree(&bigHash); return bigHash; } +struct hash *raReadThreeLevels(char *fileName, char *lowKeyField, char *middleKeyField) +/* Return 3 level hash that contains all ra records in file keyed by lowKeyField, which must exist. + * and broken into sub hashes based upon middleKeyField that must exist. + * Example raReadThreeLevels("cv.ra","term","type"): + * returns hash of 'type' hashes of 'term' hashes of every stanza in cv.ra */ +{ +struct lineFile *lf = lineFileOpen(fileName, TRUE); +struct hash *topHash = hashNew(0); // Not expecting that many types +struct hash *bottomHash; +while ((bottomHash = raNextRecord(lf)) != NULL) + { + char *lowKey = hashFindVal(bottomHash, lowKeyField); + if (lowKey == NULL) + errAbort("Couldn't find key field %s line %d of %s", + lowKeyField, lf->lineIx, lf->fileName); + + char *middleKey = hashFindVal(bottomHash, middleKeyField); + if (middleKey == NULL) + errAbort("Couldn't find middle key field %s line %d of %s", + middleKeyField, lf->lineIx, lf->fileName); + + struct hash *middleHash = hashFindVal(topHash, middleKey); + if (middleHash == NULL) + { + middleHash = hashNew(16); // could be quite a few terms per type. + hashAdd(topHash, middleKey, middleHash); + } + hashAdd(middleHash, lowKey, bottomHash); + } +lineFileClose(&lf); +if (hashNumEntries(topHash) == 0) + hashFree(&topHash); +return topHash; +} + struct hash *raTagVals(char *fileName, char *tag) /* Return a hash of all values of given tag seen in any stanza of ra file. */ { struct hash *hash = hashNew(0); struct lineFile *lf = lineFileOpen(fileName, TRUE); char *line; while (lineFileNextFullReal(lf, &line)) { char *word = nextWord(&line); if (sameString(word, tag)) { char *val = trimSpaces(line); if (!hashLookup(hash, val)) hashAdd(hash, val, NULL); }