c01f112aa3087b09f57757246028b93babb2c335 kent Tue Dec 21 14:10:25 2021 -0800 Adding new built-in function lookup(string, twoColFile) diff --git src/lib/strex.c src/lib/strex.c index ba8e4d4..7267328 100644 --- src/lib/strex.c +++ src/lib/strex.c @@ -44,30 +44,31 @@ enum strexBuiltInFunc /* One of these for each builtIn. We'll just do a switch to implement. * Each built in function needs a value here, to keep it simple there's * aa correspondence between these names and the built in function name */ { strexBuiltInTrim, strexBuiltInBetween, strexBuiltInWord, strexBuiltInNow, strexBuiltInMd5, strexBuiltInChop, strexBuiltInUncsv, strexBuiltInUntsv, strexBuiltInReplace, strexBuiltInFix, + strexBuiltInLookup, strexBuiltInStrip, strexBuiltInLen, strexBuiltInSymbol, strexBuiltInSymbolId, strexBuiltInLower, strexBuiltInUpper, strexBuiltInIn, strexBuiltInStarts, strexBuiltInEnds, strexBuiltInSame, strexBuiltInTidy, strexBuiltInWarn, strexBuiltInError, strexBuiltInLetterRange, strexBuiltInWordRange, @@ -180,30 +181,31 @@ strexTypeInt, strexTypeInt}; /* There's one element here for each built in function. There's also a few switches you'll need to * fill in if you add a new built in function. */ static struct strexBuiltIn builtins[] = { { "trim", strexBuiltInTrim, strexTypeString, 1, oneString, }, { "between", strexBuiltInBetween, strexTypeString, 3, threeStrings }, { "word", strexBuiltInWord, strexTypeString, 2, stringInt }, { "now", strexBuiltInNow, strexTypeString, 0, NULL }, { "md5", strexBuiltInMd5, strexTypeString, 1, oneString }, { "chop", strexBuiltInChop, strexTypeString, 3, stringStringInt }, { "uncsv", strexBuiltInUncsv, strexTypeString, 2, stringInt }, { "untsv", strexBuiltInUntsv, strexTypeString, 2, stringInt }, { "replace", strexBuiltInReplace, strexTypeString, 3, threeStrings }, { "fix", strexBuiltInFix, strexTypeString, 3, threeStrings }, + { "lookup", strexBuiltInLookup, strexTypeString, 2, twoStrings}, { "strip", strexBuiltInStrip, strexTypeString, 2, twoStrings }, { "len", strexBuiltInLen, strexTypeInt, 1, oneString}, { "symbol", strexBuiltInSymbol, strexTypeString, 2, twoStrings }, { "symbol_id", strexBuiltInSymbolId, strexTypeString, 2, twoStrings }, { "upper", strexBuiltInUpper, strexTypeString, 1, oneString }, { "lower", strexBuiltInLower, strexTypeString, 1, oneString }, { "in", strexBuiltInIn, strexTypeBoolean, 2, twoStrings }, { "starts_with", strexBuiltInStarts, strexTypeBoolean, 2, twoStrings}, { "ends_with", strexBuiltInEnds, strexTypeBoolean, 2, twoStrings}, { "same", strexBuiltInSame, strexTypeBoolean, 2, twoStrings}, { "tidy", strexBuiltInTidy, strexTypeString, 3, threeStrings }, { "warn", strexBuiltInWarn, strexTypeString, 1, oneString}, { "error", strexBuiltInError, strexTypeString, 1, oneString}, { "letter_range", strexBuiltInLetterRange, strexTypeString, 3, stringIntInt}, { "word_range", strexBuiltInWordRange, strexTypeString, 3, stringIntInt}, @@ -1964,30 +1966,63 @@ { idHash = hashNew(0); hashAdd(prefixHash, prefix, idHash); } char *id = hashFindVal(idHash, original); if (id == NULL) { char symbuf[128]; safef(symbuf, sizeof(symbuf), "%s%d", prefix, idHash->elCount + 1); id = lmCloneString(idHash->lm, symbuf); hashAdd(idHash, original, id); } return id; } +static struct hash *hashTwoColTsv(char *fileName) +/* Given a two column file (key, value) return a hash. */ +{ +struct lineFile *lf = lineFileOpen(fileName, TRUE); +struct hash *hash = hashNew(16); +char *row[3]; +int fields = 0; +while ((fields = lineFileChopTab(lf, row)) != 0) + { + lineFileExpectWords(lf, 2, fields); + char *name = row[0]; + char *value = lmCloneString(hash->lm, row[1]); + hashAdd(hash, name, value); + } +lineFileClose(&lf); +return hash; +} + +static char *lookupInTwoColFile(char *original, char *tsvName) +/* Lookup value in a two column file which we cache */ +{ +static struct hash *fileHash = NULL; +if (fileHash == NULL) + fileHash = hashNew(0); +struct hash *idHash = hashFindVal(fileHash, tsvName); +if (idHash == NULL) + { + idHash = hashTwoColTsv(tsvName); + hashAdd(fileHash, tsvName, idHash); + } +return emptyForNull(hashFindVal(idHash, original)); +} + char *finalMatchToSubstring(char *haystack,char *needle) /* Return the final position of needle in haystack */ { char *match = NULL; for (;;) { haystack = strstr(haystack, needle); if (haystack == NULL) break; match = haystack; haystack += 1; // Don't repeat last match } return match; } @@ -2074,30 +2109,37 @@ break; } case strexBuiltInFix: { struct strexEval string = strexLocalEval(p->children, run); struct strexEval oldVal = strexLocalEval(p->children->next, run); struct strexEval newVal = strexLocalEval(p->children->next->next, run); if (sameString(string.val.s, oldVal.val.s)) { res.val.s = newVal.val.s; } else res.val.s = string.val.s; break; } + case strexBuiltInLookup: + { + struct strexEval string = strexLocalEval(p->children, run); + struct strexEval fileName = strexLocalEval(p->children->next, run); + res.val.s = lookupInTwoColFile(string.val.s, fileName.val.s); + break; + } case strexBuiltInStrip: { struct strexEval a = strexLocalEval(p->children, run); struct strexEval b = strexLocalEval(p->children->next, run); res.val.s = stripAll(a.val.s, b.val.s, lm); break; } case strexBuiltInLen: { struct strexEval a = strexLocalEval(p->children, run); res.val.i = strlen(a.val.s); break; } case strexBuiltInSymbol: // Convert string to something could use as a C language symbol {