c01f112aa3087b09f57757246028b93babb2c335
kent
  Tue Dec 21 14:10:25 2021 -0800
Adding new built-in function lookup(string, twoColFile)

diff --git src/lib/strex.c src/lib/strex.c
index ba8e4d4..7267328 100644
--- src/lib/strex.c
+++ src/lib/strex.c
@@ -44,30 +44,31 @@
 enum strexBuiltInFunc
 /* One of these for each builtIn.  We'll just do a switch to implement.
  * Each built in function needs a value here, to keep it simple there's
  * aa correspondence between these names and the built in function name */
     {
     strexBuiltInTrim,
     strexBuiltInBetween,
     strexBuiltInWord,
     strexBuiltInNow,
     strexBuiltInMd5,
     strexBuiltInChop,
     strexBuiltInUncsv,
     strexBuiltInUntsv,
     strexBuiltInReplace,
     strexBuiltInFix,
+    strexBuiltInLookup,
     strexBuiltInStrip,
     strexBuiltInLen,
     strexBuiltInSymbol,
     strexBuiltInSymbolId,
     strexBuiltInLower,
     strexBuiltInUpper,
     strexBuiltInIn, 
     strexBuiltInStarts,
     strexBuiltInEnds,
     strexBuiltInSame,
     strexBuiltInTidy,
     strexBuiltInWarn,
     strexBuiltInError,
     strexBuiltInLetterRange,
     strexBuiltInWordRange,
@@ -180,30 +181,31 @@
     strexTypeInt, strexTypeInt};
 
 /* There's one element here for each built in function.  There's also a few switches you'll need to
  * fill in if you add a new built in function. */
 static struct strexBuiltIn builtins[] = {
     { "trim", strexBuiltInTrim, strexTypeString, 1, oneString, },
     { "between", strexBuiltInBetween, strexTypeString, 3, threeStrings },
     { "word", strexBuiltInWord, strexTypeString, 2, stringInt },
     { "now", strexBuiltInNow, strexTypeString, 0, NULL },
     { "md5", strexBuiltInMd5, strexTypeString, 1, oneString },
     { "chop", strexBuiltInChop, strexTypeString, 3, stringStringInt },
     { "uncsv", strexBuiltInUncsv, strexTypeString, 2, stringInt },
     { "untsv", strexBuiltInUntsv, strexTypeString, 2, stringInt },
     { "replace", strexBuiltInReplace, strexTypeString, 3, threeStrings },
     { "fix", strexBuiltInFix, strexTypeString, 3, threeStrings },
+    { "lookup", strexBuiltInLookup, strexTypeString, 2, twoStrings},
     { "strip", strexBuiltInStrip, strexTypeString, 2, twoStrings },
     { "len", strexBuiltInLen, strexTypeInt, 1, oneString},
     { "symbol", strexBuiltInSymbol, strexTypeString, 2, twoStrings },
     { "symbol_id", strexBuiltInSymbolId, strexTypeString, 2, twoStrings },
     { "upper", strexBuiltInUpper, strexTypeString, 1, oneString },
     { "lower", strexBuiltInLower, strexTypeString, 1, oneString },
     { "in", strexBuiltInIn, strexTypeBoolean, 2, twoStrings },
     { "starts_with", strexBuiltInStarts, strexTypeBoolean, 2, twoStrings}, 
     { "ends_with", strexBuiltInEnds, strexTypeBoolean, 2, twoStrings}, 
     { "same", strexBuiltInSame, strexTypeBoolean, 2, twoStrings}, 
     { "tidy", strexBuiltInTidy, strexTypeString, 3, threeStrings },
     { "warn", strexBuiltInWarn, strexTypeString, 1, oneString},
     { "error", strexBuiltInError, strexTypeString, 1, oneString},
     { "letter_range", strexBuiltInLetterRange, strexTypeString, 3, stringIntInt},
     { "word_range", strexBuiltInWordRange, strexTypeString, 3, stringIntInt},
@@ -1964,30 +1966,63 @@
     {
     idHash = hashNew(0);
     hashAdd(prefixHash, prefix, idHash);
     }
 char *id = hashFindVal(idHash, original);
 if (id == NULL)
     {
     char symbuf[128];
     safef(symbuf, sizeof(symbuf), "%s%d", prefix, idHash->elCount + 1);
     id = lmCloneString(idHash->lm, symbuf);
     hashAdd(idHash, original, id);
     }
 return id;
 }
 
+static struct hash *hashTwoColTsv(char *fileName)
+/* Given a two column file (key, value) return a hash. */
+{
+struct lineFile *lf = lineFileOpen(fileName, TRUE);
+struct hash *hash = hashNew(16);
+char *row[3];
+int fields = 0;
+while ((fields = lineFileChopTab(lf, row)) != 0)
+    {
+    lineFileExpectWords(lf, 2, fields);
+    char *name = row[0];
+    char *value = lmCloneString(hash->lm, row[1]);
+    hashAdd(hash, name, value);
+    }
+lineFileClose(&lf);
+return hash;
+}
+
+static char *lookupInTwoColFile(char *original, char *tsvName)
+/* Lookup value in a two column file which we cache */
+{
+static struct hash *fileHash = NULL;
+if (fileHash == NULL)
+    fileHash = hashNew(0);
+struct hash *idHash = hashFindVal(fileHash, tsvName);
+if (idHash == NULL)
+    {
+    idHash = hashTwoColTsv(tsvName);
+    hashAdd(fileHash, tsvName, idHash);
+    }
+return emptyForNull(hashFindVal(idHash, original));
+}
+
 char *finalMatchToSubstring(char *haystack,char *needle)
 /* Return the final position of needle in haystack */
 {
 char *match = NULL;
 for (;;)
     {
     haystack = strstr(haystack, needle);
     if (haystack == NULL)
         break;
     match = haystack;
     haystack += 1;  // Don't repeat last match
     }
 return match;
 }
 
@@ -2074,30 +2109,37 @@
 	break;
 	}
     case strexBuiltInFix:
         {
         struct strexEval string = strexLocalEval(p->children, run);
         struct strexEval oldVal = strexLocalEval(p->children->next, run);
         struct strexEval newVal = strexLocalEval(p->children->next->next, run);
 	if (sameString(string.val.s, oldVal.val.s))
 	    {
 	    res.val.s = newVal.val.s;
 	    }
 	else
 	    res.val.s = string.val.s;
 	break;
 	}
+    case strexBuiltInLookup:
+        {
+        struct strexEval string = strexLocalEval(p->children, run);
+        struct strexEval fileName = strexLocalEval(p->children->next, run);
+	res.val.s = lookupInTwoColFile(string.val.s, fileName.val.s);
+	break;
+	}
     case strexBuiltInStrip:
         {
         struct strexEval a = strexLocalEval(p->children, run);
         struct strexEval b = strexLocalEval(p->children->next, run);
 	res.val.s = stripAll(a.val.s, b.val.s, lm);
 	break;
 	}
     case strexBuiltInLen:
         {
         struct strexEval a = strexLocalEval(p->children, run);
 	res.val.i = strlen(a.val.s);
 	break;
 	}
     case strexBuiltInSymbol:  // Convert string to something could use as a C language symbol
         {