1b99bc125d897c0712377c4fb4d23ff9f6cbca9f
angie
  Fri Feb 25 11:43:27 2011 -0800
New src/inc/regexHelper.h module replaces the regex conveniencefunctions that were previously down in hg/lib/hgFindSpecCustom.c.
The primary motivation for this is so I can use regex functions in
the new src/lib/vcf.c for Feature #2821 (VCF parser), but I hope
this will make it easier in general to use regexes in any new code.

diff --git src/hg/lib/hgFindSpecCustom.c src/hg/lib/hgFindSpecCustom.c
index 5d9c358..b4f9eba 100644
--- src/hg/lib/hgFindSpecCustom.c
+++ src/hg/lib/hgFindSpecCustom.c
@@ -1,119 +1,66 @@
 /* hgFindSpecCustom - custom (not autoSQL generated) code for working
  * with hgFindSpec.  This code is concerned with making the hgFindSpec
  * MySQL table out of the trackDb.ra files. */
 
 #include "common.h"
 #include "linefile.h"
 #include "jksql.h"
 #include "hgFindSpec.h"
 #include "hdb.h"
 #include "hui.h"
 #include "ra.h"
 #include "hash.h"
 #include "obscure.h"
-#include <regex.h>
+#include "regexHelper.h"
 #include "trackDb.h"
 
-static char const rcsid[] = "$Id: hgFindSpecCustom.c,v 1.16 2010/03/29 23:11:37 angie Exp $";
-
 /* ----------- End of AutoSQL generated code --------------------- */
 
-#define REGEX_OPTIONS (REG_NOSUB | REG_EXTENDED | REG_ICASE)
-#define REGEX_SUBSTR_OPTIONS (REG_EXTENDED | REG_ICASE)
-
-static regex_t *compileRegex(char *exp, char *what, int compileFlags)
-/* Compile exp (or die with an informative-as-possible error message). 
- * Cache pre-compiled regex's internally (so don't free result after use). */
-{
-static struct hash *reHash = NULL;
-struct hashEl *hel = NULL;
-
-if (reHash == NULL)
-    reHash = newHash(10);
-hel = hashLookup(reHash, exp);
-if (hel != NULL)
-    return((regex_t *)hel->val);
-else
-    {
-    regex_t *compiledExp = NULL;
-    int errNum = 0;
-    AllocVar(compiledExp);
-    errNum = regcomp(compiledExp, exp, compileFlags);
-    if (errNum != 0)
-	{
-	char errBuf[512];
-	regerror(errNum, compiledExp, errBuf, sizeof(errBuf));
-	errAbort("%s \"%s\" got regular expression compilation error %d:\n%s\n",
-		 what, exp, errNum, errBuf);
-	}
-    hashAdd(reHash, exp, compiledExp);
-    return(compiledExp);
-    }
-}
-
-boolean matchRegex(char *name, char *exp)
-/* Return TRUE if name matches the regular expression pattern
- * (case insensitive). */
-{
-regex_t *compiledExp = compileRegex(exp, "Regular expression", REGEX_OPTIONS);
-return(regexec(compiledExp, name, 0, NULL, 0) == 0);
-}
-
-boolean matchRegexSubstr(char *name, char *exp, regmatch_t substrArr[],
-			 size_t substrArrSize)
-/* Return TRUE if name matches exp (case insensitive); regexec fills in 
- * substrArr with substring offsets. */
-{
-regex_t *compiledExp = compileRegex(exp, "Regular expression w/substrings",
-				    REGEX_SUBSTR_OPTIONS);
-return(regexec(compiledExp, name, substrArrSize, substrArr, 0) == 0);
-}
-
-
 static void anchorTermRegex(struct hgFindSpec *hfs)
 /* termRegex must match the whole term.  If it doesn't already start with 
  * ^ and end in $, add those (no need to make the trackDb.ra file even 
  * harder to read with those extra magic chars :). */
 {
 if (isNotEmpty(hfs->termRegex))
     {
     char *orig = hfs->termRegex;
     char first = orig[0];
     char last  = orig[strlen(orig)-1];
     char buf[512];
     safef(buf, sizeof(buf), "%s%s%s",
 	  (first == '^') ? "" : "^",
 	  orig,
 	  (last  == '$') ? "" : "$");
     freeMem(hfs->termRegex);
     hfs->termRegex = cloneString(buf);
     }
 else if (hfs->termRegex == NULL)
     hfs->termRegex = "";
 }
 
 
 static void checkTermRegex(struct hgFindSpec *hfs)
 /* Make sure termRegex compiles OK. */
 {
 if (isNotEmpty(hfs->termRegex))
     {
-    regex_t *compiledExp = NULL;
     char buf[256];
     safef(buf, sizeof(buf), "hfsPolish: search %s: termRegex", hfs->searchName);
-    compiledExp = compileRegex(hfs->termRegex, buf, REGEX_OPTIONS);
+    const regex_t *compiledExp = regexCompile(hfs->termRegex, buf,
+					      (REG_EXTENDED | REG_ICASE | REG_NOSUB));
+    compiledExp = NULL;  // Avoid compiler warning about unused variable / return value
     }
 }
 
 static void escapeTermRegex(struct hgFindSpec *hfs)
 /* Escape any '\' characters in termRegex for sql storage. */
 {
 if (isNotEmpty(hfs->termRegex))
     {
     char *orig = hfs->termRegex;
     hfs->termRegex = makeEscapedString(orig, '\\');
     freeMem(orig);
     }
 }
 
 
@@ -160,94 +107,94 @@
     }
 return(queryFormat);
 }
 
 static char *queryFormatRegex =
     "^select [[:alnum:]]+, ?[[:alnum:]]+, ?[[:alnum:]]+, ?[[:alnum:]]+ "
     "from %s where [[:alnum:]]+ (r?like|=) ['\"]?.*%s.*['\"]?$";
 static char *exactTermFormatRegex = "['\"]?.*%s.*['\"]?$";
 static char *prefixTermFormatRegex = "['\"]?%s.*%%['\"]?$";
 
 static void checkQueryFormat(struct hgFindSpec *hfs)
 /* Make sure query looks right and jives with searchMethod. */
 {
 if (isNotEmpty(hfs->query) && !hgFindSpecSetting(hfs, "dontCheckQueryFormat"))
     {
-    if (! matchRegex(hfs->query, queryFormatRegex))
+    if (! regexMatchNoCase(hfs->query, queryFormatRegex))
 	errAbort("hfsPolish: search %s: query needs to be of the format "
 		 "\"select field1,field2,field3,field4 from %%s "
 		 "where field4 like '%%s'\" "
 		 "(for prefix, '%%s%%%%'; for fuzzy, '%%%%%%s%%%%'), "
 		 "but instead is this:\n%s",
 		 hfs->searchName, hfs->query);
     if (isNotEmpty(hfs->xrefQuery))
 	{
-	if (!matchRegex(hfs->query, exactTermFormatRegex))
+	if (!regexMatchNoCase(hfs->query, exactTermFormatRegex))
 	    errAbort("hfsPolish: search %s: there is an xrefQuery so query "
 		     "needs to end with %s (exact match to xref results).",
 		     hfs->searchName, exactTermFormat);
 	}
     else
 	{
 	if (sameString(hfs->searchMethod, "fuzzy") &&
 	    !endsWith(hfs->query, fuzzyTermFormat))
 	    errAbort("hfsPolish: search %s: searchMethod is fuzzy so query "
 		     "needs to end with %s.",
 		     hfs->searchName, fuzzyTermFormat);
 	else if (sameString(hfs->searchMethod, "prefix") &&
-		 !matchRegex(hfs->query, prefixTermFormatRegex))
+		 !regexMatchNoCase(hfs->query, prefixTermFormatRegex))
 	    errAbort("hfsPolish: search %s: searchMethod is prefix so query "
 		     "needs to end with %s.",
 		     hfs->searchName, prefixTermFormat);
 	
 	else if (sameString(hfs->searchMethod, "exact") &&
-		 !matchRegex(hfs->query, exactTermFormatRegex))
+		 !regexMatchNoCase(hfs->query, exactTermFormatRegex))
 	    errAbort("hfsPolish: search %s: searchMethod is exact so query "
 		     "needs to end with %s.",
 		     hfs->searchName, exactTermFormat);
 	}
     }
 }
 
 static char *xrefQueryFormatRegex =
     "select [[:alnum:]]+, ?[[:alnum:]]+(\\([^)]+\\))? from %s where [[:alnum:]]+ (like|=) ['\"]?[%s]+['\"]?";
 
 static void checkXrefQueryFormat(struct hgFindSpec *hfs)
 /* Make sure xrefQuery looks right and jives with searchMethod. */
 {
 if (isNotEmpty(hfs->xrefQuery) &&
     !hgFindSpecSetting(hfs, "dontCheckXrefQueryFormat"))
     {
-    if (! matchRegex(hfs->xrefQuery, xrefQueryFormatRegex))
+    if (! regexMatchNoCase(hfs->xrefQuery, xrefQueryFormatRegex))
 	errAbort("hfsPolish: search %s: xrefQuery needs to be of the format "
 		 "\"select field1,field2 from %%s where field2 like '%%s'\" "
 		 "(for prefix, '%%s%%%%'; for exact, '%%%%%%s%%%%'), "
 		 "but instead is this:\n%s",
 		 hfs->searchName, hfs->xrefQuery);
     if (sameString(hfs->searchMethod, "fuzzy") &&
 	!endsWith(hfs->xrefQuery, fuzzyTermFormat))
 	errAbort("hfsPolish: search %s: searchMethod is fuzzy so xrefQuery "
 		 "needs to end with %s.",
 		 hfs->searchName, fuzzyTermFormat);
     else if (sameString(hfs->searchMethod, "prefix") &&
-	     !matchRegex(hfs->xrefQuery, prefixTermFormatRegex))
+	     !regexMatchNoCase(hfs->xrefQuery, prefixTermFormatRegex))
 	errAbort("hfsPolish: search %s: searchMethod is prefix so xrefQuery "
 		 "needs to end with %s.",
 		 hfs->searchName, prefixTermFormat);
 	
     else if (sameString(hfs->searchMethod, "exact") &&
-	     !matchRegex(hfs->xrefQuery, exactTermFormatRegex))
+	     !regexMatchNoCase(hfs->xrefQuery, exactTermFormatRegex))
 	errAbort("hfsPolish: search %s: searchMethod is exact so xrefQuery "
 		 " needs to end with %s.",
 		 hfs->searchName, exactTermFormat);
     }
 }
 
 
 static void hgFindSpecPolish(char *db, struct hgFindSpec *hfs)
 /* Fill in missing values with defaults, check for consistency. */
 {
 /* At least one of {searchName, searchTable} must be defined. */
 if ((hfs->searchName == NULL) && (hfs->searchTable == NULL))
     errAbort("hfsPolish: searchName or searchTable must be defined.\n");
 if (hfs->searchName == NULL)
     hfs->searchName = cloneString(hfs->searchTable);