1b99bc125d897c0712377c4fb4d23ff9f6cbca9f angie Fri Feb 25 11:43:27 2011 -0800 New src/inc/regexHelper.h module replaces the regex conveniencefunctions that were previously down in hg/lib/hgFindSpecCustom.c. The primary motivation for this is so I can use regex functions in the new src/lib/vcf.c for Feature #2821 (VCF parser), but I hope this will make it easier in general to use regexes in any new code. diff --git src/hg/lib/hgFindSpecCustom.c src/hg/lib/hgFindSpecCustom.c index 5d9c358..b4f9eba 100644 --- src/hg/lib/hgFindSpecCustom.c +++ src/hg/lib/hgFindSpecCustom.c @@ -1,119 +1,66 @@ /* hgFindSpecCustom - custom (not autoSQL generated) code for working * with hgFindSpec. This code is concerned with making the hgFindSpec * MySQL table out of the trackDb.ra files. */ #include "common.h" #include "linefile.h" #include "jksql.h" #include "hgFindSpec.h" #include "hdb.h" #include "hui.h" #include "ra.h" #include "hash.h" #include "obscure.h" -#include +#include "regexHelper.h" #include "trackDb.h" -static char const rcsid[] = "$Id: hgFindSpecCustom.c,v 1.16 2010/03/29 23:11:37 angie Exp $"; - /* ----------- End of AutoSQL generated code --------------------- */ -#define REGEX_OPTIONS (REG_NOSUB | REG_EXTENDED | REG_ICASE) -#define REGEX_SUBSTR_OPTIONS (REG_EXTENDED | REG_ICASE) - -static regex_t *compileRegex(char *exp, char *what, int compileFlags) -/* Compile exp (or die with an informative-as-possible error message). - * Cache pre-compiled regex's internally (so don't free result after use). */ -{ -static struct hash *reHash = NULL; -struct hashEl *hel = NULL; - -if (reHash == NULL) - reHash = newHash(10); -hel = hashLookup(reHash, exp); -if (hel != NULL) - return((regex_t *)hel->val); -else - { - regex_t *compiledExp = NULL; - int errNum = 0; - AllocVar(compiledExp); - errNum = regcomp(compiledExp, exp, compileFlags); - if (errNum != 0) - { - char errBuf[512]; - regerror(errNum, compiledExp, errBuf, sizeof(errBuf)); - errAbort("%s \"%s\" got regular expression compilation error %d:\n%s\n", - what, exp, errNum, errBuf); - } - hashAdd(reHash, exp, compiledExp); - return(compiledExp); - } -} - -boolean matchRegex(char *name, char *exp) -/* Return TRUE if name matches the regular expression pattern - * (case insensitive). */ -{ -regex_t *compiledExp = compileRegex(exp, "Regular expression", REGEX_OPTIONS); -return(regexec(compiledExp, name, 0, NULL, 0) == 0); -} - -boolean matchRegexSubstr(char *name, char *exp, regmatch_t substrArr[], - size_t substrArrSize) -/* Return TRUE if name matches exp (case insensitive); regexec fills in - * substrArr with substring offsets. */ -{ -regex_t *compiledExp = compileRegex(exp, "Regular expression w/substrings", - REGEX_SUBSTR_OPTIONS); -return(regexec(compiledExp, name, substrArrSize, substrArr, 0) == 0); -} - - static void anchorTermRegex(struct hgFindSpec *hfs) /* termRegex must match the whole term. If it doesn't already start with * ^ and end in $, add those (no need to make the trackDb.ra file even * harder to read with those extra magic chars :). */ { if (isNotEmpty(hfs->termRegex)) { char *orig = hfs->termRegex; char first = orig[0]; char last = orig[strlen(orig)-1]; char buf[512]; safef(buf, sizeof(buf), "%s%s%s", (first == '^') ? "" : "^", orig, (last == '$') ? "" : "$"); freeMem(hfs->termRegex); hfs->termRegex = cloneString(buf); } else if (hfs->termRegex == NULL) hfs->termRegex = ""; } static void checkTermRegex(struct hgFindSpec *hfs) /* Make sure termRegex compiles OK. */ { if (isNotEmpty(hfs->termRegex)) { - regex_t *compiledExp = NULL; char buf[256]; safef(buf, sizeof(buf), "hfsPolish: search %s: termRegex", hfs->searchName); - compiledExp = compileRegex(hfs->termRegex, buf, REGEX_OPTIONS); + const regex_t *compiledExp = regexCompile(hfs->termRegex, buf, + (REG_EXTENDED | REG_ICASE | REG_NOSUB)); + compiledExp = NULL; // Avoid compiler warning about unused variable / return value } } static void escapeTermRegex(struct hgFindSpec *hfs) /* Escape any '\' characters in termRegex for sql storage. */ { if (isNotEmpty(hfs->termRegex)) { char *orig = hfs->termRegex; hfs->termRegex = makeEscapedString(orig, '\\'); freeMem(orig); } } @@ -160,94 +107,94 @@ } return(queryFormat); } static char *queryFormatRegex = "^select [[:alnum:]]+, ?[[:alnum:]]+, ?[[:alnum:]]+, ?[[:alnum:]]+ " "from %s where [[:alnum:]]+ (r?like|=) ['\"]?.*%s.*['\"]?$"; static char *exactTermFormatRegex = "['\"]?.*%s.*['\"]?$"; static char *prefixTermFormatRegex = "['\"]?%s.*%%['\"]?$"; static void checkQueryFormat(struct hgFindSpec *hfs) /* Make sure query looks right and jives with searchMethod. */ { if (isNotEmpty(hfs->query) && !hgFindSpecSetting(hfs, "dontCheckQueryFormat")) { - if (! matchRegex(hfs->query, queryFormatRegex)) + if (! regexMatchNoCase(hfs->query, queryFormatRegex)) errAbort("hfsPolish: search %s: query needs to be of the format " "\"select field1,field2,field3,field4 from %%s " "where field4 like '%%s'\" " "(for prefix, '%%s%%%%'; for fuzzy, '%%%%%%s%%%%'), " "but instead is this:\n%s", hfs->searchName, hfs->query); if (isNotEmpty(hfs->xrefQuery)) { - if (!matchRegex(hfs->query, exactTermFormatRegex)) + if (!regexMatchNoCase(hfs->query, exactTermFormatRegex)) errAbort("hfsPolish: search %s: there is an xrefQuery so query " "needs to end with %s (exact match to xref results).", hfs->searchName, exactTermFormat); } else { if (sameString(hfs->searchMethod, "fuzzy") && !endsWith(hfs->query, fuzzyTermFormat)) errAbort("hfsPolish: search %s: searchMethod is fuzzy so query " "needs to end with %s.", hfs->searchName, fuzzyTermFormat); else if (sameString(hfs->searchMethod, "prefix") && - !matchRegex(hfs->query, prefixTermFormatRegex)) + !regexMatchNoCase(hfs->query, prefixTermFormatRegex)) errAbort("hfsPolish: search %s: searchMethod is prefix so query " "needs to end with %s.", hfs->searchName, prefixTermFormat); else if (sameString(hfs->searchMethod, "exact") && - !matchRegex(hfs->query, exactTermFormatRegex)) + !regexMatchNoCase(hfs->query, exactTermFormatRegex)) errAbort("hfsPolish: search %s: searchMethod is exact so query " "needs to end with %s.", hfs->searchName, exactTermFormat); } } } static char *xrefQueryFormatRegex = "select [[:alnum:]]+, ?[[:alnum:]]+(\\([^)]+\\))? from %s where [[:alnum:]]+ (like|=) ['\"]?[%s]+['\"]?"; static void checkXrefQueryFormat(struct hgFindSpec *hfs) /* Make sure xrefQuery looks right and jives with searchMethod. */ { if (isNotEmpty(hfs->xrefQuery) && !hgFindSpecSetting(hfs, "dontCheckXrefQueryFormat")) { - if (! matchRegex(hfs->xrefQuery, xrefQueryFormatRegex)) + if (! regexMatchNoCase(hfs->xrefQuery, xrefQueryFormatRegex)) errAbort("hfsPolish: search %s: xrefQuery needs to be of the format " "\"select field1,field2 from %%s where field2 like '%%s'\" " "(for prefix, '%%s%%%%'; for exact, '%%%%%%s%%%%'), " "but instead is this:\n%s", hfs->searchName, hfs->xrefQuery); if (sameString(hfs->searchMethod, "fuzzy") && !endsWith(hfs->xrefQuery, fuzzyTermFormat)) errAbort("hfsPolish: search %s: searchMethod is fuzzy so xrefQuery " "needs to end with %s.", hfs->searchName, fuzzyTermFormat); else if (sameString(hfs->searchMethod, "prefix") && - !matchRegex(hfs->xrefQuery, prefixTermFormatRegex)) + !regexMatchNoCase(hfs->xrefQuery, prefixTermFormatRegex)) errAbort("hfsPolish: search %s: searchMethod is prefix so xrefQuery " "needs to end with %s.", hfs->searchName, prefixTermFormat); else if (sameString(hfs->searchMethod, "exact") && - !matchRegex(hfs->xrefQuery, exactTermFormatRegex)) + !regexMatchNoCase(hfs->xrefQuery, exactTermFormatRegex)) errAbort("hfsPolish: search %s: searchMethod is exact so xrefQuery " " needs to end with %s.", hfs->searchName, exactTermFormat); } } static void hgFindSpecPolish(char *db, struct hgFindSpec *hfs) /* Fill in missing values with defaults, check for consistency. */ { /* At least one of {searchName, searchTable} must be defined. */ if ((hfs->searchName == NULL) && (hfs->searchTable == NULL)) errAbort("hfsPolish: searchName or searchTable must be defined.\n"); if (hfs->searchName == NULL) hfs->searchName = cloneString(hfs->searchTable);