aaf72102b545c05c42f66b7a3fc22d65b1ecf4fe angie Mon Aug 8 14:12:39 2016 -0700 Added recognition of a small subset of HGVS terms: coding (c.) SNVs relative to RefSeq NM_ or LRG transcript IDs, and protein (p.) simple substitutions relative to NP_. Also accepted (not HGVS but similar and popular): geneSymbol and abbreviated protein subst like "ALK G1494E". hgFind will map terms to the current genome if possible, and will display warnings about unrecognized accessions, out-of-bounds coordinates and mismatching reference alleles. refs #15071, #15554 diff --git src/lib/regexHelper.c src/lib/regexHelper.c index 04e8ab5..fe2e938 100644 --- src/lib/regexHelper.c +++ src/lib/regexHelper.c @@ -79,15 +79,59 @@ regmatch_t substrArr[], size_t substrArrSize) /* Return TRUE if string matches regular expression exp (case sensitive); * regexec fills in substrArr with substring offsets. */ { return regexMatchSubstrMaybeCase(string, exp, substrArr, substrArrSize, FALSE); } boolean regexMatchSubstrNoCase(const char *string, const char *exp, regmatch_t substrArr[], size_t substrArrSize) /* Return TRUE if string matches regular expression exp (case insensitive); * regexec fills in substrArr with substring offsets. */ { return regexMatchSubstrMaybeCase(string, exp, substrArr, substrArrSize, TRUE); } +void regexSubstringCopy(const char *string, const regmatch_t substr, + char *buf, size_t bufSize) +/* Copy a substring from string into buf using start and end offsets from substr. + * If the substring was not matched then make buf an empty string. */ +{ +if (regexSubstrMatched(substr)) + safencpy(buf, bufSize, string + substr.rm_so, substr.rm_eo - substr.rm_so); +else + *buf = '\0'; +} + +char *regexSubstringClone(const char *string, const regmatch_t substr) +/* Clone and return a substring from string using start and end offsets from substr. + * If the substring was not matched then return a cloned empty string. */ +{ +char *clone = NULL; +if (regexSubstrMatched(substr)) + { + int len = substr.rm_eo - substr.rm_so; + clone = needMem(len + 1); + regexSubstringCopy(string, substr, clone, len + 1); + } +else + clone = cloneString(""); +return clone; +} + +int regexSubstringInt(const char *string, const regmatch_t substr) +/* Return the integer value of the substring specified by substr. + * If substr was not matched, return 0; you can check first with regexSubstrMatched() if + * that's not the desired behavior for unmatched substr. */ +{ +int val = 0; +if (regexSubstrMatched(substr)) + { + int len = substr.rm_eo - substr.rm_so; + char buf[len+1]; + regexSubstringCopy(string, substr, buf, sizeof(buf)); + val = atoi(buf); + } +else + val = 0; +return val; +}