aaf72102b545c05c42f66b7a3fc22d65b1ecf4fe
angie
  Mon Aug 8 14:12:39 2016 -0700
Added recognition of a small subset of HGVS terms: coding (c.) SNVs relative to RefSeq NM_ or LRG transcript IDs,
and protein (p.) simple substitutions relative to NP_.
Also accepted (not HGVS but similar and popular): geneSymbol and abbreviated protein subst like "ALK G1494E".
hgFind will map terms to the current genome if possible, and will display warnings about unrecognized accessions,
out-of-bounds coordinates and mismatching reference alleles.
refs #15071, #15554

diff --git src/lib/regexHelper.c src/lib/regexHelper.c
index 04e8ab5..fe2e938 100644
--- src/lib/regexHelper.c
+++ src/lib/regexHelper.c
@@ -79,15 +79,59 @@
 			 regmatch_t substrArr[], size_t substrArrSize)
 /* Return TRUE if string matches regular expression exp (case sensitive);
  * regexec fills in substrArr with substring offsets. */
 {
 return regexMatchSubstrMaybeCase(string, exp, substrArr, substrArrSize, FALSE);
 }
 
 boolean regexMatchSubstrNoCase(const char *string, const char *exp,
 			       regmatch_t substrArr[], size_t substrArrSize)
 /* Return TRUE if string matches regular expression exp (case insensitive);
  * regexec fills in substrArr with substring offsets. */
 {
 return regexMatchSubstrMaybeCase(string, exp, substrArr, substrArrSize, TRUE);
 }
 
+void regexSubstringCopy(const char *string, const regmatch_t substr,
+                        char *buf, size_t bufSize)
+/* Copy a substring from string into buf using start and end offsets from substr.
+ * If the substring was not matched then make buf an empty string. */
+{
+if (regexSubstrMatched(substr))
+    safencpy(buf, bufSize, string + substr.rm_so, substr.rm_eo - substr.rm_so);
+else
+    *buf = '\0';
+}
+
+char *regexSubstringClone(const char *string, const regmatch_t substr)
+/* Clone and return a substring from string using start and end offsets from substr.
+ * If the substring was not matched then return a cloned empty string. */
+{
+char *clone = NULL;
+if (regexSubstrMatched(substr))
+    {
+    int len = substr.rm_eo - substr.rm_so;
+    clone = needMem(len + 1);
+    regexSubstringCopy(string, substr, clone, len + 1);
+    }
+else
+    clone = cloneString("");
+return clone;
+}
+
+int regexSubstringInt(const char *string, const regmatch_t substr)
+/* Return the integer value of the substring specified by substr.
+ * If substr was not matched, return 0; you can check first with regexSubstrMatched() if
+ * that's not the desired behavior for unmatched substr. */
+{
+int val = 0;
+if (regexSubstrMatched(substr))
+    {
+    int len = substr.rm_eo - substr.rm_so;
+    char buf[len+1];
+    regexSubstringCopy(string, substr, buf, sizeof(buf));
+    val = atoi(buf);
+    }
+else
+    val = 0;
+return val;
+}